Commit 0c3afaf24c3c02fde5a39c38b200eb9e5c80aeda
1 parent
f2bebe34
Exists in
refactor
staged.
Showing
4 changed files
with
4 additions
and
477 deletions
Show diff stats
mjpeg/__init__.py
... | ... | @@ -8,7 +8,8 @@ __all__ = ['Jpeg', 'colorMap', 'diffblock', 'diffblocks'] |
8 | 8 | # functions from submodules. |
9 | 9 | # |
10 | 10 | # :: |
11 | - | |
11 | +import numpy as np | |
12 | +from numpy import shape | |
12 | 13 | import numpy.random as rnd |
13 | 14 | |
14 | 15 | import base |
... | ... | @@ -169,18 +170,6 @@ class Jpeg(Jsteg): |
169 | 170 | E = [-np.inf] + [i for i in range(-T, T + 2)] + [np.inf] |
170 | 171 | return np.histogram(A, E) |
171 | 172 | |
172 | - def plotHist(self, mask=base.acMaskBlock, T=8): | |
173 | - """ | |
174 | - Make a histogram of the jpeg coefficients. | |
175 | - The mask is a boolean 8x8 matrix indicating the | |
176 | - frequencies to be included. This defaults to the | |
177 | - AC coefficients. | |
178 | - """ | |
179 | - A = self.rawsignal(mask).tolist() | |
180 | - E = [i for i in range(-T, T + 2)] | |
181 | - plt.hist(A, E, histtype='bar') | |
182 | - plt.show() | |
183 | - | |
184 | 173 | def nzcount(self, *a, **kw): |
185 | 174 | """Number of non-zero AC coefficients. |
186 | 175 | ... | ... |
mjpeg/compress.py
msteg/steganalysis/ChiSquare.py
... | ... | @@ -1,162 +0,0 @@ |
1 | -""" | |
2 | -<p> | |
3 | -This module implements an algorithm described by Andreas Westfeld in [1,2], | |
4 | -which detects if there was data embedded into an image using JSteg. | |
5 | -It uses the property that JSteg generates pairs of values in the | |
6 | -DCT-coefficients histogram, which can be detected by a \chi^2 test. | |
7 | -</p> | |
8 | - | |
9 | -<pre> | |
10 | -[1]: Andreas Westfeld, F5 - A Steganographic Algorithm High Capacity Despite | |
11 | -Better Steganalysis | |
12 | -[2]: Andreas Westfeld, Angriffe auf steganographische Systeme | |
13 | -</pre> | |
14 | -""" | |
15 | - | |
16 | -from collections import defaultdict | |
17 | -import os | |
18 | - | |
19 | -from PIL import Image | |
20 | -import numpy | |
21 | -from scipy.stats import chisquare | |
22 | -import matplotlib.pyplot as plt | |
23 | -import itertools as it | |
24 | - | |
25 | -from .. import * | |
26 | - | |
27 | - | |
28 | -class ChiSquare(StegBase): | |
29 | - """ | |
30 | - The module contains only one method, <b>detect</b>. | |
31 | - """ | |
32 | - | |
33 | - def __init__(self, ui, core): | |
34 | - self.ui = ui | |
35 | - self.core = core | |
36 | - | |
37 | - def detect(self, src, tgt, tgt2): | |
38 | - """ | |
39 | - <p> | |
40 | - Detect if there was data embedded in the <i>source image</i> image with | |
41 | - JSteg algorithm. | |
42 | - </p> | |
43 | - | |
44 | - <p> | |
45 | - Parameters: | |
46 | - <ol> | |
47 | - <li><pre>Source image</pre> Image which should be tested</li> | |
48 | - <li><pre>Target image</pre> Image which displays a graphic with the | |
49 | - embedding probability</li> | |
50 | - <li><pre>2nd Target image</pre> Image which displays the embedding | |
51 | - positions in the image</li> | |
52 | - </ol> | |
53 | - </p> | |
54 | - """ | |
55 | - # --------------------------- Input ----------------------------------- | |
56 | - # If src is from the image pool, test whether the image exists encoded | |
57 | - # on the file system. Otherwise we can not read DCT-coefficients. | |
58 | - if self.core.media_manager.is_media_key(src): | |
59 | - src = self.core.media_manager.get_file(src) | |
60 | - if hasattr(src, 'tmp_file'): | |
61 | - src = src.tmp_file | |
62 | - self.ui.display_error('Trying file: %s' % src) | |
63 | - else: | |
64 | - self.ui.display_error('Can not detect anything from \ | |
65 | - decoded images.') | |
66 | - return | |
67 | - # Test whether the file exists. | |
68 | - if not os.path.isfile(src): | |
69 | - self.ui.display_error('No such file.') | |
70 | - return | |
71 | - # Test if it is a JPEG file. | |
72 | - if not self._looks_like_jpeg(src): | |
73 | - self.ui.display_error('Input is probably not a JPEG file.') | |
74 | - return | |
75 | - | |
76 | - # ---------------------------- Algorithm ------------------------------ | |
77 | - # Build DCT-histogram in steps of \approx 1% of all coefficients and | |
78 | - # calculate the p-value at each step. | |
79 | - | |
80 | - # dct_data = rw_dct.read_dct_coefficients(src) | |
81 | - dct_data = self._get_cov_data(src) | |
82 | - | |
83 | - hist = defaultdict(int) | |
84 | - cnt = 0 | |
85 | - l = len(dct_data) | |
86 | - one_p = l / 100 | |
87 | - result = [] | |
88 | - for block in dct_data: | |
89 | - # update the histogram with one block of 64 coefficients | |
90 | - for c in block: | |
91 | - hist[c] += 1 | |
92 | - | |
93 | - cnt += 1 | |
94 | - if not cnt % one_p: | |
95 | - # calculate p-value | |
96 | - self.ui.set_progress(cnt * 100 / l) | |
97 | - | |
98 | - # ignore the pair (0, 1), since JSteg does not embed data there | |
99 | - hl = [hist[i] for i in range(-2048, 2049) if not i in (0, 1)] | |
100 | - k = len(hl) / 2 | |
101 | - observed = [] | |
102 | - expected = [] | |
103 | - # calculate observed and expected distribution | |
104 | - for i in range(k): | |
105 | - t = hl[2 * i] + hl[2 * i + 1] | |
106 | - if t > 3: | |
107 | - observed.append(hl[2 * i]) | |
108 | - expected.append(t / 2) | |
109 | - # calculate (\chi^2, p) | |
110 | - p = chisquare(numpy.array(observed), numpy.array(expected))[1] | |
111 | - result.append(p) | |
112 | - | |
113 | - # ----------------------------- Output -------------------------------- | |
114 | - # Graph displaying the embedding probabilities in relation to the | |
115 | - # sample size. | |
116 | - figure = plt.figure() | |
117 | - plot = figure.add_subplot(111) | |
118 | - plot.grid(True) | |
119 | - plot.plot(result, color='r', linewidth=2.0) | |
120 | - plt.axis([0, 100, 0, 1.1]) | |
121 | - plt.title('Embedding probability for different percentages \ | |
122 | -of the file capacity.') | |
123 | - plt.xlabel('% of file capacity') | |
124 | - plt.ylabel('Embedding probability') | |
125 | - | |
126 | - if self.core.media_manager.is_media_key(tgt): | |
127 | - img = figure_to_pil(figure) | |
128 | - self.core.media_manager.put_media(tgt, img) | |
129 | - else: | |
130 | - plt.savefig(tgt) | |
131 | - | |
132 | - # Image displaying the length and position of the embedded data | |
133 | - # within the image | |
134 | - img2 = Image.open(src) | |
135 | - img2.convert("RGB") | |
136 | - width, height = img2.size | |
137 | - | |
138 | - for i in range(100): | |
139 | - result[i] = max(result[i:]) | |
140 | - | |
141 | - cnt2 = 0 | |
142 | - for (top, left) in it.product(range(0, height, 8), range(0, width, 8)): | |
143 | - if not cnt2 % one_p: | |
144 | - r = result[cnt2 / one_p] | |
145 | - if r >= 0.5: | |
146 | - color = (255, int((1 - r) * 2 * 255), 0) | |
147 | - else: | |
148 | - color = (int(r * 2 * 255), 255, 0) | |
149 | - cnt2 += 1 | |
150 | - img2.paste(color, (left, top, min(left + 8, width), | |
151 | - min(top + 8, height))) | |
152 | - self.core.media_manager.put_media(tgt2, img2) | |
153 | - | |
154 | - def __str__(self): | |
155 | - return 'Chi-Square-Test' | |
156 | - | |
157 | - | |
158 | -def figure_to_pil(figure): | |
159 | - figure.canvas.draw() | |
160 | - return Image.fromstring('RGB', | |
161 | - figure.canvas.get_width_height(), | |
162 | - figure.canvas.tostring_rgb()) |
msteg/steganalysis/MPB.py.bak
... | ... | @@ -1,300 +0,0 @@ |
1 | -__author__ = 'chunk' | |
2 | -""" | |
3 | -Yun Q. Shi, et al - A Markov Process Based Approach to Effective Attacking JPEG Steganography | |
4 | -""" | |
5 | - | |
6 | -import time | |
7 | -import math | |
8 | -import numpy as np | |
9 | - | |
10 | -from .. import * | |
11 | -from ...mjpeg import Jpeg,colorMap | |
12 | -from ...common import * | |
13 | - | |
14 | -import csv | |
15 | -import json | |
16 | -import pickle | |
17 | -import cv2 | |
18 | -from sklearn import svm | |
19 | - | |
20 | -base_dir = '/home/hadoop/data/HeadShoulder/' | |
21 | - | |
22 | - | |
23 | -class MPB(StegBase): | |
24 | - """ | |
25 | - Markov Process Based Steganalyasis Algo. | |
26 | - """ | |
27 | - | |
28 | - def __init__(self): | |
29 | - StegBase.__init__(self, sample_key) | |
30 | - self.model = None | |
31 | - self.svm = None | |
32 | - | |
33 | - def _get_trans_prob_mat_orig(self, ciq, T=4): | |
34 | - """ | |
35 | - Original! | |
36 | - Calculate Transition Probability Matrix. | |
37 | - | |
38 | - :param ciq: jpeg DCT coeff matrix, 2-D numpy array of int16 (pre-abs) | |
39 | - :param T: signed integer, usually 1~7 | |
40 | - :return: TPM - 3-D tensor, numpy array of size (2*T+1, 2*T+1, 4) | |
41 | - """ | |
42 | - ciq = np.absolute(ciq).clip(0, T) | |
43 | - TPM = np.zeros((2 * T + 1, 2 * T + 1, 4), np.float64) | |
44 | - # Fh = np.diff(ciq, axis=-1) | |
45 | - # Fv = np.diff(ciq, axis=0) | |
46 | - Fh = ciq[:-1, :-1] - ciq[:-1, 1:] | |
47 | - Fv = ciq[:-1, :-1] - ciq[1:, :-1] | |
48 | - Fd = ciq[:-1, :-1] - ciq[1:, 1:] | |
49 | - Fm = ciq[:-1, 1:] - ciq[1:, :-1] | |
50 | - | |
51 | - Fh1 = Fh[:-1, :-1] | |
52 | - Fh2 = Fh[:-1, 1:] | |
53 | - | |
54 | - Fv1 = Fv[:-1, :-1] | |
55 | - Fv2 = Fv[1:, :-1] | |
56 | - | |
57 | - Fd1 = Fd[:-1, :-1] | |
58 | - Fd2 = Fd[1:, 1:] | |
59 | - | |
60 | - Fm1 = Fm[:-1, 1:] | |
61 | - Fm2 = Fm[1:, :-1] | |
62 | - | |
63 | - # original:(very slow!) | |
64 | - for n in range(-T, T + 1): | |
65 | - for m in range(-T, T + 1): | |
66 | - dh = np.sum(Fh1 == m) * 1.0 | |
67 | - dv = np.sum(Fv1 == m) * 1.0 | |
68 | - dd = np.sum(Fd1 == m) * 1.0 | |
69 | - dm = np.sum(Fm1 == m) * 1.0 | |
70 | - | |
71 | - if dh != 0: | |
72 | - TPM[m, n, 0] = np.sum(np.logical_and(Fh1 == m, Fh2 == n)) / dh | |
73 | - | |
74 | - if dv != 0: | |
75 | - TPM[m, n, 1] = np.sum(np.logical_and(Fv1 == m, Fv2 == n)) / dv | |
76 | - | |
77 | - if dd != 0: | |
78 | - TPM[m, n, 2] = np.sum(np.logical_and(Fd1 == m, Fd2 == n)) / dd | |
79 | - | |
80 | - if dm != 0: | |
81 | - TPM[m, n, 3] = np.sum(np.logical_and(Fm1 == m, Fm2 == n)) / dm | |
82 | - | |
83 | - # 1.422729s | |
84 | - return TPM | |
85 | - | |
86 | - | |
87 | - def get_trans_prob_mat(self, ciq, T=4): | |
88 | - """ | |
89 | - Calculate Transition Probability Matrix. | |
90 | - | |
91 | - :param ciq: jpeg DCT coeff matrix, 2-D numpy array of int16 (pre-abs) | |
92 | - :param T: signed integer, usually 1~7 | |
93 | - :return: TPM - 3-D tensor, numpy array of size (2*T+1, 2*T+1, 4) | |
94 | - """ | |
95 | - | |
96 | - return self._get_trans_prob_mat_orig(ciq, T) | |
97 | - | |
98 | - | |
99 | - # timer = Timer() | |
100 | - ciq = np.absolute(ciq).clip(0, T) | |
101 | - TPM = np.zeros((2 * T + 1, 2 * T + 1, 4), np.float64) | |
102 | - # Fh = np.diff(ciq, axis=-1) | |
103 | - # Fv = np.diff(ciq, axis=0) | |
104 | - Fh = ciq[:-1, :-1] - ciq[:-1, 1:] | |
105 | - Fv = ciq[:-1, :-1] - ciq[1:, :-1] | |
106 | - Fd = ciq[:-1, :-1] - ciq[1:, 1:] | |
107 | - Fm = ciq[:-1, 1:] - ciq[1:, :-1] | |
108 | - | |
109 | - Fh1 = Fh[:-1, :-1].ravel() | |
110 | - Fh2 = Fh[:-1, 1:].ravel() | |
111 | - | |
112 | - Fv1 = Fv[:-1, :-1].ravel() | |
113 | - Fv2 = Fv[1:, :-1].ravel() | |
114 | - | |
115 | - Fd1 = Fd[:-1, :-1].ravel() | |
116 | - Fd2 = Fd[1:, 1:].ravel() | |
117 | - | |
118 | - Fm1 = Fm[:-1, 1:].ravel() | |
119 | - Fm2 = Fm[1:, :-1].ravel() | |
120 | - | |
121 | - | |
122 | - | |
123 | - # 0.089754s | |
124 | - # timer.mark() | |
125 | - # TPM[Fh1.ravel(), Fh2.ravel(), 0] += 1 | |
126 | - # TPM[Fv1.ravel(), Fv2.ravel(), 1] += 1 | |
127 | - # TPM[Fd1.ravel(), Fd2.ravel(), 2] += 1 | |
128 | - # TPM[Fm1.ravel(), Fm2.ravel(), 3] += 1 | |
129 | - # timer.report() | |
130 | - | |
131 | - # 1.459668s | |
132 | - # timer.mark() | |
133 | - # for i in range(len(Fh1)): | |
134 | - # TPM[Fh1[i], Fh2[i], 0] += 1 | |
135 | - # for i in range(len(Fv1)): | |
136 | - # TPM[Fv1[i], Fv2[i], 1] += 1 | |
137 | - # for i in range(len(Fd1)): | |
138 | - # TPM[Fd1[i], Fd2[i], 2] += 1 | |
139 | - # for i in range(len(Fm1)): | |
140 | - # TPM[Fm1[i], Fm2[i], 3] += 1 | |
141 | - # timer.report() | |
142 | - | |
143 | - # 1.463982s | |
144 | - # timer.mark() | |
145 | - for m, n in zip(Fh1.ravel(), Fh2.ravel()): | |
146 | - TPM[m, n, 0] += 1 | |
147 | - | |
148 | - for m, n in zip(Fv1.ravel(), Fv2.ravel()): | |
149 | - TPM[m, n, 1] += 1 | |
150 | - | |
151 | - for m, n in zip(Fd1.ravel(), Fd2.ravel()): | |
152 | - TPM[m, n, 2] += 1 | |
153 | - | |
154 | - for m, n in zip(Fm1.ravel(), Fm2.ravel()): | |
155 | - TPM[m, n, 3] += 1 | |
156 | - # timer.report() | |
157 | - | |
158 | - # 0.057505s | |
159 | - # timer.mark() | |
160 | - for m in range(-T, T + 1): | |
161 | - dh = np.sum(Fh1 == m) * 1.0 | |
162 | - dv = np.sum(Fv1 == m) * 1.0 | |
163 | - dd = np.sum(Fd1 == m) * 1.0 | |
164 | - dm = np.sum(Fm1 == m) * 1.0 | |
165 | - | |
166 | - if dh != 0: | |
167 | - TPM[m, :, 0] /= dh | |
168 | - | |
169 | - if dv != 0: | |
170 | - TPM[m, :, 1] /= dv | |
171 | - | |
172 | - if dd != 0: | |
173 | - TPM[m, :, 2] /= dd | |
174 | - | |
175 | - if dm != 0: | |
176 | - TPM[m, :, 3] /= dm | |
177 | - # timer.report() | |
178 | - | |
179 | - return TPM | |
180 | - | |
181 | - def load_dataset(self, mode, file): | |
182 | - if mode == 'local': | |
183 | - return self._load_dataset_from_local(file) | |
184 | - elif mode == 'remote' or mode == 'hbase': | |
185 | - return self._load_dataset_from_hbase(file) | |
186 | - else: | |
187 | - raise Exception("Unknown mode!") | |
188 | - | |
189 | - def _load_dataset_from_local(self, list_file='images_map_Train.tsv'): | |
190 | - """ | |
191 | - load jpeg dataset according to a file of file-list. | |
192 | - | |
193 | - :param list_file: a tsv file with each line for a jpeg file path | |
194 | - :return:(X,Y) for SVM | |
195 | - """ | |
196 | - list_file = base_dir + list_file | |
197 | - | |
198 | - X = [] | |
199 | - Y = [] | |
200 | - dict_tagbuf = {} | |
201 | - dict_dataset = {} | |
202 | - | |
203 | - with open(list_file, 'rb') as tsvfile: | |
204 | - tsvfile = csv.reader(tsvfile, delimiter='\t') | |
205 | - for line in tsvfile: | |
206 | - imgname = line[0] + '.jpg' | |
207 | - dict_tagbuf[imgname] = line[1] | |
208 | - | |
209 | - dir = base_dir + 'Feat/' | |
210 | - for path, subdirs, files in os.walk(dir + 'Train/'): | |
211 | - for name in files: | |
212 | - featpath = os.path.join(path, name) | |
213 | - # print featpath | |
214 | - with open(featpath, 'rb') as featfile: | |
215 | - imgname = path.split('/')[-1] + name.replace('.mpb', '.jpg') | |
216 | - dict_dataset[imgname] = json.loads(featfile.read()) | |
217 | - | |
218 | - for imgname, tag in dict_tagbuf.items(): | |
219 | - tag = 1 if tag == 'True' else 0 | |
220 | - X.append(dict_dataset[imgname]) | |
221 | - Y.append(tag) | |
222 | - | |
223 | - return X, Y | |
224 | - | |
225 | - | |
226 | - def _load_dataset_from_hbase(self, table='ImgCV'): | |
227 | - pass | |
228 | - | |
229 | - | |
230 | - def _model_svm_train_sk(self, X, Y): | |
231 | - timer = Timer() | |
232 | - timer.mark() | |
233 | - lin_clf = svm.LinearSVC() | |
234 | - lin_clf.fit(X, Y) | |
235 | - with open('res/tmp.model', 'wb') as modelfile: | |
236 | - model = pickle.dump(lin_clf, modelfile) | |
237 | - | |
238 | - timer.report() | |
239 | - | |
240 | - self.svm = 'sk' | |
241 | - self.model = lin_clf | |
242 | - | |
243 | - return lin_clf | |
244 | - | |
245 | - def _model_svm_predict_sk(self, image, clf=None): | |
246 | - if clf is None: | |
247 | - if self.svm == 'sk' and self.model != None: | |
248 | - clf = self.model | |
249 | - else: | |
250 | - with open('res/tmp.model', 'rb') as modelfile: | |
251 | - clf = pickle.load(modelfile) | |
252 | - | |
253 | - im = mjpeg.Jpeg(image, key=sample_key) | |
254 | - ciq = im.coef_arrays[mjpeg.colorMap['Y']] | |
255 | - tpm = self.get_trans_prob_mat(ciq) | |
256 | - | |
257 | - return clf.predict(tpm) | |
258 | - | |
259 | - | |
260 | - def _model_svm_train_cv(self, X, Y): | |
261 | - svm_params = dict(kernel_type=cv2.SVM_LINEAR, | |
262 | - svm_type=cv2.SVM_C_SVC, | |
263 | - C=2.67, gamma=5.383) | |
264 | - | |
265 | - timer = Timer() | |
266 | - timer.mark() | |
267 | - svm = cv2.SVM() | |
268 | - svm.train(X, Y, params=svm_params) | |
269 | - svm.save('res/svm_data.model') | |
270 | - | |
271 | - self.svm = 'cv' | |
272 | - self.model = svm | |
273 | - | |
274 | - return svm | |
275 | - | |
276 | - def _model_svm_predict_cv(self, image, svm=None): | |
277 | - if svm is None: | |
278 | - if self.svm == 'cv' and self.model != None: | |
279 | - clf = self.model | |
280 | - else: | |
281 | - svm = cv2.SVM() | |
282 | - svm.load('res/svm_data.model') | |
283 | - | |
284 | - im = mjpeg.Jpeg(image, key=sample_key) | |
285 | - ciq = im.coef_arrays[mjpeg.colorMap['Y']] | |
286 | - tpm = self.get_trans_prob_mat(ciq) | |
287 | - | |
288 | - return svm.predict(tpm) | |
289 | - | |
290 | - def train_svm(self): | |
291 | - X, Y = self.load_dataset('local', 'images_map_Train.tsv') | |
292 | - return self._model_svm_train_sk(X, Y) | |
293 | - | |
294 | - def predict_svm(self, image): | |
295 | - return self._model_svm_predict_sk(image) | |
296 | - | |
297 | - | |
298 | - | |
299 | - | |
300 | - |