Commit 0c3afaf24c3c02fde5a39c38b200eb9e5c80aeda
1 parent
f2bebe34
Exists in
refactor
staged.
Showing
4 changed files
with
4 additions
and
477 deletions
Show diff stats
mjpeg/__init__.py
@@ -8,7 +8,8 @@ __all__ = ['Jpeg', 'colorMap', 'diffblock', 'diffblocks'] | @@ -8,7 +8,8 @@ __all__ = ['Jpeg', 'colorMap', 'diffblock', 'diffblocks'] | ||
8 | # functions from submodules. | 8 | # functions from submodules. |
9 | # | 9 | # |
10 | # :: | 10 | # :: |
11 | - | 11 | +import numpy as np |
12 | +from numpy import shape | ||
12 | import numpy.random as rnd | 13 | import numpy.random as rnd |
13 | 14 | ||
14 | import base | 15 | import base |
@@ -169,18 +170,6 @@ class Jpeg(Jsteg): | @@ -169,18 +170,6 @@ class Jpeg(Jsteg): | ||
169 | E = [-np.inf] + [i for i in range(-T, T + 2)] + [np.inf] | 170 | E = [-np.inf] + [i for i in range(-T, T + 2)] + [np.inf] |
170 | return np.histogram(A, E) | 171 | return np.histogram(A, E) |
171 | 172 | ||
172 | - def plotHist(self, mask=base.acMaskBlock, T=8): | ||
173 | - """ | ||
174 | - Make a histogram of the jpeg coefficients. | ||
175 | - The mask is a boolean 8x8 matrix indicating the | ||
176 | - frequencies to be included. This defaults to the | ||
177 | - AC coefficients. | ||
178 | - """ | ||
179 | - A = self.rawsignal(mask).tolist() | ||
180 | - E = [i for i in range(-T, T + 2)] | ||
181 | - plt.hist(A, E, histtype='bar') | ||
182 | - plt.show() | ||
183 | - | ||
184 | def nzcount(self, *a, **kw): | 173 | def nzcount(self, *a, **kw): |
185 | """Number of non-zero AC coefficients. | 174 | """Number of non-zero AC coefficients. |
186 | 175 |
mjpeg/compress.py
msteg/steganalysis/ChiSquare.py
@@ -1,162 +0,0 @@ | @@ -1,162 +0,0 @@ | ||
1 | -""" | ||
2 | -<p> | ||
3 | -This module implements an algorithm described by Andreas Westfeld in [1,2], | ||
4 | -which detects if there was data embedded into an image using JSteg. | ||
5 | -It uses the property that JSteg generates pairs of values in the | ||
6 | -DCT-coefficients histogram, which can be detected by a \chi^2 test. | ||
7 | -</p> | ||
8 | - | ||
9 | -<pre> | ||
10 | -[1]: Andreas Westfeld, F5 - A Steganographic Algorithm High Capacity Despite | ||
11 | -Better Steganalysis | ||
12 | -[2]: Andreas Westfeld, Angriffe auf steganographische Systeme | ||
13 | -</pre> | ||
14 | -""" | ||
15 | - | ||
16 | -from collections import defaultdict | ||
17 | -import os | ||
18 | - | ||
19 | -from PIL import Image | ||
20 | -import numpy | ||
21 | -from scipy.stats import chisquare | ||
22 | -import matplotlib.pyplot as plt | ||
23 | -import itertools as it | ||
24 | - | ||
25 | -from .. import * | ||
26 | - | ||
27 | - | ||
28 | -class ChiSquare(StegBase): | ||
29 | - """ | ||
30 | - The module contains only one method, <b>detect</b>. | ||
31 | - """ | ||
32 | - | ||
33 | - def __init__(self, ui, core): | ||
34 | - self.ui = ui | ||
35 | - self.core = core | ||
36 | - | ||
37 | - def detect(self, src, tgt, tgt2): | ||
38 | - """ | ||
39 | - <p> | ||
40 | - Detect if there was data embedded in the <i>source image</i> image with | ||
41 | - JSteg algorithm. | ||
42 | - </p> | ||
43 | - | ||
44 | - <p> | ||
45 | - Parameters: | ||
46 | - <ol> | ||
47 | - <li><pre>Source image</pre> Image which should be tested</li> | ||
48 | - <li><pre>Target image</pre> Image which displays a graphic with the | ||
49 | - embedding probability</li> | ||
50 | - <li><pre>2nd Target image</pre> Image which displays the embedding | ||
51 | - positions in the image</li> | ||
52 | - </ol> | ||
53 | - </p> | ||
54 | - """ | ||
55 | - # --------------------------- Input ----------------------------------- | ||
56 | - # If src is from the image pool, test whether the image exists encoded | ||
57 | - # on the file system. Otherwise we can not read DCT-coefficients. | ||
58 | - if self.core.media_manager.is_media_key(src): | ||
59 | - src = self.core.media_manager.get_file(src) | ||
60 | - if hasattr(src, 'tmp_file'): | ||
61 | - src = src.tmp_file | ||
62 | - self.ui.display_error('Trying file: %s' % src) | ||
63 | - else: | ||
64 | - self.ui.display_error('Can not detect anything from \ | ||
65 | - decoded images.') | ||
66 | - return | ||
67 | - # Test whether the file exists. | ||
68 | - if not os.path.isfile(src): | ||
69 | - self.ui.display_error('No such file.') | ||
70 | - return | ||
71 | - # Test if it is a JPEG file. | ||
72 | - if not self._looks_like_jpeg(src): | ||
73 | - self.ui.display_error('Input is probably not a JPEG file.') | ||
74 | - return | ||
75 | - | ||
76 | - # ---------------------------- Algorithm ------------------------------ | ||
77 | - # Build DCT-histogram in steps of \approx 1% of all coefficients and | ||
78 | - # calculate the p-value at each step. | ||
79 | - | ||
80 | - # dct_data = rw_dct.read_dct_coefficients(src) | ||
81 | - dct_data = self._get_cov_data(src) | ||
82 | - | ||
83 | - hist = defaultdict(int) | ||
84 | - cnt = 0 | ||
85 | - l = len(dct_data) | ||
86 | - one_p = l / 100 | ||
87 | - result = [] | ||
88 | - for block in dct_data: | ||
89 | - # update the histogram with one block of 64 coefficients | ||
90 | - for c in block: | ||
91 | - hist[c] += 1 | ||
92 | - | ||
93 | - cnt += 1 | ||
94 | - if not cnt % one_p: | ||
95 | - # calculate p-value | ||
96 | - self.ui.set_progress(cnt * 100 / l) | ||
97 | - | ||
98 | - # ignore the pair (0, 1), since JSteg does not embed data there | ||
99 | - hl = [hist[i] for i in range(-2048, 2049) if not i in (0, 1)] | ||
100 | - k = len(hl) / 2 | ||
101 | - observed = [] | ||
102 | - expected = [] | ||
103 | - # calculate observed and expected distribution | ||
104 | - for i in range(k): | ||
105 | - t = hl[2 * i] + hl[2 * i + 1] | ||
106 | - if t > 3: | ||
107 | - observed.append(hl[2 * i]) | ||
108 | - expected.append(t / 2) | ||
109 | - # calculate (\chi^2, p) | ||
110 | - p = chisquare(numpy.array(observed), numpy.array(expected))[1] | ||
111 | - result.append(p) | ||
112 | - | ||
113 | - # ----------------------------- Output -------------------------------- | ||
114 | - # Graph displaying the embedding probabilities in relation to the | ||
115 | - # sample size. | ||
116 | - figure = plt.figure() | ||
117 | - plot = figure.add_subplot(111) | ||
118 | - plot.grid(True) | ||
119 | - plot.plot(result, color='r', linewidth=2.0) | ||
120 | - plt.axis([0, 100, 0, 1.1]) | ||
121 | - plt.title('Embedding probability for different percentages \ | ||
122 | -of the file capacity.') | ||
123 | - plt.xlabel('% of file capacity') | ||
124 | - plt.ylabel('Embedding probability') | ||
125 | - | ||
126 | - if self.core.media_manager.is_media_key(tgt): | ||
127 | - img = figure_to_pil(figure) | ||
128 | - self.core.media_manager.put_media(tgt, img) | ||
129 | - else: | ||
130 | - plt.savefig(tgt) | ||
131 | - | ||
132 | - # Image displaying the length and position of the embedded data | ||
133 | - # within the image | ||
134 | - img2 = Image.open(src) | ||
135 | - img2.convert("RGB") | ||
136 | - width, height = img2.size | ||
137 | - | ||
138 | - for i in range(100): | ||
139 | - result[i] = max(result[i:]) | ||
140 | - | ||
141 | - cnt2 = 0 | ||
142 | - for (top, left) in it.product(range(0, height, 8), range(0, width, 8)): | ||
143 | - if not cnt2 % one_p: | ||
144 | - r = result[cnt2 / one_p] | ||
145 | - if r >= 0.5: | ||
146 | - color = (255, int((1 - r) * 2 * 255), 0) | ||
147 | - else: | ||
148 | - color = (int(r * 2 * 255), 255, 0) | ||
149 | - cnt2 += 1 | ||
150 | - img2.paste(color, (left, top, min(left + 8, width), | ||
151 | - min(top + 8, height))) | ||
152 | - self.core.media_manager.put_media(tgt2, img2) | ||
153 | - | ||
154 | - def __str__(self): | ||
155 | - return 'Chi-Square-Test' | ||
156 | - | ||
157 | - | ||
158 | -def figure_to_pil(figure): | ||
159 | - figure.canvas.draw() | ||
160 | - return Image.fromstring('RGB', | ||
161 | - figure.canvas.get_width_height(), | ||
162 | - figure.canvas.tostring_rgb()) |
msteg/steganalysis/MPB.py.bak
@@ -1,300 +0,0 @@ | @@ -1,300 +0,0 @@ | ||
1 | -__author__ = 'chunk' | ||
2 | -""" | ||
3 | -Yun Q. Shi, et al - A Markov Process Based Approach to Effective Attacking JPEG Steganography | ||
4 | -""" | ||
5 | - | ||
6 | -import time | ||
7 | -import math | ||
8 | -import numpy as np | ||
9 | - | ||
10 | -from .. import * | ||
11 | -from ...mjpeg import Jpeg,colorMap | ||
12 | -from ...common import * | ||
13 | - | ||
14 | -import csv | ||
15 | -import json | ||
16 | -import pickle | ||
17 | -import cv2 | ||
18 | -from sklearn import svm | ||
19 | - | ||
20 | -base_dir = '/home/hadoop/data/HeadShoulder/' | ||
21 | - | ||
22 | - | ||
23 | -class MPB(StegBase): | ||
24 | - """ | ||
25 | - Markov Process Based Steganalyasis Algo. | ||
26 | - """ | ||
27 | - | ||
28 | - def __init__(self): | ||
29 | - StegBase.__init__(self, sample_key) | ||
30 | - self.model = None | ||
31 | - self.svm = None | ||
32 | - | ||
33 | - def _get_trans_prob_mat_orig(self, ciq, T=4): | ||
34 | - """ | ||
35 | - Original! | ||
36 | - Calculate Transition Probability Matrix. | ||
37 | - | ||
38 | - :param ciq: jpeg DCT coeff matrix, 2-D numpy array of int16 (pre-abs) | ||
39 | - :param T: signed integer, usually 1~7 | ||
40 | - :return: TPM - 3-D tensor, numpy array of size (2*T+1, 2*T+1, 4) | ||
41 | - """ | ||
42 | - ciq = np.absolute(ciq).clip(0, T) | ||
43 | - TPM = np.zeros((2 * T + 1, 2 * T + 1, 4), np.float64) | ||
44 | - # Fh = np.diff(ciq, axis=-1) | ||
45 | - # Fv = np.diff(ciq, axis=0) | ||
46 | - Fh = ciq[:-1, :-1] - ciq[:-1, 1:] | ||
47 | - Fv = ciq[:-1, :-1] - ciq[1:, :-1] | ||
48 | - Fd = ciq[:-1, :-1] - ciq[1:, 1:] | ||
49 | - Fm = ciq[:-1, 1:] - ciq[1:, :-1] | ||
50 | - | ||
51 | - Fh1 = Fh[:-1, :-1] | ||
52 | - Fh2 = Fh[:-1, 1:] | ||
53 | - | ||
54 | - Fv1 = Fv[:-1, :-1] | ||
55 | - Fv2 = Fv[1:, :-1] | ||
56 | - | ||
57 | - Fd1 = Fd[:-1, :-1] | ||
58 | - Fd2 = Fd[1:, 1:] | ||
59 | - | ||
60 | - Fm1 = Fm[:-1, 1:] | ||
61 | - Fm2 = Fm[1:, :-1] | ||
62 | - | ||
63 | - # original:(very slow!) | ||
64 | - for n in range(-T, T + 1): | ||
65 | - for m in range(-T, T + 1): | ||
66 | - dh = np.sum(Fh1 == m) * 1.0 | ||
67 | - dv = np.sum(Fv1 == m) * 1.0 | ||
68 | - dd = np.sum(Fd1 == m) * 1.0 | ||
69 | - dm = np.sum(Fm1 == m) * 1.0 | ||
70 | - | ||
71 | - if dh != 0: | ||
72 | - TPM[m, n, 0] = np.sum(np.logical_and(Fh1 == m, Fh2 == n)) / dh | ||
73 | - | ||
74 | - if dv != 0: | ||
75 | - TPM[m, n, 1] = np.sum(np.logical_and(Fv1 == m, Fv2 == n)) / dv | ||
76 | - | ||
77 | - if dd != 0: | ||
78 | - TPM[m, n, 2] = np.sum(np.logical_and(Fd1 == m, Fd2 == n)) / dd | ||
79 | - | ||
80 | - if dm != 0: | ||
81 | - TPM[m, n, 3] = np.sum(np.logical_and(Fm1 == m, Fm2 == n)) / dm | ||
82 | - | ||
83 | - # 1.422729s | ||
84 | - return TPM | ||
85 | - | ||
86 | - | ||
87 | - def get_trans_prob_mat(self, ciq, T=4): | ||
88 | - """ | ||
89 | - Calculate Transition Probability Matrix. | ||
90 | - | ||
91 | - :param ciq: jpeg DCT coeff matrix, 2-D numpy array of int16 (pre-abs) | ||
92 | - :param T: signed integer, usually 1~7 | ||
93 | - :return: TPM - 3-D tensor, numpy array of size (2*T+1, 2*T+1, 4) | ||
94 | - """ | ||
95 | - | ||
96 | - return self._get_trans_prob_mat_orig(ciq, T) | ||
97 | - | ||
98 | - | ||
99 | - # timer = Timer() | ||
100 | - ciq = np.absolute(ciq).clip(0, T) | ||
101 | - TPM = np.zeros((2 * T + 1, 2 * T + 1, 4), np.float64) | ||
102 | - # Fh = np.diff(ciq, axis=-1) | ||
103 | - # Fv = np.diff(ciq, axis=0) | ||
104 | - Fh = ciq[:-1, :-1] - ciq[:-1, 1:] | ||
105 | - Fv = ciq[:-1, :-1] - ciq[1:, :-1] | ||
106 | - Fd = ciq[:-1, :-1] - ciq[1:, 1:] | ||
107 | - Fm = ciq[:-1, 1:] - ciq[1:, :-1] | ||
108 | - | ||
109 | - Fh1 = Fh[:-1, :-1].ravel() | ||
110 | - Fh2 = Fh[:-1, 1:].ravel() | ||
111 | - | ||
112 | - Fv1 = Fv[:-1, :-1].ravel() | ||
113 | - Fv2 = Fv[1:, :-1].ravel() | ||
114 | - | ||
115 | - Fd1 = Fd[:-1, :-1].ravel() | ||
116 | - Fd2 = Fd[1:, 1:].ravel() | ||
117 | - | ||
118 | - Fm1 = Fm[:-1, 1:].ravel() | ||
119 | - Fm2 = Fm[1:, :-1].ravel() | ||
120 | - | ||
121 | - | ||
122 | - | ||
123 | - # 0.089754s | ||
124 | - # timer.mark() | ||
125 | - # TPM[Fh1.ravel(), Fh2.ravel(), 0] += 1 | ||
126 | - # TPM[Fv1.ravel(), Fv2.ravel(), 1] += 1 | ||
127 | - # TPM[Fd1.ravel(), Fd2.ravel(), 2] += 1 | ||
128 | - # TPM[Fm1.ravel(), Fm2.ravel(), 3] += 1 | ||
129 | - # timer.report() | ||
130 | - | ||
131 | - # 1.459668s | ||
132 | - # timer.mark() | ||
133 | - # for i in range(len(Fh1)): | ||
134 | - # TPM[Fh1[i], Fh2[i], 0] += 1 | ||
135 | - # for i in range(len(Fv1)): | ||
136 | - # TPM[Fv1[i], Fv2[i], 1] += 1 | ||
137 | - # for i in range(len(Fd1)): | ||
138 | - # TPM[Fd1[i], Fd2[i], 2] += 1 | ||
139 | - # for i in range(len(Fm1)): | ||
140 | - # TPM[Fm1[i], Fm2[i], 3] += 1 | ||
141 | - # timer.report() | ||
142 | - | ||
143 | - # 1.463982s | ||
144 | - # timer.mark() | ||
145 | - for m, n in zip(Fh1.ravel(), Fh2.ravel()): | ||
146 | - TPM[m, n, 0] += 1 | ||
147 | - | ||
148 | - for m, n in zip(Fv1.ravel(), Fv2.ravel()): | ||
149 | - TPM[m, n, 1] += 1 | ||
150 | - | ||
151 | - for m, n in zip(Fd1.ravel(), Fd2.ravel()): | ||
152 | - TPM[m, n, 2] += 1 | ||
153 | - | ||
154 | - for m, n in zip(Fm1.ravel(), Fm2.ravel()): | ||
155 | - TPM[m, n, 3] += 1 | ||
156 | - # timer.report() | ||
157 | - | ||
158 | - # 0.057505s | ||
159 | - # timer.mark() | ||
160 | - for m in range(-T, T + 1): | ||
161 | - dh = np.sum(Fh1 == m) * 1.0 | ||
162 | - dv = np.sum(Fv1 == m) * 1.0 | ||
163 | - dd = np.sum(Fd1 == m) * 1.0 | ||
164 | - dm = np.sum(Fm1 == m) * 1.0 | ||
165 | - | ||
166 | - if dh != 0: | ||
167 | - TPM[m, :, 0] /= dh | ||
168 | - | ||
169 | - if dv != 0: | ||
170 | - TPM[m, :, 1] /= dv | ||
171 | - | ||
172 | - if dd != 0: | ||
173 | - TPM[m, :, 2] /= dd | ||
174 | - | ||
175 | - if dm != 0: | ||
176 | - TPM[m, :, 3] /= dm | ||
177 | - # timer.report() | ||
178 | - | ||
179 | - return TPM | ||
180 | - | ||
181 | - def load_dataset(self, mode, file): | ||
182 | - if mode == 'local': | ||
183 | - return self._load_dataset_from_local(file) | ||
184 | - elif mode == 'remote' or mode == 'hbase': | ||
185 | - return self._load_dataset_from_hbase(file) | ||
186 | - else: | ||
187 | - raise Exception("Unknown mode!") | ||
188 | - | ||
189 | - def _load_dataset_from_local(self, list_file='images_map_Train.tsv'): | ||
190 | - """ | ||
191 | - load jpeg dataset according to a file of file-list. | ||
192 | - | ||
193 | - :param list_file: a tsv file with each line for a jpeg file path | ||
194 | - :return:(X,Y) for SVM | ||
195 | - """ | ||
196 | - list_file = base_dir + list_file | ||
197 | - | ||
198 | - X = [] | ||
199 | - Y = [] | ||
200 | - dict_tagbuf = {} | ||
201 | - dict_dataset = {} | ||
202 | - | ||
203 | - with open(list_file, 'rb') as tsvfile: | ||
204 | - tsvfile = csv.reader(tsvfile, delimiter='\t') | ||
205 | - for line in tsvfile: | ||
206 | - imgname = line[0] + '.jpg' | ||
207 | - dict_tagbuf[imgname] = line[1] | ||
208 | - | ||
209 | - dir = base_dir + 'Feat/' | ||
210 | - for path, subdirs, files in os.walk(dir + 'Train/'): | ||
211 | - for name in files: | ||
212 | - featpath = os.path.join(path, name) | ||
213 | - # print featpath | ||
214 | - with open(featpath, 'rb') as featfile: | ||
215 | - imgname = path.split('/')[-1] + name.replace('.mpb', '.jpg') | ||
216 | - dict_dataset[imgname] = json.loads(featfile.read()) | ||
217 | - | ||
218 | - for imgname, tag in dict_tagbuf.items(): | ||
219 | - tag = 1 if tag == 'True' else 0 | ||
220 | - X.append(dict_dataset[imgname]) | ||
221 | - Y.append(tag) | ||
222 | - | ||
223 | - return X, Y | ||
224 | - | ||
225 | - | ||
226 | - def _load_dataset_from_hbase(self, table='ImgCV'): | ||
227 | - pass | ||
228 | - | ||
229 | - | ||
230 | - def _model_svm_train_sk(self, X, Y): | ||
231 | - timer = Timer() | ||
232 | - timer.mark() | ||
233 | - lin_clf = svm.LinearSVC() | ||
234 | - lin_clf.fit(X, Y) | ||
235 | - with open('res/tmp.model', 'wb') as modelfile: | ||
236 | - model = pickle.dump(lin_clf, modelfile) | ||
237 | - | ||
238 | - timer.report() | ||
239 | - | ||
240 | - self.svm = 'sk' | ||
241 | - self.model = lin_clf | ||
242 | - | ||
243 | - return lin_clf | ||
244 | - | ||
245 | - def _model_svm_predict_sk(self, image, clf=None): | ||
246 | - if clf is None: | ||
247 | - if self.svm == 'sk' and self.model != None: | ||
248 | - clf = self.model | ||
249 | - else: | ||
250 | - with open('res/tmp.model', 'rb') as modelfile: | ||
251 | - clf = pickle.load(modelfile) | ||
252 | - | ||
253 | - im = mjpeg.Jpeg(image, key=sample_key) | ||
254 | - ciq = im.coef_arrays[mjpeg.colorMap['Y']] | ||
255 | - tpm = self.get_trans_prob_mat(ciq) | ||
256 | - | ||
257 | - return clf.predict(tpm) | ||
258 | - | ||
259 | - | ||
260 | - def _model_svm_train_cv(self, X, Y): | ||
261 | - svm_params = dict(kernel_type=cv2.SVM_LINEAR, | ||
262 | - svm_type=cv2.SVM_C_SVC, | ||
263 | - C=2.67, gamma=5.383) | ||
264 | - | ||
265 | - timer = Timer() | ||
266 | - timer.mark() | ||
267 | - svm = cv2.SVM() | ||
268 | - svm.train(X, Y, params=svm_params) | ||
269 | - svm.save('res/svm_data.model') | ||
270 | - | ||
271 | - self.svm = 'cv' | ||
272 | - self.model = svm | ||
273 | - | ||
274 | - return svm | ||
275 | - | ||
276 | - def _model_svm_predict_cv(self, image, svm=None): | ||
277 | - if svm is None: | ||
278 | - if self.svm == 'cv' and self.model != None: | ||
279 | - clf = self.model | ||
280 | - else: | ||
281 | - svm = cv2.SVM() | ||
282 | - svm.load('res/svm_data.model') | ||
283 | - | ||
284 | - im = mjpeg.Jpeg(image, key=sample_key) | ||
285 | - ciq = im.coef_arrays[mjpeg.colorMap['Y']] | ||
286 | - tpm = self.get_trans_prob_mat(ciq) | ||
287 | - | ||
288 | - return svm.predict(tpm) | ||
289 | - | ||
290 | - def train_svm(self): | ||
291 | - X, Y = self.load_dataset('local', 'images_map_Train.tsv') | ||
292 | - return self._model_svm_train_sk(X, Y) | ||
293 | - | ||
294 | - def predict_svm(self, image): | ||
295 | - return self._model_svm_predict_sk(image) | ||
296 | - | ||
297 | - | ||
298 | - | ||
299 | - | ||
300 | - |