Commit 0c3afaf24c3c02fde5a39c38b200eb9e5c80aeda

Authored by Chunk
1 parent f2bebe34
Exists in refactor

staged.

mjpeg/__init__.py
@@ -8,7 +8,8 @@ __all__ = ['Jpeg', 'colorMap', 'diffblock', 'diffblocks'] @@ -8,7 +8,8 @@ __all__ = ['Jpeg', 'colorMap', 'diffblock', 'diffblocks']
8 # functions from submodules. 8 # functions from submodules.
9 # 9 #
10 # :: 10 # ::
11 - 11 +import numpy as np
  12 +from numpy import shape
12 import numpy.random as rnd 13 import numpy.random as rnd
13 14
14 import base 15 import base
@@ -169,18 +170,6 @@ class Jpeg(Jsteg): @@ -169,18 +170,6 @@ class Jpeg(Jsteg):
169 E = [-np.inf] + [i for i in range(-T, T + 2)] + [np.inf] 170 E = [-np.inf] + [i for i in range(-T, T + 2)] + [np.inf]
170 return np.histogram(A, E) 171 return np.histogram(A, E)
171 172
172 - def plotHist(self, mask=base.acMaskBlock, T=8):  
173 - """  
174 - Make a histogram of the jpeg coefficients.  
175 - The mask is a boolean 8x8 matrix indicating the  
176 - frequencies to be included. This defaults to the  
177 - AC coefficients.  
178 - """  
179 - A = self.rawsignal(mask).tolist()  
180 - E = [i for i in range(-T, T + 2)]  
181 - plt.hist(A, E, histtype='bar')  
182 - plt.show()  
183 -  
184 def nzcount(self, *a, **kw): 173 def nzcount(self, *a, **kw):
185 """Number of non-zero AC coefficients. 174 """Number of non-zero AC coefficients.
186 175
mjpeg/compress.py
1 ## -*- coding: utf-8 -*- 1 ## -*- coding: utf-8 -*-
2 2
3 -  
4 -from pylab import * 3 +from numpy import array
  4 +# from pylab import *
5 5
6 # The standard quantisation tables for JPEG:: 6 # The standard quantisation tables for JPEG::
7 7
msteg/steganalysis/ChiSquare.py
@@ -1,162 +0,0 @@ @@ -1,162 +0,0 @@
1 -"""  
2 -<p>  
3 -This module implements an algorithm described by Andreas Westfeld in [1,2],  
4 -which detects if there was data embedded into an image using JSteg.  
5 -It uses the property that JSteg generates pairs of values in the  
6 -DCT-coefficients histogram, which can be detected by a \chi^2 test.  
7 -</p>  
8 -  
9 -<pre>  
10 -[1]: Andreas Westfeld, F5 - A Steganographic Algorithm High Capacity Despite  
11 -Better Steganalysis  
12 -[2]: Andreas Westfeld, Angriffe auf steganographische Systeme  
13 -</pre>  
14 -"""  
15 -  
16 -from collections import defaultdict  
17 -import os  
18 -  
19 -from PIL import Image  
20 -import numpy  
21 -from scipy.stats import chisquare  
22 -import matplotlib.pyplot as plt  
23 -import itertools as it  
24 -  
25 -from .. import *  
26 -  
27 -  
28 -class ChiSquare(StegBase):  
29 - """  
30 - The module contains only one method, <b>detect</b>.  
31 - """  
32 -  
33 - def __init__(self, ui, core):  
34 - self.ui = ui  
35 - self.core = core  
36 -  
37 - def detect(self, src, tgt, tgt2):  
38 - """  
39 - <p>  
40 - Detect if there was data embedded in the <i>source image</i> image with  
41 - JSteg algorithm.  
42 - </p>  
43 -  
44 - <p>  
45 - Parameters:  
46 - <ol>  
47 - <li><pre>Source image</pre> Image which should be tested</li>  
48 - <li><pre>Target image</pre> Image which displays a graphic with the  
49 - embedding probability</li>  
50 - <li><pre>2nd Target image</pre> Image which displays the embedding  
51 - positions in the image</li>  
52 - </ol>  
53 - </p>  
54 - """  
55 - # --------------------------- Input -----------------------------------  
56 - # If src is from the image pool, test whether the image exists encoded  
57 - # on the file system. Otherwise we can not read DCT-coefficients.  
58 - if self.core.media_manager.is_media_key(src):  
59 - src = self.core.media_manager.get_file(src)  
60 - if hasattr(src, 'tmp_file'):  
61 - src = src.tmp_file  
62 - self.ui.display_error('Trying file: %s' % src)  
63 - else:  
64 - self.ui.display_error('Can not detect anything from \  
65 - decoded images.')  
66 - return  
67 - # Test whether the file exists.  
68 - if not os.path.isfile(src):  
69 - self.ui.display_error('No such file.')  
70 - return  
71 - # Test if it is a JPEG file.  
72 - if not self._looks_like_jpeg(src):  
73 - self.ui.display_error('Input is probably not a JPEG file.')  
74 - return  
75 -  
76 - # ---------------------------- Algorithm ------------------------------  
77 - # Build DCT-histogram in steps of \approx 1% of all coefficients and  
78 - # calculate the p-value at each step.  
79 -  
80 - # dct_data = rw_dct.read_dct_coefficients(src)  
81 - dct_data = self._get_cov_data(src)  
82 -  
83 - hist = defaultdict(int)  
84 - cnt = 0  
85 - l = len(dct_data)  
86 - one_p = l / 100  
87 - result = []  
88 - for block in dct_data:  
89 - # update the histogram with one block of 64 coefficients  
90 - for c in block:  
91 - hist[c] += 1  
92 -  
93 - cnt += 1  
94 - if not cnt % one_p:  
95 - # calculate p-value  
96 - self.ui.set_progress(cnt * 100 / l)  
97 -  
98 - # ignore the pair (0, 1), since JSteg does not embed data there  
99 - hl = [hist[i] for i in range(-2048, 2049) if not i in (0, 1)]  
100 - k = len(hl) / 2  
101 - observed = []  
102 - expected = []  
103 - # calculate observed and expected distribution  
104 - for i in range(k):  
105 - t = hl[2 * i] + hl[2 * i + 1]  
106 - if t > 3:  
107 - observed.append(hl[2 * i])  
108 - expected.append(t / 2)  
109 - # calculate (\chi^2, p)  
110 - p = chisquare(numpy.array(observed), numpy.array(expected))[1]  
111 - result.append(p)  
112 -  
113 - # ----------------------------- Output --------------------------------  
114 - # Graph displaying the embedding probabilities in relation to the  
115 - # sample size.  
116 - figure = plt.figure()  
117 - plot = figure.add_subplot(111)  
118 - plot.grid(True)  
119 - plot.plot(result, color='r', linewidth=2.0)  
120 - plt.axis([0, 100, 0, 1.1])  
121 - plt.title('Embedding probability for different percentages \  
122 -of the file capacity.')  
123 - plt.xlabel('% of file capacity')  
124 - plt.ylabel('Embedding probability')  
125 -  
126 - if self.core.media_manager.is_media_key(tgt):  
127 - img = figure_to_pil(figure)  
128 - self.core.media_manager.put_media(tgt, img)  
129 - else:  
130 - plt.savefig(tgt)  
131 -  
132 - # Image displaying the length and position of the embedded data  
133 - # within the image  
134 - img2 = Image.open(src)  
135 - img2.convert("RGB")  
136 - width, height = img2.size  
137 -  
138 - for i in range(100):  
139 - result[i] = max(result[i:])  
140 -  
141 - cnt2 = 0  
142 - for (top, left) in it.product(range(0, height, 8), range(0, width, 8)):  
143 - if not cnt2 % one_p:  
144 - r = result[cnt2 / one_p]  
145 - if r >= 0.5:  
146 - color = (255, int((1 - r) * 2 * 255), 0)  
147 - else:  
148 - color = (int(r * 2 * 255), 255, 0)  
149 - cnt2 += 1  
150 - img2.paste(color, (left, top, min(left + 8, width),  
151 - min(top + 8, height)))  
152 - self.core.media_manager.put_media(tgt2, img2)  
153 -  
154 - def __str__(self):  
155 - return 'Chi-Square-Test'  
156 -  
157 -  
158 -def figure_to_pil(figure):  
159 - figure.canvas.draw()  
160 - return Image.fromstring('RGB',  
161 - figure.canvas.get_width_height(),  
162 - figure.canvas.tostring_rgb())  
msteg/steganalysis/MPB.py.bak
@@ -1,300 +0,0 @@ @@ -1,300 +0,0 @@
1 -__author__ = 'chunk'  
2 -"""  
3 -Yun Q. Shi, et al - A Markov Process Based Approach to Effective Attacking JPEG Steganography  
4 -"""  
5 -  
6 -import time  
7 -import math  
8 -import numpy as np  
9 -  
10 -from .. import *  
11 -from ...mjpeg import Jpeg,colorMap  
12 -from ...common import *  
13 -  
14 -import csv  
15 -import json  
16 -import pickle  
17 -import cv2  
18 -from sklearn import svm  
19 -  
20 -base_dir = '/home/hadoop/data/HeadShoulder/'  
21 -  
22 -  
23 -class MPB(StegBase):  
24 - """  
25 - Markov Process Based Steganalyasis Algo.  
26 - """  
27 -  
28 - def __init__(self):  
29 - StegBase.__init__(self, sample_key)  
30 - self.model = None  
31 - self.svm = None  
32 -  
33 - def _get_trans_prob_mat_orig(self, ciq, T=4):  
34 - """  
35 - Original!  
36 - Calculate Transition Probability Matrix.  
37 -  
38 - :param ciq: jpeg DCT coeff matrix, 2-D numpy array of int16 (pre-abs)  
39 - :param T: signed integer, usually 1~7  
40 - :return: TPM - 3-D tensor, numpy array of size (2*T+1, 2*T+1, 4)  
41 - """  
42 - ciq = np.absolute(ciq).clip(0, T)  
43 - TPM = np.zeros((2 * T + 1, 2 * T + 1, 4), np.float64)  
44 - # Fh = np.diff(ciq, axis=-1)  
45 - # Fv = np.diff(ciq, axis=0)  
46 - Fh = ciq[:-1, :-1] - ciq[:-1, 1:]  
47 - Fv = ciq[:-1, :-1] - ciq[1:, :-1]  
48 - Fd = ciq[:-1, :-1] - ciq[1:, 1:]  
49 - Fm = ciq[:-1, 1:] - ciq[1:, :-1]  
50 -  
51 - Fh1 = Fh[:-1, :-1]  
52 - Fh2 = Fh[:-1, 1:]  
53 -  
54 - Fv1 = Fv[:-1, :-1]  
55 - Fv2 = Fv[1:, :-1]  
56 -  
57 - Fd1 = Fd[:-1, :-1]  
58 - Fd2 = Fd[1:, 1:]  
59 -  
60 - Fm1 = Fm[:-1, 1:]  
61 - Fm2 = Fm[1:, :-1]  
62 -  
63 - # original:(very slow!)  
64 - for n in range(-T, T + 1):  
65 - for m in range(-T, T + 1):  
66 - dh = np.sum(Fh1 == m) * 1.0  
67 - dv = np.sum(Fv1 == m) * 1.0  
68 - dd = np.sum(Fd1 == m) * 1.0  
69 - dm = np.sum(Fm1 == m) * 1.0  
70 -  
71 - if dh != 0:  
72 - TPM[m, n, 0] = np.sum(np.logical_and(Fh1 == m, Fh2 == n)) / dh  
73 -  
74 - if dv != 0:  
75 - TPM[m, n, 1] = np.sum(np.logical_and(Fv1 == m, Fv2 == n)) / dv  
76 -  
77 - if dd != 0:  
78 - TPM[m, n, 2] = np.sum(np.logical_and(Fd1 == m, Fd2 == n)) / dd  
79 -  
80 - if dm != 0:  
81 - TPM[m, n, 3] = np.sum(np.logical_and(Fm1 == m, Fm2 == n)) / dm  
82 -  
83 - # 1.422729s  
84 - return TPM  
85 -  
86 -  
87 - def get_trans_prob_mat(self, ciq, T=4):  
88 - """  
89 - Calculate Transition Probability Matrix.  
90 -  
91 - :param ciq: jpeg DCT coeff matrix, 2-D numpy array of int16 (pre-abs)  
92 - :param T: signed integer, usually 1~7  
93 - :return: TPM - 3-D tensor, numpy array of size (2*T+1, 2*T+1, 4)  
94 - """  
95 -  
96 - return self._get_trans_prob_mat_orig(ciq, T)  
97 -  
98 -  
99 - # timer = Timer()  
100 - ciq = np.absolute(ciq).clip(0, T)  
101 - TPM = np.zeros((2 * T + 1, 2 * T + 1, 4), np.float64)  
102 - # Fh = np.diff(ciq, axis=-1)  
103 - # Fv = np.diff(ciq, axis=0)  
104 - Fh = ciq[:-1, :-1] - ciq[:-1, 1:]  
105 - Fv = ciq[:-1, :-1] - ciq[1:, :-1]  
106 - Fd = ciq[:-1, :-1] - ciq[1:, 1:]  
107 - Fm = ciq[:-1, 1:] - ciq[1:, :-1]  
108 -  
109 - Fh1 = Fh[:-1, :-1].ravel()  
110 - Fh2 = Fh[:-1, 1:].ravel()  
111 -  
112 - Fv1 = Fv[:-1, :-1].ravel()  
113 - Fv2 = Fv[1:, :-1].ravel()  
114 -  
115 - Fd1 = Fd[:-1, :-1].ravel()  
116 - Fd2 = Fd[1:, 1:].ravel()  
117 -  
118 - Fm1 = Fm[:-1, 1:].ravel()  
119 - Fm2 = Fm[1:, :-1].ravel()  
120 -  
121 -  
122 -  
123 - # 0.089754s  
124 - # timer.mark()  
125 - # TPM[Fh1.ravel(), Fh2.ravel(), 0] += 1  
126 - # TPM[Fv1.ravel(), Fv2.ravel(), 1] += 1  
127 - # TPM[Fd1.ravel(), Fd2.ravel(), 2] += 1  
128 - # TPM[Fm1.ravel(), Fm2.ravel(), 3] += 1  
129 - # timer.report()  
130 -  
131 - # 1.459668s  
132 - # timer.mark()  
133 - # for i in range(len(Fh1)):  
134 - # TPM[Fh1[i], Fh2[i], 0] += 1  
135 - # for i in range(len(Fv1)):  
136 - # TPM[Fv1[i], Fv2[i], 1] += 1  
137 - # for i in range(len(Fd1)):  
138 - # TPM[Fd1[i], Fd2[i], 2] += 1  
139 - # for i in range(len(Fm1)):  
140 - # TPM[Fm1[i], Fm2[i], 3] += 1  
141 - # timer.report()  
142 -  
143 - # 1.463982s  
144 - # timer.mark()  
145 - for m, n in zip(Fh1.ravel(), Fh2.ravel()):  
146 - TPM[m, n, 0] += 1  
147 -  
148 - for m, n in zip(Fv1.ravel(), Fv2.ravel()):  
149 - TPM[m, n, 1] += 1  
150 -  
151 - for m, n in zip(Fd1.ravel(), Fd2.ravel()):  
152 - TPM[m, n, 2] += 1  
153 -  
154 - for m, n in zip(Fm1.ravel(), Fm2.ravel()):  
155 - TPM[m, n, 3] += 1  
156 - # timer.report()  
157 -  
158 - # 0.057505s  
159 - # timer.mark()  
160 - for m in range(-T, T + 1):  
161 - dh = np.sum(Fh1 == m) * 1.0  
162 - dv = np.sum(Fv1 == m) * 1.0  
163 - dd = np.sum(Fd1 == m) * 1.0  
164 - dm = np.sum(Fm1 == m) * 1.0  
165 -  
166 - if dh != 0:  
167 - TPM[m, :, 0] /= dh  
168 -  
169 - if dv != 0:  
170 - TPM[m, :, 1] /= dv  
171 -  
172 - if dd != 0:  
173 - TPM[m, :, 2] /= dd  
174 -  
175 - if dm != 0:  
176 - TPM[m, :, 3] /= dm  
177 - # timer.report()  
178 -  
179 - return TPM  
180 -  
181 - def load_dataset(self, mode, file):  
182 - if mode == 'local':  
183 - return self._load_dataset_from_local(file)  
184 - elif mode == 'remote' or mode == 'hbase':  
185 - return self._load_dataset_from_hbase(file)  
186 - else:  
187 - raise Exception("Unknown mode!")  
188 -  
189 - def _load_dataset_from_local(self, list_file='images_map_Train.tsv'):  
190 - """  
191 - load jpeg dataset according to a file of file-list.  
192 -  
193 - :param list_file: a tsv file with each line for a jpeg file path  
194 - :return:(X,Y) for SVM  
195 - """  
196 - list_file = base_dir + list_file  
197 -  
198 - X = []  
199 - Y = []  
200 - dict_tagbuf = {}  
201 - dict_dataset = {}  
202 -  
203 - with open(list_file, 'rb') as tsvfile:  
204 - tsvfile = csv.reader(tsvfile, delimiter='\t')  
205 - for line in tsvfile:  
206 - imgname = line[0] + '.jpg'  
207 - dict_tagbuf[imgname] = line[1]  
208 -  
209 - dir = base_dir + 'Feat/'  
210 - for path, subdirs, files in os.walk(dir + 'Train/'):  
211 - for name in files:  
212 - featpath = os.path.join(path, name)  
213 - # print featpath  
214 - with open(featpath, 'rb') as featfile:  
215 - imgname = path.split('/')[-1] + name.replace('.mpb', '.jpg')  
216 - dict_dataset[imgname] = json.loads(featfile.read())  
217 -  
218 - for imgname, tag in dict_tagbuf.items():  
219 - tag = 1 if tag == 'True' else 0  
220 - X.append(dict_dataset[imgname])  
221 - Y.append(tag)  
222 -  
223 - return X, Y  
224 -  
225 -  
226 - def _load_dataset_from_hbase(self, table='ImgCV'):  
227 - pass  
228 -  
229 -  
230 - def _model_svm_train_sk(self, X, Y):  
231 - timer = Timer()  
232 - timer.mark()  
233 - lin_clf = svm.LinearSVC()  
234 - lin_clf.fit(X, Y)  
235 - with open('res/tmp.model', 'wb') as modelfile:  
236 - model = pickle.dump(lin_clf, modelfile)  
237 -  
238 - timer.report()  
239 -  
240 - self.svm = 'sk'  
241 - self.model = lin_clf  
242 -  
243 - return lin_clf  
244 -  
245 - def _model_svm_predict_sk(self, image, clf=None):  
246 - if clf is None:  
247 - if self.svm == 'sk' and self.model != None:  
248 - clf = self.model  
249 - else:  
250 - with open('res/tmp.model', 'rb') as modelfile:  
251 - clf = pickle.load(modelfile)  
252 -  
253 - im = mjpeg.Jpeg(image, key=sample_key)  
254 - ciq = im.coef_arrays[mjpeg.colorMap['Y']]  
255 - tpm = self.get_trans_prob_mat(ciq)  
256 -  
257 - return clf.predict(tpm)  
258 -  
259 -  
260 - def _model_svm_train_cv(self, X, Y):  
261 - svm_params = dict(kernel_type=cv2.SVM_LINEAR,  
262 - svm_type=cv2.SVM_C_SVC,  
263 - C=2.67, gamma=5.383)  
264 -  
265 - timer = Timer()  
266 - timer.mark()  
267 - svm = cv2.SVM()  
268 - svm.train(X, Y, params=svm_params)  
269 - svm.save('res/svm_data.model')  
270 -  
271 - self.svm = 'cv'  
272 - self.model = svm  
273 -  
274 - return svm  
275 -  
276 - def _model_svm_predict_cv(self, image, svm=None):  
277 - if svm is None:  
278 - if self.svm == 'cv' and self.model != None:  
279 - clf = self.model  
280 - else:  
281 - svm = cv2.SVM()  
282 - svm.load('res/svm_data.model')  
283 -  
284 - im = mjpeg.Jpeg(image, key=sample_key)  
285 - ciq = im.coef_arrays[mjpeg.colorMap['Y']]  
286 - tpm = self.get_trans_prob_mat(ciq)  
287 -  
288 - return svm.predict(tpm)  
289 -  
290 - def train_svm(self):  
291 - X, Y = self.load_dataset('local', 'images_map_Train.tsv')  
292 - return self._model_svm_train_sk(X, Y)  
293 -  
294 - def predict_svm(self, image):  
295 - return self._model_svm_predict_sk(image)  
296 -  
297 -  
298 -  
299 -  
300 -