Commit 0c3afaf24c3c02fde5a39c38b200eb9e5c80aeda

Authored by Chunk
1 parent f2bebe34
Exists in refactor

staged.

mjpeg/__init__.py
... ... @@ -8,7 +8,8 @@ __all__ = ['Jpeg', 'colorMap', 'diffblock', 'diffblocks']
8 8 # functions from submodules.
9 9 #
10 10 # ::
11   -
  11 +import numpy as np
  12 +from numpy import shape
12 13 import numpy.random as rnd
13 14  
14 15 import base
... ... @@ -169,18 +170,6 @@ class Jpeg(Jsteg):
169 170 E = [-np.inf] + [i for i in range(-T, T + 2)] + [np.inf]
170 171 return np.histogram(A, E)
171 172  
172   - def plotHist(self, mask=base.acMaskBlock, T=8):
173   - """
174   - Make a histogram of the jpeg coefficients.
175   - The mask is a boolean 8x8 matrix indicating the
176   - frequencies to be included. This defaults to the
177   - AC coefficients.
178   - """
179   - A = self.rawsignal(mask).tolist()
180   - E = [i for i in range(-T, T + 2)]
181   - plt.hist(A, E, histtype='bar')
182   - plt.show()
183   -
184 173 def nzcount(self, *a, **kw):
185 174 """Number of non-zero AC coefficients.
186 175  
... ...
mjpeg/compress.py
1 1 ## -*- coding: utf-8 -*-
2 2  
3   -
4   -from pylab import *
  3 +from numpy import array
  4 +# from pylab import *
5 5  
6 6 # The standard quantisation tables for JPEG::
7 7  
... ...
msteg/steganalysis/ChiSquare.py
... ... @@ -1,162 +0,0 @@
1   -"""
2   -<p>
3   -This module implements an algorithm described by Andreas Westfeld in [1,2],
4   -which detects if there was data embedded into an image using JSteg.
5   -It uses the property that JSteg generates pairs of values in the
6   -DCT-coefficients histogram, which can be detected by a \chi^2 test.
7   -</p>
8   -
9   -<pre>
10   -[1]: Andreas Westfeld, F5 - A Steganographic Algorithm High Capacity Despite
11   -Better Steganalysis
12   -[2]: Andreas Westfeld, Angriffe auf steganographische Systeme
13   -</pre>
14   -"""
15   -
16   -from collections import defaultdict
17   -import os
18   -
19   -from PIL import Image
20   -import numpy
21   -from scipy.stats import chisquare
22   -import matplotlib.pyplot as plt
23   -import itertools as it
24   -
25   -from .. import *
26   -
27   -
28   -class ChiSquare(StegBase):
29   - """
30   - The module contains only one method, <b>detect</b>.
31   - """
32   -
33   - def __init__(self, ui, core):
34   - self.ui = ui
35   - self.core = core
36   -
37   - def detect(self, src, tgt, tgt2):
38   - """
39   - <p>
40   - Detect if there was data embedded in the <i>source image</i> image with
41   - JSteg algorithm.
42   - </p>
43   -
44   - <p>
45   - Parameters:
46   - <ol>
47   - <li><pre>Source image</pre> Image which should be tested</li>
48   - <li><pre>Target image</pre> Image which displays a graphic with the
49   - embedding probability</li>
50   - <li><pre>2nd Target image</pre> Image which displays the embedding
51   - positions in the image</li>
52   - </ol>
53   - </p>
54   - """
55   - # --------------------------- Input -----------------------------------
56   - # If src is from the image pool, test whether the image exists encoded
57   - # on the file system. Otherwise we can not read DCT-coefficients.
58   - if self.core.media_manager.is_media_key(src):
59   - src = self.core.media_manager.get_file(src)
60   - if hasattr(src, 'tmp_file'):
61   - src = src.tmp_file
62   - self.ui.display_error('Trying file: %s' % src)
63   - else:
64   - self.ui.display_error('Can not detect anything from \
65   - decoded images.')
66   - return
67   - # Test whether the file exists.
68   - if not os.path.isfile(src):
69   - self.ui.display_error('No such file.')
70   - return
71   - # Test if it is a JPEG file.
72   - if not self._looks_like_jpeg(src):
73   - self.ui.display_error('Input is probably not a JPEG file.')
74   - return
75   -
76   - # ---------------------------- Algorithm ------------------------------
77   - # Build DCT-histogram in steps of \approx 1% of all coefficients and
78   - # calculate the p-value at each step.
79   -
80   - # dct_data = rw_dct.read_dct_coefficients(src)
81   - dct_data = self._get_cov_data(src)
82   -
83   - hist = defaultdict(int)
84   - cnt = 0
85   - l = len(dct_data)
86   - one_p = l / 100
87   - result = []
88   - for block in dct_data:
89   - # update the histogram with one block of 64 coefficients
90   - for c in block:
91   - hist[c] += 1
92   -
93   - cnt += 1
94   - if not cnt % one_p:
95   - # calculate p-value
96   - self.ui.set_progress(cnt * 100 / l)
97   -
98   - # ignore the pair (0, 1), since JSteg does not embed data there
99   - hl = [hist[i] for i in range(-2048, 2049) if not i in (0, 1)]
100   - k = len(hl) / 2
101   - observed = []
102   - expected = []
103   - # calculate observed and expected distribution
104   - for i in range(k):
105   - t = hl[2 * i] + hl[2 * i + 1]
106   - if t > 3:
107   - observed.append(hl[2 * i])
108   - expected.append(t / 2)
109   - # calculate (\chi^2, p)
110   - p = chisquare(numpy.array(observed), numpy.array(expected))[1]
111   - result.append(p)
112   -
113   - # ----------------------------- Output --------------------------------
114   - # Graph displaying the embedding probabilities in relation to the
115   - # sample size.
116   - figure = plt.figure()
117   - plot = figure.add_subplot(111)
118   - plot.grid(True)
119   - plot.plot(result, color='r', linewidth=2.0)
120   - plt.axis([0, 100, 0, 1.1])
121   - plt.title('Embedding probability for different percentages \
122   -of the file capacity.')
123   - plt.xlabel('% of file capacity')
124   - plt.ylabel('Embedding probability')
125   -
126   - if self.core.media_manager.is_media_key(tgt):
127   - img = figure_to_pil(figure)
128   - self.core.media_manager.put_media(tgt, img)
129   - else:
130   - plt.savefig(tgt)
131   -
132   - # Image displaying the length and position of the embedded data
133   - # within the image
134   - img2 = Image.open(src)
135   - img2.convert("RGB")
136   - width, height = img2.size
137   -
138   - for i in range(100):
139   - result[i] = max(result[i:])
140   -
141   - cnt2 = 0
142   - for (top, left) in it.product(range(0, height, 8), range(0, width, 8)):
143   - if not cnt2 % one_p:
144   - r = result[cnt2 / one_p]
145   - if r >= 0.5:
146   - color = (255, int((1 - r) * 2 * 255), 0)
147   - else:
148   - color = (int(r * 2 * 255), 255, 0)
149   - cnt2 += 1
150   - img2.paste(color, (left, top, min(left + 8, width),
151   - min(top + 8, height)))
152   - self.core.media_manager.put_media(tgt2, img2)
153   -
154   - def __str__(self):
155   - return 'Chi-Square-Test'
156   -
157   -
158   -def figure_to_pil(figure):
159   - figure.canvas.draw()
160   - return Image.fromstring('RGB',
161   - figure.canvas.get_width_height(),
162   - figure.canvas.tostring_rgb())
msteg/steganalysis/MPB.py.bak
... ... @@ -1,300 +0,0 @@
1   -__author__ = 'chunk'
2   -"""
3   -Yun Q. Shi, et al - A Markov Process Based Approach to Effective Attacking JPEG Steganography
4   -"""
5   -
6   -import time
7   -import math
8   -import numpy as np
9   -
10   -from .. import *
11   -from ...mjpeg import Jpeg,colorMap
12   -from ...common import *
13   -
14   -import csv
15   -import json
16   -import pickle
17   -import cv2
18   -from sklearn import svm
19   -
20   -base_dir = '/home/hadoop/data/HeadShoulder/'
21   -
22   -
23   -class MPB(StegBase):
24   - """
25   - Markov Process Based Steganalyasis Algo.
26   - """
27   -
28   - def __init__(self):
29   - StegBase.__init__(self, sample_key)
30   - self.model = None
31   - self.svm = None
32   -
33   - def _get_trans_prob_mat_orig(self, ciq, T=4):
34   - """
35   - Original!
36   - Calculate Transition Probability Matrix.
37   -
38   - :param ciq: jpeg DCT coeff matrix, 2-D numpy array of int16 (pre-abs)
39   - :param T: signed integer, usually 1~7
40   - :return: TPM - 3-D tensor, numpy array of size (2*T+1, 2*T+1, 4)
41   - """
42   - ciq = np.absolute(ciq).clip(0, T)
43   - TPM = np.zeros((2 * T + 1, 2 * T + 1, 4), np.float64)
44   - # Fh = np.diff(ciq, axis=-1)
45   - # Fv = np.diff(ciq, axis=0)
46   - Fh = ciq[:-1, :-1] - ciq[:-1, 1:]
47   - Fv = ciq[:-1, :-1] - ciq[1:, :-1]
48   - Fd = ciq[:-1, :-1] - ciq[1:, 1:]
49   - Fm = ciq[:-1, 1:] - ciq[1:, :-1]
50   -
51   - Fh1 = Fh[:-1, :-1]
52   - Fh2 = Fh[:-1, 1:]
53   -
54   - Fv1 = Fv[:-1, :-1]
55   - Fv2 = Fv[1:, :-1]
56   -
57   - Fd1 = Fd[:-1, :-1]
58   - Fd2 = Fd[1:, 1:]
59   -
60   - Fm1 = Fm[:-1, 1:]
61   - Fm2 = Fm[1:, :-1]
62   -
63   - # original:(very slow!)
64   - for n in range(-T, T + 1):
65   - for m in range(-T, T + 1):
66   - dh = np.sum(Fh1 == m) * 1.0
67   - dv = np.sum(Fv1 == m) * 1.0
68   - dd = np.sum(Fd1 == m) * 1.0
69   - dm = np.sum(Fm1 == m) * 1.0
70   -
71   - if dh != 0:
72   - TPM[m, n, 0] = np.sum(np.logical_and(Fh1 == m, Fh2 == n)) / dh
73   -
74   - if dv != 0:
75   - TPM[m, n, 1] = np.sum(np.logical_and(Fv1 == m, Fv2 == n)) / dv
76   -
77   - if dd != 0:
78   - TPM[m, n, 2] = np.sum(np.logical_and(Fd1 == m, Fd2 == n)) / dd
79   -
80   - if dm != 0:
81   - TPM[m, n, 3] = np.sum(np.logical_and(Fm1 == m, Fm2 == n)) / dm
82   -
83   - # 1.422729s
84   - return TPM
85   -
86   -
87   - def get_trans_prob_mat(self, ciq, T=4):
88   - """
89   - Calculate Transition Probability Matrix.
90   -
91   - :param ciq: jpeg DCT coeff matrix, 2-D numpy array of int16 (pre-abs)
92   - :param T: signed integer, usually 1~7
93   - :return: TPM - 3-D tensor, numpy array of size (2*T+1, 2*T+1, 4)
94   - """
95   -
96   - return self._get_trans_prob_mat_orig(ciq, T)
97   -
98   -
99   - # timer = Timer()
100   - ciq = np.absolute(ciq).clip(0, T)
101   - TPM = np.zeros((2 * T + 1, 2 * T + 1, 4), np.float64)
102   - # Fh = np.diff(ciq, axis=-1)
103   - # Fv = np.diff(ciq, axis=0)
104   - Fh = ciq[:-1, :-1] - ciq[:-1, 1:]
105   - Fv = ciq[:-1, :-1] - ciq[1:, :-1]
106   - Fd = ciq[:-1, :-1] - ciq[1:, 1:]
107   - Fm = ciq[:-1, 1:] - ciq[1:, :-1]
108   -
109   - Fh1 = Fh[:-1, :-1].ravel()
110   - Fh2 = Fh[:-1, 1:].ravel()
111   -
112   - Fv1 = Fv[:-1, :-1].ravel()
113   - Fv2 = Fv[1:, :-1].ravel()
114   -
115   - Fd1 = Fd[:-1, :-1].ravel()
116   - Fd2 = Fd[1:, 1:].ravel()
117   -
118   - Fm1 = Fm[:-1, 1:].ravel()
119   - Fm2 = Fm[1:, :-1].ravel()
120   -
121   -
122   -
123   - # 0.089754s
124   - # timer.mark()
125   - # TPM[Fh1.ravel(), Fh2.ravel(), 0] += 1
126   - # TPM[Fv1.ravel(), Fv2.ravel(), 1] += 1
127   - # TPM[Fd1.ravel(), Fd2.ravel(), 2] += 1
128   - # TPM[Fm1.ravel(), Fm2.ravel(), 3] += 1
129   - # timer.report()
130   -
131   - # 1.459668s
132   - # timer.mark()
133   - # for i in range(len(Fh1)):
134   - # TPM[Fh1[i], Fh2[i], 0] += 1
135   - # for i in range(len(Fv1)):
136   - # TPM[Fv1[i], Fv2[i], 1] += 1
137   - # for i in range(len(Fd1)):
138   - # TPM[Fd1[i], Fd2[i], 2] += 1
139   - # for i in range(len(Fm1)):
140   - # TPM[Fm1[i], Fm2[i], 3] += 1
141   - # timer.report()
142   -
143   - # 1.463982s
144   - # timer.mark()
145   - for m, n in zip(Fh1.ravel(), Fh2.ravel()):
146   - TPM[m, n, 0] += 1
147   -
148   - for m, n in zip(Fv1.ravel(), Fv2.ravel()):
149   - TPM[m, n, 1] += 1
150   -
151   - for m, n in zip(Fd1.ravel(), Fd2.ravel()):
152   - TPM[m, n, 2] += 1
153   -
154   - for m, n in zip(Fm1.ravel(), Fm2.ravel()):
155   - TPM[m, n, 3] += 1
156   - # timer.report()
157   -
158   - # 0.057505s
159   - # timer.mark()
160   - for m in range(-T, T + 1):
161   - dh = np.sum(Fh1 == m) * 1.0
162   - dv = np.sum(Fv1 == m) * 1.0
163   - dd = np.sum(Fd1 == m) * 1.0
164   - dm = np.sum(Fm1 == m) * 1.0
165   -
166   - if dh != 0:
167   - TPM[m, :, 0] /= dh
168   -
169   - if dv != 0:
170   - TPM[m, :, 1] /= dv
171   -
172   - if dd != 0:
173   - TPM[m, :, 2] /= dd
174   -
175   - if dm != 0:
176   - TPM[m, :, 3] /= dm
177   - # timer.report()
178   -
179   - return TPM
180   -
181   - def load_dataset(self, mode, file):
182   - if mode == 'local':
183   - return self._load_dataset_from_local(file)
184   - elif mode == 'remote' or mode == 'hbase':
185   - return self._load_dataset_from_hbase(file)
186   - else:
187   - raise Exception("Unknown mode!")
188   -
189   - def _load_dataset_from_local(self, list_file='images_map_Train.tsv'):
190   - """
191   - load jpeg dataset according to a file of file-list.
192   -
193   - :param list_file: a tsv file with each line for a jpeg file path
194   - :return:(X,Y) for SVM
195   - """
196   - list_file = base_dir + list_file
197   -
198   - X = []
199   - Y = []
200   - dict_tagbuf = {}
201   - dict_dataset = {}
202   -
203   - with open(list_file, 'rb') as tsvfile:
204   - tsvfile = csv.reader(tsvfile, delimiter='\t')
205   - for line in tsvfile:
206   - imgname = line[0] + '.jpg'
207   - dict_tagbuf[imgname] = line[1]
208   -
209   - dir = base_dir + 'Feat/'
210   - for path, subdirs, files in os.walk(dir + 'Train/'):
211   - for name in files:
212   - featpath = os.path.join(path, name)
213   - # print featpath
214   - with open(featpath, 'rb') as featfile:
215   - imgname = path.split('/')[-1] + name.replace('.mpb', '.jpg')
216   - dict_dataset[imgname] = json.loads(featfile.read())
217   -
218   - for imgname, tag in dict_tagbuf.items():
219   - tag = 1 if tag == 'True' else 0
220   - X.append(dict_dataset[imgname])
221   - Y.append(tag)
222   -
223   - return X, Y
224   -
225   -
226   - def _load_dataset_from_hbase(self, table='ImgCV'):
227   - pass
228   -
229   -
230   - def _model_svm_train_sk(self, X, Y):
231   - timer = Timer()
232   - timer.mark()
233   - lin_clf = svm.LinearSVC()
234   - lin_clf.fit(X, Y)
235   - with open('res/tmp.model', 'wb') as modelfile:
236   - model = pickle.dump(lin_clf, modelfile)
237   -
238   - timer.report()
239   -
240   - self.svm = 'sk'
241   - self.model = lin_clf
242   -
243   - return lin_clf
244   -
245   - def _model_svm_predict_sk(self, image, clf=None):
246   - if clf is None:
247   - if self.svm == 'sk' and self.model != None:
248   - clf = self.model
249   - else:
250   - with open('res/tmp.model', 'rb') as modelfile:
251   - clf = pickle.load(modelfile)
252   -
253   - im = mjpeg.Jpeg(image, key=sample_key)
254   - ciq = im.coef_arrays[mjpeg.colorMap['Y']]
255   - tpm = self.get_trans_prob_mat(ciq)
256   -
257   - return clf.predict(tpm)
258   -
259   -
260   - def _model_svm_train_cv(self, X, Y):
261   - svm_params = dict(kernel_type=cv2.SVM_LINEAR,
262   - svm_type=cv2.SVM_C_SVC,
263   - C=2.67, gamma=5.383)
264   -
265   - timer = Timer()
266   - timer.mark()
267   - svm = cv2.SVM()
268   - svm.train(X, Y, params=svm_params)
269   - svm.save('res/svm_data.model')
270   -
271   - self.svm = 'cv'
272   - self.model = svm
273   -
274   - return svm
275   -
276   - def _model_svm_predict_cv(self, image, svm=None):
277   - if svm is None:
278   - if self.svm == 'cv' and self.model != None:
279   - clf = self.model
280   - else:
281   - svm = cv2.SVM()
282   - svm.load('res/svm_data.model')
283   -
284   - im = mjpeg.Jpeg(image, key=sample_key)
285   - ciq = im.coef_arrays[mjpeg.colorMap['Y']]
286   - tpm = self.get_trans_prob_mat(ciq)
287   -
288   - return svm.predict(tpm)
289   -
290   - def train_svm(self):
291   - X, Y = self.load_dataset('local', 'images_map_Train.tsv')
292   - return self._model_svm_train_sk(X, Y)
293   -
294   - def predict_svm(self, image):
295   - return self._model_svm_predict_sk(image)
296   -
297   -
298   -
299   -
300   -