staged.

Chunk
1 parent f2bebe34
Showing 4 changed files with 4 additions and 477 deletions Show diff stats
mjpeg/__init__.py
mjpeg/compress.py
msteg/steganalysis/ChiSquare.py
msteg/steganalysis/MPB.py.bak
@@ -8,7 +8,8 @@ __all__ = [&#39;Jpeg&#39;, &#39;colorMap&#39;, &#39;diffblock&#39;, &#39;diffblocks&#39;]
 # functions from submodules.
 #
 # ::
-
+import numpy as np
+from numpy import shape
 import numpy.random as rnd
  
 import base
@@ -169,18 +170,6 @@ class Jpeg(Jsteg):
         E = [-np.inf] + [i for i in range(-T, T + 2)] + [np.inf]
         return np.histogram(A, E)
  
-    def plotHist(self, mask=base.acMaskBlock, T=8):
-        """
-          Make a histogram of the jpeg coefficients.
-          The mask is a boolean 8x8 matrix indicating the
-          frequencies to be included.  This defaults to the
-          AC coefficients.
-        """
-        A = self.rawsignal(mask).tolist()
-        E = [i for i in range(-T, T + 2)]
-        plt.hist(A, E, histtype='bar')
-        plt.show()
-
     def nzcount(self, *a, **kw):
         """Number of non-zero AC coefficients.
  
 ## -*- coding: utf-8 -*-
  
-
-from pylab import *
+from numpy import array
+# from pylab import *
  
 # The standard quantisation tables for JPEG::
  
@@ -1,162 +0,0 @@
-"""
-<p>
-This module implements an algorithm described by Andreas Westfeld in [1,2],
-which detects if there was data embedded into an image using JSteg.
-It uses the property that JSteg generates pairs of values in the
-DCT-coefficients histogram, which can be detected by a \chi^2 test.
-</p>
-
-<pre>
-[1]: Andreas Westfeld, F5 - A Steganographic Algorithm High Capacity Despite
-Better Steganalysis
-[2]: Andreas Westfeld, Angriffe auf steganographische Systeme
-</pre>
-"""
-
-from collections import defaultdict
-import os
-
-from PIL import Image
-import numpy
-from scipy.stats import chisquare
-import matplotlib.pyplot as plt
-import itertools as it
-
-from .. import *
-
-
-class ChiSquare(StegBase):
-    """
-    The module contains only one method, <b>detect</b>.
-    """
-
-    def __init__(self, ui, core):
-        self.ui = ui
-        self.core = core
-
-    def detect(self, src, tgt, tgt2):
-        """
-        <p>
-        Detect if there was data embedded in the <i>source image</i> image with
-        JSteg algorithm.
-        </p>
-
-        <p>
-        Parameters:
-        <ol>
-        <li><pre>Source image</pre> Image which should be tested</li>
-        <li><pre>Target image</pre> Image which displays a graphic with the
-        embedding probability</li>
-        <li><pre>2nd Target image</pre> Image which displays the embedding
-        positions in the image</li>
-        </ol>
-        </p>
-        """
-        # --------------------------- Input -----------------------------------
-        # If src is from the image pool, test whether the image exists encoded
-        # on the file system. Otherwise we can not read DCT-coefficients.
-        if self.core.media_manager.is_media_key(src):
-            src = self.core.media_manager.get_file(src)
-            if hasattr(src, 'tmp_file'):
-                src = src.tmp_file
-                self.ui.display_error('Trying file: %s' % src)
-            else:
-                self.ui.display_error('Can not detect anything from \
-                        decoded images.')
-                return
-        # Test whether the file exists.
-        if not os.path.isfile(src):
-            self.ui.display_error('No such file.')
-            return
-        # Test if it is a JPEG file.
-        if not self._looks_like_jpeg(src):
-            self.ui.display_error('Input is probably not a JPEG file.')
-            return
-
-        # ---------------------------- Algorithm ------------------------------
-        # Build DCT-histogram in steps of \approx 1% of all coefficients and
-        # calculate the p-value at each step.
-
-        # dct_data = rw_dct.read_dct_coefficients(src)
-        dct_data = self._get_cov_data(src)
-
-        hist = defaultdict(int)
-        cnt = 0
-        l = len(dct_data)
-        one_p = l / 100
-        result = []
-        for block in dct_data:
-            # update the histogram with one block of 64 coefficients
-            for c in block:
-                hist[c] += 1
-
-            cnt += 1
-            if not cnt % one_p:
-                # calculate p-value
-                self.ui.set_progress(cnt * 100 / l)
-
-                # ignore the pair (0, 1), since JSteg does not embed data there
-                hl = [hist[i] for i in range(-2048, 2049) if not i in (0, 1)]
-                k = len(hl) / 2
-                observed = []
-                expected = []
-                # calculate observed and expected distribution
-                for i in range(k):
-                    t = hl[2 * i] + hl[2 * i + 1]
-                    if t > 3:
-                        observed.append(hl[2 * i])
-                        expected.append(t / 2)
-                # calculate (\chi^2, p)
-                p = chisquare(numpy.array(observed), numpy.array(expected))[1]
-                result.append(p)
-
-        # ----------------------------- Output --------------------------------
-        # Graph displaying the embedding probabilities in relation to the
-        # sample size.
-        figure = plt.figure()
-        plot = figure.add_subplot(111)
-        plot.grid(True)
-        plot.plot(result, color='r', linewidth=2.0)
-        plt.axis([0, 100, 0, 1.1])
-        plt.title('Embedding probability for different percentages \
-of the file capacity.')
-        plt.xlabel('% of file capacity')
-        plt.ylabel('Embedding probability')
-
-        if self.core.media_manager.is_media_key(tgt):
-            img = figure_to_pil(figure)
-            self.core.media_manager.put_media(tgt, img)
-        else:
-            plt.savefig(tgt)
-
-        # Image displaying the length and position of the embedded data
-        # within the image
-        img2 = Image.open(src)
-        img2.convert("RGB")
-        width, height = img2.size
-
-        for i in range(100):
-            result[i] = max(result[i:])
-
-        cnt2 = 0
-        for (top, left) in it.product(range(0, height, 8), range(0, width, 8)):
-            if not cnt2 % one_p:
-                r = result[cnt2 / one_p]
-                if r >= 0.5:
-                    color = (255, int((1 - r) * 2 * 255), 0)
-                else:
-                    color = (int(r * 2 * 255), 255, 0)
-            cnt2 += 1
-            img2.paste(color, (left, top, min(left + 8, width),
-                               min(top + 8, height)))
-        self.core.media_manager.put_media(tgt2, img2)
-
-    def __str__(self):
-        return 'Chi-Square-Test'
-
-
-def figure_to_pil(figure):
-    figure.canvas.draw()
-    return Image.fromstring('RGB',
-                            figure.canvas.get_width_height(),
-                            figure.canvas.tostring_rgb())
@@ -1,300 +0,0 @@
-__author__ = 'chunk'
-"""
-Yun Q. Shi, et al - A Markov Process Based Approach to Effective Attacking JPEG Steganography
-"""
-
-import time
-import math
-import numpy as np
-
-from .. import *
-from ...mjpeg import Jpeg,colorMap
-from ...common import *
-
-import csv
-import json
-import pickle
-import cv2
-from sklearn import svm
-
-base_dir = '/home/hadoop/data/HeadShoulder/'
-
-
-class MPB(StegBase):
-    """
-    Markov Process Based Steganalyasis Algo.
-    """
-
-    def __init__(self):
-        StegBase.__init__(self, sample_key)
-        self.model = None
-        self.svm = None
-
-    def _get_trans_prob_mat_orig(self, ciq, T=4):
-        """
-        Original!
-        Calculate Transition Probability Matrix.
-
-        :param ciq: jpeg DCT coeff matrix, 2-D numpy array of int16 (pre-abs)
-        :param T: signed integer, usually 1~7
-        :return: TPM - 3-D tensor, numpy array of size (2*T+1, 2*T+1, 4)
-        """
-        ciq = np.absolute(ciq).clip(0, T)
-        TPM = np.zeros((2 * T + 1, 2 * T + 1, 4), np.float64)
-        # Fh = np.diff(ciq, axis=-1)
-        # Fv = np.diff(ciq, axis=0)
-        Fh = ciq[:-1, :-1] - ciq[:-1, 1:]
-        Fv = ciq[:-1, :-1] - ciq[1:, :-1]
-        Fd = ciq[:-1, :-1] - ciq[1:, 1:]
-        Fm = ciq[:-1, 1:] - ciq[1:, :-1]
-
-        Fh1 = Fh[:-1, :-1]
-        Fh2 = Fh[:-1, 1:]
-
-        Fv1 = Fv[:-1, :-1]
-        Fv2 = Fv[1:, :-1]
-
-        Fd1 = Fd[:-1, :-1]
-        Fd2 = Fd[1:, 1:]
-
-        Fm1 = Fm[:-1, 1:]
-        Fm2 = Fm[1:, :-1]
-
-        # original:(very slow!)
-        for n in range(-T, T + 1):
-            for m in range(-T, T + 1):
-                dh = np.sum(Fh1 == m) * 1.0
-                dv = np.sum(Fv1 == m) * 1.0
-                dd = np.sum(Fd1 == m) * 1.0
-                dm = np.sum(Fm1 == m) * 1.0
-
-                if dh != 0:
-                    TPM[m, n, 0] = np.sum(np.logical_and(Fh1 == m, Fh2 == n)) / dh
-
-                if dv != 0:
-                    TPM[m, n, 1] = np.sum(np.logical_and(Fv1 == m, Fv2 == n)) / dv
-
-                if dd != 0:
-                    TPM[m, n, 2] = np.sum(np.logical_and(Fd1 == m, Fd2 == n)) / dd
-
-                if dm != 0:
-                    TPM[m, n, 3] = np.sum(np.logical_and(Fm1 == m, Fm2 == n)) / dm
-
-        # 1.422729s
-        return TPM
-
-
-    def get_trans_prob_mat(self, ciq, T=4):
-        """
-        Calculate Transition Probability Matrix.
-
-        :param ciq: jpeg DCT coeff matrix, 2-D numpy array of int16 (pre-abs)
-        :param T: signed integer, usually 1~7
-        :return: TPM - 3-D tensor, numpy array of size (2*T+1, 2*T+1, 4)
-        """
-
-        return self._get_trans_prob_mat_orig(ciq, T)
-
-
-        # timer = Timer()
-        ciq = np.absolute(ciq).clip(0, T)
-        TPM = np.zeros((2 * T + 1, 2 * T + 1, 4), np.float64)
-        # Fh = np.diff(ciq, axis=-1)
-        # Fv = np.diff(ciq, axis=0)
-        Fh = ciq[:-1, :-1] - ciq[:-1, 1:]
-        Fv = ciq[:-1, :-1] - ciq[1:, :-1]
-        Fd = ciq[:-1, :-1] - ciq[1:, 1:]
-        Fm = ciq[:-1, 1:] - ciq[1:, :-1]
-
-        Fh1 = Fh[:-1, :-1].ravel()
-        Fh2 = Fh[:-1, 1:].ravel()
-
-        Fv1 = Fv[:-1, :-1].ravel()
-        Fv2 = Fv[1:, :-1].ravel()
-
-        Fd1 = Fd[:-1, :-1].ravel()
-        Fd2 = Fd[1:, 1:].ravel()
-
-        Fm1 = Fm[:-1, 1:].ravel()
-        Fm2 = Fm[1:, :-1].ravel()
-
-
-
-        # 0.089754s
-        # timer.mark()
-        # TPM[Fh1.ravel(), Fh2.ravel(), 0] += 1
-        # TPM[Fv1.ravel(), Fv2.ravel(), 1] += 1
-        # TPM[Fd1.ravel(), Fd2.ravel(), 2] += 1
-        # TPM[Fm1.ravel(), Fm2.ravel(), 3] += 1
-        # timer.report()
-
-        # 1.459668s
-        # timer.mark()
-        # for i in range(len(Fh1)):
-        #     TPM[Fh1[i], Fh2[i], 0] += 1
-        # for i in range(len(Fv1)):
-        #     TPM[Fv1[i], Fv2[i], 1] += 1
-        # for i in range(len(Fd1)):
-        #     TPM[Fd1[i], Fd2[i], 2] += 1
-        # for i in range(len(Fm1)):
-        #     TPM[Fm1[i], Fm2[i], 3] += 1
-        # timer.report()
-
-        # 1.463982s
-        # timer.mark()
-        for m, n in zip(Fh1.ravel(), Fh2.ravel()):
-            TPM[m, n, 0] += 1
-
-        for m, n in zip(Fv1.ravel(), Fv2.ravel()):
-            TPM[m, n, 1] += 1
-
-        for m, n in zip(Fd1.ravel(), Fd2.ravel()):
-            TPM[m, n, 2] += 1
-
-        for m, n in zip(Fm1.ravel(), Fm2.ravel()):
-            TPM[m, n, 3] += 1
-        # timer.report()
-
-        # 0.057505s
-        # timer.mark()
-        for m in range(-T, T + 1):
-            dh = np.sum(Fh1 == m) * 1.0
-            dv = np.sum(Fv1 == m) * 1.0
-            dd = np.sum(Fd1 == m) * 1.0
-            dm = np.sum(Fm1 == m) * 1.0
-
-            if dh != 0:
-                TPM[m, :, 0] /= dh
-
-            if dv != 0:
-                TPM[m, :, 1] /= dv
-
-            if dd != 0:
-                TPM[m, :, 2] /= dd
-
-            if dm != 0:
-                TPM[m, :, 3] /= dm
-        # timer.report()
-
-        return TPM
-
-    def load_dataset(self, mode, file):
-        if mode == 'local':
-            return self._load_dataset_from_local(file)
-        elif mode == 'remote' or mode == 'hbase':
-            return self._load_dataset_from_hbase(file)
-        else:
-            raise Exception("Unknown mode!")
-
-    def _load_dataset_from_local(self, list_file='images_map_Train.tsv'):
-        """
-        load jpeg dataset according to a file of file-list.
-
-        :param list_file: a tsv file with each line for a jpeg file path
-        :return:(X,Y) for SVM
-        """
-        list_file = base_dir + list_file
-
-        X = []
-        Y = []
-        dict_tagbuf = {}
-        dict_dataset = {}
-
-        with open(list_file, 'rb') as tsvfile:
-            tsvfile = csv.reader(tsvfile, delimiter='\t')
-            for line in tsvfile:
-                imgname = line[0] + '.jpg'
-                dict_tagbuf[imgname] = line[1]
-
-        dir = base_dir + 'Feat/'
-        for path, subdirs, files in os.walk(dir + 'Train/'):
-            for name in files:
-                featpath = os.path.join(path, name)
-                # print featpath
-                with open(featpath, 'rb') as featfile:
-                    imgname = path.split('/')[-1] + name.replace('.mpb', '.jpg')
-                    dict_dataset[imgname] = json.loads(featfile.read())
-
-        for imgname, tag in dict_tagbuf.items():
-            tag = 1 if tag == 'True' else 0
-            X.append(dict_dataset[imgname])
-            Y.append(tag)
-
-        return X, Y
-
-
-    def _load_dataset_from_hbase(self, table='ImgCV'):
-        pass
-
-
-    def _model_svm_train_sk(self, X, Y):
-        timer = Timer()
-        timer.mark()
-        lin_clf = svm.LinearSVC()
-        lin_clf.fit(X, Y)
-        with open('res/tmp.model', 'wb') as modelfile:
-            model = pickle.dump(lin_clf, modelfile)
-
-        timer.report()
-
-        self.svm = 'sk'
-        self.model = lin_clf
-
-        return lin_clf
-
-    def _model_svm_predict_sk(self, image, clf=None):
-        if clf is None:
-            if self.svm == 'sk' and self.model != None:
-                clf = self.model
-            else:
-                with open('res/tmp.model', 'rb') as modelfile:
-                    clf = pickle.load(modelfile)
-
-        im = mjpeg.Jpeg(image, key=sample_key)
-        ciq = im.coef_arrays[mjpeg.colorMap['Y']]
-        tpm = self.get_trans_prob_mat(ciq)
-
-        return clf.predict(tpm)
-
-
-    def _model_svm_train_cv(self, X, Y):
-        svm_params = dict(kernel_type=cv2.SVM_LINEAR,
-                          svm_type=cv2.SVM_C_SVC,
-                          C=2.67, gamma=5.383)
-
-        timer = Timer()
-        timer.mark()
-        svm = cv2.SVM()
-        svm.train(X, Y, params=svm_params)
-        svm.save('res/svm_data.model')
-
-        self.svm = 'cv'
-        self.model = svm
-
-        return svm
-
-    def _model_svm_predict_cv(self, image, svm=None):
-        if svm is None:
-            if self.svm == 'cv' and self.model != None:
-                clf = self.model
-            else:
-                svm = cv2.SVM()
-                svm.load('res/svm_data.model')
-
-        im = mjpeg.Jpeg(image, key=sample_key)
-        ciq = im.coef_arrays[mjpeg.colorMap['Y']]
-        tpm = self.get_trans_prob_mat(ciq)
-
-        return svm.predict(tpm)
-
-    def train_svm(self):
-        X, Y = self.load_dataset('local', 'images_map_Train.tsv')
-        return self._model_svm_train_sk(X, Y)
-
-    def predict_svm(self, image):
-        return self._model_svm_predict_sk(image)
-
-
-
-
-
...	...	@@ -8,7 +8,8 @@ __all__ = ['Jpeg', 'colorMap', 'diffblock', 'diffblocks']
8	8	# functions from submodules.
9	9	#
10	10	# ::
11		-
	11	+import numpy as np
	12	+from numpy import shape
12	13	import numpy.random as rnd
13	14
14	15	import base
...	...	@@ -169,18 +170,6 @@ class Jpeg(Jsteg):
169	170	E = [-np.inf] + [i for i in range(-T, T + 2)] + [np.inf]
170	171	return np.histogram(A, E)
171	172
172		- def plotHist(self, mask=base.acMaskBlock, T=8):
173		- """
174		- Make a histogram of the jpeg coefficients.
175		- The mask is a boolean 8x8 matrix indicating the
176		- frequencies to be included. This defaults to the
177		- AC coefficients.
178		- """
179		- A = self.rawsignal(mask).tolist()
180		- E = [i for i in range(-T, T + 2)]
181		- plt.hist(A, E, histtype='bar')
182		- plt.show()
183		-
184	173	def nzcount(self, a, *kw):
185	174	"""Number of non-zero AC coefficients.
186	175
...	...
1	1	## -- coding: utf-8 --
2	2
3		-
4		-from pylab import *
	3	+from numpy import array
	4	+# from pylab import *
5	5
6	6	# The standard quantisation tables for JPEG::
7	7
...	...
...	...	@@ -1,162 +0,0 @@
1		-"""
2		-<p>
3		-This module implements an algorithm described by Andreas Westfeld in [1,2],
4		-which detects if there was data embedded into an image using JSteg.
5		-It uses the property that JSteg generates pairs of values in the
6		-DCT-coefficients histogram, which can be detected by a \chi^2 test.
7		-</p>
8		-
9		-<pre>
10		-[1]: Andreas Westfeld, F5 - A Steganographic Algorithm High Capacity Despite
11		-Better Steganalysis
12		-[2]: Andreas Westfeld, Angriffe auf steganographische Systeme
13		-</pre>
14		-"""
15		-
16		-from collections import defaultdict
17		-import os
18		-
19		-from PIL import Image
20		-import numpy
21		-from scipy.stats import chisquare
22		-import matplotlib.pyplot as plt
23		-import itertools as it
24		-
25		-from .. import *
26		-
27		-
28		-class ChiSquare(StegBase):
29		- """
30		- The module contains only one method, <b>detect</b>.
31		- """
32		-
33		- def __init__(self, ui, core):
34		- self.ui = ui
35		- self.core = core
36		-
37		- def detect(self, src, tgt, tgt2):
38		- """
39		- <p>
40		- Detect if there was data embedded in the <i>source image</i> image with
41		- JSteg algorithm.
42		- </p>
43		-
44		- <p>
45		- Parameters:
46		- <ol>
47		- <li><pre>Source image</pre> Image which should be tested</li>
48		- <li><pre>Target image</pre> Image which displays a graphic with the
49		- embedding probability</li>
50		- <li><pre>2nd Target image</pre> Image which displays the embedding
51		- positions in the image</li>
52		- </ol>
53		- </p>
54		- """
55		- # --------------------------- Input -----------------------------------
56		- # If src is from the image pool, test whether the image exists encoded
57		- # on the file system. Otherwise we can not read DCT-coefficients.
58		- if self.core.media_manager.is_media_key(src):
59		- src = self.core.media_manager.get_file(src)
60		- if hasattr(src, 'tmp_file'):
61		- src = src.tmp_file
62		- self.ui.display_error('Trying file: %s' % src)
63		- else:
64		- self.ui.display_error('Can not detect anything from \
65		- decoded images.')
66		- return
67		- # Test whether the file exists.
68		- if not os.path.isfile(src):
69		- self.ui.display_error('No such file.')
70		- return
71		- # Test if it is a JPEG file.
72		- if not self._looks_like_jpeg(src):
73		- self.ui.display_error('Input is probably not a JPEG file.')
74		- return
75		-
76		- # ---------------------------- Algorithm ------------------------------
77		- # Build DCT-histogram in steps of \approx 1% of all coefficients and
78		- # calculate the p-value at each step.
79		-
80		- # dct_data = rw_dct.read_dct_coefficients(src)
81		- dct_data = self._get_cov_data(src)
82		-
83		- hist = defaultdict(int)
84		- cnt = 0
85		- l = len(dct_data)
86		- one_p = l / 100
87		- result = []
88		- for block in dct_data:
89		- # update the histogram with one block of 64 coefficients
90		- for c in block:
91		- hist[c] += 1
92		-
93		- cnt += 1
94		- if not cnt % one_p:
95		- # calculate p-value
96		- self.ui.set_progress(cnt * 100 / l)
97		-
98		- # ignore the pair (0, 1), since JSteg does not embed data there
99		- hl = [hist[i] for i in range(-2048, 2049) if not i in (0, 1)]
100		- k = len(hl) / 2
101		- observed = []
102		- expected = []
103		- # calculate observed and expected distribution
104		- for i in range(k):
105		- t = hl[2 * i] + hl[2 * i + 1]
106		- if t > 3:
107		- observed.append(hl[2 * i])
108		- expected.append(t / 2)
109		- # calculate (\chi^2, p)
110		- p = chisquare(numpy.array(observed), numpy.array(expected))[1]
111		- result.append(p)
112		-
113		- # ----------------------------- Output --------------------------------
114		- # Graph displaying the embedding probabilities in relation to the
115		- # sample size.
116		- figure = plt.figure()
117		- plot = figure.add_subplot(111)
118		- plot.grid(True)
119		- plot.plot(result, color='r', linewidth=2.0)
120		- plt.axis([0, 100, 0, 1.1])
121		- plt.title('Embedding probability for different percentages \
122		-of the file capacity.')
123		- plt.xlabel('% of file capacity')
124		- plt.ylabel('Embedding probability')
125		-
126		- if self.core.media_manager.is_media_key(tgt):
127		- img = figure_to_pil(figure)
128		- self.core.media_manager.put_media(tgt, img)
129		- else:
130		- plt.savefig(tgt)
131		-
132		- # Image displaying the length and position of the embedded data
133		- # within the image
134		- img2 = Image.open(src)
135		- img2.convert("RGB")
136		- width, height = img2.size
137		-
138		- for i in range(100):
139		- result[i] = max(result[i:])
140		-
141		- cnt2 = 0
142		- for (top, left) in it.product(range(0, height, 8), range(0, width, 8)):
143		- if not cnt2 % one_p:
144		- r = result[cnt2 / one_p]
145		- if r >= 0.5:
146		- color = (255, int((1 - r) * 2 * 255), 0)
147		- else:
148		- color = (int(r * 2 * 255), 255, 0)
149		- cnt2 += 1
150		- img2.paste(color, (left, top, min(left + 8, width),
151		- min(top + 8, height)))
152		- self.core.media_manager.put_media(tgt2, img2)
153		-
154		- def __str__(self):
155		- return 'Chi-Square-Test'
156		-
157		-
158		-def figure_to_pil(figure):
159		- figure.canvas.draw()
160		- return Image.fromstring('RGB',
161		- figure.canvas.get_width_height(),
162		- figure.canvas.tostring_rgb())
...	...	@@ -1,300 +0,0 @@
1		-__author__ = 'chunk'
2		-"""
3		-Yun Q. Shi, et al - A Markov Process Based Approach to Effective Attacking JPEG Steganography
4		-"""
5		-
6		-import time
7		-import math
8		-import numpy as np
9		-
10		-from .. import *
11		-from ...mjpeg import Jpeg,colorMap
12		-from ...common import *
13		-
14		-import csv
15		-import json
16		-import pickle
17		-import cv2
18		-from sklearn import svm
19		-
20		-base_dir = '/home/hadoop/data/HeadShoulder/'
21		-
22		-
23		-class MPB(StegBase):
24		- """
25		- Markov Process Based Steganalyasis Algo.
26		- """
27		-
28		- def __init__(self):
29		- StegBase.__init__(self, sample_key)
30		- self.model = None
31		- self.svm = None
32		-
33		- def _get_trans_prob_mat_orig(self, ciq, T=4):
34		- """
35		- Original!
36		- Calculate Transition Probability Matrix.
37		-
38		- :param ciq: jpeg DCT coeff matrix, 2-D numpy array of int16 (pre-abs)
39		- :param T: signed integer, usually 1~7
40		- :return: TPM - 3-D tensor, numpy array of size (2T+1, 2T+1, 4)
41		- """
42		- ciq = np.absolute(ciq).clip(0, T)
43		- TPM = np.zeros((2 * T + 1, 2 * T + 1, 4), np.float64)
44		- # Fh = np.diff(ciq, axis=-1)
45		- # Fv = np.diff(ciq, axis=0)
46		- Fh = ciq[:-1, :-1] - ciq[:-1, 1:]
47		- Fv = ciq[:-1, :-1] - ciq[1:, :-1]
48		- Fd = ciq[:-1, :-1] - ciq[1:, 1:]
49		- Fm = ciq[:-1, 1:] - ciq[1:, :-1]
50		-
51		- Fh1 = Fh[:-1, :-1]
52		- Fh2 = Fh[:-1, 1:]
53		-
54		- Fv1 = Fv[:-1, :-1]
55		- Fv2 = Fv[1:, :-1]
56		-
57		- Fd1 = Fd[:-1, :-1]
58		- Fd2 = Fd[1:, 1:]
59		-
60		- Fm1 = Fm[:-1, 1:]
61		- Fm2 = Fm[1:, :-1]
62		-
63		- # original:(very slow!)
64		- for n in range(-T, T + 1):
65		- for m in range(-T, T + 1):
66		- dh = np.sum(Fh1 == m) * 1.0
67		- dv = np.sum(Fv1 == m) * 1.0
68		- dd = np.sum(Fd1 == m) * 1.0
69		- dm = np.sum(Fm1 == m) * 1.0
70		-
71		- if dh != 0:
72		- TPM[m, n, 0] = np.sum(np.logical_and(Fh1 == m, Fh2 == n)) / dh
73		-
74		- if dv != 0:
75		- TPM[m, n, 1] = np.sum(np.logical_and(Fv1 == m, Fv2 == n)) / dv
76		-
77		- if dd != 0:
78		- TPM[m, n, 2] = np.sum(np.logical_and(Fd1 == m, Fd2 == n)) / dd
79		-
80		- if dm != 0:
81		- TPM[m, n, 3] = np.sum(np.logical_and(Fm1 == m, Fm2 == n)) / dm
82		-
83		- # 1.422729s
84		- return TPM
85		-
86		-
87		- def get_trans_prob_mat(self, ciq, T=4):
88		- """
89		- Calculate Transition Probability Matrix.
90		-
91		- :param ciq: jpeg DCT coeff matrix, 2-D numpy array of int16 (pre-abs)
92		- :param T: signed integer, usually 1~7
93		- :return: TPM - 3-D tensor, numpy array of size (2T+1, 2T+1, 4)
94		- """
95		-
96		- return self._get_trans_prob_mat_orig(ciq, T)
97		-
98		-
99		- # timer = Timer()
100		- ciq = np.absolute(ciq).clip(0, T)
101		- TPM = np.zeros((2 * T + 1, 2 * T + 1, 4), np.float64)
102		- # Fh = np.diff(ciq, axis=-1)
103		- # Fv = np.diff(ciq, axis=0)
104		- Fh = ciq[:-1, :-1] - ciq[:-1, 1:]
105		- Fv = ciq[:-1, :-1] - ciq[1:, :-1]
106		- Fd = ciq[:-1, :-1] - ciq[1:, 1:]
107		- Fm = ciq[:-1, 1:] - ciq[1:, :-1]
108		-
109		- Fh1 = Fh[:-1, :-1].ravel()
110		- Fh2 = Fh[:-1, 1:].ravel()
111		-
112		- Fv1 = Fv[:-1, :-1].ravel()
113		- Fv2 = Fv[1:, :-1].ravel()
114		-
115		- Fd1 = Fd[:-1, :-1].ravel()
116		- Fd2 = Fd[1:, 1:].ravel()
117		-
118		- Fm1 = Fm[:-1, 1:].ravel()
119		- Fm2 = Fm[1:, :-1].ravel()
120		-
121		-
122		-
123		- # 0.089754s
124		- # timer.mark()
125		- # TPM[Fh1.ravel(), Fh2.ravel(), 0] += 1
126		- # TPM[Fv1.ravel(), Fv2.ravel(), 1] += 1
127		- # TPM[Fd1.ravel(), Fd2.ravel(), 2] += 1
128		- # TPM[Fm1.ravel(), Fm2.ravel(), 3] += 1
129		- # timer.report()
130		-
131		- # 1.459668s
132		- # timer.mark()
133		- # for i in range(len(Fh1)):
134		- # TPM[Fh1[i], Fh2[i], 0] += 1
135		- # for i in range(len(Fv1)):
136		- # TPM[Fv1[i], Fv2[i], 1] += 1
137		- # for i in range(len(Fd1)):
138		- # TPM[Fd1[i], Fd2[i], 2] += 1
139		- # for i in range(len(Fm1)):
140		- # TPM[Fm1[i], Fm2[i], 3] += 1
141		- # timer.report()
142		-
143		- # 1.463982s
144		- # timer.mark()
145		- for m, n in zip(Fh1.ravel(), Fh2.ravel()):
146		- TPM[m, n, 0] += 1
147		-
148		- for m, n in zip(Fv1.ravel(), Fv2.ravel()):
149		- TPM[m, n, 1] += 1
150		-
151		- for m, n in zip(Fd1.ravel(), Fd2.ravel()):
152		- TPM[m, n, 2] += 1
153		-
154		- for m, n in zip(Fm1.ravel(), Fm2.ravel()):
155		- TPM[m, n, 3] += 1
156		- # timer.report()
157		-
158		- # 0.057505s
159		- # timer.mark()
160		- for m in range(-T, T + 1):
161		- dh = np.sum(Fh1 == m) * 1.0
162		- dv = np.sum(Fv1 == m) * 1.0
163		- dd = np.sum(Fd1 == m) * 1.0
164		- dm = np.sum(Fm1 == m) * 1.0
165		-
166		- if dh != 0:
167		- TPM[m, :, 0] /= dh
168		-
169		- if dv != 0:
170		- TPM[m, :, 1] /= dv
171		-
172		- if dd != 0:
173		- TPM[m, :, 2] /= dd
174		-
175		- if dm != 0:
176		- TPM[m, :, 3] /= dm
177		- # timer.report()
178		-
179		- return TPM
180		-
181		- def load_dataset(self, mode, file):
182		- if mode == 'local':
183		- return self._load_dataset_from_local(file)
184		- elif mode == 'remote' or mode == 'hbase':
185		- return self._load_dataset_from_hbase(file)
186		- else:
187		- raise Exception("Unknown mode!")
188		-
189		- def _load_dataset_from_local(self, list_file='images_map_Train.tsv'):
190		- """
191		- load jpeg dataset according to a file of file-list.
192		-
193		- :param list_file: a tsv file with each line for a jpeg file path
194		- :return:(X,Y) for SVM
195		- """
196		- list_file = base_dir + list_file
197		-
198		- X = []
199		- Y = []
200		- dict_tagbuf = {}
201		- dict_dataset = {}
202		-
203		- with open(list_file, 'rb') as tsvfile:
204		- tsvfile = csv.reader(tsvfile, delimiter='\t')
205		- for line in tsvfile:
206		- imgname = line[0] + '.jpg'
207		- dict_tagbuf[imgname] = line[1]
208		-
209		- dir = base_dir + 'Feat/'
210		- for path, subdirs, files in os.walk(dir + 'Train/'):
211		- for name in files:
212		- featpath = os.path.join(path, name)
213		- # print featpath
214		- with open(featpath, 'rb') as featfile:
215		- imgname = path.split('/')[-1] + name.replace('.mpb', '.jpg')
216		- dict_dataset[imgname] = json.loads(featfile.read())
217		-
218		- for imgname, tag in dict_tagbuf.items():
219		- tag = 1 if tag == 'True' else 0
220		- X.append(dict_dataset[imgname])
221		- Y.append(tag)
222		-
223		- return X, Y
224		-
225		-
226		- def _load_dataset_from_hbase(self, table='ImgCV'):
227		- pass
228		-
229		-
230		- def _model_svm_train_sk(self, X, Y):
231		- timer = Timer()
232		- timer.mark()
233		- lin_clf = svm.LinearSVC()
234		- lin_clf.fit(X, Y)
235		- with open('res/tmp.model', 'wb') as modelfile:
236		- model = pickle.dump(lin_clf, modelfile)
237		-
238		- timer.report()
239		-
240		- self.svm = 'sk'
241		- self.model = lin_clf
242		-
243		- return lin_clf
244		-
245		- def _model_svm_predict_sk(self, image, clf=None):
246		- if clf is None:
247		- if self.svm == 'sk' and self.model != None:
248		- clf = self.model
249		- else:
250		- with open('res/tmp.model', 'rb') as modelfile:
251		- clf = pickle.load(modelfile)
252		-
253		- im = mjpeg.Jpeg(image, key=sample_key)
254		- ciq = im.coef_arrays[mjpeg.colorMap['Y']]
255		- tpm = self.get_trans_prob_mat(ciq)
256		-
257		- return clf.predict(tpm)
258		-
259		-
260		- def _model_svm_train_cv(self, X, Y):
261		- svm_params = dict(kernel_type=cv2.SVM_LINEAR,
262		- svm_type=cv2.SVM_C_SVC,
263		- C=2.67, gamma=5.383)
264		-
265		- timer = Timer()
266		- timer.mark()
267		- svm = cv2.SVM()
268		- svm.train(X, Y, params=svm_params)
269		- svm.save('res/svm_data.model')
270		-
271		- self.svm = 'cv'
272		- self.model = svm
273		-
274		- return svm
275		-
276		- def _model_svm_predict_cv(self, image, svm=None):
277		- if svm is None:
278		- if self.svm == 'cv' and self.model != None:
279		- clf = self.model
280		- else:
281		- svm = cv2.SVM()
282		- svm.load('res/svm_data.model')
283		-
284		- im = mjpeg.Jpeg(image, key=sample_key)
285		- ciq = im.coef_arrays[mjpeg.colorMap['Y']]
286		- tpm = self.get_trans_prob_mat(ciq)
287		-
288		- return svm.predict(tpm)
289		-
290		- def train_svm(self):
291		- X, Y = self.load_dataset('local', 'images_map_Train.tsv')
292		- return self._model_svm_train_sk(X, Y)
293		-
294		- def predict_svm(self, image):
295		- return self._model_svm_predict_sk(image)
296		-
297		-
298		-
299		-
300		-