staged.

Chunk
1 parent f2bebe34
Showing 4 changed files with 4 additions and 477 deletions Show diff stats
mjpeg/__init__.py
mjpeg/compress.py
msteg/steganalysis/ChiSquare.py
msteg/steganalysis/MPB.py.bak
@@ -8,7 +8,8 @@ __all__ = [&#39;Jpeg&#39;, &#39;colorMap&#39;, &#39;diffblock&#39;, &#39;diffblocks&#39;]
 # functions from submodules.
 #
 # ::
-
+import numpy as np
+from numpy import shape
 import numpy.random as rnd
 import base
@@ -169,18 +170,6 @@ class Jpeg(Jsteg):
         E = [-np.inf] + [i for i in range(-T, T + 2)] + [np.inf]
         return np.histogram(A, E)
-    def plotHist(self, mask=base.acMaskBlock, T=8):
-        """
-          Make a histogram of the jpeg coefficients.
-          The mask is a boolean 8x8 matrix indicating the
-          frequencies to be included.  This defaults to the
-          AC coefficients.
-        """
-        A = self.rawsignal(mask).tolist()
-        E = [i for i in range(-T, T + 2)]
-        plt.hist(A, E, histtype='bar')
-        plt.show()
-
     def nzcount(self, *a, **kw):
         """Number of non-zero AC coefficients.
 ## -*- coding: utf-8 -*-
-
-from pylab import *
+from numpy import array
+# from pylab import *
 # The standard quantisation tables for JPEG::
@@ -1,162 +0,0 @@
-"""
-<p>
-This module implements an algorithm described by Andreas Westfeld in [1,2],
-which detects if there was data embedded into an image using JSteg.
-It uses the property that JSteg generates pairs of values in the
-DCT-coefficients histogram, which can be detected by a \chi^2 test.
-</p>
-
-<pre>
-[1]: Andreas Westfeld, F5 - A Steganographic Algorithm High Capacity Despite
-Better Steganalysis
-[2]: Andreas Westfeld, Angriffe auf steganographische Systeme
-</pre>
-"""
-
-from collections import defaultdict
-import os
-
-from PIL import Image
-import numpy
-from scipy.stats import chisquare
-import matplotlib.pyplot as plt
-import itertools as it
-
-from .. import *
-
-
-class ChiSquare(StegBase):
-    """
-    The module contains only one method, <b>detect</b>.
-    """
-
-    def __init__(self, ui, core):
-        self.ui = ui
-        self.core = core
-
-    def detect(self, src, tgt, tgt2):
-        """
-        <p>
-        Detect if there was data embedded in the <i>source image</i> image with
-        JSteg algorithm.
-        </p>
-
-        <p>
-        Parameters:
-        <ol>
-        <li><pre>Source image</pre> Image which should be tested</li>
-        <li><pre>Target image</pre> Image which displays a graphic with the
-        embedding probability</li>
-        <li><pre>2nd Target image</pre> Image which displays the embedding
-        positions in the image</li>
-        </ol>
-        </p>
-        """
-        # --------------------------- Input -----------------------------------
-        # If src is from the image pool, test whether the image exists encoded
-        # on the file system. Otherwise we can not read DCT-coefficients.
-        if self.core.media_manager.is_media_key(src):
-            src = self.core.media_manager.get_file(src)
-            if hasattr(src, 'tmp_file'):
-                src = src.tmp_file
-                self.ui.display_error('Trying file: %s' % src)
-            else:
-                self.ui.display_error('Can not detect anything from \
-                        decoded images.')
-                return
-        # Test whether the file exists.
-        if not os.path.isfile(src):
-            self.ui.display_error('No such file.')
-            return
-        # Test if it is a JPEG file.
-        if not self._looks_like_jpeg(src):
-            self.ui.display_error('Input is probably not a JPEG file.')
-            return
-
-        # ---------------------------- Algorithm ------------------------------
-        # Build DCT-histogram in steps of \approx 1% of all coefficients and
-        # calculate the p-value at each step.
-
-        # dct_data = rw_dct.read_dct_coefficients(src)
-        dct_data = self._get_cov_data(src)
-
-        hist = defaultdict(int)
-        cnt = 0
-        l = len(dct_data)
-        one_p = l / 100
-        result = []
-        for block in dct_data:
-            # update the histogram with one block of 64 coefficients
-            for c in block:
-                hist[c] += 1
-
-            cnt += 1
-            if not cnt % one_p:
-                # calculate p-value
-                self.ui.set_progress(cnt * 100 / l)
-
-                # ignore the pair (0, 1), since JSteg does not embed data there
-                hl = [hist[i] for i in range(-2048, 2049) if not i in (0, 1)]
-                k = len(hl) / 2
-                observed = []
-                expected = []
-                # calculate observed and expected distribution
-                for i in range(k):
-                    t = hl[2 * i] + hl[2 * i + 1]
-                    if t > 3:
-                        observed.append(hl[2 * i])
-                        expected.append(t / 2)
-                # calculate (\chi^2, p)
-                p = chisquare(numpy.array(observed), numpy.array(expected))[1]
-                result.append(p)
-
-        # ----------------------------- Output --------------------------------
-        # Graph displaying the embedding probabilities in relation to the
-        # sample size.
-        figure = plt.figure()
-        plot = figure.add_subplot(111)
-        plot.grid(True)
-        plot.plot(result, color='r', linewidth=2.0)
-        plt.axis([0, 100, 0, 1.1])
-        plt.title('Embedding probability for different percentages \
-of the file capacity.')
-        plt.xlabel('% of file capacity')
-        plt.ylabel('Embedding probability')
-
-        if self.core.media_manager.is_media_key(tgt):
-            img = figure_to_pil(figure)
-            self.core.media_manager.put_media(tgt, img)
-        else:
-            plt.savefig(tgt)
-
-        # Image displaying the length and position of the embedded data
-        # within the image
-        img2 = Image.open(src)
-        img2.convert("RGB")
-        width, height = img2.size
-
-        for i in range(100):
-            result[i] = max(result[i:])
-
-        cnt2 = 0
-        for (top, left) in it.product(range(0, height, 8), range(0, width, 8)):
-            if not cnt2 % one_p:
-                r = result[cnt2 / one_p]
-                if r >= 0.5:
-                    color = (255, int((1 - r) * 2 * 255), 0)
-                else:
-                    color = (int(r * 2 * 255), 255, 0)
-            cnt2 += 1
-            img2.paste(color, (left, top, min(left + 8, width),
-                               min(top + 8, height)))
-        self.core.media_manager.put_media(tgt2, img2)
-
-    def __str__(self):
-        return 'Chi-Square-Test'
-
-
-def figure_to_pil(figure):
-    figure.canvas.draw()
-    return Image.fromstring('RGB',
-                            figure.canvas.get_width_height(),
-                            figure.canvas.tostring_rgb())
@@ -1,300 +0,0 @@
-__author__ = 'chunk'
-"""
-Yun Q. Shi, et al - A Markov Process Based Approach to Effective Attacking JPEG Steganography
-"""
-
-import time
-import math
-import numpy as np
-
-from .. import *
-from ...mjpeg import Jpeg,colorMap
-from ...common import *
-
-import csv
-import json
-import pickle
-import cv2
-from sklearn import svm
-
-base_dir = '/home/hadoop/data/HeadShoulder/'
-
-
-class MPB(StegBase):
-    """
-    Markov Process Based Steganalyasis Algo.
-    """
-
-    def __init__(self):
-        StegBase.__init__(self, sample_key)
-        self.model = None
-        self.svm = None
-
-    def _get_trans_prob_mat_orig(self, ciq, T=4):
-        """
-        Original!
-        Calculate Transition Probability Matrix.
-
-        :param ciq: jpeg DCT coeff matrix, 2-D numpy array of int16 (pre-abs)
-        :param T: signed integer, usually 1~7
-        :return: TPM - 3-D tensor, numpy array of size (2*T+1, 2*T+1, 4)
-        """
-        ciq = np.absolute(ciq).clip(0, T)
-        TPM = np.zeros((2 * T + 1, 2 * T + 1, 4), np.float64)
-        # Fh = np.diff(ciq, axis=-1)
-        # Fv = np.diff(ciq, axis=0)
-        Fh = ciq[:-1, :-1] - ciq[:-1, 1:]
-        Fv = ciq[:-1, :-1] - ciq[1:, :-1]
-        Fd = ciq[:-1, :-1] - ciq[1:, 1:]
-        Fm = ciq[:-1, 1:] - ciq[1:, :-1]
-
-        Fh1 = Fh[:-1, :-1]
-        Fh2 = Fh[:-1, 1:]
-
-        Fv1 = Fv[:-1, :-1]
-        Fv2 = Fv[1:, :-1]
-
-        Fd1 = Fd[:-1, :-1]
-        Fd2 = Fd[1:, 1:]
-
-        Fm1 = Fm[:-1, 1:]
-        Fm2 = Fm[1:, :-1]
-
-        # original:(very slow!)
-        for n in range(-T, T + 1):
-            for m in range(-T, T + 1):
-                dh = np.sum(Fh1 == m) * 1.0
-                dv = np.sum(Fv1 == m) * 1.0
-                dd = np.sum(Fd1 == m) * 1.0
-                dm = np.sum(Fm1 == m) * 1.0
-
-                if dh != 0:
-                    TPM[m, n, 0] = np.sum(np.logical_and(Fh1 == m, Fh2 == n)) / dh
-
-                if dv != 0:
-                    TPM[m, n, 1] = np.sum(np.logical_and(Fv1 == m, Fv2 == n)) / dv
-
-                if dd != 0:
-                    TPM[m, n, 2] = np.sum(np.logical_and(Fd1 == m, Fd2 == n)) / dd
-
-                if dm != 0:
-                    TPM[m, n, 3] = np.sum(np.logical_and(Fm1 == m, Fm2 == n)) / dm
-
-        # 1.422729s
-        return TPM
-
-
-    def get_trans_prob_mat(self, ciq, T=4):
-        """
-        Calculate Transition Probability Matrix.
-
-        :param ciq: jpeg DCT coeff matrix, 2-D numpy array of int16 (pre-abs)
-        :param T: signed integer, usually 1~7
-        :return: TPM - 3-D tensor, numpy array of size (2*T+1, 2*T+1, 4)
-        """
-
-        return self._get_trans_prob_mat_orig(ciq, T)
-
-
-        # timer = Timer()
-        ciq = np.absolute(ciq).clip(0, T)
-        TPM = np.zeros((2 * T + 1, 2 * T + 1, 4), np.float64)
-        # Fh = np.diff(ciq, axis=-1)
-        # Fv = np.diff(ciq, axis=0)
-        Fh = ciq[:-1, :-1] - ciq[:-1, 1:]
-        Fv = ciq[:-1, :-1] - ciq[1:, :-1]
-        Fd = ciq[:-1, :-1] - ciq[1:, 1:]
-        Fm = ciq[:-1, 1:] - ciq[1:, :-1]
-
-        Fh1 = Fh[:-1, :-1].ravel()
-        Fh2 = Fh[:-1, 1:].ravel()
-
-        Fv1 = Fv[:-1, :-1].ravel()
-        Fv2 = Fv[1:, :-1].ravel()
-
-        Fd1 = Fd[:-1, :-1].ravel()
-        Fd2 = Fd[1:, 1:].ravel()
-
-        Fm1 = Fm[:-1, 1:].ravel()
-        Fm2 = Fm[1:, :-1].ravel()
-
-
-
-        # 0.089754s
-        # timer.mark()
-        # TPM[Fh1.ravel(), Fh2.ravel(), 0] += 1
-        # TPM[Fv1.ravel(), Fv2.ravel(), 1] += 1
-        # TPM[Fd1.ravel(), Fd2.ravel(), 2] += 1
-        # TPM[Fm1.ravel(), Fm2.ravel(), 3] += 1
-        # timer.report()
-
-        # 1.459668s
-        # timer.mark()
-        # for i in range(len(Fh1)):
-        #     TPM[Fh1[i], Fh2[i], 0] += 1
-        # for i in range(len(Fv1)):
-        #     TPM[Fv1[i], Fv2[i], 1] += 1
-        # for i in range(len(Fd1)):
-        #     TPM[Fd1[i], Fd2[i], 2] += 1
-        # for i in range(len(Fm1)):
-        #     TPM[Fm1[i], Fm2[i], 3] += 1
-        # timer.report()
-
-        # 1.463982s
-        # timer.mark()
-        for m, n in zip(Fh1.ravel(), Fh2.ravel()):
-            TPM[m, n, 0] += 1
-
-        for m, n in zip(Fv1.ravel(), Fv2.ravel()):
-            TPM[m, n, 1] += 1
-
-        for m, n in zip(Fd1.ravel(), Fd2.ravel()):
-            TPM[m, n, 2] += 1
-
-        for m, n in zip(Fm1.ravel(), Fm2.ravel()):
-            TPM[m, n, 3] += 1
-        # timer.report()
-
-        # 0.057505s
-        # timer.mark()
-        for m in range(-T, T + 1):
-            dh = np.sum(Fh1 == m) * 1.0
-            dv = np.sum(Fv1 == m) * 1.0
-            dd = np.sum(Fd1 == m) * 1.0
-            dm = np.sum(Fm1 == m) * 1.0
-
-            if dh != 0:
-                TPM[m, :, 0] /= dh
-
-            if dv != 0:
-                TPM[m, :, 1] /= dv
-
-            if dd != 0:
-                TPM[m, :, 2] /= dd
-
-            if dm != 0:
-                TPM[m, :, 3] /= dm
-        # timer.report()
-
-        return TPM
-
-    def load_dataset(self, mode, file):
-        if mode == 'local':
-            return self._load_dataset_from_local(file)
-        elif mode == 'remote' or mode == 'hbase':
-            return self._load_dataset_from_hbase(file)
-        else:
-            raise Exception("Unknown mode!")
-
-    def _load_dataset_from_local(self, list_file='images_map_Train.tsv'):
-        """
-        load jpeg dataset according to a file of file-list.
-
-        :param list_file: a tsv file with each line for a jpeg file path
-        :return:(X,Y) for SVM
-        """
-        list_file = base_dir + list_file
-
-        X = []
-        Y = []
-        dict_tagbuf = {}
-        dict_dataset = {}
-
-        with open(list_file, 'rb') as tsvfile:
-            tsvfile = csv.reader(tsvfile, delimiter='\t')
-            for line in tsvfile:
-                imgname = line[0] + '.jpg'
-                dict_tagbuf[imgname] = line[1]
-
-        dir = base_dir + 'Feat/'
-        for path, subdirs, files in os.walk(dir + 'Train/'):
-            for name in files:
-                featpath = os.path.join(path, name)
-                # print featpath
-                with open(featpath, 'rb') as featfile:
-                    imgname = path.split('/')[-1] + name.replace('.mpb', '.jpg')
-                    dict_dataset[imgname] = json.loads(featfile.read())
-
-        for imgname, tag in dict_tagbuf.items():
-            tag = 1 if tag == 'True' else 0
-            X.append(dict_dataset[imgname])
-            Y.append(tag)
-
-        return X, Y
-
-
-    def _load_dataset_from_hbase(self, table='ImgCV'):
-        pass
-
-
-    def _model_svm_train_sk(self, X, Y):
-        timer = Timer()
-        timer.mark()
-        lin_clf = svm.LinearSVC()
-        lin_clf.fit(X, Y)
-        with open('res/tmp.model', 'wb') as modelfile:
-            model = pickle.dump(lin_clf, modelfile)
-
-        timer.report()
-
-        self.svm = 'sk'
-        self.model = lin_clf
-
-        return lin_clf
-
-    def _model_svm_predict_sk(self, image, clf=None):
-        if clf is None:
-            if self.svm == 'sk' and self.model != None:
-                clf = self.model
-            else:
-                with open('res/tmp.model', 'rb') as modelfile:
-                    clf = pickle.load(modelfile)
-
-        im = mjpeg.Jpeg(image, key=sample_key)
-        ciq = im.coef_arrays[mjpeg.colorMap['Y']]
-        tpm = self.get_trans_prob_mat(ciq)
-
-        return clf.predict(tpm)
-
-
-    def _model_svm_train_cv(self, X, Y):
-        svm_params = dict(kernel_type=cv2.SVM_LINEAR,
-                          svm_type=cv2.SVM_C_SVC,
-                          C=2.67, gamma=5.383)
-
-        timer = Timer()
-        timer.mark()
-        svm = cv2.SVM()
-        svm.train(X, Y, params=svm_params)
-        svm.save('res/svm_data.model')
-
-        self.svm = 'cv'
-        self.model = svm
-
-        return svm
-
-    def _model_svm_predict_cv(self, image, svm=None):
-        if svm is None:
-            if self.svm == 'cv' and self.model != None:
-                clf = self.model
-            else:
-                svm = cv2.SVM()
-                svm.load('res/svm_data.model')
-
-        im = mjpeg.Jpeg(image, key=sample_key)
-        ciq = im.coef_arrays[mjpeg.colorMap['Y']]
-        tpm = self.get_trans_prob_mat(ciq)
-
-        return svm.predict(tpm)
-
-    def train_svm(self):
-        X, Y = self.load_dataset('local', 'images_map_Train.tsv')
-        return self._model_svm_train_sk(X, Y)
-
-    def predict_svm(self, image):
-        return self._model_svm_predict_sk(image)
-
-
-
-
-
1	## -- coding: utf-8 --	1	## -- coding: utf-8 --
2		2
3	-
4	-from pylab import *	3	+from numpy import array
		4	+# from pylab import *
5		5
6	# The standard quantisation tables for JPEG::	6	# The standard quantisation tables for JPEG::
7		7
	@@ -1,162 +0,0 @@	@@ -1,162 +0,0 @@
1	-"""
2	-<p>
3	-This module implements an algorithm described by Andreas Westfeld in [1,2],
4	-which detects if there was data embedded into an image using JSteg.
5	-It uses the property that JSteg generates pairs of values in the
6	-DCT-coefficients histogram, which can be detected by a \chi^2 test.
7	-</p>
8	-
9	-<pre>
10	-[1]: Andreas Westfeld, F5 - A Steganographic Algorithm High Capacity Despite
11	-Better Steganalysis
12	-[2]: Andreas Westfeld, Angriffe auf steganographische Systeme
13	-</pre>
14	-"""
15	-
16	-from collections import defaultdict
17	-import os
18	-
19	-from PIL import Image
20	-import numpy
21	-from scipy.stats import chisquare
22	-import matplotlib.pyplot as plt
23	-import itertools as it
24	-
25	-from .. import *
26	-
27	-
28	-class ChiSquare(StegBase):
29	- """
30	- The module contains only one method, <b>detect</b>.
31	- """
32	-
33	- def __init__(self, ui, core):
34	- self.ui = ui
35	- self.core = core
36	-
37	- def detect(self, src, tgt, tgt2):
38	- """
39	- <p>
40	- Detect if there was data embedded in the <i>source image</i> image with
41	- JSteg algorithm.
42	- </p>
43	-
44	- <p>
45	- Parameters:
46	- <ol>
47	- <li><pre>Source image</pre> Image which should be tested</li>
48	- <li><pre>Target image</pre> Image which displays a graphic with the
49	- embedding probability</li>
50	- <li><pre>2nd Target image</pre> Image which displays the embedding
51	- positions in the image</li>
52	- </ol>
53	- </p>
54	- """
55	- # --------------------------- Input -----------------------------------
56	- # If src is from the image pool, test whether the image exists encoded
57	- # on the file system. Otherwise we can not read DCT-coefficients.
58	- if self.core.media_manager.is_media_key(src):
59	- src = self.core.media_manager.get_file(src)
60	- if hasattr(src, 'tmp_file'):
61	- src = src.tmp_file
62	- self.ui.display_error('Trying file: %s' % src)
63	- else:
64	- self.ui.display_error('Can not detect anything from \
65	- decoded images.')
66	- return
67	- # Test whether the file exists.
68	- if not os.path.isfile(src):
69	- self.ui.display_error('No such file.')
70	- return
71	- # Test if it is a JPEG file.
72	- if not self._looks_like_jpeg(src):
73	- self.ui.display_error('Input is probably not a JPEG file.')
74	- return
75	-
76	- # ---------------------------- Algorithm ------------------------------
77	- # Build DCT-histogram in steps of \approx 1% of all coefficients and
78	- # calculate the p-value at each step.
79	-
80	- # dct_data = rw_dct.read_dct_coefficients(src)
81	- dct_data = self._get_cov_data(src)
82	-
83	- hist = defaultdict(int)
84	- cnt = 0
85	- l = len(dct_data)
86	- one_p = l / 100
87	- result = []
88	- for block in dct_data:
89	- # update the histogram with one block of 64 coefficients
90	- for c in block:
91	- hist[c] += 1
92	-
93	- cnt += 1
94	- if not cnt % one_p:
95	- # calculate p-value
96	- self.ui.set_progress(cnt * 100 / l)
97	-
98	- # ignore the pair (0, 1), since JSteg does not embed data there
99	- hl = [hist[i] for i in range(-2048, 2049) if not i in (0, 1)]
100	- k = len(hl) / 2
101	- observed = []
102	- expected = []
103	- # calculate observed and expected distribution
104	- for i in range(k):
105	- t = hl[2 * i] + hl[2 * i + 1]
106	- if t > 3:
107	- observed.append(hl[2 * i])
108	- expected.append(t / 2)
109	- # calculate (\chi^2, p)
110	- p = chisquare(numpy.array(observed), numpy.array(expected))[1]
111	- result.append(p)
112	-
113	- # ----------------------------- Output --------------------------------
114	- # Graph displaying the embedding probabilities in relation to the
115	- # sample size.
116	- figure = plt.figure()
117	- plot = figure.add_subplot(111)
118	- plot.grid(True)
119	- plot.plot(result, color='r', linewidth=2.0)
120	- plt.axis([0, 100, 0, 1.1])
121	- plt.title('Embedding probability for different percentages \
122	-of the file capacity.')
123	- plt.xlabel('% of file capacity')
124	- plt.ylabel('Embedding probability')
125	-
126	- if self.core.media_manager.is_media_key(tgt):
127	- img = figure_to_pil(figure)
128	- self.core.media_manager.put_media(tgt, img)
129	- else:
130	- plt.savefig(tgt)
131	-
132	- # Image displaying the length and position of the embedded data
133	- # within the image
134	- img2 = Image.open(src)
135	- img2.convert("RGB")
136	- width, height = img2.size
137	-
138	- for i in range(100):
139	- result[i] = max(result[i:])
140	-
141	- cnt2 = 0
142	- for (top, left) in it.product(range(0, height, 8), range(0, width, 8)):
143	- if not cnt2 % one_p:
144	- r = result[cnt2 / one_p]
145	- if r >= 0.5:
146	- color = (255, int((1 - r) * 2 * 255), 0)
147	- else:
148	- color = (int(r * 2 * 255), 255, 0)
149	- cnt2 += 1
150	- img2.paste(color, (left, top, min(left + 8, width),
151	- min(top + 8, height)))
152	- self.core.media_manager.put_media(tgt2, img2)
153	-
154	- def __str__(self):
155	- return 'Chi-Square-Test'
156	-
157	-
158	-def figure_to_pil(figure):
159	- figure.canvas.draw()
160	- return Image.fromstring('RGB',
161	- figure.canvas.get_width_height(),
162	- figure.canvas.tostring_rgb())
	@@ -1,300 +0,0 @@	@@ -1,300 +0,0 @@
1	-__author__ = 'chunk'
2	-"""
3	-Yun Q. Shi, et al - A Markov Process Based Approach to Effective Attacking JPEG Steganography
4	-"""
5	-
6	-import time
7	-import math
8	-import numpy as np
9	-
10	-from .. import *
11	-from ...mjpeg import Jpeg,colorMap
12	-from ...common import *
13	-
14	-import csv
15	-import json
16	-import pickle
17	-import cv2
18	-from sklearn import svm
19	-
20	-base_dir = '/home/hadoop/data/HeadShoulder/'
21	-
22	-
23	-class MPB(StegBase):
24	- """
25	- Markov Process Based Steganalyasis Algo.
26	- """
27	-
28	- def __init__(self):
29	- StegBase.__init__(self, sample_key)
30	- self.model = None
31	- self.svm = None
32	-
33	- def _get_trans_prob_mat_orig(self, ciq, T=4):
34	- """
35	- Original!
36	- Calculate Transition Probability Matrix.
37	-
38	- :param ciq: jpeg DCT coeff matrix, 2-D numpy array of int16 (pre-abs)
39	- :param T: signed integer, usually 1~7
40	- :return: TPM - 3-D tensor, numpy array of size (2T+1, 2T+1, 4)
41	- """
42	- ciq = np.absolute(ciq).clip(0, T)
43	- TPM = np.zeros((2 * T + 1, 2 * T + 1, 4), np.float64)
44	- # Fh = np.diff(ciq, axis=-1)
45	- # Fv = np.diff(ciq, axis=0)
46	- Fh = ciq[:-1, :-1] - ciq[:-1, 1:]
47	- Fv = ciq[:-1, :-1] - ciq[1:, :-1]
48	- Fd = ciq[:-1, :-1] - ciq[1:, 1:]
49	- Fm = ciq[:-1, 1:] - ciq[1:, :-1]
50	-
51	- Fh1 = Fh[:-1, :-1]
52	- Fh2 = Fh[:-1, 1:]
53	-
54	- Fv1 = Fv[:-1, :-1]
55	- Fv2 = Fv[1:, :-1]
56	-
57	- Fd1 = Fd[:-1, :-1]
58	- Fd2 = Fd[1:, 1:]
59	-
60	- Fm1 = Fm[:-1, 1:]
61	- Fm2 = Fm[1:, :-1]
62	-
63	- # original:(very slow!)
64	- for n in range(-T, T + 1):
65	- for m in range(-T, T + 1):
66	- dh = np.sum(Fh1 == m) * 1.0
67	- dv = np.sum(Fv1 == m) * 1.0
68	- dd = np.sum(Fd1 == m) * 1.0
69	- dm = np.sum(Fm1 == m) * 1.0
70	-
71	- if dh != 0:
72	- TPM[m, n, 0] = np.sum(np.logical_and(Fh1 == m, Fh2 == n)) / dh
73	-
74	- if dv != 0:
75	- TPM[m, n, 1] = np.sum(np.logical_and(Fv1 == m, Fv2 == n)) / dv
76	-
77	- if dd != 0:
78	- TPM[m, n, 2] = np.sum(np.logical_and(Fd1 == m, Fd2 == n)) / dd
79	-
80	- if dm != 0:
81	- TPM[m, n, 3] = np.sum(np.logical_and(Fm1 == m, Fm2 == n)) / dm
82	-
83	- # 1.422729s
84	- return TPM
85	-
86	-
87	- def get_trans_prob_mat(self, ciq, T=4):
88	- """
89	- Calculate Transition Probability Matrix.
90	-
91	- :param ciq: jpeg DCT coeff matrix, 2-D numpy array of int16 (pre-abs)
92	- :param T: signed integer, usually 1~7
93	- :return: TPM - 3-D tensor, numpy array of size (2T+1, 2T+1, 4)
94	- """
95	-
96	- return self._get_trans_prob_mat_orig(ciq, T)
97	-
98	-
99	- # timer = Timer()
100	- ciq = np.absolute(ciq).clip(0, T)
101	- TPM = np.zeros((2 * T + 1, 2 * T + 1, 4), np.float64)
102	- # Fh = np.diff(ciq, axis=-1)
103	- # Fv = np.diff(ciq, axis=0)
104	- Fh = ciq[:-1, :-1] - ciq[:-1, 1:]
105	- Fv = ciq[:-1, :-1] - ciq[1:, :-1]
106	- Fd = ciq[:-1, :-1] - ciq[1:, 1:]
107	- Fm = ciq[:-1, 1:] - ciq[1:, :-1]
108	-
109	- Fh1 = Fh[:-1, :-1].ravel()
110	- Fh2 = Fh[:-1, 1:].ravel()
111	-
112	- Fv1 = Fv[:-1, :-1].ravel()
113	- Fv2 = Fv[1:, :-1].ravel()
114	-
115	- Fd1 = Fd[:-1, :-1].ravel()
116	- Fd2 = Fd[1:, 1:].ravel()
117	-
118	- Fm1 = Fm[:-1, 1:].ravel()
119	- Fm2 = Fm[1:, :-1].ravel()
120	-
121	-
122	-
123	- # 0.089754s
124	- # timer.mark()
125	- # TPM[Fh1.ravel(), Fh2.ravel(), 0] += 1
126	- # TPM[Fv1.ravel(), Fv2.ravel(), 1] += 1
127	- # TPM[Fd1.ravel(), Fd2.ravel(), 2] += 1
128	- # TPM[Fm1.ravel(), Fm2.ravel(), 3] += 1
129	- # timer.report()
130	-
131	- # 1.459668s
132	- # timer.mark()
133	- # for i in range(len(Fh1)):
134	- # TPM[Fh1[i], Fh2[i], 0] += 1
135	- # for i in range(len(Fv1)):
136	- # TPM[Fv1[i], Fv2[i], 1] += 1
137	- # for i in range(len(Fd1)):
138	- # TPM[Fd1[i], Fd2[i], 2] += 1
139	- # for i in range(len(Fm1)):
140	- # TPM[Fm1[i], Fm2[i], 3] += 1
141	- # timer.report()
142	-
143	- # 1.463982s
144	- # timer.mark()
145	- for m, n in zip(Fh1.ravel(), Fh2.ravel()):
146	- TPM[m, n, 0] += 1
147	-
148	- for m, n in zip(Fv1.ravel(), Fv2.ravel()):
149	- TPM[m, n, 1] += 1
150	-
151	- for m, n in zip(Fd1.ravel(), Fd2.ravel()):
152	- TPM[m, n, 2] += 1
153	-
154	- for m, n in zip(Fm1.ravel(), Fm2.ravel()):
155	- TPM[m, n, 3] += 1
156	- # timer.report()
157	-
158	- # 0.057505s
159	- # timer.mark()
160	- for m in range(-T, T + 1):
161	- dh = np.sum(Fh1 == m) * 1.0
162	- dv = np.sum(Fv1 == m) * 1.0
163	- dd = np.sum(Fd1 == m) * 1.0
164	- dm = np.sum(Fm1 == m) * 1.0
165	-
166	- if dh != 0:
167	- TPM[m, :, 0] /= dh
168	-
169	- if dv != 0:
170	- TPM[m, :, 1] /= dv
171	-
172	- if dd != 0:
173	- TPM[m, :, 2] /= dd
174	-
175	- if dm != 0:
176	- TPM[m, :, 3] /= dm
177	- # timer.report()
178	-
179	- return TPM
180	-
181	- def load_dataset(self, mode, file):
182	- if mode == 'local':
183	- return self._load_dataset_from_local(file)
184	- elif mode == 'remote' or mode == 'hbase':
185	- return self._load_dataset_from_hbase(file)
186	- else:
187	- raise Exception("Unknown mode!")
188	-
189	- def _load_dataset_from_local(self, list_file='images_map_Train.tsv'):
190	- """
191	- load jpeg dataset according to a file of file-list.
192	-
193	- :param list_file: a tsv file with each line for a jpeg file path
194	- :return:(X,Y) for SVM
195	- """
196	- list_file = base_dir + list_file
197	-
198	- X = []
199	- Y = []
200	- dict_tagbuf = {}
201	- dict_dataset = {}
202	-
203	- with open(list_file, 'rb') as tsvfile:
204	- tsvfile = csv.reader(tsvfile, delimiter='\t')
205	- for line in tsvfile:
206	- imgname = line[0] + '.jpg'
207	- dict_tagbuf[imgname] = line[1]
208	-
209	- dir = base_dir + 'Feat/'
210	- for path, subdirs, files in os.walk(dir + 'Train/'):
211	- for name in files:
212	- featpath = os.path.join(path, name)
213	- # print featpath
214	- with open(featpath, 'rb') as featfile:
215	- imgname = path.split('/')[-1] + name.replace('.mpb', '.jpg')
216	- dict_dataset[imgname] = json.loads(featfile.read())
217	-
218	- for imgname, tag in dict_tagbuf.items():
219	- tag = 1 if tag == 'True' else 0
220	- X.append(dict_dataset[imgname])
221	- Y.append(tag)
222	-
223	- return X, Y
224	-
225	-
226	- def _load_dataset_from_hbase(self, table='ImgCV'):
227	- pass
228	-
229	-
230	- def _model_svm_train_sk(self, X, Y):
231	- timer = Timer()
232	- timer.mark()
233	- lin_clf = svm.LinearSVC()
234	- lin_clf.fit(X, Y)
235	- with open('res/tmp.model', 'wb') as modelfile:
236	- model = pickle.dump(lin_clf, modelfile)
237	-
238	- timer.report()
239	-
240	- self.svm = 'sk'
241	- self.model = lin_clf
242	-
243	- return lin_clf
244	-
245	- def _model_svm_predict_sk(self, image, clf=None):
246	- if clf is None:
247	- if self.svm == 'sk' and self.model != None:
248	- clf = self.model
249	- else:
250	- with open('res/tmp.model', 'rb') as modelfile:
251	- clf = pickle.load(modelfile)
252	-
253	- im = mjpeg.Jpeg(image, key=sample_key)
254	- ciq = im.coef_arrays[mjpeg.colorMap['Y']]
255	- tpm = self.get_trans_prob_mat(ciq)
256	-
257	- return clf.predict(tpm)
258	-
259	-
260	- def _model_svm_train_cv(self, X, Y):
261	- svm_params = dict(kernel_type=cv2.SVM_LINEAR,
262	- svm_type=cv2.SVM_C_SVC,
263	- C=2.67, gamma=5.383)
264	-
265	- timer = Timer()
266	- timer.mark()
267	- svm = cv2.SVM()
268	- svm.train(X, Y, params=svm_params)
269	- svm.save('res/svm_data.model')
270	-
271	- self.svm = 'cv'
272	- self.model = svm
273	-
274	- return svm
275	-
276	- def _model_svm_predict_cv(self, image, svm=None):
277	- if svm is None:
278	- if self.svm == 'cv' and self.model != None:
279	- clf = self.model
280	- else:
281	- svm = cv2.SVM()
282	- svm.load('res/svm_data.model')
283	-
284	- im = mjpeg.Jpeg(image, key=sample_key)
285	- ciq = im.coef_arrays[mjpeg.colorMap['Y']]
286	- tpm = self.get_trans_prob_mat(ciq)
287	-
288	- return svm.predict(tpm)
289	-
290	- def train_svm(self):
291	- X, Y = self.load_dataset('local', 'images_map_Train.tsv')
292	- return self._model_svm_train_sk(X, Y)
293	-
294	- def predict_svm(self, image):
295	- return self._model_svm_predict_sk(image)
296	-
297	-
298	-
299	-
300	-