From 26e2fe9fe66de4396c58244e9e2e8cfb440e3292 Mon Sep 17 00:00:00 2001
From: Chunk <chunkplus@gmail.com>
Date: Tue, 3 Mar 2015 23:59:33 +0800
Subject: [PATCH] MPB steganalysis algo half-finished,

---
 jpegObj/__init__.py             |  25 +++++++++++++++++++++++++
 jpegObj/__init__.pyc            | Bin 13621 -> 0 bytes
 msteg/steganalysis/ChiSquare.py |   8 +++-----
 msteg/steganalysis/MPB.py       | 208 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 msteg/steganalysis/MPB.pyc      | Bin 0 -> 4462 bytes
 msteg/steganography/F5.py       |   2 ++
 msteg/steganography/F5.pyc      | Bin 9457 -> 0 bytes
 test_jpeg.py                    |  31 +++++--------------------------
 test_steganal.py                |  49 +++++++++++++++++++++++++++++++++++++++++++++++++
 9 files changed, 292 insertions(+), 31 deletions(-)
 create mode 100644 msteg/steganalysis/MPB.py
 create mode 100644 msteg/steganalysis/MPB.pyc
 create mode 100644 test_steganal.py
diff --git a/jpegObj/__init__.py b/jpegObj/__init__.py
index 297376e..e208c0c 100644
--- a/jpegObj/__init__.py
+++ b/jpegObj/__init__.py
@@ -31,6 +31,7 @@ colorCode = {
 }
 
 colorParam = ['Y', 'Cb', 'Cr']
+colorMap = {'Y': 0, 'Cb': 1, 'Cr': 2}
 
 # The JPEG class
 # ==============
@@ -64,6 +65,7 @@ class Jpeg(Jsteg):
         else:
             self.key = None
 
+
     def getkey(self):
         """Return the key used to shuffle the coefficients."""
         return self.key
@@ -380,4 +382,27 @@ class Jpeg(Jsteg):
         return S.astype(np.uint8)
 
 
+def diffblock(c1, c2):
+    diff = False
+    if np.array_equal(c1, c2):
+        print("blocks match")
+    else:
+        print("blocks not match")
+        diff = True
+
+    return diff
+
+
+def diffblocks(a, b):
+    diff = False
+    cnt = 0
+    for comp in range(a.image_components):
+        xmax, ymax = a.Jgetcompdim(comp)
+        for y in range(ymax):
+            for x in range(xmax):
+                if a.Jgetblock(x, y, comp) != b.Jgetblock(x, y, comp):
+                    print("blocks({},{}) in component {} not match".format(y, x, comp))
+                    diff = True
+                    cnt += 1
+    return diff, cnt
 
diff --git a/jpegObj/__init__.pyc b/jpegObj/__init__.pyc
index 9df081c..b60cc33 100644
Binary files a/jpegObj/__init__.pyc and b/jpegObj/__init__.pyc differ
diff --git a/msteg/steganalysis/ChiSquare.py b/msteg/steganalysis/ChiSquare.py
index 5bf976c..43f8b67 100644
--- a/msteg/steganalysis/ChiSquare.py
+++ b/msteg/steganalysis/ChiSquare.py
@@ -23,12 +23,10 @@ import matplotlib.pyplot as plt
 import itertools as it
 from msteg.StegBase import StegBase
 
-from stegotool.util.plugins import describe_and_annotate
-from stegotool.util.plugins import ImagePath, NewFilePath
-from stegotool.util.JPEGSteg import JPEGSteg
-from stegotool.util import rw_dct
+from msteg.StegBase import *
 
-class ChiSquare(JPEGSteg):
+
+class ChiSquare(StegBase):
     """
     The module contains only one method, <b>detect</b>.
     """
diff --git a/msteg/steganalysis/MPB.py b/msteg/steganalysis/MPB.py
new file mode 100644
index 0000000..6db9a50
--- /dev/null
+++ b/msteg/steganalysis/MPB.py
@@ -0,0 +1,208 @@
+__author__ = 'chunk'
+"""
+Yun Q. Shi, et al - A Markov Process Based Approach to Effective Attacking JPEG Steganography
+"""
+
+import time
+import math
+import numpy as np
+from msteg.StegBase import *
+import mjsteg
+import jpegObj
+from common import *
+
+import csv
+import json
+import pickle
+from sklearn import svm
+
+base_dir = '/home/hadoop/data/HeadShoulder/'
+
+class MPB(StegBase):
+    """
+    Markov Process Based Steganalyasis Algo.
+    """
+
+    def __init__(self):
+        StegBase.__init__(self, sample_key)
+
+    def get_trans_prob_mat_orig(self, ciq, T=4):
+        """
+        Original!
+        Calculate Transition Probability Matrix.
+
+        :param ciq: jpeg DCT coeff matrix, 2-D numpy array of int16 (pre-abs)
+        :param T: signed integer, usually 1~7
+        :return: TPM - 3-D tensor, numpy array of size (2*T+1, 2*T+1, 4)
+        """
+        ciq = np.absolute(ciq).clip(0, T)
+        TPM = np.zeros((2 * T + 1, 2 * T + 1, 4), np.float64)
+        # Fh = np.diff(ciq, axis=-1)
+        # Fv = np.diff(ciq, axis=0)
+        Fh = ciq[:-1, :-1] - ciq[:-1, 1:]
+        Fv = ciq[:-1, :-1] - ciq[1:, :-1]
+        Fd = ciq[:-1, :-1] - ciq[1:, 1:]
+        Fm = ciq[:-1, 1:] - ciq[1:, :-1]
+
+        Fh1 = Fh[:-1, :-1]
+        Fh2 = Fh[:-1, 1:]
+
+        Fv1 = Fv[:-1, :-1]
+        Fv2 = Fv[1:, :-1]
+
+        Fd1 = Fd[:-1, :-1]
+        Fd2 = Fd[1:, 1:]
+
+        Fm1 = Fm[:-1, 1:]
+        Fm2 = Fm[1:, :-1]
+
+        # original:(very slow!)
+        for n in range(-T, T + 1):
+            for m in range(-T, T + 1):
+                dh = np.sum(Fh1 == m) * 1.0
+                dv = np.sum(Fv1 == m) * 1.0
+                dd = np.sum(Fd1 == m) * 1.0
+                dm = np.sum(Fm1 == m) * 1.0
+
+                if dh != 0:
+                    TPM[m, n, 0] = np.sum(np.logical_and(Fh1 == m, Fh2 == n)) / dh
+
+                if dv != 0:
+                    TPM[m, n, 1] = np.sum(np.logical_and(Fv1 == m, Fv2 == n)) / dv
+
+                if dd != 0:
+                    TPM[m, n, 2] = np.sum(np.logical_and(Fd1 == m, Fd2 == n)) / dd
+
+                if dm != 0:
+                    TPM[m, n, 3] = np.sum(np.logical_and(Fm1 == m, Fm2 == n)) / dm
+
+        # 1.422729s
+        return TPM
+
+
+    def get_trans_prob_mat(self, ciq, T=4):
+        """
+        Calculate Transition Probability Matrix.
+
+        :param ciq: jpeg DCT coeff matrix, 2-D numpy array of int16 (pre-abs)
+        :param T: signed integer, usually 1~7
+        :return: TPM - 3-D tensor, numpy array of size (2*T+1, 2*T+1, 4)
+        """
+        # return self.get_trans_prob_mat_orig(ciq, T)
+        # timer = Timer()
+        ciq = np.absolute(ciq).clip(0, T)
+        TPM = np.zeros((2 * T + 1, 2 * T + 1, 4), np.float64)
+        # Fh = np.diff(ciq, axis=-1)
+        # Fv = np.diff(ciq, axis=0)
+        Fh = ciq[:-1, :-1] - ciq[:-1, 1:]
+        Fv = ciq[:-1, :-1] - ciq[1:, :-1]
+        Fd = ciq[:-1, :-1] - ciq[1:, 1:]
+        Fm = ciq[:-1, 1:] - ciq[1:, :-1]
+
+        Fh1 = Fh[:-1, :-1]
+        Fh2 = Fh[:-1, 1:]
+
+        Fv1 = Fv[:-1, :-1]
+        Fv2 = Fv[1:, :-1]
+
+        Fd1 = Fd[:-1, :-1]
+        Fd2 = Fd[1:, 1:]
+
+        Fm1 = Fm[:-1, 1:]
+        Fm2 = Fm[1:, :-1]
+
+
+
+        # 0.089754s
+        # timer.mark()
+        # TPM[Fh1.ravel(), Fh2.ravel(), 0] += 1
+        # TPM[Fv1.ravel(), Fv2.ravel(), 1] += 1
+        # TPM[Fd1.ravel(), Fd2.ravel(), 2] += 1
+        # TPM[Fm1.ravel(), Fm2.ravel(), 3] += 1
+        # timer.report()
+
+        # 1.936746s
+        # timer.mark()
+        for m, n in zip(Fh1.ravel(), Fh2.ravel()):
+            TPM[m, n, 0] += 1
+
+        for m, n in zip(Fv1.ravel(), Fv2.ravel()):
+            TPM[m, n, 1] += 1
+
+        for m, n in zip(Fd1.ravel(), Fd2.ravel()):
+            TPM[m, n, 2] += 1
+
+        for m, n in zip(Fm1.ravel(), Fm2.ravel()):
+            TPM[m, n, 3] += 1
+        # timer.report()
+
+        # 0.057505s
+        # timer.mark()
+        for m in range(-T, T + 1):
+            dh = np.sum(Fh1 == m) * 1.0
+            dv = np.sum(Fv1 == m) * 1.0
+            dd = np.sum(Fd1 == m) * 1.0
+            dm = np.sum(Fm1 == m) * 1.0
+
+            if dh != 0:
+                TPM[m, :, 0] /= dh
+
+            if dv != 0:
+                TPM[m, :, 1] /= dv
+
+            if dd != 0:
+                TPM[m, :, 2] /= dd
+
+            if dm != 0:
+                TPM[m, :, 3] /= dm
+        # timer.report()
+
+        return TPM
+
+    def _load_dataset(self,list_file):
+        """
+        load jpeg dataset according to a file of file-list.
+
+        :param list_file: a tsv file with each line for a jpeg file path
+        :return:(X,Y) for SVM
+        """
+        X = []
+        Y = []
+        dict_tagbuf = {}
+        dict_dataset = {}
+
+        with open(list_file, 'rb') as tsvfile:
+            tsvfile = csv.reader(tsvfile, delimiter='\t')
+            for line in tsvfile:
+                imgname = line[0] + '.jpg'
+                dict_tagbuf[imgname] = line[1]
+
+        dir = base_dir + 'Feat/'
+        for path, subdirs, files in os.walk(dir + 'Train/'):
+            for name in files:
+                featpath = os.path.join(path, name)
+                # print featpath
+                with open(featpath, 'rb') as featfile:
+                    imgname = path.split('/')[-1] + name.replace('.mpb', '.jpg')
+                    dict_dataset[imgname] = json.loads(featfile.read())
+
+        for imgname, tag in dict_tagbuf.items():
+            tag = 1 if tag == 'True' else 0
+            X.append(dict_dataset[imgname])
+            Y.append(tag)
+
+        return X, Y
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/msteg/steganalysis/MPB.pyc b/msteg/steganalysis/MPB.pyc
new file mode 100644
index 0000000..92d0b75
Binary files /dev/null and b/msteg/steganalysis/MPB.pyc differ
diff --git a/msteg/steganography/F5.py b/msteg/steganography/F5.py
index f45853a..d1eecb2 100644
--- a/msteg/steganography/F5.py
+++ b/msteg/steganography/F5.py
@@ -1,6 +1,8 @@
 __author__ = 'chunk'
 
 """
+ref - http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.115.3651&rep=rep1&type=pdf
+
 <p>This module implements the rather sophisticated F5 algorithm which was
 invented by Andreas Westfeld.</p>
 
diff --git a/msteg/steganography/F5.pyc b/msteg/steganography/F5.pyc
index bc47764..4d08df6 100644
Binary files a/msteg/steganography/F5.pyc and b/msteg/steganography/F5.pyc differ
diff --git a/test_jpeg.py b/test_jpeg.py
index b200f24..d68966d 100644
--- a/test_jpeg.py
+++ b/test_jpeg.py
@@ -4,6 +4,7 @@ import numpy as np
 import mjsteg
 import jpegObj
 from jpegObj import base
+
 from common import *
 
 timer = Timer()
@@ -21,30 +22,8 @@ sample_key = [46812L, 20559L, 31360L, 16681L, 27536L, 39553L, 5427L, 63029L, 565
               5908L, 59816L, 56765L]
 
 
-def diffblock(c1, c2):
-    diff = False
-    if np.array_equal(c1, c2):
-        print("blocks match")
-    else:
-        print("blocks not match")
-        diff = True
-
-    return diff
 
 
-def diffblocks(a, b):
-    diff = False
-    cnt = 0
-    for comp in range(a.image_components):
-        xmax, ymax = a.Jgetcompdim(comp)
-        for y in range(ymax):
-            for x in range(xmax):
-                if a.Jgetblock(x, y, comp) != b.Jgetblock(x, y, comp):
-                    print("blocks({},{}) in component {} not match".format(y, x, comp))
-                    diff = True
-                    cnt += 1
-    return diff, cnt
-
 
 def test_setblocks():
     """
@@ -63,7 +42,7 @@ def test_setblocks():
 
     ima = jpegObj.Jpeg("res/test3.jpg")
     imb = jpegObj.Jpeg("res/test4.jpg")
-    diffblocks(ima, imb)
+    jpegObj.diffblocks(ima, imb)
 
 
 def test_setblocks2():
@@ -88,7 +67,7 @@ def test_setblocks2():
 
     ima = jpegObj.Jpeg("res/test3.jpg")
     imb = jpegObj.Jpeg("res/test4.jpg")
-    diffblocks(ima, imb)
+    jpegObj.diffblocks(ima, imb)
 
 
 def test_setblock():
@@ -106,7 +85,7 @@ def test_setblock():
     blocks2 = imb.Jgetblock(1, 0, 0)
     block_to_show = np.frombuffer(blocks2, dtype=np.int16, count=-1, offset=0).reshape(8, 8)
     print block_to_show
-    diffblock(blocks1, block_to_show)
+    jpegObj.diffblock(blocks1, block_to_show)
 
 
 def test_split():
@@ -197,7 +176,7 @@ if __name__ == '__main__':
     imc = jpegObj.Jpeg("res/steged.jpg", key=sample_key)
     print ima.Jgetcompdim(0)
     print ima.getkey(), imc.getkey()
-    print diffblocks(ima, imc)
+    print jpegObj.diffblocks(ima, imc)
 
     # c1 = ima.getCoefBlocks()
     # c2 =  imb.getCoefBlocks()
diff --git a/test_steganal.py b/test_steganal.py
new file mode 100644
index 0000000..2fa1a03
--- /dev/null
+++ b/test_steganal.py
@@ -0,0 +1,49 @@
+__author__ = 'chunk'
+
+import numpy as np
+import pylab as P
+import pylab as plt
+
+import mjpeg
+import mjsteg
+import jpegObj
+from  msteg.steganography import LSB, F3, F4, F5
+from  msteg.steganalysis import MPB
+
+from common import *
+
+
+timer = Timer()
+
+sample = [[7, 12, 14, -12, 1, 0, -1, 0],
+          [6, 5, -10, 0, 6, 0, 0, 0],
+          [0, 6, -5, 4, 0, -1, 0, 0],
+          [0, -3, 0, 1, -1, 0, 0, 0],
+          [-3, 5, 0, 0, 0, 0, 0, 0],
+          [2, -1, 0, 0, 0, 0, 0, 0],
+          [0, 0, 0, 0, 0, 0, 0, 0],
+          [0, 0, 0, 0, 0, 0, 0, 0]]
+
+sample_key = [46812L, 20559L, 31360L, 16681L, 27536L, 39553L, 5427L, 63029L, 56572L, 36476L, 25695L, 61908L, 63014L,
+              5908L, 59816L, 56765L]
+
+txtsample = [116, 104, 105, 115, 32, 105, 115, 32, 116, 111, 32, 98, 101, 32, 101, 109, 98, 101, 100, 101, 100, 46, 10]
+
+if __name__ == '__main__':
+    timer = Timer()
+
+    timer.mark()
+    ima = jpegObj.Jpeg("res/test3.jpg", key=sample_key)
+    timer.report() # 0.006490s
+
+    ciq = ima.coef_arrays[jpegObj.colorMap['Y']]
+    timer.report() # 0.000019s
+
+    mpbSteg = MPB.MPB()
+    tpm = mpbSteg.get_trans_prob_mat(ciq)
+    timer.report() # 1.365718s
+
+    print tpm, tpm.shape
+    pass
+
+
--
libgit2 0.21.2