Blame view

msteg/steganalysis/MPB.py 8.11 KB
1d19f0e7   Chunk   staged.
1
2
3
4
5
6
7
8
__author__ = 'chunk'
"""
Yun Q. Shi, et al - A Markov Process Based Approach to Effective Attacking JPEG Steganography
"""

import time
import math
import numpy as np
ca73c96f   Chunk   Transformed into ...
9
10

from .. import *
ca73c96f   Chunk   Transformed into ...
11
from ...mjpeg import Jpeg,colorMap
1d19f0e7   Chunk   staged.
12
from ...common import *
080c30c2   Chunk   F5 lib updated. I...
13
14
15

import csv
import json
1d19f0e7   Chunk   staged.
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import pickle
import cv2
from sklearn import svm

base_dir = '/home/hadoop/data/HeadShoulder/'


class MPB(StegBase):
    """
    Markov Process Based Steganalyasis Algo.
    """

    def __init__(self):
        StegBase.__init__(self, sample_key)
        self.model = None
        self.svm = None

    def _get_trans_prob_mat_orig(self, ciq, T=4):
        """
        Original!
        Calculate Transition Probability Matrix.

080c30c2   Chunk   F5 lib updated. I...
38
        :param ciq: jpeg DCT coeff matrix, 2-D numpy array of int16 (pre-abs)
1d19f0e7   Chunk   staged.
39
40
41
        :param T: signed integer, usually 1~7
        :return: TPM - 3-D tensor, numpy array of size (2*T+1, 2*T+1, 4)
        """
080c30c2   Chunk   F5 lib updated. I...
42
43
44
45
        ciq = np.absolute(ciq).clip(0, T)
        TPM = np.zeros((2 * T + 1, 2 * T + 1, 4), np.float64)
        # Fh = np.diff(ciq, axis=-1)
        # Fv = np.diff(ciq, axis=0)
1d19f0e7   Chunk   staged.
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
        Fh = ciq[:-1, :-1] - ciq[:-1, 1:]
        Fv = ciq[:-1, :-1] - ciq[1:, :-1]
        Fd = ciq[:-1, :-1] - ciq[1:, 1:]
        Fm = ciq[:-1, 1:] - ciq[1:, :-1]

        Fh1 = Fh[:-1, :-1]
        Fh2 = Fh[:-1, 1:]

        Fv1 = Fv[:-1, :-1]
        Fv2 = Fv[1:, :-1]

        Fd1 = Fd[:-1, :-1]
        Fd2 = Fd[1:, 1:]

        Fm1 = Fm[:-1, 1:]
        Fm2 = Fm[1:, :-1]

        # original:(very slow!)
        for n in range(-T, T + 1):
            for m in range(-T, T + 1):
                dh = np.sum(Fh1 == m) * 1.0
                dv = np.sum(Fv1 == m) * 1.0
                dd = np.sum(Fd1 == m) * 1.0
                dm = np.sum(Fm1 == m) * 1.0

                if dh != 0:
                    TPM[m, n, 0] = np.sum(np.logical_and(Fh1 == m, Fh2 == n)) / dh

                if dv != 0:
                    TPM[m, n, 1] = np.sum(np.logical_and(Fv1 == m, Fv2 == n)) / dv

                if dd != 0:
                    TPM[m, n, 2] = np.sum(np.logical_and(Fd1 == m, Fd2 == n)) / dd

                if dm != 0:
                    TPM[m, n, 3] = np.sum(np.logical_and(Fm1 == m, Fm2 == n)) / dm
080c30c2   Chunk   F5 lib updated. I...
82

1d19f0e7   Chunk   staged.
83
84
85
86
87
88
89
90
91
        # 1.422729s
        return TPM


    def get_trans_prob_mat(self, ciq, T=4):
        """
        Calculate Transition Probability Matrix.

        :param ciq: jpeg DCT coeff matrix, 2-D numpy array of int16 (pre-abs)
018ebf56   Chunk   Spark Streaming T...
92
        :param T: signed integer, usually 1~7
1d19f0e7   Chunk   staged.
93
94
95
        :return: TPM - 3-D tensor, numpy array of size (2*T+1, 2*T+1, 4)
        """

080c30c2   Chunk   F5 lib updated. I...
96
97
        return self._get_trans_prob_mat_orig(ciq, T)

1d19f0e7   Chunk   staged.
98
99
100

        # timer = Timer()
        ciq = np.absolute(ciq).clip(0, T)
080c30c2   Chunk   F5 lib updated. I...
101
102
103
104
105
106
107
108
109
110
        TPM = np.zeros((2 * T + 1, 2 * T + 1, 4), np.float64)
        # Fh = np.diff(ciq, axis=-1)
        # Fv = np.diff(ciq, axis=0)
        Fh = ciq[:-1, :-1] - ciq[:-1, 1:]
        Fv = ciq[:-1, :-1] - ciq[1:, :-1]
        Fd = ciq[:-1, :-1] - ciq[1:, 1:]
        Fm = ciq[:-1, 1:] - ciq[1:, :-1]

        Fh1 = Fh[:-1, :-1].ravel()
        Fh2 = Fh[:-1, 1:].ravel()
1d19f0e7   Chunk   staged.
111

018ebf56   Chunk   Spark Streaming T...
112
113
        Fv1 = Fv[:-1, :-1].ravel()
        Fv2 = Fv[1:, :-1].ravel()
1d19f0e7   Chunk   staged.
114

018ebf56   Chunk   Spark Streaming T...
115
116
        Fd1 = Fd[:-1, :-1].ravel()
        Fd2 = Fd[1:, 1:].ravel()
1d19f0e7   Chunk   staged.
117

018ebf56   Chunk   Spark Streaming T...
118
119
        Fm1 = Fm[:-1, 1:].ravel()
        Fm2 = Fm[1:, :-1].ravel()
1d19f0e7   Chunk   staged.
120

018ebf56   Chunk   Spark Streaming T...
121
122


1d19f0e7   Chunk   staged.
123
124
125
126
127
128
129
130
131
132
133
        # 0.089754s
        # timer.mark()
        # TPM[Fh1.ravel(), Fh2.ravel(), 0] += 1
        # TPM[Fv1.ravel(), Fv2.ravel(), 1] += 1
        # TPM[Fd1.ravel(), Fd2.ravel(), 2] += 1
        # TPM[Fm1.ravel(), Fm2.ravel(), 3] += 1
        # timer.report()

        # 1.459668s
        # timer.mark()
        # for i in range(len(Fh1)):
018ebf56   Chunk   Spark Streaming T...
134
135
136
        #     TPM[Fh1[i], Fh2[i], 0] += 1
        # for i in range(len(Fv1)):
        #     TPM[Fv1[i], Fv2[i], 1] += 1
080c30c2   Chunk   F5 lib updated. I...
137
        # for i in range(len(Fd1)):
018ebf56   Chunk   Spark Streaming T...
138
139
140
141
142
143
144
145
146
        #     TPM[Fd1[i], Fd2[i], 2] += 1
        # for i in range(len(Fm1)):
        #     TPM[Fm1[i], Fm2[i], 3] += 1
        # timer.report()

        # 1.463982s
        # timer.mark()
        for m, n in zip(Fh1.ravel(), Fh2.ravel()):
            TPM[m, n, 0] += 1
1d19f0e7   Chunk   staged.
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183

        for m, n in zip(Fv1.ravel(), Fv2.ravel()):
            TPM[m, n, 1] += 1

        for m, n in zip(Fd1.ravel(), Fd2.ravel()):
            TPM[m, n, 2] += 1

        for m, n in zip(Fm1.ravel(), Fm2.ravel()):
            TPM[m, n, 3] += 1
        # timer.report()

        # 0.057505s
        # timer.mark()
        for m in range(-T, T + 1):
            dh = np.sum(Fh1 == m) * 1.0
            dv = np.sum(Fv1 == m) * 1.0
            dd = np.sum(Fd1 == m) * 1.0
            dm = np.sum(Fm1 == m) * 1.0

            if dh != 0:
                TPM[m, :, 0] /= dh

            if dv != 0:
                TPM[m, :, 1] /= dv

            if dd != 0:
                TPM[m, :, 2] /= dd

            if dm != 0:
                TPM[m, :, 3] /= dm
        # timer.report()

        return TPM

    def load_dataset(self, mode, file):
        if mode == 'local':
            return self._load_dataset_from_local(file)
1d19f0e7   Chunk   staged.
184
        elif mode == 'remote' or mode == 'hbase':