Blame view

msteg/steganalysis/ChiSquare.py 5.63 KB
eb820443   Chunk   staged.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
"""
<p>
This module implements an algorithm described by Andreas Westfeld in [1,2],
which detects if there was data embedded into an image using JSteg.
It uses the property that JSteg generates pairs of values in the
DCT-coefficients histogram, which can be detected by a \chi^2 test.
</p>

<pre>
[1]: Andreas Westfeld, F5 - A Steganographic Algorithm High Capacity Despite
Better Steganalysis
[2]: Andreas Westfeld, Angriffe auf steganographische Systeme
</pre>
"""

from collections import defaultdict
import os

45a82355   Chunk   staged.
19
from PIL import Image
eb820443   Chunk   staged.
20
21
22
23
import numpy
from scipy.stats import chisquare
import matplotlib.pyplot as plt
import itertools as it
eb820443   Chunk   staged.
24
from msteg.StegBase import StegBase
b69b6985   Chunk   py module refract...
25

eb820443   Chunk   staged.
26
from msteg.StegBase import *
26e2fe9f   Chunk   MPB steganalysis ...
27
28


eb820443   Chunk   staged.
29
30
31
32
33
34
35
36
class ChiSquare(StegBase):
    """
    The module contains only one method, <b>detect</b>.
    """

    def __init__(self, ui, core):
        self.ui = ui
        self.core = core
eb820443   Chunk   staged.
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78

    @describe_and_annotate((None, None),
                           ("Source image", ImagePath),
                           ("Target image", NewFilePath),
                           ("2nd Target image", NewFilePath))
    def detect(self, src, tgt, tgt2):
        """
        <p>
        Detect if there was data embedded in the <i>source image</i> image with
        JSteg algorithm.
        </p>

        <p>
        Parameters:
        <ol>
        <li><pre>Source image</pre> Image which should be tested</li>
        <li><pre>Target image</pre> Image which displays a graphic with the
        embedding probability</li>
        <li><pre>2nd Target image</pre> Image which displays the embedding
        positions in the image</li>
        </ol>
        </p>
        """
        # --------------------------- Input -----------------------------------
        # If src is from the image pool, test whether the image exists encoded
        # on the file system. Otherwise we can not read DCT-coefficients.
        if self.core.media_manager.is_media_key(src):
            src = self.core.media_manager.get_file(src)
            if hasattr(src, 'tmp_file'):
                src = src.tmp_file
                self.ui.display_error('Trying file: %s' % src)
            else:
                self.ui.display_error('Can not detect anything from \
                        decoded images.')
                return
        # Test whether the file exists.
        if not os.path.isfile(src):
            self.ui.display_error('No such file.')
            return
        # Test if it is a JPEG file.
        if not self._looks_like_jpeg(src):
            self.ui.display_error('Input is probably not a JPEG file.')
b69b6985   Chunk   py module refract...
79
80
81
82
            return

        # ---------------------------- Algorithm ------------------------------
        # Build DCT-histogram in steps of \approx 1% of all coefficients and
eb820443   Chunk   staged.
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
        # calculate the p-value at each step.
        dct_data = rw_dct.read_dct_coefficients(src)
        hist = defaultdict(int)
        cnt = 0
        l = len(dct_data)
        one_p = l / 100
        result = []
        for block in dct_data:
            # update the histogram with one block of 64 coefficients
            for c in block:
                hist[c] += 1

            cnt += 1
            if not cnt % one_p:
                # calculate p-value
                self.ui.set_progress(cnt * 100 / l)

                # ignore the pair (0, 1), since JSteg does not embed data there
                hl = [hist[i] for i in range(-2048, 2049) if not i in (0, 1)]
                k = len(hl) / 2
                observed = []
                expected = []
                # calculate observed and expected distribution
                for i in range(k):
                    t = hl[2 * i] + hl[2 * i + 1]
                    if t > 3:
                        observed.append(hl[2 * i])
                        expected.append(t / 2)
                # calculate (\chi^2, p)
                p = chisquare(numpy.array(observed), numpy.array(expected))[1]
                result.append(p)

        # ----------------------------- Output --------------------------------
        # Graph displaying the embedding probabilities in relation to the
        # sample size.
        figure = plt.figure()
        plot = figure.add_subplot(111)
        plot.grid(True)
        plot.plot(result, color='r', linewidth=2.0)
        plt.axis([0, 100, 0, 1.1])
        plt.title('Embedding probability for different percentages \
of the file capacity.')
        plt.xlabel('% of file capacity')
        plt.ylabel('Embedding probability')

        if self.core.media_manager.is_media_key(tgt):
            img = figure_to_pil(figure)
            self.core.media_manager.put_media(tgt, img)
        else:
            plt.savefig(tgt)

        # Image displaying the length and position of the embedded data
        # within the image
        img2 = Image.open(src)
        img2.convert("RGB")
        width, height = img2.size

        for i in range(100):
            result[i] = max(result[i:])

        cnt2 = 0
        for (top, left) in it.product(range(0, height, 8), range(0, width, 8)):
            if not cnt2 % one_p:
                r = result[cnt2 / one_p]
                if r >= 0.5:
                    color = (255, int((1 - r) * 2 * 255), 0)
                else:
                    color = (int(r * 2 * 255), 255, 0)
b69b6985   Chunk   py module refract...
151
            cnt2 += 1
eb820443   Chunk   staged.
152
153
154
155
156
157
158
159
160
161
162
            img2.paste(color, (left, top, min(left + 8, width),
                                   min(top + 8, height)))
        self.core.media_manager.put_media(tgt2, img2)

    def __str__(self):
        return 'Chi-Square-Test'


def figure_to_pil(figure):
    figure.canvas.draw()
    return Image.fromstring('RGB',