ChiSquare.py 5.41 KB
"""
<p>
This module implements an algorithm described by Andreas Westfeld in [1,2],
which detects if there was data embedded into an image using JSteg.
It uses the property that JSteg generates pairs of values in the
DCT-coefficients histogram, which can be detected by a \chi^2 test.
</p>

<pre>
[1]: Andreas Westfeld, F5 - A Steganographic Algorithm High Capacity Despite
Better Steganalysis
[2]: Andreas Westfeld, Angriffe auf steganographische Systeme
</pre>
"""

from collections import defaultdict
import os

from PIL import Image
import numpy
from scipy.stats import chisquare
import matplotlib.pyplot as plt
import itertools as it

from msteg import *


class ChiSquare(StegBase):
    """
    The module contains only one method, <b>detect</b>.
    """

    def __init__(self, ui, core):
        self.ui = ui
        self.core = core

    def detect(self, src, tgt, tgt2):
        """
        <p>
        Detect if there was data embedded in the <i>source image</i> image with
        JSteg algorithm.
        </p>

        <p>
        Parameters:
        <ol>
        <li><pre>Source image</pre> Image which should be tested</li>
        <li><pre>Target image</pre> Image which displays a graphic with the
        embedding probability</li>
        <li><pre>2nd Target image</pre> Image which displays the embedding
        positions in the image</li>
        </ol>
        </p>
        """
        # --------------------------- Input -----------------------------------
        # If src is from the image pool, test whether the image exists encoded
        # on the file system. Otherwise we can not read DCT-coefficients.
        if self.core.media_manager.is_media_key(src):
            src = self.core.media_manager.get_file(src)
            if hasattr(src, 'tmp_file'):
                src = src.tmp_file
                self.ui.display_error('Trying file: %s' % src)
            else:
                self.ui.display_error('Can not detect anything from \
                        decoded images.')
                return
        # Test whether the file exists.
        if not os.path.isfile(src):
            self.ui.display_error('No such file.')
            return
        # Test if it is a JPEG file.
        if not self._looks_like_jpeg(src):
            self.ui.display_error('Input is probably not a JPEG file.')
            return

        # ---------------------------- Algorithm ------------------------------
        # Build DCT-histogram in steps of \approx 1% of all coefficients and
        # calculate the p-value at each step.

        # dct_data = rw_dct.read_dct_coefficients(src)
        dct_data = self._get_cov_data(src)

        hist = defaultdict(int)
        cnt = 0
        l = len(dct_data)
        one_p = l / 100
        result = []
        for block in dct_data:
            # update the histogram with one block of 64 coefficients
            for c in block:
                hist[c] += 1

            cnt += 1
            if not cnt % one_p:
                # calculate p-value
                self.ui.set_progress(cnt * 100 / l)

                # ignore the pair (0, 1), since JSteg does not embed data there
                hl = [hist[i] for i in range(-2048, 2049) if not i in (0, 1)]
                k = len(hl) / 2
                observed = []
                expected = []
                # calculate observed and expected distribution
                for i in range(k):
                    t = hl[2 * i] + hl[2 * i + 1]
                    if t > 3:
                        observed.append(hl[2 * i])
                        expected.append(t / 2)
                # calculate (\chi^2, p)
                p = chisquare(numpy.array(observed), numpy.array(expected))[1]
                result.append(p)

        # ----------------------------- Output --------------------------------
        # Graph displaying the embedding probabilities in relation to the
        # sample size.
        figure = plt.figure()
        plot = figure.add_subplot(111)
        plot.grid(True)
        plot.plot(result, color='r', linewidth=2.0)
        plt.axis([0, 100, 0, 1.1])
        plt.title('Embedding probability for different percentages \
of the file capacity.')
        plt.xlabel('% of file capacity')
        plt.ylabel('Embedding probability')

        if self.core.media_manager.is_media_key(tgt):
            img = figure_to_pil(figure)
            self.core.media_manager.put_media(tgt, img)
        else:
            plt.savefig(tgt)

        # Image displaying the length and position of the embedded data
        # within the image
        img2 = Image.open(src)
        img2.convert("RGB")
        width, height = img2.size

        for i in range(100):
            result[i] = max(result[i:])

        cnt2 = 0
        for (top, left) in it.product(range(0, height, 8), range(0, width, 8)):
            if not cnt2 % one_p:
                r = result[cnt2 / one_p]
                if r >= 0.5:
                    color = (255, int((1 - r) * 2 * 255), 0)
                else:
                    color = (int(r * 2 * 255), 255, 0)
            cnt2 += 1
            img2.paste(color, (left, top, min(left + 8, width),
                               min(top + 8, height)))
        self.core.media_manager.put_media(tgt2, img2)

    def __str__(self):
        return 'Chi-Square-Test'


def figure_to_pil(figure):
    figure.canvas.draw()
    return Image.fromstring('RGB',
                            figure.canvas.get_width_height(),
                            figure.canvas.tostring_rgb())