Commit 163652ee61cb303235c445b8a1698b6b2300d571

Authored by Chunk
1 parent 774baa70
Exists in refactor

staged.

mdata/ILSVRC.py
... ... @@ -33,7 +33,8 @@ package_dir = os.path.dirname(os.path.abspath(__file__))
33 33  
34 34  
35 35 class DataILSVRC(DataDumperBase):
36   - def __init__(self, base_dir='/media/chunk/Elements/D/data/ImageNet/img/ILSVRC2013_DET_val', category='Train'):
  36 + def __init__(self, base_dir='/media/chunk/Elements/D/data/ImageNet/img/ILSVRC2013_DET_val', category='Train',
  37 + host='HPC-server'):
37 38 DataDumperBase.__init__(self, base_dir, category)
38 39  
39 40 self.base_dir = base_dir
... ... @@ -45,6 +46,10 @@ class DataILSVRC(DataDumperBase):
45 46 self.feat_dir = os.path.join(self.dst_dir, 'Feat')
46 47 self.img_dir = os.path.join(self.dst_dir, 'Img')
47 48  
  49 + self.host = host
  50 + self.master = 'spark://%s:7077' % self.host
  51 + self.appname = 'ImageILSVRC'
  52 +
48 53 self.dict_data = {}
49 54  
50 55 self.table_name = self.base_dir.strip('/').split('/')[-1] + '-' + self.category
... ... @@ -316,7 +321,7 @@ class DataILSVRC(DataDumperBase):
316 321 return self.table
317 322  
318 323 if self.connection is None:
319   - c = happybase.Connection('HPC-server')
  324 + c = happybase.Connection(host=self.host)
320 325 self.connection = c
321 326  
322 327 tables = self.connection.tables()
... ... @@ -345,7 +350,7 @@ class DataILSVRC(DataDumperBase):
345 350 table_name = self.table_name
346 351  
347 352 if self.connection is None:
348   - c = happybase.Connection('HPC-server')
  353 + c = happybase.Connection(host=self.host)
349 354 self.connection = c
350 355  
351 356 tables = self.connection.tables()
... ... @@ -506,7 +511,7 @@ class DataILSVRC(DataDumperBase):
506 511  
507 512 elif mode == "spark": # cluster
508 513 if self.sparker == None:
509   - self.sparker = SC.Sparker(host='HPC-server', appname='ImageCV', master='spark://HPC-server:7077')
  514 + self.sparker = SC.Sparker(host=self.host, appname=self.appname, master=self.master)
510 515  
511 516 result = self.sparker.read_hbase(self.table_name) # result = {key:[feat,tag],...}
512 517 for feat, tag in result:
... ...
mdata/ILSVRC_S.py
... ... @@ -39,7 +39,7 @@ class DataILSVRC_S(DataDumperBase):
39 39 copyright(c) 2015 chunkplus@gmail.com
40 40 """
41 41  
42   - def __init__(self, base='ILSVRC2013_DET_val', category='Train_1', tablename=None):
  42 + def __init__(self, base='ILSVRC2013_DET_val', category='Train_1', host='HPC-server', tablename=None):
43 43 DataDumperBase.__init__(self, base, category)
44 44  
45 45 self.base = base
... ... @@ -55,8 +55,11 @@ class DataILSVRC_S(DataDumperBase):
55 55 else:
56 56 self.table_name = tablename
57 57  
58   - self.sparker = SC.Sparker(host='HPC-server', appname='ImageILSVRC-S',
59   - master='spark://HPC-server:7077')
  58 + self.host = host
  59 + self.master = 'spark://%s:7077' % self.host
  60 + self.appname = 'ImageILSVRC-S'
  61 + self.sparker = SC.Sparker(host=self.host, appname=self.appname,
  62 + master=self.master)
60 63  
61 64 self.steger = F5.F5(sample_key, 1)
62 65  
... ... @@ -67,7 +70,7 @@ class DataILSVRC_S(DataDumperBase):
67 70 return self.table
68 71  
69 72 if self.connection is None:
70   - c = happybase.Connection('HPC-server')
  73 + c = happybase.Connection(host=self.host)
71 74 self.connection = c
72 75  
73 76 tables = self.connection.tables()
... ... @@ -91,7 +94,7 @@ class DataILSVRC_S(DataDumperBase):
91 94 table_name = self.table_name
92 95  
93 96 if self.connection is None:
94   - c = happybase.Connection('HPC-server')
  97 + c = happybase.Connection(host=self.host)
95 98 self.connection = c
96 99  
97 100 tables = self.connection.tables()
... ... @@ -251,8 +254,8 @@ class DataILSVRC_S(DataDumperBase):
251 254  
252 255 elif mode == 'spark':
253 256 if self.sparker == None:
254   - self.sparker = SC.Sparker(host='HPC-server', appname='ImageILSVRC-S',
255   - master='spark://HPC-server:7077')
  257 + self.sparker = SC.Sparker(host=self.host, appname=self.appname,
  258 + master=self.master)
256 259  
257 260 cols = [
258 261 'cf_pic:data',
... ... @@ -285,8 +288,8 @@ class DataILSVRC_S(DataDumperBase):
285 288 withdata=withdata)
286 289 elif mode == 'analysis':
287 290 if self.sparker == None:
288   - self.sparker = SC.Sparker(host='HPC-server', appname='ImageILSVRC-S',
289   - master='spark://HPC-server:7077')
  291 + self.sparker = SC.Sparker(host=self.host, appname=self.appname,
  292 + master=self.master)
290 293  
291 294 cols = [
292 295 'cf_pic:data',
... ... @@ -401,8 +404,8 @@ class DataILSVRC_S(DataDumperBase):
401 404  
402 405 elif mode == 'spark':
403 406 if self.sparker == None:
404   - self.sparker = SC.Sparker(host='HPC-server', appname='ImageILSVRC-S',
405   - master='spark://HPC-server:7077')
  407 + self.sparker = SC.Sparker(host=self.host, appname=self.appname,
  408 + master=self.master)
406 409  
407 410 cols = [
408 411 'cf_pic:data',
... ... @@ -496,8 +499,8 @@ class DataILSVRC_S(DataDumperBase):
496 499  
497 500 elif mode == 'spark':
498 501 if self.sparker == None:
499   - self.sparker = SC.Sparker(host='HPC-server', appname='ImageILSVRC-S',
500   - master='spark://HPC-server:7077')
  502 + self.sparker = SC.Sparker(host=self.host, appname=self.appname,
  503 + master=self.master)
501 504  
502 505 cols = [
503 506 'cf_pic:data',
... ... @@ -532,8 +535,8 @@ class DataILSVRC_S(DataDumperBase):
532 535 def _analysis(self, mode='analysis', feattype='ibd', readforward=False, writeback=True, withdata=False):
533 536 if mode == 'analysis':
534 537 if self.sparker == None:
535   - self.sparker = SC.Sparker(host='HPC-server', appname='ImageILSVRC-S',
536   - master='spark://HPC-server:7077')
  538 + self.sparker = SC.Sparker(host=self.host, appname=self.appname,
  539 + master=self.master)
537 540  
538 541 cols = [
539 542 'cf_pic:data',
... ... @@ -618,8 +621,8 @@ class DataILSVRC_S(DataDumperBase):
618 621  
619 622 elif mode == "spark" or mode == "cluster":
620 623 if self.sparker == None:
621   - self.sparker = SC.Sparker(host='HPC-server', appname='ImageILSVRC-S',
622   - master='spark://HPC-server:7077')
  624 + self.sparker = SC.Sparker(host=self.host, appname=self.appname,
  625 + master=self.master)
623 626  
624 627 rdd_dataset = self.sparker.read_hbase(self.table_name, func=rdd.rddparse_dataset_ILS, collect=False)
625 628 if not collect:
... ...
msteg/steganalysis/ChiSquare.py
... ... @@ -1,162 +0,0 @@
1   -"""
2   -<p>
3   -This module implements an algorithm described by Andreas Westfeld in [1,2],
4   -which detects if there was data embedded into an image using JSteg.
5   -It uses the property that JSteg generates pairs of values in the
6   -DCT-coefficients histogram, which can be detected by a \chi^2 test.
7   -</p>
8   -
9   -<pre>
10   -[1]: Andreas Westfeld, F5 - A Steganographic Algorithm High Capacity Despite
11   -Better Steganalysis
12   -[2]: Andreas Westfeld, Angriffe auf steganographische Systeme
13   -</pre>
14   -"""
15   -
16   -from collections import defaultdict
17   -import os
18   -
19   -from PIL import Image
20   -import numpy
21   -from scipy.stats import chisquare
22   -import matplotlib.pyplot as plt
23   -import itertools as it
24   -
25   -from .. import *
26   -
27   -
28   -class ChiSquare(StegBase):
29   - """
30   - The module contains only one method, <b>detect</b>.
31   - """
32   -
33   - def __init__(self, ui, core):
34   - self.ui = ui
35   - self.core = core
36   -
37   - def detect(self, src, tgt, tgt2):
38   - """
39   - <p>
40   - Detect if there was data embedded in the <i>source image</i> image with
41   - JSteg algorithm.
42   - </p>
43   -
44   - <p>
45   - Parameters:
46   - <ol>
47   - <li><pre>Source image</pre> Image which should be tested</li>
48   - <li><pre>Target image</pre> Image which displays a graphic with the
49   - embedding probability</li>
50   - <li><pre>2nd Target image</pre> Image which displays the embedding
51   - positions in the image</li>
52   - </ol>
53   - </p>
54   - """
55   - # --------------------------- Input -----------------------------------
56   - # If src is from the image pool, test whether the image exists encoded
57   - # on the file system. Otherwise we can not read DCT-coefficients.
58   - if self.core.media_manager.is_media_key(src):
59   - src = self.core.media_manager.get_file(src)
60   - if hasattr(src, 'tmp_file'):
61   - src = src.tmp_file
62   - self.ui.display_error('Trying file: %s' % src)
63   - else:
64   - self.ui.display_error('Can not detect anything from \
65   - decoded images.')
66   - return
67   - # Test whether the file exists.
68   - if not os.path.isfile(src):
69   - self.ui.display_error('No such file.')
70   - return
71   - # Test if it is a JPEG file.
72   - if not self._looks_like_jpeg(src):
73   - self.ui.display_error('Input is probably not a JPEG file.')
74   - return
75   -
76   - # ---------------------------- Algorithm ------------------------------
77   - # Build DCT-histogram in steps of \approx 1% of all coefficients and
78   - # calculate the p-value at each step.
79   -
80   - # dct_data = rw_dct.read_dct_coefficients(src)
81   - dct_data = self._get_cov_data(src)
82   -
83   - hist = defaultdict(int)
84   - cnt = 0
85   - l = len(dct_data)
86   - one_p = l / 100
87   - result = []
88   - for block in dct_data:
89   - # update the histogram with one block of 64 coefficients
90   - for c in block:
91   - hist[c] += 1
92   -
93   - cnt += 1
94   - if not cnt % one_p:
95   - # calculate p-value
96   - self.ui.set_progress(cnt * 100 / l)
97   -
98   - # ignore the pair (0, 1), since JSteg does not embed data there
99   - hl = [hist[i] for i in range(-2048, 2049) if not i in (0, 1)]
100   - k = len(hl) / 2
101   - observed = []
102   - expected = []
103   - # calculate observed and expected distribution
104   - for i in range(k):
105   - t = hl[2 * i] + hl[2 * i + 1]
106   - if t > 3:
107   - observed.append(hl[2 * i])
108   - expected.append(t / 2)
109   - # calculate (\chi^2, p)
110   - p = chisquare(numpy.array(observed), numpy.array(expected))[1]
111   - result.append(p)
112   -
113   - # ----------------------------- Output --------------------------------
114   - # Graph displaying the embedding probabilities in relation to the
115   - # sample size.
116   - figure = plt.figure()
117   - plot = figure.add_subplot(111)
118   - plot.grid(True)
119   - plot.plot(result, color='r', linewidth=2.0)
120   - plt.axis([0, 100, 0, 1.1])
121   - plt.title('Embedding probability for different percentages \
122   -of the file capacity.')
123   - plt.xlabel('% of file capacity')
124   - plt.ylabel('Embedding probability')
125   -
126   - if self.core.media_manager.is_media_key(tgt):
127   - img = figure_to_pil(figure)
128   - self.core.media_manager.put_media(tgt, img)
129   - else:
130   - plt.savefig(tgt)
131   -
132   - # Image displaying the length and position of the embedded data
133   - # within the image
134   - img2 = Image.open(src)
135   - img2.convert("RGB")
136   - width, height = img2.size
137   -
138   - for i in range(100):
139   - result[i] = max(result[i:])
140   -
141   - cnt2 = 0
142   - for (top, left) in it.product(range(0, height, 8), range(0, width, 8)):
143   - if not cnt2 % one_p:
144   - r = result[cnt2 / one_p]
145   - if r >= 0.5:
146   - color = (255, int((1 - r) * 2 * 255), 0)
147   - else:
148   - color = (int(r * 2 * 255), 255, 0)
149   - cnt2 += 1
150   - img2.paste(color, (left, top, min(left + 8, width),
151   - min(top + 8, height)))
152   - self.core.media_manager.put_media(tgt2, img2)
153   -
154   - def __str__(self):
155   - return 'Chi-Square-Test'
156   -
157   -
158   -def figure_to_pil(figure):
159   - figure.canvas.draw()
160   - return Image.fromstring('RGB',
161   - figure.canvas.get_width_height(),
162   - figure.canvas.tostring_rgb())
test/test_jpeg.py
1 1 __author__ = 'chunk'
2 2  
3 3 import numpy as np
4   -import matplotlib.pyplot as plt
5   -import seaborn as sns
6 4 from .. import mjpeg
7 5 from ..mjpeg import base
8 6 from ..msteg.steganography import LSB, F3, F4, F5
... ... @@ -24,9 +22,6 @@ sample_key = [46812L, 20559L, 31360L, 16681L, 27536L, 39553L, 5427L, 63029L, 565
24 22 61908L, 63014L,
25 23 5908L, 59816L, 56765L]
26 24  
27   -# plt.ticklabel_format(style='sci', axis='both', scilimits=(1, 4))
28   -
29   -plt.ticklabel_format(style='sci', axis='both')
30 25  
31 26 package_dir = os.path.dirname(os.path.abspath(__file__))
32 27  
... ... @@ -182,38 +177,6 @@ def test_jpeg():
182 177 print mjpeg.diffblocks(ima, imc)
183 178  
184 179  
185   -def test_hist():
186   - ima = mjpeg.Jpeg(os.path.join(package_dir, "../res/high/pic3_orig.jpg"), key=sample_key)
187   - print ima.getQuality()
188   - print ima.getCapacity('All')
189   -
190   - capacity = ima.getCapacity()
191   - print capacity
192   - rate = 0.65
193   - hidden = np.random.bytes(int(int(capacity) * rate) / 8)
194   - steger = F5.F5(sample_key, 1)
195   - steger2 = F4.F4(key=None)
196   - steger3 = LSB.LSB(key=None)
197   - embed_rate = steger3.embed_raw_data(os.path.join(package_dir, "../res/high/pic3_orig.jpg"),
198   - hidden,
199   - os.path.join(package_dir, "../res/high/pic3_dest.jpg"),
200   - frommem=True)
201   - #
202   - print embed_rate
203   -
204   - imb = mjpeg.Jpeg(os.path.join(package_dir, "../res/high/pic3_dest.jpg"), key=sample_key)
205   - print imb.getQuality()
206   - print imb.getCapacity('All')
207   -
208   - A = imb.rawsignal().tolist()
209   - E = [i for i in range(-8, 10)]
210   - plt.hist(A, E, histtype='bar', rwidth=0.8, align='left')
211   - plt.xlabel("JPEG coefficients after quantisation")
212   - plt.ylabel("Frequency")
213   - plt.xticks([i for i in range(-8, 9)])
214   - plt.ylim(ymax=300000)
215   - plt.show()
216   -
217 180  
218 181 if __name__ == '__main__':
219 182 # timer.mark()
... ...