Commit 163652ee61cb303235c445b8a1698b6b2300d571
1 parent
774baa70
Exists in
refactor
staged.
Showing
4 changed files
with
29 additions
and
220 deletions
Show diff stats
mdata/ILSVRC.py
... | ... | @@ -33,7 +33,8 @@ package_dir = os.path.dirname(os.path.abspath(__file__)) |
33 | 33 | |
34 | 34 | |
35 | 35 | class DataILSVRC(DataDumperBase): |
36 | - def __init__(self, base_dir='/media/chunk/Elements/D/data/ImageNet/img/ILSVRC2013_DET_val', category='Train'): | |
36 | + def __init__(self, base_dir='/media/chunk/Elements/D/data/ImageNet/img/ILSVRC2013_DET_val', category='Train', | |
37 | + host='HPC-server'): | |
37 | 38 | DataDumperBase.__init__(self, base_dir, category) |
38 | 39 | |
39 | 40 | self.base_dir = base_dir |
... | ... | @@ -45,6 +46,10 @@ class DataILSVRC(DataDumperBase): |
45 | 46 | self.feat_dir = os.path.join(self.dst_dir, 'Feat') |
46 | 47 | self.img_dir = os.path.join(self.dst_dir, 'Img') |
47 | 48 | |
49 | + self.host = host | |
50 | + self.master = 'spark://%s:7077' % self.host | |
51 | + self.appname = 'ImageILSVRC' | |
52 | + | |
48 | 53 | self.dict_data = {} |
49 | 54 | |
50 | 55 | self.table_name = self.base_dir.strip('/').split('/')[-1] + '-' + self.category |
... | ... | @@ -316,7 +321,7 @@ class DataILSVRC(DataDumperBase): |
316 | 321 | return self.table |
317 | 322 | |
318 | 323 | if self.connection is None: |
319 | - c = happybase.Connection('HPC-server') | |
324 | + c = happybase.Connection(host=self.host) | |
320 | 325 | self.connection = c |
321 | 326 | |
322 | 327 | tables = self.connection.tables() |
... | ... | @@ -345,7 +350,7 @@ class DataILSVRC(DataDumperBase): |
345 | 350 | table_name = self.table_name |
346 | 351 | |
347 | 352 | if self.connection is None: |
348 | - c = happybase.Connection('HPC-server') | |
353 | + c = happybase.Connection(host=self.host) | |
349 | 354 | self.connection = c |
350 | 355 | |
351 | 356 | tables = self.connection.tables() |
... | ... | @@ -506,7 +511,7 @@ class DataILSVRC(DataDumperBase): |
506 | 511 | |
507 | 512 | elif mode == "spark": # cluster |
508 | 513 | if self.sparker == None: |
509 | - self.sparker = SC.Sparker(host='HPC-server', appname='ImageCV', master='spark://HPC-server:7077') | |
514 | + self.sparker = SC.Sparker(host=self.host, appname=self.appname, master=self.master) | |
510 | 515 | |
511 | 516 | result = self.sparker.read_hbase(self.table_name) # result = {key:[feat,tag],...} |
512 | 517 | for feat, tag in result: | ... | ... |
mdata/ILSVRC_S.py
... | ... | @@ -39,7 +39,7 @@ class DataILSVRC_S(DataDumperBase): |
39 | 39 | copyright(c) 2015 chunkplus@gmail.com |
40 | 40 | """ |
41 | 41 | |
42 | - def __init__(self, base='ILSVRC2013_DET_val', category='Train_1', tablename=None): | |
42 | + def __init__(self, base='ILSVRC2013_DET_val', category='Train_1', host='HPC-server', tablename=None): | |
43 | 43 | DataDumperBase.__init__(self, base, category) |
44 | 44 | |
45 | 45 | self.base = base |
... | ... | @@ -55,8 +55,11 @@ class DataILSVRC_S(DataDumperBase): |
55 | 55 | else: |
56 | 56 | self.table_name = tablename |
57 | 57 | |
58 | - self.sparker = SC.Sparker(host='HPC-server', appname='ImageILSVRC-S', | |
59 | - master='spark://HPC-server:7077') | |
58 | + self.host = host | |
59 | + self.master = 'spark://%s:7077' % self.host | |
60 | + self.appname = 'ImageILSVRC-S' | |
61 | + self.sparker = SC.Sparker(host=self.host, appname=self.appname, | |
62 | + master=self.master) | |
60 | 63 | |
61 | 64 | self.steger = F5.F5(sample_key, 1) |
62 | 65 | |
... | ... | @@ -67,7 +70,7 @@ class DataILSVRC_S(DataDumperBase): |
67 | 70 | return self.table |
68 | 71 | |
69 | 72 | if self.connection is None: |
70 | - c = happybase.Connection('HPC-server') | |
73 | + c = happybase.Connection(host=self.host) | |
71 | 74 | self.connection = c |
72 | 75 | |
73 | 76 | tables = self.connection.tables() |
... | ... | @@ -91,7 +94,7 @@ class DataILSVRC_S(DataDumperBase): |
91 | 94 | table_name = self.table_name |
92 | 95 | |
93 | 96 | if self.connection is None: |
94 | - c = happybase.Connection('HPC-server') | |
97 | + c = happybase.Connection(host=self.host) | |
95 | 98 | self.connection = c |
96 | 99 | |
97 | 100 | tables = self.connection.tables() |
... | ... | @@ -251,8 +254,8 @@ class DataILSVRC_S(DataDumperBase): |
251 | 254 | |
252 | 255 | elif mode == 'spark': |
253 | 256 | if self.sparker == None: |
254 | - self.sparker = SC.Sparker(host='HPC-server', appname='ImageILSVRC-S', | |
255 | - master='spark://HPC-server:7077') | |
257 | + self.sparker = SC.Sparker(host=self.host, appname=self.appname, | |
258 | + master=self.master) | |
256 | 259 | |
257 | 260 | cols = [ |
258 | 261 | 'cf_pic:data', |
... | ... | @@ -285,8 +288,8 @@ class DataILSVRC_S(DataDumperBase): |
285 | 288 | withdata=withdata) |
286 | 289 | elif mode == 'analysis': |
287 | 290 | if self.sparker == None: |
288 | - self.sparker = SC.Sparker(host='HPC-server', appname='ImageILSVRC-S', | |
289 | - master='spark://HPC-server:7077') | |
291 | + self.sparker = SC.Sparker(host=self.host, appname=self.appname, | |
292 | + master=self.master) | |
290 | 293 | |
291 | 294 | cols = [ |
292 | 295 | 'cf_pic:data', |
... | ... | @@ -401,8 +404,8 @@ class DataILSVRC_S(DataDumperBase): |
401 | 404 | |
402 | 405 | elif mode == 'spark': |
403 | 406 | if self.sparker == None: |
404 | - self.sparker = SC.Sparker(host='HPC-server', appname='ImageILSVRC-S', | |
405 | - master='spark://HPC-server:7077') | |
407 | + self.sparker = SC.Sparker(host=self.host, appname=self.appname, | |
408 | + master=self.master) | |
406 | 409 | |
407 | 410 | cols = [ |
408 | 411 | 'cf_pic:data', |
... | ... | @@ -496,8 +499,8 @@ class DataILSVRC_S(DataDumperBase): |
496 | 499 | |
497 | 500 | elif mode == 'spark': |
498 | 501 | if self.sparker == None: |
499 | - self.sparker = SC.Sparker(host='HPC-server', appname='ImageILSVRC-S', | |
500 | - master='spark://HPC-server:7077') | |
502 | + self.sparker = SC.Sparker(host=self.host, appname=self.appname, | |
503 | + master=self.master) | |
501 | 504 | |
502 | 505 | cols = [ |
503 | 506 | 'cf_pic:data', |
... | ... | @@ -532,8 +535,8 @@ class DataILSVRC_S(DataDumperBase): |
532 | 535 | def _analysis(self, mode='analysis', feattype='ibd', readforward=False, writeback=True, withdata=False): |
533 | 536 | if mode == 'analysis': |
534 | 537 | if self.sparker == None: |
535 | - self.sparker = SC.Sparker(host='HPC-server', appname='ImageILSVRC-S', | |
536 | - master='spark://HPC-server:7077') | |
538 | + self.sparker = SC.Sparker(host=self.host, appname=self.appname, | |
539 | + master=self.master) | |
537 | 540 | |
538 | 541 | cols = [ |
539 | 542 | 'cf_pic:data', |
... | ... | @@ -618,8 +621,8 @@ class DataILSVRC_S(DataDumperBase): |
618 | 621 | |
619 | 622 | elif mode == "spark" or mode == "cluster": |
620 | 623 | if self.sparker == None: |
621 | - self.sparker = SC.Sparker(host='HPC-server', appname='ImageILSVRC-S', | |
622 | - master='spark://HPC-server:7077') | |
624 | + self.sparker = SC.Sparker(host=self.host, appname=self.appname, | |
625 | + master=self.master) | |
623 | 626 | |
624 | 627 | rdd_dataset = self.sparker.read_hbase(self.table_name, func=rdd.rddparse_dataset_ILS, collect=False) |
625 | 628 | if not collect: | ... | ... |
msteg/steganalysis/ChiSquare.py
... | ... | @@ -1,162 +0,0 @@ |
1 | -""" | |
2 | -<p> | |
3 | -This module implements an algorithm described by Andreas Westfeld in [1,2], | |
4 | -which detects if there was data embedded into an image using JSteg. | |
5 | -It uses the property that JSteg generates pairs of values in the | |
6 | -DCT-coefficients histogram, which can be detected by a \chi^2 test. | |
7 | -</p> | |
8 | - | |
9 | -<pre> | |
10 | -[1]: Andreas Westfeld, F5 - A Steganographic Algorithm High Capacity Despite | |
11 | -Better Steganalysis | |
12 | -[2]: Andreas Westfeld, Angriffe auf steganographische Systeme | |
13 | -</pre> | |
14 | -""" | |
15 | - | |
16 | -from collections import defaultdict | |
17 | -import os | |
18 | - | |
19 | -from PIL import Image | |
20 | -import numpy | |
21 | -from scipy.stats import chisquare | |
22 | -import matplotlib.pyplot as plt | |
23 | -import itertools as it | |
24 | - | |
25 | -from .. import * | |
26 | - | |
27 | - | |
28 | -class ChiSquare(StegBase): | |
29 | - """ | |
30 | - The module contains only one method, <b>detect</b>. | |
31 | - """ | |
32 | - | |
33 | - def __init__(self, ui, core): | |
34 | - self.ui = ui | |
35 | - self.core = core | |
36 | - | |
37 | - def detect(self, src, tgt, tgt2): | |
38 | - """ | |
39 | - <p> | |
40 | - Detect if there was data embedded in the <i>source image</i> image with | |
41 | - JSteg algorithm. | |
42 | - </p> | |
43 | - | |
44 | - <p> | |
45 | - Parameters: | |
46 | - <ol> | |
47 | - <li><pre>Source image</pre> Image which should be tested</li> | |
48 | - <li><pre>Target image</pre> Image which displays a graphic with the | |
49 | - embedding probability</li> | |
50 | - <li><pre>2nd Target image</pre> Image which displays the embedding | |
51 | - positions in the image</li> | |
52 | - </ol> | |
53 | - </p> | |
54 | - """ | |
55 | - # --------------------------- Input ----------------------------------- | |
56 | - # If src is from the image pool, test whether the image exists encoded | |
57 | - # on the file system. Otherwise we can not read DCT-coefficients. | |
58 | - if self.core.media_manager.is_media_key(src): | |
59 | - src = self.core.media_manager.get_file(src) | |
60 | - if hasattr(src, 'tmp_file'): | |
61 | - src = src.tmp_file | |
62 | - self.ui.display_error('Trying file: %s' % src) | |
63 | - else: | |
64 | - self.ui.display_error('Can not detect anything from \ | |
65 | - decoded images.') | |
66 | - return | |
67 | - # Test whether the file exists. | |
68 | - if not os.path.isfile(src): | |
69 | - self.ui.display_error('No such file.') | |
70 | - return | |
71 | - # Test if it is a JPEG file. | |
72 | - if not self._looks_like_jpeg(src): | |
73 | - self.ui.display_error('Input is probably not a JPEG file.') | |
74 | - return | |
75 | - | |
76 | - # ---------------------------- Algorithm ------------------------------ | |
77 | - # Build DCT-histogram in steps of \approx 1% of all coefficients and | |
78 | - # calculate the p-value at each step. | |
79 | - | |
80 | - # dct_data = rw_dct.read_dct_coefficients(src) | |
81 | - dct_data = self._get_cov_data(src) | |
82 | - | |
83 | - hist = defaultdict(int) | |
84 | - cnt = 0 | |
85 | - l = len(dct_data) | |
86 | - one_p = l / 100 | |
87 | - result = [] | |
88 | - for block in dct_data: | |
89 | - # update the histogram with one block of 64 coefficients | |
90 | - for c in block: | |
91 | - hist[c] += 1 | |
92 | - | |
93 | - cnt += 1 | |
94 | - if not cnt % one_p: | |
95 | - # calculate p-value | |
96 | - self.ui.set_progress(cnt * 100 / l) | |
97 | - | |
98 | - # ignore the pair (0, 1), since JSteg does not embed data there | |
99 | - hl = [hist[i] for i in range(-2048, 2049) if not i in (0, 1)] | |
100 | - k = len(hl) / 2 | |
101 | - observed = [] | |
102 | - expected = [] | |
103 | - # calculate observed and expected distribution | |
104 | - for i in range(k): | |
105 | - t = hl[2 * i] + hl[2 * i + 1] | |
106 | - if t > 3: | |
107 | - observed.append(hl[2 * i]) | |
108 | - expected.append(t / 2) | |
109 | - # calculate (\chi^2, p) | |
110 | - p = chisquare(numpy.array(observed), numpy.array(expected))[1] | |
111 | - result.append(p) | |
112 | - | |
113 | - # ----------------------------- Output -------------------------------- | |
114 | - # Graph displaying the embedding probabilities in relation to the | |
115 | - # sample size. | |
116 | - figure = plt.figure() | |
117 | - plot = figure.add_subplot(111) | |
118 | - plot.grid(True) | |
119 | - plot.plot(result, color='r', linewidth=2.0) | |
120 | - plt.axis([0, 100, 0, 1.1]) | |
121 | - plt.title('Embedding probability for different percentages \ | |
122 | -of the file capacity.') | |
123 | - plt.xlabel('% of file capacity') | |
124 | - plt.ylabel('Embedding probability') | |
125 | - | |
126 | - if self.core.media_manager.is_media_key(tgt): | |
127 | - img = figure_to_pil(figure) | |
128 | - self.core.media_manager.put_media(tgt, img) | |
129 | - else: | |
130 | - plt.savefig(tgt) | |
131 | - | |
132 | - # Image displaying the length and position of the embedded data | |
133 | - # within the image | |
134 | - img2 = Image.open(src) | |
135 | - img2.convert("RGB") | |
136 | - width, height = img2.size | |
137 | - | |
138 | - for i in range(100): | |
139 | - result[i] = max(result[i:]) | |
140 | - | |
141 | - cnt2 = 0 | |
142 | - for (top, left) in it.product(range(0, height, 8), range(0, width, 8)): | |
143 | - if not cnt2 % one_p: | |
144 | - r = result[cnt2 / one_p] | |
145 | - if r >= 0.5: | |
146 | - color = (255, int((1 - r) * 2 * 255), 0) | |
147 | - else: | |
148 | - color = (int(r * 2 * 255), 255, 0) | |
149 | - cnt2 += 1 | |
150 | - img2.paste(color, (left, top, min(left + 8, width), | |
151 | - min(top + 8, height))) | |
152 | - self.core.media_manager.put_media(tgt2, img2) | |
153 | - | |
154 | - def __str__(self): | |
155 | - return 'Chi-Square-Test' | |
156 | - | |
157 | - | |
158 | -def figure_to_pil(figure): | |
159 | - figure.canvas.draw() | |
160 | - return Image.fromstring('RGB', | |
161 | - figure.canvas.get_width_height(), | |
162 | - figure.canvas.tostring_rgb()) |
test/test_jpeg.py
1 | 1 | __author__ = 'chunk' |
2 | 2 | |
3 | 3 | import numpy as np |
4 | -import matplotlib.pyplot as plt | |
5 | -import seaborn as sns | |
6 | 4 | from .. import mjpeg |
7 | 5 | from ..mjpeg import base |
8 | 6 | from ..msteg.steganography import LSB, F3, F4, F5 |
... | ... | @@ -24,9 +22,6 @@ sample_key = [46812L, 20559L, 31360L, 16681L, 27536L, 39553L, 5427L, 63029L, 565 |
24 | 22 | 61908L, 63014L, |
25 | 23 | 5908L, 59816L, 56765L] |
26 | 24 | |
27 | -# plt.ticklabel_format(style='sci', axis='both', scilimits=(1, 4)) | |
28 | - | |
29 | -plt.ticklabel_format(style='sci', axis='both') | |
30 | 25 | |
31 | 26 | package_dir = os.path.dirname(os.path.abspath(__file__)) |
32 | 27 | |
... | ... | @@ -182,38 +177,6 @@ def test_jpeg(): |
182 | 177 | print mjpeg.diffblocks(ima, imc) |
183 | 178 | |
184 | 179 | |
185 | -def test_hist(): | |
186 | - ima = mjpeg.Jpeg(os.path.join(package_dir, "../res/high/pic3_orig.jpg"), key=sample_key) | |
187 | - print ima.getQuality() | |
188 | - print ima.getCapacity('All') | |
189 | - | |
190 | - capacity = ima.getCapacity() | |
191 | - print capacity | |
192 | - rate = 0.65 | |
193 | - hidden = np.random.bytes(int(int(capacity) * rate) / 8) | |
194 | - steger = F5.F5(sample_key, 1) | |
195 | - steger2 = F4.F4(key=None) | |
196 | - steger3 = LSB.LSB(key=None) | |
197 | - embed_rate = steger3.embed_raw_data(os.path.join(package_dir, "../res/high/pic3_orig.jpg"), | |
198 | - hidden, | |
199 | - os.path.join(package_dir, "../res/high/pic3_dest.jpg"), | |
200 | - frommem=True) | |
201 | - # | |
202 | - print embed_rate | |
203 | - | |
204 | - imb = mjpeg.Jpeg(os.path.join(package_dir, "../res/high/pic3_dest.jpg"), key=sample_key) | |
205 | - print imb.getQuality() | |
206 | - print imb.getCapacity('All') | |
207 | - | |
208 | - A = imb.rawsignal().tolist() | |
209 | - E = [i for i in range(-8, 10)] | |
210 | - plt.hist(A, E, histtype='bar', rwidth=0.8, align='left') | |
211 | - plt.xlabel("JPEG coefficients after quantisation") | |
212 | - plt.ylabel("Frequency") | |
213 | - plt.xticks([i for i in range(-8, 9)]) | |
214 | - plt.ylim(ymax=300000) | |
215 | - plt.show() | |
216 | - | |
217 | 180 | |
218 | 181 | if __name__ == '__main__': |
219 | 182 | # timer.mark() | ... | ... |