Commit 163652ee61cb303235c445b8a1698b6b2300d571
1 parent
774baa70
Exists in
refactor
staged.
Showing
4 changed files
with
29 additions
and
220 deletions
Show diff stats
mdata/ILSVRC.py
@@ -33,7 +33,8 @@ package_dir = os.path.dirname(os.path.abspath(__file__)) | @@ -33,7 +33,8 @@ package_dir = os.path.dirname(os.path.abspath(__file__)) | ||
33 | 33 | ||
34 | 34 | ||
35 | class DataILSVRC(DataDumperBase): | 35 | class DataILSVRC(DataDumperBase): |
36 | - def __init__(self, base_dir='/media/chunk/Elements/D/data/ImageNet/img/ILSVRC2013_DET_val', category='Train'): | 36 | + def __init__(self, base_dir='/media/chunk/Elements/D/data/ImageNet/img/ILSVRC2013_DET_val', category='Train', |
37 | + host='HPC-server'): | ||
37 | DataDumperBase.__init__(self, base_dir, category) | 38 | DataDumperBase.__init__(self, base_dir, category) |
38 | 39 | ||
39 | self.base_dir = base_dir | 40 | self.base_dir = base_dir |
@@ -45,6 +46,10 @@ class DataILSVRC(DataDumperBase): | @@ -45,6 +46,10 @@ class DataILSVRC(DataDumperBase): | ||
45 | self.feat_dir = os.path.join(self.dst_dir, 'Feat') | 46 | self.feat_dir = os.path.join(self.dst_dir, 'Feat') |
46 | self.img_dir = os.path.join(self.dst_dir, 'Img') | 47 | self.img_dir = os.path.join(self.dst_dir, 'Img') |
47 | 48 | ||
49 | + self.host = host | ||
50 | + self.master = 'spark://%s:7077' % self.host | ||
51 | + self.appname = 'ImageILSVRC' | ||
52 | + | ||
48 | self.dict_data = {} | 53 | self.dict_data = {} |
49 | 54 | ||
50 | self.table_name = self.base_dir.strip('/').split('/')[-1] + '-' + self.category | 55 | self.table_name = self.base_dir.strip('/').split('/')[-1] + '-' + self.category |
@@ -316,7 +321,7 @@ class DataILSVRC(DataDumperBase): | @@ -316,7 +321,7 @@ class DataILSVRC(DataDumperBase): | ||
316 | return self.table | 321 | return self.table |
317 | 322 | ||
318 | if self.connection is None: | 323 | if self.connection is None: |
319 | - c = happybase.Connection('HPC-server') | 324 | + c = happybase.Connection(host=self.host) |
320 | self.connection = c | 325 | self.connection = c |
321 | 326 | ||
322 | tables = self.connection.tables() | 327 | tables = self.connection.tables() |
@@ -345,7 +350,7 @@ class DataILSVRC(DataDumperBase): | @@ -345,7 +350,7 @@ class DataILSVRC(DataDumperBase): | ||
345 | table_name = self.table_name | 350 | table_name = self.table_name |
346 | 351 | ||
347 | if self.connection is None: | 352 | if self.connection is None: |
348 | - c = happybase.Connection('HPC-server') | 353 | + c = happybase.Connection(host=self.host) |
349 | self.connection = c | 354 | self.connection = c |
350 | 355 | ||
351 | tables = self.connection.tables() | 356 | tables = self.connection.tables() |
@@ -506,7 +511,7 @@ class DataILSVRC(DataDumperBase): | @@ -506,7 +511,7 @@ class DataILSVRC(DataDumperBase): | ||
506 | 511 | ||
507 | elif mode == "spark": # cluster | 512 | elif mode == "spark": # cluster |
508 | if self.sparker == None: | 513 | if self.sparker == None: |
509 | - self.sparker = SC.Sparker(host='HPC-server', appname='ImageCV', master='spark://HPC-server:7077') | 514 | + self.sparker = SC.Sparker(host=self.host, appname=self.appname, master=self.master) |
510 | 515 | ||
511 | result = self.sparker.read_hbase(self.table_name) # result = {key:[feat,tag],...} | 516 | result = self.sparker.read_hbase(self.table_name) # result = {key:[feat,tag],...} |
512 | for feat, tag in result: | 517 | for feat, tag in result: |
mdata/ILSVRC_S.py
@@ -39,7 +39,7 @@ class DataILSVRC_S(DataDumperBase): | @@ -39,7 +39,7 @@ class DataILSVRC_S(DataDumperBase): | ||
39 | copyright(c) 2015 chunkplus@gmail.com | 39 | copyright(c) 2015 chunkplus@gmail.com |
40 | """ | 40 | """ |
41 | 41 | ||
42 | - def __init__(self, base='ILSVRC2013_DET_val', category='Train_1', tablename=None): | 42 | + def __init__(self, base='ILSVRC2013_DET_val', category='Train_1', host='HPC-server', tablename=None): |
43 | DataDumperBase.__init__(self, base, category) | 43 | DataDumperBase.__init__(self, base, category) |
44 | 44 | ||
45 | self.base = base | 45 | self.base = base |
@@ -55,8 +55,11 @@ class DataILSVRC_S(DataDumperBase): | @@ -55,8 +55,11 @@ class DataILSVRC_S(DataDumperBase): | ||
55 | else: | 55 | else: |
56 | self.table_name = tablename | 56 | self.table_name = tablename |
57 | 57 | ||
58 | - self.sparker = SC.Sparker(host='HPC-server', appname='ImageILSVRC-S', | ||
59 | - master='spark://HPC-server:7077') | 58 | + self.host = host |
59 | + self.master = 'spark://%s:7077' % self.host | ||
60 | + self.appname = 'ImageILSVRC-S' | ||
61 | + self.sparker = SC.Sparker(host=self.host, appname=self.appname, | ||
62 | + master=self.master) | ||
60 | 63 | ||
61 | self.steger = F5.F5(sample_key, 1) | 64 | self.steger = F5.F5(sample_key, 1) |
62 | 65 | ||
@@ -67,7 +70,7 @@ class DataILSVRC_S(DataDumperBase): | @@ -67,7 +70,7 @@ class DataILSVRC_S(DataDumperBase): | ||
67 | return self.table | 70 | return self.table |
68 | 71 | ||
69 | if self.connection is None: | 72 | if self.connection is None: |
70 | - c = happybase.Connection('HPC-server') | 73 | + c = happybase.Connection(host=self.host) |
71 | self.connection = c | 74 | self.connection = c |
72 | 75 | ||
73 | tables = self.connection.tables() | 76 | tables = self.connection.tables() |
@@ -91,7 +94,7 @@ class DataILSVRC_S(DataDumperBase): | @@ -91,7 +94,7 @@ class DataILSVRC_S(DataDumperBase): | ||
91 | table_name = self.table_name | 94 | table_name = self.table_name |
92 | 95 | ||
93 | if self.connection is None: | 96 | if self.connection is None: |
94 | - c = happybase.Connection('HPC-server') | 97 | + c = happybase.Connection(host=self.host) |
95 | self.connection = c | 98 | self.connection = c |
96 | 99 | ||
97 | tables = self.connection.tables() | 100 | tables = self.connection.tables() |
@@ -251,8 +254,8 @@ class DataILSVRC_S(DataDumperBase): | @@ -251,8 +254,8 @@ class DataILSVRC_S(DataDumperBase): | ||
251 | 254 | ||
252 | elif mode == 'spark': | 255 | elif mode == 'spark': |
253 | if self.sparker == None: | 256 | if self.sparker == None: |
254 | - self.sparker = SC.Sparker(host='HPC-server', appname='ImageILSVRC-S', | ||
255 | - master='spark://HPC-server:7077') | 257 | + self.sparker = SC.Sparker(host=self.host, appname=self.appname, |
258 | + master=self.master) | ||
256 | 259 | ||
257 | cols = [ | 260 | cols = [ |
258 | 'cf_pic:data', | 261 | 'cf_pic:data', |
@@ -285,8 +288,8 @@ class DataILSVRC_S(DataDumperBase): | @@ -285,8 +288,8 @@ class DataILSVRC_S(DataDumperBase): | ||
285 | withdata=withdata) | 288 | withdata=withdata) |
286 | elif mode == 'analysis': | 289 | elif mode == 'analysis': |
287 | if self.sparker == None: | 290 | if self.sparker == None: |
288 | - self.sparker = SC.Sparker(host='HPC-server', appname='ImageILSVRC-S', | ||
289 | - master='spark://HPC-server:7077') | 291 | + self.sparker = SC.Sparker(host=self.host, appname=self.appname, |
292 | + master=self.master) | ||
290 | 293 | ||
291 | cols = [ | 294 | cols = [ |
292 | 'cf_pic:data', | 295 | 'cf_pic:data', |
@@ -401,8 +404,8 @@ class DataILSVRC_S(DataDumperBase): | @@ -401,8 +404,8 @@ class DataILSVRC_S(DataDumperBase): | ||
401 | 404 | ||
402 | elif mode == 'spark': | 405 | elif mode == 'spark': |
403 | if self.sparker == None: | 406 | if self.sparker == None: |
404 | - self.sparker = SC.Sparker(host='HPC-server', appname='ImageILSVRC-S', | ||
405 | - master='spark://HPC-server:7077') | 407 | + self.sparker = SC.Sparker(host=self.host, appname=self.appname, |
408 | + master=self.master) | ||
406 | 409 | ||
407 | cols = [ | 410 | cols = [ |
408 | 'cf_pic:data', | 411 | 'cf_pic:data', |
@@ -496,8 +499,8 @@ class DataILSVRC_S(DataDumperBase): | @@ -496,8 +499,8 @@ class DataILSVRC_S(DataDumperBase): | ||
496 | 499 | ||
497 | elif mode == 'spark': | 500 | elif mode == 'spark': |
498 | if self.sparker == None: | 501 | if self.sparker == None: |
499 | - self.sparker = SC.Sparker(host='HPC-server', appname='ImageILSVRC-S', | ||
500 | - master='spark://HPC-server:7077') | 502 | + self.sparker = SC.Sparker(host=self.host, appname=self.appname, |
503 | + master=self.master) | ||
501 | 504 | ||
502 | cols = [ | 505 | cols = [ |
503 | 'cf_pic:data', | 506 | 'cf_pic:data', |
@@ -532,8 +535,8 @@ class DataILSVRC_S(DataDumperBase): | @@ -532,8 +535,8 @@ class DataILSVRC_S(DataDumperBase): | ||
532 | def _analysis(self, mode='analysis', feattype='ibd', readforward=False, writeback=True, withdata=False): | 535 | def _analysis(self, mode='analysis', feattype='ibd', readforward=False, writeback=True, withdata=False): |
533 | if mode == 'analysis': | 536 | if mode == 'analysis': |
534 | if self.sparker == None: | 537 | if self.sparker == None: |
535 | - self.sparker = SC.Sparker(host='HPC-server', appname='ImageILSVRC-S', | ||
536 | - master='spark://HPC-server:7077') | 538 | + self.sparker = SC.Sparker(host=self.host, appname=self.appname, |
539 | + master=self.master) | ||
537 | 540 | ||
538 | cols = [ | 541 | cols = [ |
539 | 'cf_pic:data', | 542 | 'cf_pic:data', |
@@ -618,8 +621,8 @@ class DataILSVRC_S(DataDumperBase): | @@ -618,8 +621,8 @@ class DataILSVRC_S(DataDumperBase): | ||
618 | 621 | ||
619 | elif mode == "spark" or mode == "cluster": | 622 | elif mode == "spark" or mode == "cluster": |
620 | if self.sparker == None: | 623 | if self.sparker == None: |
621 | - self.sparker = SC.Sparker(host='HPC-server', appname='ImageILSVRC-S', | ||
622 | - master='spark://HPC-server:7077') | 624 | + self.sparker = SC.Sparker(host=self.host, appname=self.appname, |
625 | + master=self.master) | ||
623 | 626 | ||
624 | rdd_dataset = self.sparker.read_hbase(self.table_name, func=rdd.rddparse_dataset_ILS, collect=False) | 627 | rdd_dataset = self.sparker.read_hbase(self.table_name, func=rdd.rddparse_dataset_ILS, collect=False) |
625 | if not collect: | 628 | if not collect: |
msteg/steganalysis/ChiSquare.py
@@ -1,162 +0,0 @@ | @@ -1,162 +0,0 @@ | ||
1 | -""" | ||
2 | -<p> | ||
3 | -This module implements an algorithm described by Andreas Westfeld in [1,2], | ||
4 | -which detects if there was data embedded into an image using JSteg. | ||
5 | -It uses the property that JSteg generates pairs of values in the | ||
6 | -DCT-coefficients histogram, which can be detected by a \chi^2 test. | ||
7 | -</p> | ||
8 | - | ||
9 | -<pre> | ||
10 | -[1]: Andreas Westfeld, F5 - A Steganographic Algorithm High Capacity Despite | ||
11 | -Better Steganalysis | ||
12 | -[2]: Andreas Westfeld, Angriffe auf steganographische Systeme | ||
13 | -</pre> | ||
14 | -""" | ||
15 | - | ||
16 | -from collections import defaultdict | ||
17 | -import os | ||
18 | - | ||
19 | -from PIL import Image | ||
20 | -import numpy | ||
21 | -from scipy.stats import chisquare | ||
22 | -import matplotlib.pyplot as plt | ||
23 | -import itertools as it | ||
24 | - | ||
25 | -from .. import * | ||
26 | - | ||
27 | - | ||
28 | -class ChiSquare(StegBase): | ||
29 | - """ | ||
30 | - The module contains only one method, <b>detect</b>. | ||
31 | - """ | ||
32 | - | ||
33 | - def __init__(self, ui, core): | ||
34 | - self.ui = ui | ||
35 | - self.core = core | ||
36 | - | ||
37 | - def detect(self, src, tgt, tgt2): | ||
38 | - """ | ||
39 | - <p> | ||
40 | - Detect if there was data embedded in the <i>source image</i> image with | ||
41 | - JSteg algorithm. | ||
42 | - </p> | ||
43 | - | ||
44 | - <p> | ||
45 | - Parameters: | ||
46 | - <ol> | ||
47 | - <li><pre>Source image</pre> Image which should be tested</li> | ||
48 | - <li><pre>Target image</pre> Image which displays a graphic with the | ||
49 | - embedding probability</li> | ||
50 | - <li><pre>2nd Target image</pre> Image which displays the embedding | ||
51 | - positions in the image</li> | ||
52 | - </ol> | ||
53 | - </p> | ||
54 | - """ | ||
55 | - # --------------------------- Input ----------------------------------- | ||
56 | - # If src is from the image pool, test whether the image exists encoded | ||
57 | - # on the file system. Otherwise we can not read DCT-coefficients. | ||
58 | - if self.core.media_manager.is_media_key(src): | ||
59 | - src = self.core.media_manager.get_file(src) | ||
60 | - if hasattr(src, 'tmp_file'): | ||
61 | - src = src.tmp_file | ||
62 | - self.ui.display_error('Trying file: %s' % src) | ||
63 | - else: | ||
64 | - self.ui.display_error('Can not detect anything from \ | ||
65 | - decoded images.') | ||
66 | - return | ||
67 | - # Test whether the file exists. | ||
68 | - if not os.path.isfile(src): | ||
69 | - self.ui.display_error('No such file.') | ||
70 | - return | ||
71 | - # Test if it is a JPEG file. | ||
72 | - if not self._looks_like_jpeg(src): | ||
73 | - self.ui.display_error('Input is probably not a JPEG file.') | ||
74 | - return | ||
75 | - | ||
76 | - # ---------------------------- Algorithm ------------------------------ | ||
77 | - # Build DCT-histogram in steps of \approx 1% of all coefficients and | ||
78 | - # calculate the p-value at each step. | ||
79 | - | ||
80 | - # dct_data = rw_dct.read_dct_coefficients(src) | ||
81 | - dct_data = self._get_cov_data(src) | ||
82 | - | ||
83 | - hist = defaultdict(int) | ||
84 | - cnt = 0 | ||
85 | - l = len(dct_data) | ||
86 | - one_p = l / 100 | ||
87 | - result = [] | ||
88 | - for block in dct_data: | ||
89 | - # update the histogram with one block of 64 coefficients | ||
90 | - for c in block: | ||
91 | - hist[c] += 1 | ||
92 | - | ||
93 | - cnt += 1 | ||
94 | - if not cnt % one_p: | ||
95 | - # calculate p-value | ||
96 | - self.ui.set_progress(cnt * 100 / l) | ||
97 | - | ||
98 | - # ignore the pair (0, 1), since JSteg does not embed data there | ||
99 | - hl = [hist[i] for i in range(-2048, 2049) if not i in (0, 1)] | ||
100 | - k = len(hl) / 2 | ||
101 | - observed = [] | ||
102 | - expected = [] | ||
103 | - # calculate observed and expected distribution | ||
104 | - for i in range(k): | ||
105 | - t = hl[2 * i] + hl[2 * i + 1] | ||
106 | - if t > 3: | ||
107 | - observed.append(hl[2 * i]) | ||
108 | - expected.append(t / 2) | ||
109 | - # calculate (\chi^2, p) | ||
110 | - p = chisquare(numpy.array(observed), numpy.array(expected))[1] | ||
111 | - result.append(p) | ||
112 | - | ||
113 | - # ----------------------------- Output -------------------------------- | ||
114 | - # Graph displaying the embedding probabilities in relation to the | ||
115 | - # sample size. | ||
116 | - figure = plt.figure() | ||
117 | - plot = figure.add_subplot(111) | ||
118 | - plot.grid(True) | ||
119 | - plot.plot(result, color='r', linewidth=2.0) | ||
120 | - plt.axis([0, 100, 0, 1.1]) | ||
121 | - plt.title('Embedding probability for different percentages \ | ||
122 | -of the file capacity.') | ||
123 | - plt.xlabel('% of file capacity') | ||
124 | - plt.ylabel('Embedding probability') | ||
125 | - | ||
126 | - if self.core.media_manager.is_media_key(tgt): | ||
127 | - img = figure_to_pil(figure) | ||
128 | - self.core.media_manager.put_media(tgt, img) | ||
129 | - else: | ||
130 | - plt.savefig(tgt) | ||
131 | - | ||
132 | - # Image displaying the length and position of the embedded data | ||
133 | - # within the image | ||
134 | - img2 = Image.open(src) | ||
135 | - img2.convert("RGB") | ||
136 | - width, height = img2.size | ||
137 | - | ||
138 | - for i in range(100): | ||
139 | - result[i] = max(result[i:]) | ||
140 | - | ||
141 | - cnt2 = 0 | ||
142 | - for (top, left) in it.product(range(0, height, 8), range(0, width, 8)): | ||
143 | - if not cnt2 % one_p: | ||
144 | - r = result[cnt2 / one_p] | ||
145 | - if r >= 0.5: | ||
146 | - color = (255, int((1 - r) * 2 * 255), 0) | ||
147 | - else: | ||
148 | - color = (int(r * 2 * 255), 255, 0) | ||
149 | - cnt2 += 1 | ||
150 | - img2.paste(color, (left, top, min(left + 8, width), | ||
151 | - min(top + 8, height))) | ||
152 | - self.core.media_manager.put_media(tgt2, img2) | ||
153 | - | ||
154 | - def __str__(self): | ||
155 | - return 'Chi-Square-Test' | ||
156 | - | ||
157 | - | ||
158 | -def figure_to_pil(figure): | ||
159 | - figure.canvas.draw() | ||
160 | - return Image.fromstring('RGB', | ||
161 | - figure.canvas.get_width_height(), | ||
162 | - figure.canvas.tostring_rgb()) |
test/test_jpeg.py
1 | __author__ = 'chunk' | 1 | __author__ = 'chunk' |
2 | 2 | ||
3 | import numpy as np | 3 | import numpy as np |
4 | -import matplotlib.pyplot as plt | ||
5 | -import seaborn as sns | ||
6 | from .. import mjpeg | 4 | from .. import mjpeg |
7 | from ..mjpeg import base | 5 | from ..mjpeg import base |
8 | from ..msteg.steganography import LSB, F3, F4, F5 | 6 | from ..msteg.steganography import LSB, F3, F4, F5 |
@@ -24,9 +22,6 @@ sample_key = [46812L, 20559L, 31360L, 16681L, 27536L, 39553L, 5427L, 63029L, 565 | @@ -24,9 +22,6 @@ sample_key = [46812L, 20559L, 31360L, 16681L, 27536L, 39553L, 5427L, 63029L, 565 | ||
24 | 61908L, 63014L, | 22 | 61908L, 63014L, |
25 | 5908L, 59816L, 56765L] | 23 | 5908L, 59816L, 56765L] |
26 | 24 | ||
27 | -# plt.ticklabel_format(style='sci', axis='both', scilimits=(1, 4)) | ||
28 | - | ||
29 | -plt.ticklabel_format(style='sci', axis='both') | ||
30 | 25 | ||
31 | package_dir = os.path.dirname(os.path.abspath(__file__)) | 26 | package_dir = os.path.dirname(os.path.abspath(__file__)) |
32 | 27 | ||
@@ -182,38 +177,6 @@ def test_jpeg(): | @@ -182,38 +177,6 @@ def test_jpeg(): | ||
182 | print mjpeg.diffblocks(ima, imc) | 177 | print mjpeg.diffblocks(ima, imc) |
183 | 178 | ||
184 | 179 | ||
185 | -def test_hist(): | ||
186 | - ima = mjpeg.Jpeg(os.path.join(package_dir, "../res/high/pic3_orig.jpg"), key=sample_key) | ||
187 | - print ima.getQuality() | ||
188 | - print ima.getCapacity('All') | ||
189 | - | ||
190 | - capacity = ima.getCapacity() | ||
191 | - print capacity | ||
192 | - rate = 0.65 | ||
193 | - hidden = np.random.bytes(int(int(capacity) * rate) / 8) | ||
194 | - steger = F5.F5(sample_key, 1) | ||
195 | - steger2 = F4.F4(key=None) | ||
196 | - steger3 = LSB.LSB(key=None) | ||
197 | - embed_rate = steger3.embed_raw_data(os.path.join(package_dir, "../res/high/pic3_orig.jpg"), | ||
198 | - hidden, | ||
199 | - os.path.join(package_dir, "../res/high/pic3_dest.jpg"), | ||
200 | - frommem=True) | ||
201 | - # | ||
202 | - print embed_rate | ||
203 | - | ||
204 | - imb = mjpeg.Jpeg(os.path.join(package_dir, "../res/high/pic3_dest.jpg"), key=sample_key) | ||
205 | - print imb.getQuality() | ||
206 | - print imb.getCapacity('All') | ||
207 | - | ||
208 | - A = imb.rawsignal().tolist() | ||
209 | - E = [i for i in range(-8, 10)] | ||
210 | - plt.hist(A, E, histtype='bar', rwidth=0.8, align='left') | ||
211 | - plt.xlabel("JPEG coefficients after quantisation") | ||
212 | - plt.ylabel("Frequency") | ||
213 | - plt.xticks([i for i in range(-8, 9)]) | ||
214 | - plt.ylim(ymax=300000) | ||
215 | - plt.show() | ||
216 | - | ||
217 | 180 | ||
218 | if __name__ == '__main__': | 181 | if __name__ == '__main__': |
219 | # timer.mark() | 182 | # timer.mark() |