Commit 163652ee61cb303235c445b8a1698b6b2300d571

Authored by Chunk
1 parent 774baa70
Exists in refactor

staged.

mdata/ILSVRC.py
@@ -33,7 +33,8 @@ package_dir = os.path.dirname(os.path.abspath(__file__)) @@ -33,7 +33,8 @@ package_dir = os.path.dirname(os.path.abspath(__file__))
33 33
34 34
35 class DataILSVRC(DataDumperBase): 35 class DataILSVRC(DataDumperBase):
36 - def __init__(self, base_dir='/media/chunk/Elements/D/data/ImageNet/img/ILSVRC2013_DET_val', category='Train'): 36 + def __init__(self, base_dir='/media/chunk/Elements/D/data/ImageNet/img/ILSVRC2013_DET_val', category='Train',
  37 + host='HPC-server'):
37 DataDumperBase.__init__(self, base_dir, category) 38 DataDumperBase.__init__(self, base_dir, category)
38 39
39 self.base_dir = base_dir 40 self.base_dir = base_dir
@@ -45,6 +46,10 @@ class DataILSVRC(DataDumperBase): @@ -45,6 +46,10 @@ class DataILSVRC(DataDumperBase):
45 self.feat_dir = os.path.join(self.dst_dir, 'Feat') 46 self.feat_dir = os.path.join(self.dst_dir, 'Feat')
46 self.img_dir = os.path.join(self.dst_dir, 'Img') 47 self.img_dir = os.path.join(self.dst_dir, 'Img')
47 48
  49 + self.host = host
  50 + self.master = 'spark://%s:7077' % self.host
  51 + self.appname = 'ImageILSVRC'
  52 +
48 self.dict_data = {} 53 self.dict_data = {}
49 54
50 self.table_name = self.base_dir.strip('/').split('/')[-1] + '-' + self.category 55 self.table_name = self.base_dir.strip('/').split('/')[-1] + '-' + self.category
@@ -316,7 +321,7 @@ class DataILSVRC(DataDumperBase): @@ -316,7 +321,7 @@ class DataILSVRC(DataDumperBase):
316 return self.table 321 return self.table
317 322
318 if self.connection is None: 323 if self.connection is None:
319 - c = happybase.Connection('HPC-server') 324 + c = happybase.Connection(host=self.host)
320 self.connection = c 325 self.connection = c
321 326
322 tables = self.connection.tables() 327 tables = self.connection.tables()
@@ -345,7 +350,7 @@ class DataILSVRC(DataDumperBase): @@ -345,7 +350,7 @@ class DataILSVRC(DataDumperBase):
345 table_name = self.table_name 350 table_name = self.table_name
346 351
347 if self.connection is None: 352 if self.connection is None:
348 - c = happybase.Connection('HPC-server') 353 + c = happybase.Connection(host=self.host)
349 self.connection = c 354 self.connection = c
350 355
351 tables = self.connection.tables() 356 tables = self.connection.tables()
@@ -506,7 +511,7 @@ class DataILSVRC(DataDumperBase): @@ -506,7 +511,7 @@ class DataILSVRC(DataDumperBase):
506 511
507 elif mode == "spark": # cluster 512 elif mode == "spark": # cluster
508 if self.sparker == None: 513 if self.sparker == None:
509 - self.sparker = SC.Sparker(host='HPC-server', appname='ImageCV', master='spark://HPC-server:7077') 514 + self.sparker = SC.Sparker(host=self.host, appname=self.appname, master=self.master)
510 515
511 result = self.sparker.read_hbase(self.table_name) # result = {key:[feat,tag],...} 516 result = self.sparker.read_hbase(self.table_name) # result = {key:[feat,tag],...}
512 for feat, tag in result: 517 for feat, tag in result:
mdata/ILSVRC_S.py
@@ -39,7 +39,7 @@ class DataILSVRC_S(DataDumperBase): @@ -39,7 +39,7 @@ class DataILSVRC_S(DataDumperBase):
39 copyright(c) 2015 chunkplus@gmail.com 39 copyright(c) 2015 chunkplus@gmail.com
40 """ 40 """
41 41
42 - def __init__(self, base='ILSVRC2013_DET_val', category='Train_1', tablename=None): 42 + def __init__(self, base='ILSVRC2013_DET_val', category='Train_1', host='HPC-server', tablename=None):
43 DataDumperBase.__init__(self, base, category) 43 DataDumperBase.__init__(self, base, category)
44 44
45 self.base = base 45 self.base = base
@@ -55,8 +55,11 @@ class DataILSVRC_S(DataDumperBase): @@ -55,8 +55,11 @@ class DataILSVRC_S(DataDumperBase):
55 else: 55 else:
56 self.table_name = tablename 56 self.table_name = tablename
57 57
58 - self.sparker = SC.Sparker(host='HPC-server', appname='ImageILSVRC-S',  
59 - master='spark://HPC-server:7077') 58 + self.host = host
  59 + self.master = 'spark://%s:7077' % self.host
  60 + self.appname = 'ImageILSVRC-S'
  61 + self.sparker = SC.Sparker(host=self.host, appname=self.appname,
  62 + master=self.master)
60 63
61 self.steger = F5.F5(sample_key, 1) 64 self.steger = F5.F5(sample_key, 1)
62 65
@@ -67,7 +70,7 @@ class DataILSVRC_S(DataDumperBase): @@ -67,7 +70,7 @@ class DataILSVRC_S(DataDumperBase):
67 return self.table 70 return self.table
68 71
69 if self.connection is None: 72 if self.connection is None:
70 - c = happybase.Connection('HPC-server') 73 + c = happybase.Connection(host=self.host)
71 self.connection = c 74 self.connection = c
72 75
73 tables = self.connection.tables() 76 tables = self.connection.tables()
@@ -91,7 +94,7 @@ class DataILSVRC_S(DataDumperBase): @@ -91,7 +94,7 @@ class DataILSVRC_S(DataDumperBase):
91 table_name = self.table_name 94 table_name = self.table_name
92 95
93 if self.connection is None: 96 if self.connection is None:
94 - c = happybase.Connection('HPC-server') 97 + c = happybase.Connection(host=self.host)
95 self.connection = c 98 self.connection = c
96 99
97 tables = self.connection.tables() 100 tables = self.connection.tables()
@@ -251,8 +254,8 @@ class DataILSVRC_S(DataDumperBase): @@ -251,8 +254,8 @@ class DataILSVRC_S(DataDumperBase):
251 254
252 elif mode == 'spark': 255 elif mode == 'spark':
253 if self.sparker == None: 256 if self.sparker == None:
254 - self.sparker = SC.Sparker(host='HPC-server', appname='ImageILSVRC-S',  
255 - master='spark://HPC-server:7077') 257 + self.sparker = SC.Sparker(host=self.host, appname=self.appname,
  258 + master=self.master)
256 259
257 cols = [ 260 cols = [
258 'cf_pic:data', 261 'cf_pic:data',
@@ -285,8 +288,8 @@ class DataILSVRC_S(DataDumperBase): @@ -285,8 +288,8 @@ class DataILSVRC_S(DataDumperBase):
285 withdata=withdata) 288 withdata=withdata)
286 elif mode == 'analysis': 289 elif mode == 'analysis':
287 if self.sparker == None: 290 if self.sparker == None:
288 - self.sparker = SC.Sparker(host='HPC-server', appname='ImageILSVRC-S',  
289 - master='spark://HPC-server:7077') 291 + self.sparker = SC.Sparker(host=self.host, appname=self.appname,
  292 + master=self.master)
290 293
291 cols = [ 294 cols = [
292 'cf_pic:data', 295 'cf_pic:data',
@@ -401,8 +404,8 @@ class DataILSVRC_S(DataDumperBase): @@ -401,8 +404,8 @@ class DataILSVRC_S(DataDumperBase):
401 404
402 elif mode == 'spark': 405 elif mode == 'spark':
403 if self.sparker == None: 406 if self.sparker == None:
404 - self.sparker = SC.Sparker(host='HPC-server', appname='ImageILSVRC-S',  
405 - master='spark://HPC-server:7077') 407 + self.sparker = SC.Sparker(host=self.host, appname=self.appname,
  408 + master=self.master)
406 409
407 cols = [ 410 cols = [
408 'cf_pic:data', 411 'cf_pic:data',
@@ -496,8 +499,8 @@ class DataILSVRC_S(DataDumperBase): @@ -496,8 +499,8 @@ class DataILSVRC_S(DataDumperBase):
496 499
497 elif mode == 'spark': 500 elif mode == 'spark':
498 if self.sparker == None: 501 if self.sparker == None:
499 - self.sparker = SC.Sparker(host='HPC-server', appname='ImageILSVRC-S',  
500 - master='spark://HPC-server:7077') 502 + self.sparker = SC.Sparker(host=self.host, appname=self.appname,
  503 + master=self.master)
501 504
502 cols = [ 505 cols = [
503 'cf_pic:data', 506 'cf_pic:data',
@@ -532,8 +535,8 @@ class DataILSVRC_S(DataDumperBase): @@ -532,8 +535,8 @@ class DataILSVRC_S(DataDumperBase):
532 def _analysis(self, mode='analysis', feattype='ibd', readforward=False, writeback=True, withdata=False): 535 def _analysis(self, mode='analysis', feattype='ibd', readforward=False, writeback=True, withdata=False):
533 if mode == 'analysis': 536 if mode == 'analysis':
534 if self.sparker == None: 537 if self.sparker == None:
535 - self.sparker = SC.Sparker(host='HPC-server', appname='ImageILSVRC-S',  
536 - master='spark://HPC-server:7077') 538 + self.sparker = SC.Sparker(host=self.host, appname=self.appname,
  539 + master=self.master)
537 540
538 cols = [ 541 cols = [
539 'cf_pic:data', 542 'cf_pic:data',
@@ -618,8 +621,8 @@ class DataILSVRC_S(DataDumperBase): @@ -618,8 +621,8 @@ class DataILSVRC_S(DataDumperBase):
618 621
619 elif mode == "spark" or mode == "cluster": 622 elif mode == "spark" or mode == "cluster":
620 if self.sparker == None: 623 if self.sparker == None:
621 - self.sparker = SC.Sparker(host='HPC-server', appname='ImageILSVRC-S',  
622 - master='spark://HPC-server:7077') 624 + self.sparker = SC.Sparker(host=self.host, appname=self.appname,
  625 + master=self.master)
623 626
624 rdd_dataset = self.sparker.read_hbase(self.table_name, func=rdd.rddparse_dataset_ILS, collect=False) 627 rdd_dataset = self.sparker.read_hbase(self.table_name, func=rdd.rddparse_dataset_ILS, collect=False)
625 if not collect: 628 if not collect:
msteg/steganalysis/ChiSquare.py
@@ -1,162 +0,0 @@ @@ -1,162 +0,0 @@
1 -"""  
2 -<p>  
3 -This module implements an algorithm described by Andreas Westfeld in [1,2],  
4 -which detects if there was data embedded into an image using JSteg.  
5 -It uses the property that JSteg generates pairs of values in the  
6 -DCT-coefficients histogram, which can be detected by a \chi^2 test.  
7 -</p>  
8 -  
9 -<pre>  
10 -[1]: Andreas Westfeld, F5 - A Steganographic Algorithm High Capacity Despite  
11 -Better Steganalysis  
12 -[2]: Andreas Westfeld, Angriffe auf steganographische Systeme  
13 -</pre>  
14 -"""  
15 -  
16 -from collections import defaultdict  
17 -import os  
18 -  
19 -from PIL import Image  
20 -import numpy  
21 -from scipy.stats import chisquare  
22 -import matplotlib.pyplot as plt  
23 -import itertools as it  
24 -  
25 -from .. import *  
26 -  
27 -  
28 -class ChiSquare(StegBase):  
29 - """  
30 - The module contains only one method, <b>detect</b>.  
31 - """  
32 -  
33 - def __init__(self, ui, core):  
34 - self.ui = ui  
35 - self.core = core  
36 -  
37 - def detect(self, src, tgt, tgt2):  
38 - """  
39 - <p>  
40 - Detect if there was data embedded in the <i>source image</i> image with  
41 - JSteg algorithm.  
42 - </p>  
43 -  
44 - <p>  
45 - Parameters:  
46 - <ol>  
47 - <li><pre>Source image</pre> Image which should be tested</li>  
48 - <li><pre>Target image</pre> Image which displays a graphic with the  
49 - embedding probability</li>  
50 - <li><pre>2nd Target image</pre> Image which displays the embedding  
51 - positions in the image</li>  
52 - </ol>  
53 - </p>  
54 - """  
55 - # --------------------------- Input -----------------------------------  
56 - # If src is from the image pool, test whether the image exists encoded  
57 - # on the file system. Otherwise we can not read DCT-coefficients.  
58 - if self.core.media_manager.is_media_key(src):  
59 - src = self.core.media_manager.get_file(src)  
60 - if hasattr(src, 'tmp_file'):  
61 - src = src.tmp_file  
62 - self.ui.display_error('Trying file: %s' % src)  
63 - else:  
64 - self.ui.display_error('Can not detect anything from \  
65 - decoded images.')  
66 - return  
67 - # Test whether the file exists.  
68 - if not os.path.isfile(src):  
69 - self.ui.display_error('No such file.')  
70 - return  
71 - # Test if it is a JPEG file.  
72 - if not self._looks_like_jpeg(src):  
73 - self.ui.display_error('Input is probably not a JPEG file.')  
74 - return  
75 -  
76 - # ---------------------------- Algorithm ------------------------------  
77 - # Build DCT-histogram in steps of \approx 1% of all coefficients and  
78 - # calculate the p-value at each step.  
79 -  
80 - # dct_data = rw_dct.read_dct_coefficients(src)  
81 - dct_data = self._get_cov_data(src)  
82 -  
83 - hist = defaultdict(int)  
84 - cnt = 0  
85 - l = len(dct_data)  
86 - one_p = l / 100  
87 - result = []  
88 - for block in dct_data:  
89 - # update the histogram with one block of 64 coefficients  
90 - for c in block:  
91 - hist[c] += 1  
92 -  
93 - cnt += 1  
94 - if not cnt % one_p:  
95 - # calculate p-value  
96 - self.ui.set_progress(cnt * 100 / l)  
97 -  
98 - # ignore the pair (0, 1), since JSteg does not embed data there  
99 - hl = [hist[i] for i in range(-2048, 2049) if not i in (0, 1)]  
100 - k = len(hl) / 2  
101 - observed = []  
102 - expected = []  
103 - # calculate observed and expected distribution  
104 - for i in range(k):  
105 - t = hl[2 * i] + hl[2 * i + 1]  
106 - if t > 3:  
107 - observed.append(hl[2 * i])  
108 - expected.append(t / 2)  
109 - # calculate (\chi^2, p)  
110 - p = chisquare(numpy.array(observed), numpy.array(expected))[1]  
111 - result.append(p)  
112 -  
113 - # ----------------------------- Output --------------------------------  
114 - # Graph displaying the embedding probabilities in relation to the  
115 - # sample size.  
116 - figure = plt.figure()  
117 - plot = figure.add_subplot(111)  
118 - plot.grid(True)  
119 - plot.plot(result, color='r', linewidth=2.0)  
120 - plt.axis([0, 100, 0, 1.1])  
121 - plt.title('Embedding probability for different percentages \  
122 -of the file capacity.')  
123 - plt.xlabel('% of file capacity')  
124 - plt.ylabel('Embedding probability')  
125 -  
126 - if self.core.media_manager.is_media_key(tgt):  
127 - img = figure_to_pil(figure)  
128 - self.core.media_manager.put_media(tgt, img)  
129 - else:  
130 - plt.savefig(tgt)  
131 -  
132 - # Image displaying the length and position of the embedded data  
133 - # within the image  
134 - img2 = Image.open(src)  
135 - img2.convert("RGB")  
136 - width, height = img2.size  
137 -  
138 - for i in range(100):  
139 - result[i] = max(result[i:])  
140 -  
141 - cnt2 = 0  
142 - for (top, left) in it.product(range(0, height, 8), range(0, width, 8)):  
143 - if not cnt2 % one_p:  
144 - r = result[cnt2 / one_p]  
145 - if r >= 0.5:  
146 - color = (255, int((1 - r) * 2 * 255), 0)  
147 - else:  
148 - color = (int(r * 2 * 255), 255, 0)  
149 - cnt2 += 1  
150 - img2.paste(color, (left, top, min(left + 8, width),  
151 - min(top + 8, height)))  
152 - self.core.media_manager.put_media(tgt2, img2)  
153 -  
154 - def __str__(self):  
155 - return 'Chi-Square-Test'  
156 -  
157 -  
158 -def figure_to_pil(figure):  
159 - figure.canvas.draw()  
160 - return Image.fromstring('RGB',  
161 - figure.canvas.get_width_height(),  
162 - figure.canvas.tostring_rgb())  
test/test_jpeg.py
1 __author__ = 'chunk' 1 __author__ = 'chunk'
2 2
3 import numpy as np 3 import numpy as np
4 -import matplotlib.pyplot as plt  
5 -import seaborn as sns  
6 from .. import mjpeg 4 from .. import mjpeg
7 from ..mjpeg import base 5 from ..mjpeg import base
8 from ..msteg.steganography import LSB, F3, F4, F5 6 from ..msteg.steganography import LSB, F3, F4, F5
@@ -24,9 +22,6 @@ sample_key = [46812L, 20559L, 31360L, 16681L, 27536L, 39553L, 5427L, 63029L, 565 @@ -24,9 +22,6 @@ sample_key = [46812L, 20559L, 31360L, 16681L, 27536L, 39553L, 5427L, 63029L, 565
24 61908L, 63014L, 22 61908L, 63014L,
25 5908L, 59816L, 56765L] 23 5908L, 59816L, 56765L]
26 24
27 -# plt.ticklabel_format(style='sci', axis='both', scilimits=(1, 4))  
28 -  
29 -plt.ticklabel_format(style='sci', axis='both')  
30 25
31 package_dir = os.path.dirname(os.path.abspath(__file__)) 26 package_dir = os.path.dirname(os.path.abspath(__file__))
32 27
@@ -182,38 +177,6 @@ def test_jpeg(): @@ -182,38 +177,6 @@ def test_jpeg():
182 print mjpeg.diffblocks(ima, imc) 177 print mjpeg.diffblocks(ima, imc)
183 178
184 179
185 -def test_hist():  
186 - ima = mjpeg.Jpeg(os.path.join(package_dir, "../res/high/pic3_orig.jpg"), key=sample_key)  
187 - print ima.getQuality()  
188 - print ima.getCapacity('All')  
189 -  
190 - capacity = ima.getCapacity()  
191 - print capacity  
192 - rate = 0.65  
193 - hidden = np.random.bytes(int(int(capacity) * rate) / 8)  
194 - steger = F5.F5(sample_key, 1)  
195 - steger2 = F4.F4(key=None)  
196 - steger3 = LSB.LSB(key=None)  
197 - embed_rate = steger3.embed_raw_data(os.path.join(package_dir, "../res/high/pic3_orig.jpg"),  
198 - hidden,  
199 - os.path.join(package_dir, "../res/high/pic3_dest.jpg"),  
200 - frommem=True)  
201 - #  
202 - print embed_rate  
203 -  
204 - imb = mjpeg.Jpeg(os.path.join(package_dir, "../res/high/pic3_dest.jpg"), key=sample_key)  
205 - print imb.getQuality()  
206 - print imb.getCapacity('All')  
207 -  
208 - A = imb.rawsignal().tolist()  
209 - E = [i for i in range(-8, 10)]  
210 - plt.hist(A, E, histtype='bar', rwidth=0.8, align='left')  
211 - plt.xlabel("JPEG coefficients after quantisation")  
212 - plt.ylabel("Frequency")  
213 - plt.xticks([i for i in range(-8, 9)])  
214 - plt.ylim(ymax=300000)  
215 - plt.show()  
216 -  
217 180
218 if __name__ == '__main__': 181 if __name__ == '__main__':
219 # timer.mark() 182 # timer.mark()