Commit 92d488d8731a722a81487a5150ce7775941940ee
1 parent
7e755616
Exists in
master
.
Showing
1 changed file
with
380 additions
and
0 deletions
Show diff stats
... | ... | @@ -0,0 +1,380 @@ |
1 | +__author__ = 'chunk' | |
2 | + | |
3 | +import os | |
4 | +import numpy as np | |
5 | +from numpy.random import randn | |
6 | +import pandas as pd | |
7 | +from scipy import stats | |
8 | +import matplotlib as mpl | |
9 | +import matplotlib.pyplot as plt | |
10 | +import seaborn as sns | |
11 | + | |
12 | +import numpy as np | |
13 | +import matplotlib.pyplot as plt | |
14 | +import seaborn as sns | |
15 | +from .. import mjpeg | |
16 | +from ..mjpeg import base | |
17 | +from ..msteg.steganography import LSB, F3, F4, F5 | |
18 | + | |
19 | +np.random.seed(sum(map(ord, "whoami"))) | |
20 | + | |
21 | +sample_key = [46812L, 20559L, 31360L, 16681L, 27536L, 39553L, 5427L, 63029L, 56572L, 36476L, 25695L, | |
22 | + 61908L, 63014L, 5908L, 59816L, 56765L] | |
23 | + | |
24 | +# plt.ticklabel_format(style='sci', axis='both', scilimits=(0, 0)) | |
25 | +plt.ticklabel_format(style='sci', axis='both') | |
26 | + | |
27 | +package_dir = os.path.dirname(os.path.abspath(__file__)) | |
28 | + | |
29 | + | |
30 | +def anal_ILSVRC(): | |
31 | + df_ILS = pd.read_csv('../res/file-tag.tsv', | |
32 | + names=['hash', 'width', 'height', 'size', 'quality'], sep='\t') | |
33 | + print df_ILS[df_ILS.size < 2000000] | |
34 | + print df_ILS.describe() | |
35 | + # df_ILS.boxplot(column='size') | |
36 | + # plt.show() | |
37 | + | |
38 | + length = df_ILS.shape[0] | |
39 | + | |
40 | + # print type(df_ILS.size.order()) # <class 'pandas.core.series.Series'> | |
41 | + print df_ILS.size.order().iloc[map(lambda x: x * length, [1.0 / 3, 2.0 / 3, 0.9999])] | |
42 | + """ | |
43 | + 7082 108514 | |
44 | + 3826 150389 | |
45 | + 8761 4814541 | |
46 | + """ | |
47 | + | |
48 | + print df_ILS.size[df_ILS.size <= 102400].count() | |
49 | + print df_ILS.size[(df_ILS['size'] > 102400) & (df_ILS['size'] <= 153600)].count() | |
50 | + print df_ILS.size[df_ILS.size > 153600].count() | |
51 | + | |
52 | + """ | |
53 | + (-,100K,150K,+): | |
54 | + 4519 | |
55 | + 6163 | |
56 | + 4831 | |
57 | + (-,100K,500K,+): | |
58 | + 4519 | |
59 | + 10932 | |
60 | + 62 | |
61 | + """ | |
62 | + | |
63 | + ## Quality | |
64 | + print df_ILS.quality.order().iloc[map(lambda x: x * length, [1.0 / 3, 2.0 / 3, 0.9999])] | |
65 | + """ | |
66 | + 13507 96 | |
67 | + 831 96 | |
68 | + 6529 100 | |
69 | + """ | |
70 | + df_new = df_ILS.sort(['size', 'quality'], ascending=True) | |
71 | + print df_new | |
72 | + | |
73 | + rand_class = stats.bernoulli.rvs(0.3, size=length) | |
74 | + # df_new['class'] = pd.Series(rand_class, index=df_new.index) | |
75 | + df_new['class'] = rand_class | |
76 | + | |
77 | + print rand_class[:100] | |
78 | + print df_new | |
79 | + | |
80 | + df_new.to_csv('../res/test.tsv', header=False, index=False, sep='\t') | |
81 | + | |
82 | + | |
83 | +def anal_ILSVRC_Test(): | |
84 | + df_ILS_T = pd.read_csv('../res/file-tag-test.tsv', | |
85 | + names=['hash', 'width', 'height', 'size', 'quality', 'class'], sep='\t') | |
86 | + print df_ILS_T | |
87 | + print df_ILS_T.size.describe() | |
88 | + | |
89 | + print df_ILS_T.size[df_ILS_T.size <= 102400].count() | |
90 | + print df_ILS_T.size[(df_ILS_T['size'] > 102400) & (df_ILS_T['size'] <= 153600)].count() | |
91 | + print df_ILS_T.size[df_ILS_T.size > 153600].count() | |
92 | + | |
93 | + length = df_ILS_T.shape[0] | |
94 | + df_ILS_T['class2'] = np.zeros(length, np.int32) | |
95 | + df_ILS_T.to_csv('../res/file-tag-test.tsv', header=False, index=False, sep='\t') | |
96 | + | |
97 | + | |
98 | +def anal_0000(): | |
99 | + df_ILS = pd.read_csv(os.path.join(package_dir, '../res/file-tag-test.tsv'), | |
100 | + names=['hash', 'width', 'height', 'size', 'quality', 'chosen', 'class'], | |
101 | + sep='\t') | |
102 | + length = df_ILS.shape[0] | |
103 | + print df_ILS.size.describe() | |
104 | + print df_ILS.size.order().iloc[map(lambda x: x * length, [1.0 / 3, 2.0 / 3, 0.9999])] | |
105 | + | |
106 | + print df_ILS.size[df_ILS.size == 166500].count() / 4592.0 | |
107 | + print df_ILS.size[df_ILS.size == 187500].count() / 4592.0 | |
108 | + print df_ILS.size[df_ILS.size == 250000].count() / 4592.0 | |
109 | + | |
110 | + print df_ILS.size[df_ILS.size <= 166500].count() | |
111 | + print df_ILS.size[(df_ILS['size'] > 166500) & (df_ILS['size'] <= 187500)].count() | |
112 | + print df_ILS.size[df_ILS.size > 187500].count() | |
113 | + | |
114 | + plt.ticklabel_format(style='sci', axis='both') | |
115 | + df_ILS.hist(column='size', bins=100) | |
116 | + plt.title('') | |
117 | + plt.xlabel("Image size") | |
118 | + plt.ylabel("Frequency") | |
119 | + plt.show() | |
120 | + | |
121 | + | |
122 | +def pre_crop(): | |
123 | + df_ILS = pd.read_csv(os.path.join(package_dir, '../res/file-tag-test.tsv'), | |
124 | + names=['hash', 'width', 'height', 'size', 'quality', 'chosen', 'class'], | |
125 | + sep='\t') | |
126 | + print df_ILS.shape | |
127 | + print df_ILS[(df_ILS['width'] >= 300) & (df_ILS['height'] >= 300)].shape | |
128 | + | |
129 | + # 300x300 4213 0.917 * | |
130 | + # 200x200 4534 0.987 | |
131 | + # 400x400 932 0.202 | |
132 | + | |
133 | + | |
134 | +def plot_hist(): | |
135 | + dat_performance = np.array([ | |
136 | + [100, 0.583396, 30.847788, 57.884814, 89.315998, 1.471087, 29.364628, 9.114235, 10.585322, | |
137 | + 39.94995, 2.235697366], | |
138 | + [200, 1.147411, 62.815709, 118.217859, 182.180979, 3.008692, 37.920278, 19.589578, 22.59827, | |
139 | + 60.518548, 3.010332948], | |
140 | + [500, 2.763806, 162.806317, 299.778606, 465.348729, 6.81705, 88.291989, 73.446282, | |
141 | + 80.263332, 168.555321, 2.760807112], | |
142 | + [1000, 6.372794, 329.023151, 600.438977, 935.834922, 15.644418, 159.951099, 186.335413, | |
143 | + 201.979831, 361.93093, 2.585672692], | |
144 | + [2000, 14.960961, 679.357936, 1256.341536, 1950.660433, 31.699596, 313.154748, 387.063702, | |
145 | + 418.763298, 731.918046, 2.665135043], | |
146 | + [5000, 39.880657, 1652.537536, 3067.98039, 4760.398583, 73.070203, 694.454719, 898.458633, | |
147 | + 971.528836, 1665.983555, 2.857410308]]) | |
148 | + | |
149 | + dat_performance = np.transpose(dat_performance) | |
150 | + data_size, serial_tot, spark_io, spark_proc, spark_tot = dat_performance[0], dat_performance[4], \ | |
151 | + dat_performance[8], dat_performance[6], \ | |
152 | + dat_performance[9] | |
153 | + | |
154 | + data_size = data_size.astype(int) | |
155 | + A = [spark_io, spark_proc] | |
156 | + E = np.arange(len(data_size)) | |
157 | + bar_width = 0.5 | |
158 | + # plt.bar(E, spark_io, width=bar_width) | |
159 | + # plt.bar(E, spark_proc, color='#e74c3c', width=bar_width, bottom=spark_io) | |
160 | + # plt.xlabel("Data size") | |
161 | + # plt.ylabel("Time(s)") | |
162 | + # plt.xticks(E + bar_width / 2, data_size) | |
163 | + # # plt.xticks(range(len(data_size)), data_size, size='small') | |
164 | + # # plt.ylim(ymax=300000) | |
165 | + # plt.show() | |
166 | + | |
167 | + # mpl.rcParams.update({'font.size': 5}) | |
168 | + | |
169 | + fig, ax = plt.subplots() | |
170 | + rects1 = ax.bar(E, spark_io, bar_width) | |
171 | + rects2 = ax.bar(E, spark_proc, bar_width, color='#e74c3c', bottom=spark_io) | |
172 | + | |
173 | + # add some text for labels, title and axes ticks | |
174 | + plt.xlabel("Data size") | |
175 | + ax.set_ylabel('Time(s)') | |
176 | + # ax.set_title('IO ratio') | |
177 | + ax.set_xticks(E + bar_width / 2) | |
178 | + ax.set_xticklabels(data_size) | |
179 | + | |
180 | + ax.legend((rects1[0], rects2[0]), ('IO', 'CPU'), loc=2) | |
181 | + | |
182 | + height1 = [rect.get_height() for rect in rects1] | |
183 | + height2 = [rect.get_height() for rect in rects2] | |
184 | + for i in range(len(rects1)): | |
185 | + height = rects1[i].get_height() + rects2[i].get_height() | |
186 | + ax.text(rects1[i].get_x() + rects1[i].get_width() / 2, 1.005 * height, '%d%%' % | |
187 | + int((100 * 1.0*height1[i]/height)), | |
188 | + ha='center', va='bottom') | |
189 | + | |
190 | + # height1 = [rect.get_height() for rect in rects1] | |
191 | + # height2 = [rect.get_height() for rect in rects2] | |
192 | + # for i in range(len(rects1)): | |
193 | + # ax.text(rects1[i].get_x() + rects1[i].get_width() / 2, 0.5 * height1[i], '%f' % (0.1 * | |
194 | + # height1[ | |
195 | + # i] / | |
196 | + # height2[ | |
197 | + # i]), | |
198 | + # ha='center', va='bottom') | |
199 | + | |
200 | + | |
201 | + plt.show() | |
202 | + | |
203 | + | |
204 | +def plot_line_performance(): | |
205 | + # performance | |
206 | + dat_performance = np.array([ | |
207 | + [100, 0.583396, 30.847788, 57.884814, 89.315998, 1.471087, 29.364628, 9.114235, 10.585322, | |
208 | + 39.94995, 2.235697366], | |
209 | + [200, 1.147411, 62.815709, 118.217859, 182.180979, 3.008692, 37.920278, 19.589578, 22.59827, | |
210 | + 60.518548, 3.010332948], | |
211 | + [500, 2.763806, 162.806317, 299.778606, 465.348729, 6.81705, 88.291989, 73.446282, | |
212 | + 80.263332, 168.555321, 2.760807112], | |
213 | + [1000, 6.372794, 329.023151, 600.438977, 935.834922, 15.644418, 159.951099, 186.335413, | |
214 | + 201.979831, 361.93093, 2.585672692], | |
215 | + [2000, 14.960961, 679.357936, 1256.341536, 1950.660433, 31.699596, 313.154748, 387.063702, | |
216 | + 418.763298, 731.918046, 2.665135043], | |
217 | + [5000, 39.880657, 1652.537536, 3067.98039, 4760.398583, 73.070203, 694.454719, 898.458633, | |
218 | + 971.528836, 1665.983555, 2.857410308]]) | |
219 | + | |
220 | + dat_performance = np.transpose(dat_performance) | |
221 | + data_size, serial_tot, spark_io, spark_proc, spark_tot = dat_performance[0], dat_performance[4], \ | |
222 | + dat_performance[8], dat_performance[6], \ | |
223 | + dat_performance[9] | |
224 | + | |
225 | + # sns.set_style("white") | |
226 | + # data_size = data_size.astype(int) | |
227 | + # plt.plot(range(len(data_size)), serial_tot, marker='o', label='serial total') | |
228 | + # plt.plot(range(len(data_size)), spark_tot, marker='o', linestyle='--', label='spark total') | |
229 | + # plt.plot(range(len(data_size)), spark_io, marker='o', linestyle=':', label='spark io') | |
230 | + # plt.plot(range(len(data_size)), spark_proc, marker='o', linestyle='-.', label='spark proc') | |
231 | + # plt.xlabel("Data size") | |
232 | + # plt.ylabel("Time(s)") | |
233 | + # plt.xticks(range(len(data_size)), data_size, size='small') | |
234 | + # plt.legend(loc=2) | |
235 | + # plt.show() | |
236 | + | |
237 | + plt.plot(data_size, serial_tot, marker='o', label='serial total') | |
238 | + plt.plot(data_size, spark_tot, marker='o', linestyle='--', label='spark total') | |
239 | + plt.plot(data_size, spark_io, marker='o', linestyle=':', label='spark io') | |
240 | + plt.plot(data_size, spark_proc, marker='o', linestyle='-.', label='spark proc') | |
241 | + plt.xlabel("Data size") | |
242 | + plt.ylabel("Time(s)") | |
243 | + plt.legend(loc=2) | |
244 | + plt.show() | |
245 | + | |
246 | + | |
247 | +def plot_line_io(): | |
248 | + # io | |
249 | + dat_io = np.array([ | |
250 | + [100, 10.585322, 29.364628, 39.94995, 10.286684, 27.079774, 37.366458, 49.995647, | |
251 | + 55.280739], | |
252 | + [200, 22.59827, 37.920278, 60.518548, 22.731275, 38.491461, 61.222736, 76.258928, | |
253 | + 83.836657], | |
254 | + [500, 80.263332, 88.291989, 168.555321, 64.610839, 88.241193, 152.852032, 177.039349, | |
255 | + 143.524813], | |
256 | + [1000, 201.979831, 159.951099, 361.93093, 172.359455, 158.694248, 331.053703, 467.126756, | |
257 | + 315.578952], | |
258 | + [2000, 418.763298, 313.154748, 731.918046, 390.990209, 313.085707, 704.075916, 802.138669, | |
259 | + 734.133909], | |
260 | + [5000, 971.528836, 694.454719, 1665.983555, 898.468232, 717.603061, 1616.071293, | |
261 | + 1860.610954, 1677.044038]]) | |
262 | + | |
263 | + dat_io = np.transpose(dat_io) | |
264 | + data_size, happybase_uncomp_io, happybase_uncomp_cpu, happybase_uncomp_tot, happybase_comp_io, happybase_comp_cpu, happybase_comp_tot, dist_uncomp, dist_comp = dat_io | |
265 | + # data_size = data_size.astype(int) | |
266 | + # plt.plot(range(len(data_size)), dist_uncomp, marker='o', label='dist-uncompressed total') | |
267 | + # plt.plot(range(len(data_size)), dist_comp, marker='o', label='dist-compressed total') | |
268 | + # plt.plot(range(len(data_size)), happybase_uncomp_tot, marker='o', label='happybase-uncompressed total') | |
269 | + # plt.plot(range(len(data_size)), happybase_comp_tot, marker='o', label='happybase-compressed total') | |
270 | + # | |
271 | + # plt.plot(range(len(data_size)), happybase_uncomp_io, marker='o', linestyle='--', | |
272 | + # label='happybase-uncompressed io') | |
273 | + # plt.plot(range(len(data_size)), happybase_comp_io, marker='o', linestyle='--', | |
274 | + # label='happybase-compressed io') | |
275 | + # plt.plot(range(len(data_size)), happybase_uncomp_cpu, marker='o', linestyle='--', | |
276 | + # label='happybase-uncompressed cpu') | |
277 | + # plt.plot(range(len(data_size)), happybase_comp_cpu, marker='o', linestyle='--', | |
278 | + # label='happybase-compressed cpu') | |
279 | + # | |
280 | + # plt.xlabel("Data size") | |
281 | + # plt.ylabel("Time") | |
282 | + # plt.xticks(range(len(data_size)), data_size, size='small') | |
283 | + # plt.legend(loc=2) | |
284 | + # plt.show() | |
285 | + | |
286 | + plt.plot(data_size, dist_uncomp, marker='o', label='dist-uncompressed total') | |
287 | + plt.plot(data_size, dist_comp, marker='D', label='dist-compressed total') | |
288 | + plt.plot(data_size, happybase_uncomp_tot, marker='o', label='happybase-uncompressed total') | |
289 | + plt.plot(data_size, happybase_comp_tot, marker='D', label='happybase-compressed total') | |
290 | + | |
291 | + plt.plot(data_size, happybase_uncomp_io, marker='o', linestyle='--', | |
292 | + label='happybase-uncompressed io') | |
293 | + plt.plot(data_size, happybase_comp_io, marker='D', linestyle='--', | |
294 | + label='happybase-compressed io') | |
295 | + plt.plot(data_size, happybase_uncomp_cpu, marker='o', linestyle='--', | |
296 | + label='happybase-uncompressed cpu') | |
297 | + plt.plot(data_size, happybase_comp_cpu, marker='D', linestyle='--', | |
298 | + label='happybase-compressed cpu') | |
299 | + | |
300 | + plt.xlabel("Data size") | |
301 | + plt.ylabel("Time") | |
302 | + plt.legend(loc=2) | |
303 | + plt.show() | |
304 | + | |
305 | + | |
306 | + # plt.subplot(2, 2, 1) | |
307 | + # plt.plot(data_size, dist_uncomp, marker='o', label='dist-uncompressed total') | |
308 | + # plt.plot(data_size, dist_comp, marker='o', label='dist-compressed total') | |
309 | + # # plt.title('Performance with(out) Compression') | |
310 | + # plt.ylabel("Time") | |
311 | + # plt.legend(loc=2) | |
312 | + # | |
313 | + # plt.subplot(2, 2, 2) | |
314 | + # plt.plot(data_size, happybase_uncomp_tot, marker='o', label='happybase-uncompressed total') | |
315 | + # plt.plot(data_size, happybase_comp_tot, marker='o', label='happybase-compressed total') | |
316 | + # plt.legend(loc=2) | |
317 | + # | |
318 | + # plt.subplot(2, 2, 3) | |
319 | + # plt.plot(data_size, happybase_uncomp_io, marker='o', linestyle='--', | |
320 | + # label='happybase-uncompressed io') | |
321 | + # plt.plot(data_size, happybase_comp_io, marker='o', linestyle='--', | |
322 | + # label='happybase-compressed io') | |
323 | + # plt.ylabel("Time") | |
324 | + # plt.xlabel("Data size") | |
325 | + # plt.legend(loc=2) | |
326 | + # | |
327 | + # plt.subplot(2, 2, 4) | |
328 | + # plt.plot(data_size, happybase_uncomp_cpu, marker='o', linestyle='--', | |
329 | + # label='happybase-uncompressed cpu') | |
330 | + # plt.plot(data_size, happybase_comp_cpu, marker='o', linestyle='--', | |
331 | + # label='happybase-compressed cpu') | |
332 | + # plt.xlabel("Data size") | |
333 | + # plt.legend(loc=2) | |
334 | + # plt.show() | |
335 | + | |
336 | + # plt.plot(data_size, dist_uncomp, marker='o', label='dist-uncompressed total') | |
337 | + # plt.plot(data_size, dist_comp, marker='D', linestyle='--',label='dist-compressed total') | |
338 | + # plt.xlabel("Data size") | |
339 | + # plt.ylabel("Time(s)") | |
340 | + # plt.legend(loc=2) | |
341 | + # plt.show() | |
342 | + # | |
343 | + # plt.plot(data_size, happybase_uncomp_tot, marker='o', label='happybase-uncompressed total') | |
344 | + # plt.plot(data_size, happybase_comp_tot, marker='D', linestyle='--',label='happybase-compressed total') | |
345 | + # plt.xlabel("Data size") | |
346 | + # plt.ylabel("Time(s)") | |
347 | + # plt.legend(loc=2) | |
348 | + # plt.show() | |
349 | + # | |
350 | + # plt.plot(data_size, happybase_uncomp_io, marker='o', | |
351 | + # label='happybase-uncompressed io') | |
352 | + # plt.plot(data_size, happybase_comp_io, marker='D', linestyle='--', | |
353 | + # label='happybase-compressed io') | |
354 | + # plt.xlabel("Data size") | |
355 | + # plt.ylabel("Time(s)") | |
356 | + # plt.legend(loc=2) | |
357 | + # plt.show() | |
358 | + # | |
359 | + # plt.plot(data_size, happybase_uncomp_cpu, marker='o', | |
360 | + # label='happybase-uncompressed cpu') | |
361 | + # plt.plot(data_size, happybase_comp_cpu, marker='D', linestyle='--', | |
362 | + # label='happybase-compressed cpu') | |
363 | + # | |
364 | + # plt.xlabel("Data size") | |
365 | + # plt.ylabel("Time(s)") | |
366 | + # plt.legend(loc=2) | |
367 | + # plt.show() | |
368 | + | |
369 | + | |
370 | +if __name__ == '__main__': | |
371 | + # anal_ILSVRC() | |
372 | + # anal_ILSVRC_Test() | |
373 | + # anal_0000() | |
374 | + # print timeit.timeit("anal_ILSVRC()", setup="from __main__ import anal_ILSVRC", number=1) | |
375 | + | |
376 | + | |
377 | + # pre_crop() | |
378 | + # plot_line() | |
379 | + anal_0000() | |
380 | + pass | ... | ... |