Commit 92d488d8731a722a81487a5150ce7775941940ee
1 parent
7e755616
Exists in
master
.
Showing
1 changed file
with
380 additions
and
0 deletions
Show diff stats
@@ -0,0 +1,380 @@ | @@ -0,0 +1,380 @@ | ||
1 | +__author__ = 'chunk' | ||
2 | + | ||
3 | +import os | ||
4 | +import numpy as np | ||
5 | +from numpy.random import randn | ||
6 | +import pandas as pd | ||
7 | +from scipy import stats | ||
8 | +import matplotlib as mpl | ||
9 | +import matplotlib.pyplot as plt | ||
10 | +import seaborn as sns | ||
11 | + | ||
12 | +import numpy as np | ||
13 | +import matplotlib.pyplot as plt | ||
14 | +import seaborn as sns | ||
15 | +from .. import mjpeg | ||
16 | +from ..mjpeg import base | ||
17 | +from ..msteg.steganography import LSB, F3, F4, F5 | ||
18 | + | ||
19 | +np.random.seed(sum(map(ord, "whoami"))) | ||
20 | + | ||
21 | +sample_key = [46812L, 20559L, 31360L, 16681L, 27536L, 39553L, 5427L, 63029L, 56572L, 36476L, 25695L, | ||
22 | + 61908L, 63014L, 5908L, 59816L, 56765L] | ||
23 | + | ||
24 | +# plt.ticklabel_format(style='sci', axis='both', scilimits=(0, 0)) | ||
25 | +plt.ticklabel_format(style='sci', axis='both') | ||
26 | + | ||
27 | +package_dir = os.path.dirname(os.path.abspath(__file__)) | ||
28 | + | ||
29 | + | ||
30 | +def anal_ILSVRC(): | ||
31 | + df_ILS = pd.read_csv('../res/file-tag.tsv', | ||
32 | + names=['hash', 'width', 'height', 'size', 'quality'], sep='\t') | ||
33 | + print df_ILS[df_ILS.size < 2000000] | ||
34 | + print df_ILS.describe() | ||
35 | + # df_ILS.boxplot(column='size') | ||
36 | + # plt.show() | ||
37 | + | ||
38 | + length = df_ILS.shape[0] | ||
39 | + | ||
40 | + # print type(df_ILS.size.order()) # <class 'pandas.core.series.Series'> | ||
41 | + print df_ILS.size.order().iloc[map(lambda x: x * length, [1.0 / 3, 2.0 / 3, 0.9999])] | ||
42 | + """ | ||
43 | + 7082 108514 | ||
44 | + 3826 150389 | ||
45 | + 8761 4814541 | ||
46 | + """ | ||
47 | + | ||
48 | + print df_ILS.size[df_ILS.size <= 102400].count() | ||
49 | + print df_ILS.size[(df_ILS['size'] > 102400) & (df_ILS['size'] <= 153600)].count() | ||
50 | + print df_ILS.size[df_ILS.size > 153600].count() | ||
51 | + | ||
52 | + """ | ||
53 | + (-,100K,150K,+): | ||
54 | + 4519 | ||
55 | + 6163 | ||
56 | + 4831 | ||
57 | + (-,100K,500K,+): | ||
58 | + 4519 | ||
59 | + 10932 | ||
60 | + 62 | ||
61 | + """ | ||
62 | + | ||
63 | + ## Quality | ||
64 | + print df_ILS.quality.order().iloc[map(lambda x: x * length, [1.0 / 3, 2.0 / 3, 0.9999])] | ||
65 | + """ | ||
66 | + 13507 96 | ||
67 | + 831 96 | ||
68 | + 6529 100 | ||
69 | + """ | ||
70 | + df_new = df_ILS.sort(['size', 'quality'], ascending=True) | ||
71 | + print df_new | ||
72 | + | ||
73 | + rand_class = stats.bernoulli.rvs(0.3, size=length) | ||
74 | + # df_new['class'] = pd.Series(rand_class, index=df_new.index) | ||
75 | + df_new['class'] = rand_class | ||
76 | + | ||
77 | + print rand_class[:100] | ||
78 | + print df_new | ||
79 | + | ||
80 | + df_new.to_csv('../res/test.tsv', header=False, index=False, sep='\t') | ||
81 | + | ||
82 | + | ||
83 | +def anal_ILSVRC_Test(): | ||
84 | + df_ILS_T = pd.read_csv('../res/file-tag-test.tsv', | ||
85 | + names=['hash', 'width', 'height', 'size', 'quality', 'class'], sep='\t') | ||
86 | + print df_ILS_T | ||
87 | + print df_ILS_T.size.describe() | ||
88 | + | ||
89 | + print df_ILS_T.size[df_ILS_T.size <= 102400].count() | ||
90 | + print df_ILS_T.size[(df_ILS_T['size'] > 102400) & (df_ILS_T['size'] <= 153600)].count() | ||
91 | + print df_ILS_T.size[df_ILS_T.size > 153600].count() | ||
92 | + | ||
93 | + length = df_ILS_T.shape[0] | ||
94 | + df_ILS_T['class2'] = np.zeros(length, np.int32) | ||
95 | + df_ILS_T.to_csv('../res/file-tag-test.tsv', header=False, index=False, sep='\t') | ||
96 | + | ||
97 | + | ||
98 | +def anal_0000(): | ||
99 | + df_ILS = pd.read_csv(os.path.join(package_dir, '../res/file-tag-test.tsv'), | ||
100 | + names=['hash', 'width', 'height', 'size', 'quality', 'chosen', 'class'], | ||
101 | + sep='\t') | ||
102 | + length = df_ILS.shape[0] | ||
103 | + print df_ILS.size.describe() | ||
104 | + print df_ILS.size.order().iloc[map(lambda x: x * length, [1.0 / 3, 2.0 / 3, 0.9999])] | ||
105 | + | ||
106 | + print df_ILS.size[df_ILS.size == 166500].count() / 4592.0 | ||
107 | + print df_ILS.size[df_ILS.size == 187500].count() / 4592.0 | ||
108 | + print df_ILS.size[df_ILS.size == 250000].count() / 4592.0 | ||
109 | + | ||
110 | + print df_ILS.size[df_ILS.size <= 166500].count() | ||
111 | + print df_ILS.size[(df_ILS['size'] > 166500) & (df_ILS['size'] <= 187500)].count() | ||
112 | + print df_ILS.size[df_ILS.size > 187500].count() | ||
113 | + | ||
114 | + plt.ticklabel_format(style='sci', axis='both') | ||
115 | + df_ILS.hist(column='size', bins=100) | ||
116 | + plt.title('') | ||
117 | + plt.xlabel("Image size") | ||
118 | + plt.ylabel("Frequency") | ||
119 | + plt.show() | ||
120 | + | ||
121 | + | ||
122 | +def pre_crop(): | ||
123 | + df_ILS = pd.read_csv(os.path.join(package_dir, '../res/file-tag-test.tsv'), | ||
124 | + names=['hash', 'width', 'height', 'size', 'quality', 'chosen', 'class'], | ||
125 | + sep='\t') | ||
126 | + print df_ILS.shape | ||
127 | + print df_ILS[(df_ILS['width'] >= 300) & (df_ILS['height'] >= 300)].shape | ||
128 | + | ||
129 | + # 300x300 4213 0.917 * | ||
130 | + # 200x200 4534 0.987 | ||
131 | + # 400x400 932 0.202 | ||
132 | + | ||
133 | + | ||
134 | +def plot_hist(): | ||
135 | + dat_performance = np.array([ | ||
136 | + [100, 0.583396, 30.847788, 57.884814, 89.315998, 1.471087, 29.364628, 9.114235, 10.585322, | ||
137 | + 39.94995, 2.235697366], | ||
138 | + [200, 1.147411, 62.815709, 118.217859, 182.180979, 3.008692, 37.920278, 19.589578, 22.59827, | ||
139 | + 60.518548, 3.010332948], | ||
140 | + [500, 2.763806, 162.806317, 299.778606, 465.348729, 6.81705, 88.291989, 73.446282, | ||
141 | + 80.263332, 168.555321, 2.760807112], | ||
142 | + [1000, 6.372794, 329.023151, 600.438977, 935.834922, 15.644418, 159.951099, 186.335413, | ||
143 | + 201.979831, 361.93093, 2.585672692], | ||
144 | + [2000, 14.960961, 679.357936, 1256.341536, 1950.660433, 31.699596, 313.154748, 387.063702, | ||
145 | + 418.763298, 731.918046, 2.665135043], | ||
146 | + [5000, 39.880657, 1652.537536, 3067.98039, 4760.398583, 73.070203, 694.454719, 898.458633, | ||
147 | + 971.528836, 1665.983555, 2.857410308]]) | ||
148 | + | ||
149 | + dat_performance = np.transpose(dat_performance) | ||
150 | + data_size, serial_tot, spark_io, spark_proc, spark_tot = dat_performance[0], dat_performance[4], \ | ||
151 | + dat_performance[8], dat_performance[6], \ | ||
152 | + dat_performance[9] | ||
153 | + | ||
154 | + data_size = data_size.astype(int) | ||
155 | + A = [spark_io, spark_proc] | ||
156 | + E = np.arange(len(data_size)) | ||
157 | + bar_width = 0.5 | ||
158 | + # plt.bar(E, spark_io, width=bar_width) | ||
159 | + # plt.bar(E, spark_proc, color='#e74c3c', width=bar_width, bottom=spark_io) | ||
160 | + # plt.xlabel("Data size") | ||
161 | + # plt.ylabel("Time(s)") | ||
162 | + # plt.xticks(E + bar_width / 2, data_size) | ||
163 | + # # plt.xticks(range(len(data_size)), data_size, size='small') | ||
164 | + # # plt.ylim(ymax=300000) | ||
165 | + # plt.show() | ||
166 | + | ||
167 | + # mpl.rcParams.update({'font.size': 5}) | ||
168 | + | ||
169 | + fig, ax = plt.subplots() | ||
170 | + rects1 = ax.bar(E, spark_io, bar_width) | ||
171 | + rects2 = ax.bar(E, spark_proc, bar_width, color='#e74c3c', bottom=spark_io) | ||
172 | + | ||
173 | + # add some text for labels, title and axes ticks | ||
174 | + plt.xlabel("Data size") | ||
175 | + ax.set_ylabel('Time(s)') | ||
176 | + # ax.set_title('IO ratio') | ||
177 | + ax.set_xticks(E + bar_width / 2) | ||
178 | + ax.set_xticklabels(data_size) | ||
179 | + | ||
180 | + ax.legend((rects1[0], rects2[0]), ('IO', 'CPU'), loc=2) | ||
181 | + | ||
182 | + height1 = [rect.get_height() for rect in rects1] | ||
183 | + height2 = [rect.get_height() for rect in rects2] | ||
184 | + for i in range(len(rects1)): | ||
185 | + height = rects1[i].get_height() + rects2[i].get_height() | ||
186 | + ax.text(rects1[i].get_x() + rects1[i].get_width() / 2, 1.005 * height, '%d%%' % | ||
187 | + int((100 * 1.0*height1[i]/height)), | ||
188 | + ha='center', va='bottom') | ||
189 | + | ||
190 | + # height1 = [rect.get_height() for rect in rects1] | ||
191 | + # height2 = [rect.get_height() for rect in rects2] | ||
192 | + # for i in range(len(rects1)): | ||
193 | + # ax.text(rects1[i].get_x() + rects1[i].get_width() / 2, 0.5 * height1[i], '%f' % (0.1 * | ||
194 | + # height1[ | ||
195 | + # i] / | ||
196 | + # height2[ | ||
197 | + # i]), | ||
198 | + # ha='center', va='bottom') | ||
199 | + | ||
200 | + | ||
201 | + plt.show() | ||
202 | + | ||
203 | + | ||
204 | +def plot_line_performance(): | ||
205 | + # performance | ||
206 | + dat_performance = np.array([ | ||
207 | + [100, 0.583396, 30.847788, 57.884814, 89.315998, 1.471087, 29.364628, 9.114235, 10.585322, | ||
208 | + 39.94995, 2.235697366], | ||
209 | + [200, 1.147411, 62.815709, 118.217859, 182.180979, 3.008692, 37.920278, 19.589578, 22.59827, | ||
210 | + 60.518548, 3.010332948], | ||
211 | + [500, 2.763806, 162.806317, 299.778606, 465.348729, 6.81705, 88.291989, 73.446282, | ||
212 | + 80.263332, 168.555321, 2.760807112], | ||
213 | + [1000, 6.372794, 329.023151, 600.438977, 935.834922, 15.644418, 159.951099, 186.335413, | ||
214 | + 201.979831, 361.93093, 2.585672692], | ||
215 | + [2000, 14.960961, 679.357936, 1256.341536, 1950.660433, 31.699596, 313.154748, 387.063702, | ||
216 | + 418.763298, 731.918046, 2.665135043], | ||
217 | + [5000, 39.880657, 1652.537536, 3067.98039, 4760.398583, 73.070203, 694.454719, 898.458633, | ||
218 | + 971.528836, 1665.983555, 2.857410308]]) | ||
219 | + | ||
220 | + dat_performance = np.transpose(dat_performance) | ||
221 | + data_size, serial_tot, spark_io, spark_proc, spark_tot = dat_performance[0], dat_performance[4], \ | ||
222 | + dat_performance[8], dat_performance[6], \ | ||
223 | + dat_performance[9] | ||
224 | + | ||
225 | + # sns.set_style("white") | ||
226 | + # data_size = data_size.astype(int) | ||
227 | + # plt.plot(range(len(data_size)), serial_tot, marker='o', label='serial total') | ||
228 | + # plt.plot(range(len(data_size)), spark_tot, marker='o', linestyle='--', label='spark total') | ||
229 | + # plt.plot(range(len(data_size)), spark_io, marker='o', linestyle=':', label='spark io') | ||
230 | + # plt.plot(range(len(data_size)), spark_proc, marker='o', linestyle='-.', label='spark proc') | ||
231 | + # plt.xlabel("Data size") | ||
232 | + # plt.ylabel("Time(s)") | ||
233 | + # plt.xticks(range(len(data_size)), data_size, size='small') | ||
234 | + # plt.legend(loc=2) | ||
235 | + # plt.show() | ||
236 | + | ||
237 | + plt.plot(data_size, serial_tot, marker='o', label='serial total') | ||
238 | + plt.plot(data_size, spark_tot, marker='o', linestyle='--', label='spark total') | ||
239 | + plt.plot(data_size, spark_io, marker='o', linestyle=':', label='spark io') | ||
240 | + plt.plot(data_size, spark_proc, marker='o', linestyle='-.', label='spark proc') | ||
241 | + plt.xlabel("Data size") | ||
242 | + plt.ylabel("Time(s)") | ||
243 | + plt.legend(loc=2) | ||
244 | + plt.show() | ||
245 | + | ||
246 | + | ||
247 | +def plot_line_io(): | ||
248 | + # io | ||
249 | + dat_io = np.array([ | ||
250 | + [100, 10.585322, 29.364628, 39.94995, 10.286684, 27.079774, 37.366458, 49.995647, | ||
251 | + 55.280739], | ||
252 | + [200, 22.59827, 37.920278, 60.518548, 22.731275, 38.491461, 61.222736, 76.258928, | ||
253 | + 83.836657], | ||
254 | + [500, 80.263332, 88.291989, 168.555321, 64.610839, 88.241193, 152.852032, 177.039349, | ||
255 | + 143.524813], | ||
256 | + [1000, 201.979831, 159.951099, 361.93093, 172.359455, 158.694248, 331.053703, 467.126756, | ||
257 | + 315.578952], | ||
258 | + [2000, 418.763298, 313.154748, 731.918046, 390.990209, 313.085707, 704.075916, 802.138669, | ||
259 | + 734.133909], | ||
260 | + [5000, 971.528836, 694.454719, 1665.983555, 898.468232, 717.603061, 1616.071293, | ||
261 | + 1860.610954, 1677.044038]]) | ||
262 | + | ||
263 | + dat_io = np.transpose(dat_io) | ||
264 | + data_size, happybase_uncomp_io, happybase_uncomp_cpu, happybase_uncomp_tot, happybase_comp_io, happybase_comp_cpu, happybase_comp_tot, dist_uncomp, dist_comp = dat_io | ||
265 | + # data_size = data_size.astype(int) | ||
266 | + # plt.plot(range(len(data_size)), dist_uncomp, marker='o', label='dist-uncompressed total') | ||
267 | + # plt.plot(range(len(data_size)), dist_comp, marker='o', label='dist-compressed total') | ||
268 | + # plt.plot(range(len(data_size)), happybase_uncomp_tot, marker='o', label='happybase-uncompressed total') | ||
269 | + # plt.plot(range(len(data_size)), happybase_comp_tot, marker='o', label='happybase-compressed total') | ||
270 | + # | ||
271 | + # plt.plot(range(len(data_size)), happybase_uncomp_io, marker='o', linestyle='--', | ||
272 | + # label='happybase-uncompressed io') | ||
273 | + # plt.plot(range(len(data_size)), happybase_comp_io, marker='o', linestyle='--', | ||
274 | + # label='happybase-compressed io') | ||
275 | + # plt.plot(range(len(data_size)), happybase_uncomp_cpu, marker='o', linestyle='--', | ||
276 | + # label='happybase-uncompressed cpu') | ||
277 | + # plt.plot(range(len(data_size)), happybase_comp_cpu, marker='o', linestyle='--', | ||
278 | + # label='happybase-compressed cpu') | ||
279 | + # | ||
280 | + # plt.xlabel("Data size") | ||
281 | + # plt.ylabel("Time") | ||
282 | + # plt.xticks(range(len(data_size)), data_size, size='small') | ||
283 | + # plt.legend(loc=2) | ||
284 | + # plt.show() | ||
285 | + | ||
286 | + plt.plot(data_size, dist_uncomp, marker='o', label='dist-uncompressed total') | ||
287 | + plt.plot(data_size, dist_comp, marker='D', label='dist-compressed total') | ||
288 | + plt.plot(data_size, happybase_uncomp_tot, marker='o', label='happybase-uncompressed total') | ||
289 | + plt.plot(data_size, happybase_comp_tot, marker='D', label='happybase-compressed total') | ||
290 | + | ||
291 | + plt.plot(data_size, happybase_uncomp_io, marker='o', linestyle='--', | ||
292 | + label='happybase-uncompressed io') | ||
293 | + plt.plot(data_size, happybase_comp_io, marker='D', linestyle='--', | ||
294 | + label='happybase-compressed io') | ||
295 | + plt.plot(data_size, happybase_uncomp_cpu, marker='o', linestyle='--', | ||
296 | + label='happybase-uncompressed cpu') | ||
297 | + plt.plot(data_size, happybase_comp_cpu, marker='D', linestyle='--', | ||
298 | + label='happybase-compressed cpu') | ||
299 | + | ||
300 | + plt.xlabel("Data size") | ||
301 | + plt.ylabel("Time") | ||
302 | + plt.legend(loc=2) | ||
303 | + plt.show() | ||
304 | + | ||
305 | + | ||
306 | + # plt.subplot(2, 2, 1) | ||
307 | + # plt.plot(data_size, dist_uncomp, marker='o', label='dist-uncompressed total') | ||
308 | + # plt.plot(data_size, dist_comp, marker='o', label='dist-compressed total') | ||
309 | + # # plt.title('Performance with(out) Compression') | ||
310 | + # plt.ylabel("Time") | ||
311 | + # plt.legend(loc=2) | ||
312 | + # | ||
313 | + # plt.subplot(2, 2, 2) | ||
314 | + # plt.plot(data_size, happybase_uncomp_tot, marker='o', label='happybase-uncompressed total') | ||
315 | + # plt.plot(data_size, happybase_comp_tot, marker='o', label='happybase-compressed total') | ||
316 | + # plt.legend(loc=2) | ||
317 | + # | ||
318 | + # plt.subplot(2, 2, 3) | ||
319 | + # plt.plot(data_size, happybase_uncomp_io, marker='o', linestyle='--', | ||
320 | + # label='happybase-uncompressed io') | ||
321 | + # plt.plot(data_size, happybase_comp_io, marker='o', linestyle='--', | ||
322 | + # label='happybase-compressed io') | ||
323 | + # plt.ylabel("Time") | ||
324 | + # plt.xlabel("Data size") | ||
325 | + # plt.legend(loc=2) | ||
326 | + # | ||
327 | + # plt.subplot(2, 2, 4) | ||
328 | + # plt.plot(data_size, happybase_uncomp_cpu, marker='o', linestyle='--', | ||
329 | + # label='happybase-uncompressed cpu') | ||
330 | + # plt.plot(data_size, happybase_comp_cpu, marker='o', linestyle='--', | ||
331 | + # label='happybase-compressed cpu') | ||
332 | + # plt.xlabel("Data size") | ||
333 | + # plt.legend(loc=2) | ||
334 | + # plt.show() | ||
335 | + | ||
336 | + # plt.plot(data_size, dist_uncomp, marker='o', label='dist-uncompressed total') | ||
337 | + # plt.plot(data_size, dist_comp, marker='D', linestyle='--',label='dist-compressed total') | ||
338 | + # plt.xlabel("Data size") | ||
339 | + # plt.ylabel("Time(s)") | ||
340 | + # plt.legend(loc=2) | ||
341 | + # plt.show() | ||
342 | + # | ||
343 | + # plt.plot(data_size, happybase_uncomp_tot, marker='o', label='happybase-uncompressed total') | ||
344 | + # plt.plot(data_size, happybase_comp_tot, marker='D', linestyle='--',label='happybase-compressed total') | ||
345 | + # plt.xlabel("Data size") | ||
346 | + # plt.ylabel("Time(s)") | ||
347 | + # plt.legend(loc=2) | ||
348 | + # plt.show() | ||
349 | + # | ||
350 | + # plt.plot(data_size, happybase_uncomp_io, marker='o', | ||
351 | + # label='happybase-uncompressed io') | ||
352 | + # plt.plot(data_size, happybase_comp_io, marker='D', linestyle='--', | ||
353 | + # label='happybase-compressed io') | ||
354 | + # plt.xlabel("Data size") | ||
355 | + # plt.ylabel("Time(s)") | ||
356 | + # plt.legend(loc=2) | ||
357 | + # plt.show() | ||
358 | + # | ||
359 | + # plt.plot(data_size, happybase_uncomp_cpu, marker='o', | ||
360 | + # label='happybase-uncompressed cpu') | ||
361 | + # plt.plot(data_size, happybase_comp_cpu, marker='D', linestyle='--', | ||
362 | + # label='happybase-compressed cpu') | ||
363 | + # | ||
364 | + # plt.xlabel("Data size") | ||
365 | + # plt.ylabel("Time(s)") | ||
366 | + # plt.legend(loc=2) | ||
367 | + # plt.show() | ||
368 | + | ||
369 | + | ||
370 | +if __name__ == '__main__': | ||
371 | + # anal_ILSVRC() | ||
372 | + # anal_ILSVRC_Test() | ||
373 | + # anal_0000() | ||
374 | + # print timeit.timeit("anal_ILSVRC()", setup="from __main__ import anal_ILSVRC", number=1) | ||
375 | + | ||
376 | + | ||
377 | + # pre_crop() | ||
378 | + # plot_line() | ||
379 | + anal_0000() | ||
380 | + pass |