From 201f2fd1d87181505dafcef1ecafbd6ed2dddf3a Mon Sep 17 00:00:00 2001 From: Chunk Date: Sun, 19 Apr 2015 17:12:37 +0800 Subject: [PATCH] (ง •̀_•́)ง we're heading for modeling! --- mdata/ANALYSIS.py | 19 ++++++++++++++++++- mdata/crop.py | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+), 1 deletion(-) create mode 100644 mdata/crop.py diff --git a/mdata/ANALYSIS.py b/mdata/ANALYSIS.py index 5b944e7..739a712 100644 --- a/mdata/ANALYSIS.py +++ b/mdata/ANALYSIS.py @@ -93,11 +93,28 @@ def anal_0000(): df_ILS.hist(column='size',bins=100) plt.show() + + +def pre_crop(): + df_ILS = pd.read_csv('../res/file-tag-test.tsv', names=['hash', 'width', 'height', 'size', 'quality','chosen','class'], sep='\t') + print df_ILS.shape + print df_ILS[(df_ILS['width'] >= 300) & (df_ILS['height'] >= 300)].shape + + # 300x300 4213 0.917 * + # 200x200 4534 0.987 + # 400x400 932 0.202 + + + + if __name__ == '__main__': # anal_ILSVRC() # anal_ILSVRC_Test() - anal_0000() + # anal_0000() # print timeit.timeit("anal_ILSVRC()", setup="from __main__ import anal_ILSVRC", number=1) + + + pre_crop() pass diff --git a/mdata/crop.py b/mdata/crop.py new file mode 100644 index 0000000..21accb8 --- /dev/null +++ b/mdata/crop.py @@ -0,0 +1,53 @@ +__author__ = 'chunk' + +import os, sys +from PIL import Image +from common import * +import random + +base_dir = '/data/hadoop/ImageNet/ILSVRC/ILSVRC2013_DET_val/' +category = 'Test' + + +def crop_Test(): + for path, subdirs, files in os.walk(os.path.join(base_dir, category)): + for name in files: + image = os.path.join(path, name) + im = Image.open(image) + w, h = im.size + if w < 300 or h < 300: + continue + left, upper = random.randint(0, w - 300), random.randint(0, h - 300) + im = im.crop((left, upper, left + 300, upper + 300)) + im.save(os.path.join(base_dir, category + '_crop', name)) + + + +if __name__ == '__main__': + timer = Timer() + + timer.mark() + crop_Test() + timer.report() + + pass + + + + + + + + + + + + + + + + + + + + -- libgit2 0.21.2