From ec755e37654e61074fe94b3fb10932bb14c0d856 Mon Sep 17 00:00:00 2001 From: Chunk Date: Mon, 20 Apr 2015 11:00:41 +0800 Subject: [PATCH] cropping. --- mdata/ILSVRC.py | 74 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------- test/test_data.py | 14 +++++++++++++- 2 files changed, 74 insertions(+), 14 deletions(-) diff --git a/mdata/ILSVRC.py b/mdata/ILSVRC.py index 266684c..58d80a8 100644 --- a/mdata/ILSVRC.py +++ b/mdata/ILSVRC.py @@ -22,6 +22,7 @@ import numpy as np from numpy.random import randn import pandas as pd from scipy import stats +import random from subprocess import Popen, PIPE, STDOUT @@ -268,6 +269,39 @@ class DataILSVRC(DataDumperBase): def embed(self, rate=None): self._embed_inner(rate) + + def crop(self, size=(300, 300)): + for path, subdirs, files in os.walk(self.data_dir): + for name in files: + image = os.path.join(path, name) + print image + + W, H = size + try: + im = Image.open(image) + w, h = im.size + if w < W or h < H: + continue + left, upper = random.randint(0, w - W), random.randint(0, h - H) + im = im.crop((left, upper, left + W, upper + H)) + im.save(os.path.join(self.data_dir + '_crop_pil', name)) + except Exception as e: + print '[EXCPT]', e + pass + + # try: + # img = cv2.imread(image, cv2.CV_LOAD_IMAGE_UNCHANGED) + # h, w = img.shape[:2] + # if w < 300 or h < 300: + # continue + # left, upper = random.randint(0, w - 300), random.randint(0, h - 300) + # img_crop = img[upper:upper + 300, left:left + 300] + # cv2.imwrite(os.path.join(base_dir, category + '_crop_cv', name), img_crop) + # except Exception as e: + # print '[EXCPT]', e + # pass + + def get_table(self): if self.table != None: return self.table @@ -410,30 +444,44 @@ class DataILSVRC(DataDumperBase): dict_dataset = {} - with open(self.list_file, 'rb') as tsvfile: - tsvfile = csv.reader(tsvfile, delimiter='\t') - for line in tsvfile: - hash = line[0] - tag = line[-1] - path_feat = os.path.join(self.feat_dir, hash[:3], hash[3:] + '.' + feattype) - if path_feat: - with open(path_feat, 'rb') as featfile: - dict_dataset[hash] = (tag, json.loads(featfile.read())) + if feattype == 'coef': # raw + with open(self.list_file, 'rb') as tsvfile: + tsvfile = csv.reader(tsvfile, delimiter='\t') + for line in tsvfile: + hash = line[0] + tag = line[-1] + image = os.path.join(self.feat_dir, hash[:3], hash[3:] + '.jpg') + if image: + im = Jpeg(image, key=sample_key) + dict_dataset[hash] = (tag, im.getCoefBlocks('Y')) + + else: + with open(self.list_file, 'rb') as tsvfile: + tsvfile = csv.reader(tsvfile, delimiter='\t') + for line in tsvfile: + hash = line[0] + tag = line[-1] + path_feat = os.path.join(self.feat_dir, hash[:3], hash[3:] + '.' + feattype) + if path_feat: + with open(path_feat, 'rb') as featfile: + dict_dataset[hash] = (tag, json.loads(featfile.read())) for tag, feat in dict_dataset.values(): - X.append([item for sublist in feat for subsublist in sublist for item in subsublist]) + # X.append([item for sublist in feat for subsublist in sublist for item in subsublist]) + X.append(np.array(feat).ravel().tolist()) Y.append(int(tag)) - elif mode == "remote" or mode == "hbase": + elif mode == "hbase": # remote if self.table == None: self.table = self.get_table() col_feat, col_tag = 'cf_feat:' + feattype, 'cf_tag:' + tagtype for key, data in self.table.scan(columns=[col_feat, col_tag]): - X.append([item for sublist in json.loads(data[col_feat]) for subsublist in sublist for item in subsublist]) + X.append( + [item for sublist in json.loads(data[col_feat]) for subsublist in sublist for item in subsublist]) Y.append(int(data[col_tag])) - elif mode == "spark" or mode == "cluster": + elif mode == "spark": # cluster if self.sparker == None: self.sparker = SC.Sparker(host='HPC-server', appname='ImageCV', master='spark://HPC-server:7077') diff --git a/test/test_data.py b/test/test_data.py index a183365..80835d6 100755 --- a/test/test_data.py +++ b/test/test_data.py @@ -135,7 +135,19 @@ def test_pipeline(): def test_crop(): - crop.crop_Test() + # crop.crop_Test() + dil = ILSVRC.DataILSVRC(base_dir='/data/hadoop/ImageNet/ILSVRC/ILSVRC2013_DET_val', category='Test_1') + dil.crop() + + dil2 = ILSVRC.DataILSVRC(base_dir='/data/hadoop/ImageNet/ILSVRC/ILSVRC2013_DET_val', category='Test_1_crop_pil') + + dil.format() + dil.embed(rate=0.2) + + X,Y = dil2.load_data(mode='local',feattype='coef') + print X[0] + print Y + if __name__ == '__main__': # test_MSR() -- libgit2 0.21.2