From ec755e37654e61074fe94b3fb10932bb14c0d856 Mon Sep 17 00:00:00 2001
From: Chunk <chunkplus@gmail.com>
Date: Mon, 20 Apr 2015 11:00:41 +0800
Subject: [PATCH] cropping.

---
 mdata/ILSVRC.py   | 74 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------
 test/test_data.py | 14 +++++++++++++-
 2 files changed, 74 insertions(+), 14 deletions(-)

diff --git a/mdata/ILSVRC.py b/mdata/ILSVRC.py
index 266684c..58d80a8 100644
--- a/mdata/ILSVRC.py
+++ b/mdata/ILSVRC.py
@@ -22,6 +22,7 @@ import numpy as np
 from numpy.random import randn
 import pandas as pd
 from scipy import stats
+import random
 
 from subprocess import Popen, PIPE, STDOUT
 
@@ -268,6 +269,39 @@ class DataILSVRC(DataDumperBase):
     def embed(self, rate=None):
         self._embed_inner(rate)
 
+
+    def crop(self, size=(300, 300)):
+        for path, subdirs, files in os.walk(self.data_dir):
+            for name in files:
+                image = os.path.join(path, name)
+                print image
+
+                W, H = size
+                try:
+                    im = Image.open(image)
+                    w, h = im.size
+                    if w < W or h < H:
+                        continue
+                    left, upper = random.randint(0, w - W), random.randint(0, h - H)
+                    im = im.crop((left, upper, left + W, upper + H))
+                    im.save(os.path.join(self.data_dir + '_crop_pil', name))
+                except Exception as e:
+                    print '[EXCPT]', e
+                    pass
+
+                    # try:
+                    # img = cv2.imread(image, cv2.CV_LOAD_IMAGE_UNCHANGED)
+                    #     h, w = img.shape[:2]
+                    #     if w < 300 or h < 300:
+                    #         continue
+                    #     left, upper = random.randint(0, w - 300), random.randint(0, h - 300)
+                    #     img_crop = img[upper:upper + 300, left:left + 300]
+                    #     cv2.imwrite(os.path.join(base_dir, category + '_crop_cv', name), img_crop)
+                    # except Exception as e:
+                    #     print '[EXCPT]', e
+                    #     pass
+
+
     def get_table(self):
         if self.table != None:
             return self.table
@@ -410,30 +444,44 @@ class DataILSVRC(DataDumperBase):
 
             dict_dataset = {}
 
-            with open(self.list_file, 'rb') as tsvfile:
-                tsvfile = csv.reader(tsvfile, delimiter='\t')
-                for line in tsvfile:
-                    hash = line[0]
-                    tag = line[-1]
-                    path_feat = os.path.join(self.feat_dir, hash[:3], hash[3:] + '.' + feattype)
-                    if path_feat:
-                        with open(path_feat, 'rb') as featfile:
-                            dict_dataset[hash] = (tag, json.loads(featfile.read()))
+            if feattype == 'coef':  # raw
+                with open(self.list_file, 'rb') as tsvfile:
+                    tsvfile = csv.reader(tsvfile, delimiter='\t')
+                    for line in tsvfile:
+                        hash = line[0]
+                        tag = line[-1]
+                        image = os.path.join(self.feat_dir, hash[:3], hash[3:] + '.jpg')
+                        if image:
+                            im = Jpeg(image, key=sample_key)
+                            dict_dataset[hash] = (tag, im.getCoefBlocks('Y'))
+
+            else:
+                with open(self.list_file, 'rb') as tsvfile:
+                    tsvfile = csv.reader(tsvfile, delimiter='\t')
+                    for line in tsvfile:
+                        hash = line[0]
+                        tag = line[-1]
+                        path_feat = os.path.join(self.feat_dir, hash[:3], hash[3:] + '.' + feattype)
+                        if path_feat:
+                            with open(path_feat, 'rb') as featfile:
+                                dict_dataset[hash] = (tag, json.loads(featfile.read()))
 
             for tag, feat in dict_dataset.values():
-                X.append([item for sublist in feat for subsublist in sublist for item in subsublist])
+                # X.append([item for sublist in feat for subsublist in sublist for item in subsublist])
+                X.append(np.array(feat).ravel().tolist())
                 Y.append(int(tag))
 
-        elif mode == "remote" or mode == "hbase":
+        elif mode == "hbase":  # remote
             if self.table == None:
                 self.table = self.get_table()
 
             col_feat, col_tag = 'cf_feat:' + feattype, 'cf_tag:' + tagtype
             for key, data in self.table.scan(columns=[col_feat, col_tag]):
-                X.append([item for sublist in json.loads(data[col_feat]) for subsublist in sublist for item in subsublist])
+                X.append(
+                    [item for sublist in json.loads(data[col_feat]) for subsublist in sublist for item in subsublist])
                 Y.append(int(data[col_tag]))
 
-        elif mode == "spark" or mode == "cluster":
+        elif mode == "spark":  # cluster
             if self.sparker == None:
                 self.sparker = SC.Sparker(host='HPC-server', appname='ImageCV', master='spark://HPC-server:7077')
 
diff --git a/test/test_data.py b/test/test_data.py
index a183365..80835d6 100755
--- a/test/test_data.py
+++ b/test/test_data.py
@@ -135,7 +135,19 @@ def test_pipeline():
 
 
 def test_crop():
-    crop.crop_Test()
+    # crop.crop_Test()
+    dil = ILSVRC.DataILSVRC(base_dir='/data/hadoop/ImageNet/ILSVRC/ILSVRC2013_DET_val', category='Test_1')
+    dil.crop()
+
+    dil2 = ILSVRC.DataILSVRC(base_dir='/data/hadoop/ImageNet/ILSVRC/ILSVRC2013_DET_val', category='Test_1_crop_pil')
+
+    dil.format()
+    dil.embed(rate=0.2)
+
+    X,Y = dil2.load_data(mode='local',feattype='coef')
+    print X[0]
+    print Y
+
 
 if __name__ == '__main__':
     # test_MSR()
--
libgit2 0.21.2