staged.

Chunk
1 parent 0d9a20ea
Showing 13 changed files with 44 additions and 29 deletions Show diff stats
.idea/ImageR.iml
.idea/misc.xml
.idea/workspace.xml
common.pyc
mdata/CV.py
mdata/CV.pyc
mdata/MSR.py
mdata/MSR.pyc
mdata/__init__.pyc
mfeat/HOG.pyc
mfeat/__init__.pyc
res/tmp.jpg
test_data.py
@@ -2,7 +2,7 @@
 <module type="PYTHON_MODULE" version="4">
   <component name="NewModuleRootManager">
     <content url="file://$MODULE_DIR$" />
-    <orderEntry type="jdk" jdkName="Python 2.7.6 virtualenv at ~/.virtualenvs/env0" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Python 2.7.8 virtualenv at ~/.virtualenvs/env1" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
 </module>
 \ No newline at end of file
@@ -20,7 +20,7 @@
   <component name="ProjectModuleManager">
     <modules />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 2.7.6 (/usr/bin/python2.7)" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 2.6.6 (/usr/bin/python)" project-jdk-type="Python SDK" />
   <component name="RunManager">
     <list size="0" />
   </component>
@@ -30,9 +30,7 @@
   <component name="ShelveChangesManager" show_recycled="false" />
   <component name="TaskManager">
     <task active="true" id="Default" summary="Default task">
-      <created>1425541040516</created>
       <option name="number" value="Default" />
-      <updated>1425541040516</updated>
     </task>
     <servers />
   </component>
@@ -20,6 +20,8 @@ class DataCV(DataDumperBase):
         self.data_dir = self.base_dir + self.category + '/'
         self.dict_data = {}
+        self.table_name = self.base_dir.split('/')[-2] + '-' + self.category
+
     def format(self):
         self.extract()
@@ -29,7 +31,7 @@ class DataCV(DataDumperBase):
             img.save('res/tmp.jpg', format='JPEG')
             image = 'res/tmp.jpg'
-        with open(image, 'rb') as f:
+        with open('res/tmp.jpg', 'rb') as f:
             index = md5(f.read()).hexdigest()
         self.dict_data[index] = ispos
@@ -39,11 +41,11 @@ class DataCV(DataDumperBase):
         dir = self.img_dir + index[:3] + '/'
         if not os.path.exists(dir):
             os.makedirs(dir)
-        image = dir + index[3:] + '.jpg'
-        print image
+        image_path = dir + index[3:] + '.jpg'
+        # print image_path
-        if not os.path.exists(image):
-            shutil.copy(image, image)
+        if not os.path.exists(image_path):
+            shutil.copy(image, image_path)
         else:
             pass
@@ -58,6 +60,10 @@ class DataCV(DataDumperBase):
                 else:
                     self._hash_copy(imagepath, False)
+
+    def build_list(self):
+        assert self.list_file != None
+
         ordict_img = collections.OrderedDict(sorted(self.dict_data.items(), key=lambda d: d[0]))
         with open(self.list_file, 'w') as f:
@@ -98,7 +104,7 @@ class DataCV(DataDumperBase):
         with open(self.list_file, 'rb') as tsvfile:
             tsvfile = csv.reader(tsvfile, delimiter='\t')
             for line in tsvfile:
-                path_img = self.img_dir + + line[0][:3] + '/' + line[0][3:] + '.jpg'
+                path_img = self.img_dir + line[0][:3] + '/' + line[0][3:] + '.jpg'
                 if path_img:
                     with open(path_img, 'rb') as fpic:
                         dict_databuf[line[0] + '.jpg'] = fpic.read()
@@ -107,8 +113,8 @@ class DataCV(DataDumperBase):
             with self.table.batch(batch_size=5000) as b:
                 for imgname, imgdata in dict_databuf.items():
                     b.put(imgname, {'cf_pic:data': imgdata})
-                raise ValueError("Something went wrong!")
         except ValueError:
+            raise
             pass
@@ -127,8 +133,8 @@ class DataCV(DataDumperBase):
             with self.table.batch(batch_size=5000) as b:
                 for imgname, imgtag in dict_tagbuf.items():
                     b.put(imgname, {'cf_tag:' + feattype: imgtag})
-                raise ValueError("Something went wrong!")
         except ValueError:
+            raise
             pass
@@ -174,7 +180,7 @@ class DataCV(DataDumperBase):
             with self.table.batch(batch_size=5000) as b:
                 for imgname, featdesc in dict_featbuf.items():
                     b.put(imgname, {'cf_feat:' + feattype: featdesc})
-                raise ValueError("Something went wrong!")
         except ValueError:
+            raise
             pass
@@ -51,11 +51,11 @@ class DataMSR(DataDumperBase):
         dir = self.img_dir + index[:3] + '/'
         if not os.path.exists(dir):
             os.makedirs(dir)
-        image = dir + index[3:] + '.jpg'
-        print image
+        image_path = dir + index[3:] + '.jpg'
+        print image_path
-        if not os.path.exists(image):
-            shutil.copy('res/tmp.jpg', image)
+        if not os.path.exists(image_path):
+            shutil.copy('res/tmp.jpg', image_path)
             # or :
             # img.save(image, format='JPEG')
@@ -115,12 +115,12 @@ class DataMSR(DataDumperBase):
             with self.table.batch(batch_size=5000) as b:
                 for imgname, imgdata in dict_buffer.items():
                     b.put(imgname, {'cf_pic:data': imgdata})
-                raise ValueError("Something went wrong!")
         except ValueError:
+            raise
             pass
-    def store_tag(self):
+    def store_tag(self, feattype='retrieve'):
         if self.table == None:
             self.table = self.get_table()
@@ -130,19 +130,20 @@ class DataMSR(DataDumperBase):
         with open(self.map_file, 'rb') as tsvfile:
             tsvfile = csv.reader(tsvfile, delimiter='\t')
             for line in tsvfile:
-                dict_namebuf[line[0]] = line[2]
+                dict_namebuf[line[2]] = line[0]
         with open(self.tag_file, 'rb') as tsvfile:
             tsvfile = csv.reader(tsvfile, delimiter='\t')
             for line in tsvfile:
-                dict_tagbuf[line[-2]] = (line[:-2], line[-1])
+                dict_tagbuf[line[-2]] = (line[0].split(), line[-1])
         try:
             with self.table.batch(batch_size=5000) as b:
-                for key, value in dict_tagbuf.items():
-                    b.put(dict_namebuf[key] + '.jpg', {'cf_tag:' + ''.join(value[0]): value[1]})
-                raise ValueError("Something went wrong!")
+                for key, data in self.table.scan():
+                    value = dict_tagbuf[dict_namebuf[key[:-4]]]
+                    b.put(key, {'cf_tag:' + feattype: json.dumps(value[0]), 'cf_tag:eval': value[1]})
         except ValueError:
+            raise
             pass
     def get_feat(self, feattype):
 __author__ = 'chunk'
+from mdata import MSR, CV
-from mdata import MSR
+def test_MSR():
+    dmsr = MSR.DataMSR()
+    # msrd.format()
+    # msrd.build_list()
-msrd = MSR.DataMSR(base_dir='/media/chunk/Elements/D/data/MSR-IRC2014/',category='Train',data_file='TrainImageSet.tsv', tag_file='TrainSetLabel.tsv')
-# msrd.format()
-# msrd.build_list()
+    dmsr.store_image()
+    dmsr.store_tag()
-print 'helllo'
 \ No newline at end of file
+def test_CV():
+    dcv = CV.DataCV()
+    dcv.format()
+    dcv.build_list()
+
+if __name__ == '__main__':
+    test_CV()
+    print 'helllo '
 \ No newline at end of file
	@@ -2,7 +2,7 @@		@@ -2,7 +2,7 @@
2	<module type="PYTHON_MODULE" version="4">	2	<module type="PYTHON_MODULE" version="4">
3	<component name="NewModuleRootManager">	3	<component name="NewModuleRootManager">
4	<content url="file://$MODULE_DIR$" />	4	<content url="file://$MODULE_DIR$" />
5	- <orderEntry type="jdk" jdkName="Python 2.7.6 virtualenv at ~/.virtualenvs/env0" jdkType="Python SDK" />	5	+ <orderEntry type="jdk" jdkName="Python 2.7.8 virtualenv at ~/.virtualenvs/env1" jdkType="Python SDK" />
6	<orderEntry type="sourceFolder" forTests="false" />	6	<orderEntry type="sourceFolder" forTests="false" />
7	</component>	7	</component>
8	</module>	8	</module>
9	\ No newline at end of file	9	\ No newline at end of file
	@@ -20,7 +20,7 @@		@@ -20,7 +20,7 @@
20	<component name="ProjectModuleManager">	20	<component name="ProjectModuleManager">
21	<modules />	21	<modules />
22	</component>	22	</component>
23	- <component name="ProjectRootManager" version="2" project-jdk-name="Python 2.7.6 (/usr/bin/python2.7)" project-jdk-type="Python SDK" />	23	+ <component name="ProjectRootManager" version="2" project-jdk-name="Python 2.6.6 (/usr/bin/python)" project-jdk-type="Python SDK" />
24	<component name="RunManager">	24	<component name="RunManager">
25	<list size="0" />	25	<list size="0" />
26	</component>	26	</component>
1	__author__ = 'chunk'	1	__author__ = 'chunk'
2		2
		3	+from mdata import MSR, CV
3		4
4	-from mdata import MSR	5	+def test_MSR():
		6	+ dmsr = MSR.DataMSR()
		7	+ # msrd.format()
		8	+ # msrd.build_list()
5		9
6	-msrd = MSR.DataMSR(base_dir='/media/chunk/Elements/D/data/MSR-IRC2014/',category='Train',data_file='TrainImageSet.tsv', tag_file='TrainSetLabel.tsv')
7	-# msrd.format()
8	-# msrd.build_list()	10	+ dmsr.store_image()
		11	+ dmsr.store_tag()
9		12
10	-print 'helllo'
11	\ No newline at end of file	13	\ No newline at end of file
		14	+def test_CV():
		15	+ dcv = CV.DataCV()
		16	+ dcv.format()
		17	+ dcv.build_list()
		18	+
		19	+if __name__ == '__main__':
		20	+ test_CV()
		21	+ print 'helllo '
12	\ No newline at end of file	22	\ No newline at end of file