staged.

Chunk
1 parent 0d9a20ea
Showing 13 changed files with 44 additions and 29 deletions Show diff stats
.idea/ImageR.iml
.idea/misc.xml
.idea/workspace.xml
common.pyc
mdata/CV.py
mdata/CV.pyc
mdata/MSR.py
mdata/MSR.pyc
mdata/__init__.pyc
mfeat/HOG.pyc
mfeat/__init__.pyc
res/tmp.jpg
test_data.py
@@ -2,7 +2,7 @@
 <module type="PYTHON_MODULE" version="4">
   <component name="NewModuleRootManager">
     <content url="file://$MODULE_DIR$" />
-    <orderEntry type="jdk" jdkName="Python 2.7.6 virtualenv at ~/.virtualenvs/env0" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Python 2.7.8 virtualenv at ~/.virtualenvs/env1" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
 </module>
 \ No newline at end of file
@@ -20,7 +20,7 @@
   <component name="ProjectModuleManager">
     <modules />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 2.7.6 (/usr/bin/python2.7)" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 2.6.6 (/usr/bin/python)" project-jdk-type="Python SDK" />
   <component name="RunManager">
     <list size="0" />
   </component>
@@ -30,9 +30,7 @@
   <component name="ShelveChangesManager" show_recycled="false" />
   <component name="TaskManager">
     <task active="true" id="Default" summary="Default task">
-      <created>1425541040516</created>
       <option name="number" value="Default" />
-      <updated>1425541040516</updated>
     </task>
     <servers />
   </component>
@@ -20,6 +20,8 @@ class DataCV(DataDumperBase):
         self.data_dir = self.base_dir + self.category + '/'
         self.dict_data = {}
  
+        self.table_name = self.base_dir.split('/')[-2] + '-' + self.category
+
     def format(self):
         self.extract()
  
@@ -29,7 +31,7 @@ class DataCV(DataDumperBase):
             img.save('res/tmp.jpg', format='JPEG')
             image = 'res/tmp.jpg'
  
-        with open(image, 'rb') as f:
+        with open('res/tmp.jpg', 'rb') as f:
             index = md5(f.read()).hexdigest()
  
         self.dict_data[index] = ispos
@@ -39,11 +41,11 @@ class DataCV(DataDumperBase):
         dir = self.img_dir + index[:3] + '/'
         if not os.path.exists(dir):
             os.makedirs(dir)
-        image = dir + index[3:] + '.jpg'
-        print image
+        image_path = dir + index[3:] + '.jpg'
+        # print image_path
  
-        if not os.path.exists(image):
-            shutil.copy(image, image)
+        if not os.path.exists(image_path):
+            shutil.copy(image, image_path)
         else:
             pass
  
@@ -58,6 +60,10 @@ class DataCV(DataDumperBase):
                 else:
                     self._hash_copy(imagepath, False)
  
+
+    def build_list(self):
+        assert self.list_file != None
+
         ordict_img = collections.OrderedDict(sorted(self.dict_data.items(), key=lambda d: d[0]))
  
         with open(self.list_file, 'w') as f:
@@ -98,7 +104,7 @@ class DataCV(DataDumperBase):
         with open(self.list_file, 'rb') as tsvfile:
             tsvfile = csv.reader(tsvfile, delimiter='\t')
             for line in tsvfile:
-                path_img = self.img_dir + + line[0][:3] + '/' + line[0][3:] + '.jpg'
+                path_img = self.img_dir + line[0][:3] + '/' + line[0][3:] + '.jpg'
                 if path_img:
                     with open(path_img, 'rb') as fpic:
                         dict_databuf[line[0] + '.jpg'] = fpic.read()
@@ -107,8 +113,8 @@ class DataCV(DataDumperBase):
             with self.table.batch(batch_size=5000) as b:
                 for imgname, imgdata in dict_databuf.items():
                     b.put(imgname, {'cf_pic:data': imgdata})
-                raise ValueError("Something went wrong!")
         except ValueError:
+            raise
             pass
  
  
@@ -127,8 +133,8 @@ class DataCV(DataDumperBase):
             with self.table.batch(batch_size=5000) as b:
                 for imgname, imgtag in dict_tagbuf.items():
                     b.put(imgname, {'cf_tag:' + feattype: imgtag})
-                raise ValueError("Something went wrong!")
         except ValueError:
+            raise
             pass
  
  
@@ -174,7 +180,7 @@ class DataCV(DataDumperBase):
             with self.table.batch(batch_size=5000) as b:
                 for imgname, featdesc in dict_featbuf.items():
                     b.put(imgname, {'cf_feat:' + feattype: featdesc})
-                raise ValueError("Something went wrong!")
         except ValueError:
+            raise
             pass
  
@@ -51,11 +51,11 @@ class DataMSR(DataDumperBase):
         dir = self.img_dir + index[:3] + '/'
         if not os.path.exists(dir):
             os.makedirs(dir)
-        image = dir + index[3:] + '.jpg'
-        print image
+        image_path = dir + index[3:] + '.jpg'
+        print image_path
  
-        if not os.path.exists(image):
-            shutil.copy('res/tmp.jpg', image)
+        if not os.path.exists(image_path):
+            shutil.copy('res/tmp.jpg', image_path)
             # or :
             # img.save(image, format='JPEG')
  
@@ -115,12 +115,12 @@ class DataMSR(DataDumperBase):
             with self.table.batch(batch_size=5000) as b:
                 for imgname, imgdata in dict_buffer.items():
                     b.put(imgname, {'cf_pic:data': imgdata})
-                raise ValueError("Something went wrong!")
         except ValueError:
+            raise
             pass
  
  
-    def store_tag(self):
+    def store_tag(self, feattype='retrieve'):
         if self.table == None:
             self.table = self.get_table()
  
@@ -130,19 +130,20 @@ class DataMSR(DataDumperBase):
         with open(self.map_file, 'rb') as tsvfile:
             tsvfile = csv.reader(tsvfile, delimiter='\t')
             for line in tsvfile:
-                dict_namebuf[line[0]] = line[2]
+                dict_namebuf[line[2]] = line[0]
  
         with open(self.tag_file, 'rb') as tsvfile:
             tsvfile = csv.reader(tsvfile, delimiter='\t')
             for line in tsvfile:
-                dict_tagbuf[line[-2]] = (line[:-2], line[-1])
+                dict_tagbuf[line[-2]] = (line[0].split(), line[-1])
  
         try:
             with self.table.batch(batch_size=5000) as b:
-                for key, value in dict_tagbuf.items():
-                    b.put(dict_namebuf[key] + '.jpg', {'cf_tag:' + ''.join(value[0]): value[1]})
-                raise ValueError("Something went wrong!")
+                for key, data in self.table.scan():
+                    value = dict_tagbuf[dict_namebuf[key[:-4]]]
+                    b.put(key, {'cf_tag:' + feattype: json.dumps(value[0]), 'cf_tag:eval': value[1]})
         except ValueError:
+            raise
             pass
  
     def get_feat(self, feattype):
 __author__ = 'chunk'
  
+from mdata import MSR, CV
  
-from mdata import MSR
+def test_MSR():
+    dmsr = MSR.DataMSR()
+    # msrd.format()
+    # msrd.build_list()
  
-msrd = MSR.DataMSR(base_dir='/media/chunk/Elements/D/data/MSR-IRC2014/',category='Train',data_file='TrainImageSet.tsv', tag_file='TrainSetLabel.tsv')
-# msrd.format()
-# msrd.build_list()
+    dmsr.store_image()
+    dmsr.store_tag()
  
-print 'helllo'
 \ No newline at end of file
+def test_CV():
+    dcv = CV.DataCV()
+    dcv.format()
+    dcv.build_list()
+
+if __name__ == '__main__':
+    test_CV()
+    print 'helllo '
 \ No newline at end of file
...	...	@@ -2,7 +2,7 @@
2	2	<module type="PYTHON_MODULE" version="4">
3	3	<component name="NewModuleRootManager">
4	4	<content url="file://$MODULE_DIR$" />
5		- <orderEntry type="jdk" jdkName="Python 2.7.6 virtualenv at ~/.virtualenvs/env0" jdkType="Python SDK" />
	5	+ <orderEntry type="jdk" jdkName="Python 2.7.8 virtualenv at ~/.virtualenvs/env1" jdkType="Python SDK" />
6	6	<orderEntry type="sourceFolder" forTests="false" />
7	7	</component>
8	8	</module>
9	9	\ No newline at end of file
...	...
...	...	@@ -20,7 +20,7 @@
20	20	<component name="ProjectModuleManager">
21	21	<modules />
22	22	</component>
23		- <component name="ProjectRootManager" version="2" project-jdk-name="Python 2.7.6 (/usr/bin/python2.7)" project-jdk-type="Python SDK" />
	23	+ <component name="ProjectRootManager" version="2" project-jdk-name="Python 2.6.6 (/usr/bin/python)" project-jdk-type="Python SDK" />
24	24	<component name="RunManager">
25	25	<list size="0" />
26	26	</component>
...	...
...	...	@@ -30,9 +30,7 @@
30	30	<component name="ShelveChangesManager" show_recycled="false" />
31	31	<component name="TaskManager">
32	32	<task active="true" id="Default" summary="Default task">
33		- <created>1425541040516</created>
34	33	<option name="number" value="Default" />
35		- <updated>1425541040516</updated>
36	34	</task>
37	35	<servers />
38	36	</component>
...	...
...	...	@@ -20,6 +20,8 @@ class DataCV(DataDumperBase):
20	20	self.data_dir = self.base_dir + self.category + '/'
21	21	self.dict_data = {}
22	22
	23	+ self.table_name = self.base_dir.split('/')[-2] + '-' + self.category
	24	+
23	25	def format(self):
24	26	self.extract()
25	27
...	...	@@ -29,7 +31,7 @@ class DataCV(DataDumperBase):
29	31	img.save('res/tmp.jpg', format='JPEG')
30	32	image = 'res/tmp.jpg'
31	33
32		- with open(image, 'rb') as f:
	34	+ with open('res/tmp.jpg', 'rb') as f:
33	35	index = md5(f.read()).hexdigest()
34	36
35	37	self.dict_data[index] = ispos
...	...	@@ -39,11 +41,11 @@ class DataCV(DataDumperBase):
39	41	dir = self.img_dir + index[:3] + '/'
40	42	if not os.path.exists(dir):
41	43	os.makedirs(dir)
42		- image = dir + index[3:] + '.jpg'
43		- print image
	44	+ image_path = dir + index[3:] + '.jpg'
	45	+ # print image_path
44	46
45		- if not os.path.exists(image):
46		- shutil.copy(image, image)
	47	+ if not os.path.exists(image_path):
	48	+ shutil.copy(image, image_path)
47	49	else:
48	50	pass
49	51
...	...	@@ -58,6 +60,10 @@ class DataCV(DataDumperBase):
58	60	else:
59	61	self._hash_copy(imagepath, False)
60	62
	63	+
	64	+ def build_list(self):
	65	+ assert self.list_file != None
	66	+
61	67	ordict_img = collections.OrderedDict(sorted(self.dict_data.items(), key=lambda d: d[0]))
62	68
63	69	with open(self.list_file, 'w') as f:
...	...	@@ -98,7 +104,7 @@ class DataCV(DataDumperBase):
98	104	with open(self.list_file, 'rb') as tsvfile:
99	105	tsvfile = csv.reader(tsvfile, delimiter='\t')
100	106	for line in tsvfile:
101		- path_img = self.img_dir + + line[0][:3] + '/' + line[0][3:] + '.jpg'
	107	+ path_img = self.img_dir + line[0][:3] + '/' + line[0][3:] + '.jpg'
102	108	if path_img:
103	109	with open(path_img, 'rb') as fpic:
104	110	dict_databuf[line[0] + '.jpg'] = fpic.read()
...	...	@@ -107,8 +113,8 @@ class DataCV(DataDumperBase):
107	113	with self.table.batch(batch_size=5000) as b:
108	114	for imgname, imgdata in dict_databuf.items():
109	115	b.put(imgname, {'cf_pic:data': imgdata})
110		- raise ValueError("Something went wrong!")
111	116	except ValueError:
	117	+ raise
112	118	pass
113	119
114	120
...	...	@@ -127,8 +133,8 @@ class DataCV(DataDumperBase):
127	133	with self.table.batch(batch_size=5000) as b:
128	134	for imgname, imgtag in dict_tagbuf.items():
129	135	b.put(imgname, {'cf_tag:' + feattype: imgtag})
130		- raise ValueError("Something went wrong!")
131	136	except ValueError:
	137	+ raise
132	138	pass
133	139
134	140
...	...	@@ -174,7 +180,7 @@ class DataCV(DataDumperBase):
174	180	with self.table.batch(batch_size=5000) as b:
175	181	for imgname, featdesc in dict_featbuf.items():
176	182	b.put(imgname, {'cf_feat:' + feattype: featdesc})
177		- raise ValueError("Something went wrong!")
178	183	except ValueError:
	184	+ raise
179	185	pass
180	186
...	...
...	...	@@ -51,11 +51,11 @@ class DataMSR(DataDumperBase):
51	51	dir = self.img_dir + index[:3] + '/'
52	52	if not os.path.exists(dir):
53	53	os.makedirs(dir)
54		- image = dir + index[3:] + '.jpg'
55		- print image
	54	+ image_path = dir + index[3:] + '.jpg'
	55	+ print image_path
56	56
57		- if not os.path.exists(image):
58		- shutil.copy('res/tmp.jpg', image)
	57	+ if not os.path.exists(image_path):
	58	+ shutil.copy('res/tmp.jpg', image_path)
59	59	# or :
60	60	# img.save(image, format='JPEG')
61	61
...	...	@@ -115,12 +115,12 @@ class DataMSR(DataDumperBase):
115	115	with self.table.batch(batch_size=5000) as b:
116	116	for imgname, imgdata in dict_buffer.items():
117	117	b.put(imgname, {'cf_pic:data': imgdata})
118		- raise ValueError("Something went wrong!")
119	118	except ValueError:
	119	+ raise
120	120	pass
121	121
122	122
123		- def store_tag(self):
	123	+ def store_tag(self, feattype='retrieve'):
124	124	if self.table == None:
125	125	self.table = self.get_table()
126	126
...	...	@@ -130,19 +130,20 @@ class DataMSR(DataDumperBase):
130	130	with open(self.map_file, 'rb') as tsvfile:
131	131	tsvfile = csv.reader(tsvfile, delimiter='\t')
132	132	for line in tsvfile:
133		- dict_namebuf[line[0]] = line[2]
	133	+ dict_namebuf[line[2]] = line[0]
134	134
135	135	with open(self.tag_file, 'rb') as tsvfile:
136	136	tsvfile = csv.reader(tsvfile, delimiter='\t')
137	137	for line in tsvfile:
138		- dict_tagbuf[line[-2]] = (line[:-2], line[-1])
	138	+ dict_tagbuf[line[-2]] = (line[0].split(), line[-1])
139	139
140	140	try:
141	141	with self.table.batch(batch_size=5000) as b:
142		- for key, value in dict_tagbuf.items():
143		- b.put(dict_namebuf[key] + '.jpg', {'cf_tag:' + ''.join(value[0]): value[1]})
144		- raise ValueError("Something went wrong!")
	142	+ for key, data in self.table.scan():
	143	+ value = dict_tagbuf[dict_namebuf[key[:-4]]]
	144	+ b.put(key, {'cf_tag:' + feattype: json.dumps(value[0]), 'cf_tag:eval': value[1]})
145	145	except ValueError:
	146	+ raise
146	147	pass
147	148
148	149	def get_feat(self, feattype):
...	...
1	1	__author__ = 'chunk'
2	2
	3	+from mdata import MSR, CV
3	4
4		-from mdata import MSR
	5	+def test_MSR():
	6	+ dmsr = MSR.DataMSR()
	7	+ # msrd.format()
	8	+ # msrd.build_list()
5	9
6		-msrd = MSR.DataMSR(base_dir='/media/chunk/Elements/D/data/MSR-IRC2014/',category='Train',data_file='TrainImageSet.tsv', tag_file='TrainSetLabel.tsv')
7		-# msrd.format()
8		-# msrd.build_list()
	10	+ dmsr.store_image()
	11	+ dmsr.store_tag()
9	12
10		-print 'helllo'
11	13	\ No newline at end of file
	14	+def test_CV():
	15	+ dcv = CV.DataCV()
	16	+ dcv.format()
	17	+ dcv.build_list()
	18	+
	19	+if __name__ == '__main__':
	20	+ test_CV()
	21	+ print 'helllo '
12	22	\ No newline at end of file
...	...