Commit 24b3b616fd755a7830933426906f0049f1a68408
1 parent
0d9a20ea
Exists in
master
and in
2 other branches
staged.
Showing
13 changed files
with
44 additions
and
29 deletions
Show diff stats
.idea/ImageR.iml
@@ -2,7 +2,7 @@ | @@ -2,7 +2,7 @@ | ||
2 | <module type="PYTHON_MODULE" version="4"> | 2 | <module type="PYTHON_MODULE" version="4"> |
3 | <component name="NewModuleRootManager"> | 3 | <component name="NewModuleRootManager"> |
4 | <content url="file://$MODULE_DIR$" /> | 4 | <content url="file://$MODULE_DIR$" /> |
5 | - <orderEntry type="jdk" jdkName="Python 2.7.6 virtualenv at ~/.virtualenvs/env0" jdkType="Python SDK" /> | 5 | + <orderEntry type="jdk" jdkName="Python 2.7.8 virtualenv at ~/.virtualenvs/env1" jdkType="Python SDK" /> |
6 | <orderEntry type="sourceFolder" forTests="false" /> | 6 | <orderEntry type="sourceFolder" forTests="false" /> |
7 | </component> | 7 | </component> |
8 | </module> | 8 | </module> |
9 | \ No newline at end of file | 9 | \ No newline at end of file |
.idea/misc.xml
@@ -20,7 +20,7 @@ | @@ -20,7 +20,7 @@ | ||
20 | <component name="ProjectModuleManager"> | 20 | <component name="ProjectModuleManager"> |
21 | <modules /> | 21 | <modules /> |
22 | </component> | 22 | </component> |
23 | - <component name="ProjectRootManager" version="2" project-jdk-name="Python 2.7.6 (/usr/bin/python2.7)" project-jdk-type="Python SDK" /> | 23 | + <component name="ProjectRootManager" version="2" project-jdk-name="Python 2.6.6 (/usr/bin/python)" project-jdk-type="Python SDK" /> |
24 | <component name="RunManager"> | 24 | <component name="RunManager"> |
25 | <list size="0" /> | 25 | <list size="0" /> |
26 | </component> | 26 | </component> |
.idea/workspace.xml
@@ -30,9 +30,7 @@ | @@ -30,9 +30,7 @@ | ||
30 | <component name="ShelveChangesManager" show_recycled="false" /> | 30 | <component name="ShelveChangesManager" show_recycled="false" /> |
31 | <component name="TaskManager"> | 31 | <component name="TaskManager"> |
32 | <task active="true" id="Default" summary="Default task"> | 32 | <task active="true" id="Default" summary="Default task"> |
33 | - <created>1425541040516</created> | ||
34 | <option name="number" value="Default" /> | 33 | <option name="number" value="Default" /> |
35 | - <updated>1425541040516</updated> | ||
36 | </task> | 34 | </task> |
37 | <servers /> | 35 | <servers /> |
38 | </component> | 36 | </component> |
common.pyc
No preview for this file type
mdata/CV.py
@@ -20,6 +20,8 @@ class DataCV(DataDumperBase): | @@ -20,6 +20,8 @@ class DataCV(DataDumperBase): | ||
20 | self.data_dir = self.base_dir + self.category + '/' | 20 | self.data_dir = self.base_dir + self.category + '/' |
21 | self.dict_data = {} | 21 | self.dict_data = {} |
22 | 22 | ||
23 | + self.table_name = self.base_dir.split('/')[-2] + '-' + self.category | ||
24 | + | ||
23 | def format(self): | 25 | def format(self): |
24 | self.extract() | 26 | self.extract() |
25 | 27 | ||
@@ -29,7 +31,7 @@ class DataCV(DataDumperBase): | @@ -29,7 +31,7 @@ class DataCV(DataDumperBase): | ||
29 | img.save('res/tmp.jpg', format='JPEG') | 31 | img.save('res/tmp.jpg', format='JPEG') |
30 | image = 'res/tmp.jpg' | 32 | image = 'res/tmp.jpg' |
31 | 33 | ||
32 | - with open(image, 'rb') as f: | 34 | + with open('res/tmp.jpg', 'rb') as f: |
33 | index = md5(f.read()).hexdigest() | 35 | index = md5(f.read()).hexdigest() |
34 | 36 | ||
35 | self.dict_data[index] = ispos | 37 | self.dict_data[index] = ispos |
@@ -39,11 +41,11 @@ class DataCV(DataDumperBase): | @@ -39,11 +41,11 @@ class DataCV(DataDumperBase): | ||
39 | dir = self.img_dir + index[:3] + '/' | 41 | dir = self.img_dir + index[:3] + '/' |
40 | if not os.path.exists(dir): | 42 | if not os.path.exists(dir): |
41 | os.makedirs(dir) | 43 | os.makedirs(dir) |
42 | - image = dir + index[3:] + '.jpg' | ||
43 | - print image | 44 | + image_path = dir + index[3:] + '.jpg' |
45 | + # print image_path | ||
44 | 46 | ||
45 | - if not os.path.exists(image): | ||
46 | - shutil.copy(image, image) | 47 | + if not os.path.exists(image_path): |
48 | + shutil.copy(image, image_path) | ||
47 | else: | 49 | else: |
48 | pass | 50 | pass |
49 | 51 | ||
@@ -58,6 +60,10 @@ class DataCV(DataDumperBase): | @@ -58,6 +60,10 @@ class DataCV(DataDumperBase): | ||
58 | else: | 60 | else: |
59 | self._hash_copy(imagepath, False) | 61 | self._hash_copy(imagepath, False) |
60 | 62 | ||
63 | + | ||
64 | + def build_list(self): | ||
65 | + assert self.list_file != None | ||
66 | + | ||
61 | ordict_img = collections.OrderedDict(sorted(self.dict_data.items(), key=lambda d: d[0])) | 67 | ordict_img = collections.OrderedDict(sorted(self.dict_data.items(), key=lambda d: d[0])) |
62 | 68 | ||
63 | with open(self.list_file, 'w') as f: | 69 | with open(self.list_file, 'w') as f: |
@@ -98,7 +104,7 @@ class DataCV(DataDumperBase): | @@ -98,7 +104,7 @@ class DataCV(DataDumperBase): | ||
98 | with open(self.list_file, 'rb') as tsvfile: | 104 | with open(self.list_file, 'rb') as tsvfile: |
99 | tsvfile = csv.reader(tsvfile, delimiter='\t') | 105 | tsvfile = csv.reader(tsvfile, delimiter='\t') |
100 | for line in tsvfile: | 106 | for line in tsvfile: |
101 | - path_img = self.img_dir + + line[0][:3] + '/' + line[0][3:] + '.jpg' | 107 | + path_img = self.img_dir + line[0][:3] + '/' + line[0][3:] + '.jpg' |
102 | if path_img: | 108 | if path_img: |
103 | with open(path_img, 'rb') as fpic: | 109 | with open(path_img, 'rb') as fpic: |
104 | dict_databuf[line[0] + '.jpg'] = fpic.read() | 110 | dict_databuf[line[0] + '.jpg'] = fpic.read() |
@@ -107,8 +113,8 @@ class DataCV(DataDumperBase): | @@ -107,8 +113,8 @@ class DataCV(DataDumperBase): | ||
107 | with self.table.batch(batch_size=5000) as b: | 113 | with self.table.batch(batch_size=5000) as b: |
108 | for imgname, imgdata in dict_databuf.items(): | 114 | for imgname, imgdata in dict_databuf.items(): |
109 | b.put(imgname, {'cf_pic:data': imgdata}) | 115 | b.put(imgname, {'cf_pic:data': imgdata}) |
110 | - raise ValueError("Something went wrong!") | ||
111 | except ValueError: | 116 | except ValueError: |
117 | + raise | ||
112 | pass | 118 | pass |
113 | 119 | ||
114 | 120 | ||
@@ -127,8 +133,8 @@ class DataCV(DataDumperBase): | @@ -127,8 +133,8 @@ class DataCV(DataDumperBase): | ||
127 | with self.table.batch(batch_size=5000) as b: | 133 | with self.table.batch(batch_size=5000) as b: |
128 | for imgname, imgtag in dict_tagbuf.items(): | 134 | for imgname, imgtag in dict_tagbuf.items(): |
129 | b.put(imgname, {'cf_tag:' + feattype: imgtag}) | 135 | b.put(imgname, {'cf_tag:' + feattype: imgtag}) |
130 | - raise ValueError("Something went wrong!") | ||
131 | except ValueError: | 136 | except ValueError: |
137 | + raise | ||
132 | pass | 138 | pass |
133 | 139 | ||
134 | 140 | ||
@@ -174,7 +180,7 @@ class DataCV(DataDumperBase): | @@ -174,7 +180,7 @@ class DataCV(DataDumperBase): | ||
174 | with self.table.batch(batch_size=5000) as b: | 180 | with self.table.batch(batch_size=5000) as b: |
175 | for imgname, featdesc in dict_featbuf.items(): | 181 | for imgname, featdesc in dict_featbuf.items(): |
176 | b.put(imgname, {'cf_feat:' + feattype: featdesc}) | 182 | b.put(imgname, {'cf_feat:' + feattype: featdesc}) |
177 | - raise ValueError("Something went wrong!") | ||
178 | except ValueError: | 183 | except ValueError: |
184 | + raise | ||
179 | pass | 185 | pass |
180 | 186 |
No preview for this file type
mdata/MSR.py
@@ -51,11 +51,11 @@ class DataMSR(DataDumperBase): | @@ -51,11 +51,11 @@ class DataMSR(DataDumperBase): | ||
51 | dir = self.img_dir + index[:3] + '/' | 51 | dir = self.img_dir + index[:3] + '/' |
52 | if not os.path.exists(dir): | 52 | if not os.path.exists(dir): |
53 | os.makedirs(dir) | 53 | os.makedirs(dir) |
54 | - image = dir + index[3:] + '.jpg' | ||
55 | - print image | 54 | + image_path = dir + index[3:] + '.jpg' |
55 | + print image_path | ||
56 | 56 | ||
57 | - if not os.path.exists(image): | ||
58 | - shutil.copy('res/tmp.jpg', image) | 57 | + if not os.path.exists(image_path): |
58 | + shutil.copy('res/tmp.jpg', image_path) | ||
59 | # or : | 59 | # or : |
60 | # img.save(image, format='JPEG') | 60 | # img.save(image, format='JPEG') |
61 | 61 | ||
@@ -115,12 +115,12 @@ class DataMSR(DataDumperBase): | @@ -115,12 +115,12 @@ class DataMSR(DataDumperBase): | ||
115 | with self.table.batch(batch_size=5000) as b: | 115 | with self.table.batch(batch_size=5000) as b: |
116 | for imgname, imgdata in dict_buffer.items(): | 116 | for imgname, imgdata in dict_buffer.items(): |
117 | b.put(imgname, {'cf_pic:data': imgdata}) | 117 | b.put(imgname, {'cf_pic:data': imgdata}) |
118 | - raise ValueError("Something went wrong!") | ||
119 | except ValueError: | 118 | except ValueError: |
119 | + raise | ||
120 | pass | 120 | pass |
121 | 121 | ||
122 | 122 | ||
123 | - def store_tag(self): | 123 | + def store_tag(self, feattype='retrieve'): |
124 | if self.table == None: | 124 | if self.table == None: |
125 | self.table = self.get_table() | 125 | self.table = self.get_table() |
126 | 126 | ||
@@ -130,19 +130,20 @@ class DataMSR(DataDumperBase): | @@ -130,19 +130,20 @@ class DataMSR(DataDumperBase): | ||
130 | with open(self.map_file, 'rb') as tsvfile: | 130 | with open(self.map_file, 'rb') as tsvfile: |
131 | tsvfile = csv.reader(tsvfile, delimiter='\t') | 131 | tsvfile = csv.reader(tsvfile, delimiter='\t') |
132 | for line in tsvfile: | 132 | for line in tsvfile: |
133 | - dict_namebuf[line[0]] = line[2] | 133 | + dict_namebuf[line[2]] = line[0] |
134 | 134 | ||
135 | with open(self.tag_file, 'rb') as tsvfile: | 135 | with open(self.tag_file, 'rb') as tsvfile: |
136 | tsvfile = csv.reader(tsvfile, delimiter='\t') | 136 | tsvfile = csv.reader(tsvfile, delimiter='\t') |
137 | for line in tsvfile: | 137 | for line in tsvfile: |
138 | - dict_tagbuf[line[-2]] = (line[:-2], line[-1]) | 138 | + dict_tagbuf[line[-2]] = (line[0].split(), line[-1]) |
139 | 139 | ||
140 | try: | 140 | try: |
141 | with self.table.batch(batch_size=5000) as b: | 141 | with self.table.batch(batch_size=5000) as b: |
142 | - for key, value in dict_tagbuf.items(): | ||
143 | - b.put(dict_namebuf[key] + '.jpg', {'cf_tag:' + ''.join(value[0]): value[1]}) | ||
144 | - raise ValueError("Something went wrong!") | 142 | + for key, data in self.table.scan(): |
143 | + value = dict_tagbuf[dict_namebuf[key[:-4]]] | ||
144 | + b.put(key, {'cf_tag:' + feattype: json.dumps(value[0]), 'cf_tag:eval': value[1]}) | ||
145 | except ValueError: | 145 | except ValueError: |
146 | + raise | ||
146 | pass | 147 | pass |
147 | 148 | ||
148 | def get_feat(self, feattype): | 149 | def get_feat(self, feattype): |
mdata/MSR.pyc
No preview for this file type
mdata/__init__.pyc
No preview for this file type
No preview for this file type
mfeat/__init__.pyc
No preview for this file type
res/tmp.jpg
test_data.py
1 | __author__ = 'chunk' | 1 | __author__ = 'chunk' |
2 | 2 | ||
3 | +from mdata import MSR, CV | ||
3 | 4 | ||
4 | -from mdata import MSR | 5 | +def test_MSR(): |
6 | + dmsr = MSR.DataMSR() | ||
7 | + # msrd.format() | ||
8 | + # msrd.build_list() | ||
5 | 9 | ||
6 | -msrd = MSR.DataMSR(base_dir='/media/chunk/Elements/D/data/MSR-IRC2014/',category='Train',data_file='TrainImageSet.tsv', tag_file='TrainSetLabel.tsv') | ||
7 | -# msrd.format() | ||
8 | -# msrd.build_list() | 10 | + dmsr.store_image() |
11 | + dmsr.store_tag() | ||
9 | 12 | ||
10 | -print 'helllo' | ||
11 | \ No newline at end of file | 13 | \ No newline at end of file |
14 | +def test_CV(): | ||
15 | + dcv = CV.DataCV() | ||
16 | + dcv.format() | ||
17 | + dcv.build_list() | ||
18 | + | ||
19 | +if __name__ == '__main__': | ||
20 | + test_CV() | ||
21 | + print 'helllo ' | ||
12 | \ No newline at end of file | 22 | \ No newline at end of file |