Commit 24b3b616fd755a7830933426906f0049f1a68408
1 parent
0d9a20ea
Exists in
master
and in
2 other branches
staged.
Showing
13 changed files
with
44 additions
and
29 deletions
Show diff stats
.idea/ImageR.iml
... | ... | @@ -2,7 +2,7 @@ |
2 | 2 | <module type="PYTHON_MODULE" version="4"> |
3 | 3 | <component name="NewModuleRootManager"> |
4 | 4 | <content url="file://$MODULE_DIR$" /> |
5 | - <orderEntry type="jdk" jdkName="Python 2.7.6 virtualenv at ~/.virtualenvs/env0" jdkType="Python SDK" /> | |
5 | + <orderEntry type="jdk" jdkName="Python 2.7.8 virtualenv at ~/.virtualenvs/env1" jdkType="Python SDK" /> | |
6 | 6 | <orderEntry type="sourceFolder" forTests="false" /> |
7 | 7 | </component> |
8 | 8 | </module> |
9 | 9 | \ No newline at end of file | ... | ... |
.idea/misc.xml
... | ... | @@ -20,7 +20,7 @@ |
20 | 20 | <component name="ProjectModuleManager"> |
21 | 21 | <modules /> |
22 | 22 | </component> |
23 | - <component name="ProjectRootManager" version="2" project-jdk-name="Python 2.7.6 (/usr/bin/python2.7)" project-jdk-type="Python SDK" /> | |
23 | + <component name="ProjectRootManager" version="2" project-jdk-name="Python 2.6.6 (/usr/bin/python)" project-jdk-type="Python SDK" /> | |
24 | 24 | <component name="RunManager"> |
25 | 25 | <list size="0" /> |
26 | 26 | </component> | ... | ... |
.idea/workspace.xml
... | ... | @@ -30,9 +30,7 @@ |
30 | 30 | <component name="ShelveChangesManager" show_recycled="false" /> |
31 | 31 | <component name="TaskManager"> |
32 | 32 | <task active="true" id="Default" summary="Default task"> |
33 | - <created>1425541040516</created> | |
34 | 33 | <option name="number" value="Default" /> |
35 | - <updated>1425541040516</updated> | |
36 | 34 | </task> |
37 | 35 | <servers /> |
38 | 36 | </component> | ... | ... |
common.pyc
No preview for this file type
mdata/CV.py
... | ... | @@ -20,6 +20,8 @@ class DataCV(DataDumperBase): |
20 | 20 | self.data_dir = self.base_dir + self.category + '/' |
21 | 21 | self.dict_data = {} |
22 | 22 | |
23 | + self.table_name = self.base_dir.split('/')[-2] + '-' + self.category | |
24 | + | |
23 | 25 | def format(self): |
24 | 26 | self.extract() |
25 | 27 | |
... | ... | @@ -29,7 +31,7 @@ class DataCV(DataDumperBase): |
29 | 31 | img.save('res/tmp.jpg', format='JPEG') |
30 | 32 | image = 'res/tmp.jpg' |
31 | 33 | |
32 | - with open(image, 'rb') as f: | |
34 | + with open('res/tmp.jpg', 'rb') as f: | |
33 | 35 | index = md5(f.read()).hexdigest() |
34 | 36 | |
35 | 37 | self.dict_data[index] = ispos |
... | ... | @@ -39,11 +41,11 @@ class DataCV(DataDumperBase): |
39 | 41 | dir = self.img_dir + index[:3] + '/' |
40 | 42 | if not os.path.exists(dir): |
41 | 43 | os.makedirs(dir) |
42 | - image = dir + index[3:] + '.jpg' | |
43 | - print image | |
44 | + image_path = dir + index[3:] + '.jpg' | |
45 | + # print image_path | |
44 | 46 | |
45 | - if not os.path.exists(image): | |
46 | - shutil.copy(image, image) | |
47 | + if not os.path.exists(image_path): | |
48 | + shutil.copy(image, image_path) | |
47 | 49 | else: |
48 | 50 | pass |
49 | 51 | |
... | ... | @@ -58,6 +60,10 @@ class DataCV(DataDumperBase): |
58 | 60 | else: |
59 | 61 | self._hash_copy(imagepath, False) |
60 | 62 | |
63 | + | |
64 | + def build_list(self): | |
65 | + assert self.list_file != None | |
66 | + | |
61 | 67 | ordict_img = collections.OrderedDict(sorted(self.dict_data.items(), key=lambda d: d[0])) |
62 | 68 | |
63 | 69 | with open(self.list_file, 'w') as f: |
... | ... | @@ -98,7 +104,7 @@ class DataCV(DataDumperBase): |
98 | 104 | with open(self.list_file, 'rb') as tsvfile: |
99 | 105 | tsvfile = csv.reader(tsvfile, delimiter='\t') |
100 | 106 | for line in tsvfile: |
101 | - path_img = self.img_dir + + line[0][:3] + '/' + line[0][3:] + '.jpg' | |
107 | + path_img = self.img_dir + line[0][:3] + '/' + line[0][3:] + '.jpg' | |
102 | 108 | if path_img: |
103 | 109 | with open(path_img, 'rb') as fpic: |
104 | 110 | dict_databuf[line[0] + '.jpg'] = fpic.read() |
... | ... | @@ -107,8 +113,8 @@ class DataCV(DataDumperBase): |
107 | 113 | with self.table.batch(batch_size=5000) as b: |
108 | 114 | for imgname, imgdata in dict_databuf.items(): |
109 | 115 | b.put(imgname, {'cf_pic:data': imgdata}) |
110 | - raise ValueError("Something went wrong!") | |
111 | 116 | except ValueError: |
117 | + raise | |
112 | 118 | pass |
113 | 119 | |
114 | 120 | |
... | ... | @@ -127,8 +133,8 @@ class DataCV(DataDumperBase): |
127 | 133 | with self.table.batch(batch_size=5000) as b: |
128 | 134 | for imgname, imgtag in dict_tagbuf.items(): |
129 | 135 | b.put(imgname, {'cf_tag:' + feattype: imgtag}) |
130 | - raise ValueError("Something went wrong!") | |
131 | 136 | except ValueError: |
137 | + raise | |
132 | 138 | pass |
133 | 139 | |
134 | 140 | |
... | ... | @@ -174,7 +180,7 @@ class DataCV(DataDumperBase): |
174 | 180 | with self.table.batch(batch_size=5000) as b: |
175 | 181 | for imgname, featdesc in dict_featbuf.items(): |
176 | 182 | b.put(imgname, {'cf_feat:' + feattype: featdesc}) |
177 | - raise ValueError("Something went wrong!") | |
178 | 183 | except ValueError: |
184 | + raise | |
179 | 185 | pass |
180 | 186 | ... | ... |
No preview for this file type
mdata/MSR.py
... | ... | @@ -51,11 +51,11 @@ class DataMSR(DataDumperBase): |
51 | 51 | dir = self.img_dir + index[:3] + '/' |
52 | 52 | if not os.path.exists(dir): |
53 | 53 | os.makedirs(dir) |
54 | - image = dir + index[3:] + '.jpg' | |
55 | - print image | |
54 | + image_path = dir + index[3:] + '.jpg' | |
55 | + print image_path | |
56 | 56 | |
57 | - if not os.path.exists(image): | |
58 | - shutil.copy('res/tmp.jpg', image) | |
57 | + if not os.path.exists(image_path): | |
58 | + shutil.copy('res/tmp.jpg', image_path) | |
59 | 59 | # or : |
60 | 60 | # img.save(image, format='JPEG') |
61 | 61 | |
... | ... | @@ -115,12 +115,12 @@ class DataMSR(DataDumperBase): |
115 | 115 | with self.table.batch(batch_size=5000) as b: |
116 | 116 | for imgname, imgdata in dict_buffer.items(): |
117 | 117 | b.put(imgname, {'cf_pic:data': imgdata}) |
118 | - raise ValueError("Something went wrong!") | |
119 | 118 | except ValueError: |
119 | + raise | |
120 | 120 | pass |
121 | 121 | |
122 | 122 | |
123 | - def store_tag(self): | |
123 | + def store_tag(self, feattype='retrieve'): | |
124 | 124 | if self.table == None: |
125 | 125 | self.table = self.get_table() |
126 | 126 | |
... | ... | @@ -130,19 +130,20 @@ class DataMSR(DataDumperBase): |
130 | 130 | with open(self.map_file, 'rb') as tsvfile: |
131 | 131 | tsvfile = csv.reader(tsvfile, delimiter='\t') |
132 | 132 | for line in tsvfile: |
133 | - dict_namebuf[line[0]] = line[2] | |
133 | + dict_namebuf[line[2]] = line[0] | |
134 | 134 | |
135 | 135 | with open(self.tag_file, 'rb') as tsvfile: |
136 | 136 | tsvfile = csv.reader(tsvfile, delimiter='\t') |
137 | 137 | for line in tsvfile: |
138 | - dict_tagbuf[line[-2]] = (line[:-2], line[-1]) | |
138 | + dict_tagbuf[line[-2]] = (line[0].split(), line[-1]) | |
139 | 139 | |
140 | 140 | try: |
141 | 141 | with self.table.batch(batch_size=5000) as b: |
142 | - for key, value in dict_tagbuf.items(): | |
143 | - b.put(dict_namebuf[key] + '.jpg', {'cf_tag:' + ''.join(value[0]): value[1]}) | |
144 | - raise ValueError("Something went wrong!") | |
142 | + for key, data in self.table.scan(): | |
143 | + value = dict_tagbuf[dict_namebuf[key[:-4]]] | |
144 | + b.put(key, {'cf_tag:' + feattype: json.dumps(value[0]), 'cf_tag:eval': value[1]}) | |
145 | 145 | except ValueError: |
146 | + raise | |
146 | 147 | pass |
147 | 148 | |
148 | 149 | def get_feat(self, feattype): | ... | ... |
mdata/MSR.pyc
No preview for this file type
mdata/__init__.pyc
No preview for this file type
No preview for this file type
mfeat/__init__.pyc
No preview for this file type
res/tmp.jpg
test_data.py
1 | 1 | __author__ = 'chunk' |
2 | 2 | |
3 | +from mdata import MSR, CV | |
3 | 4 | |
4 | -from mdata import MSR | |
5 | +def test_MSR(): | |
6 | + dmsr = MSR.DataMSR() | |
7 | + # msrd.format() | |
8 | + # msrd.build_list() | |
5 | 9 | |
6 | -msrd = MSR.DataMSR(base_dir='/media/chunk/Elements/D/data/MSR-IRC2014/',category='Train',data_file='TrainImageSet.tsv', tag_file='TrainSetLabel.tsv') | |
7 | -# msrd.format() | |
8 | -# msrd.build_list() | |
10 | + dmsr.store_image() | |
11 | + dmsr.store_tag() | |
9 | 12 | |
10 | -print 'helllo' | |
11 | 13 | \ No newline at end of file |
14 | +def test_CV(): | |
15 | + dcv = CV.DataCV() | |
16 | + dcv.format() | |
17 | + dcv.build_list() | |
18 | + | |
19 | +if __name__ == '__main__': | |
20 | + test_CV() | |
21 | + print 'helllo ' | |
12 | 22 | \ No newline at end of file | ... | ... |