Commit 24b3b616fd755a7830933426906f0049f1a68408
1 parent
0d9a20ea
Exists in
master
and in
2 other branches
staged.
Showing
13 changed files
with
44 additions
and
29 deletions
Show diff stats
.idea/ImageR.iml
| @@ -2,7 +2,7 @@ | @@ -2,7 +2,7 @@ | ||
| 2 | <module type="PYTHON_MODULE" version="4"> | 2 | <module type="PYTHON_MODULE" version="4"> |
| 3 | <component name="NewModuleRootManager"> | 3 | <component name="NewModuleRootManager"> |
| 4 | <content url="file://$MODULE_DIR$" /> | 4 | <content url="file://$MODULE_DIR$" /> |
| 5 | - <orderEntry type="jdk" jdkName="Python 2.7.6 virtualenv at ~/.virtualenvs/env0" jdkType="Python SDK" /> | 5 | + <orderEntry type="jdk" jdkName="Python 2.7.8 virtualenv at ~/.virtualenvs/env1" jdkType="Python SDK" /> |
| 6 | <orderEntry type="sourceFolder" forTests="false" /> | 6 | <orderEntry type="sourceFolder" forTests="false" /> |
| 7 | </component> | 7 | </component> |
| 8 | </module> | 8 | </module> |
| 9 | \ No newline at end of file | 9 | \ No newline at end of file |
.idea/misc.xml
| @@ -20,7 +20,7 @@ | @@ -20,7 +20,7 @@ | ||
| 20 | <component name="ProjectModuleManager"> | 20 | <component name="ProjectModuleManager"> |
| 21 | <modules /> | 21 | <modules /> |
| 22 | </component> | 22 | </component> |
| 23 | - <component name="ProjectRootManager" version="2" project-jdk-name="Python 2.7.6 (/usr/bin/python2.7)" project-jdk-type="Python SDK" /> | 23 | + <component name="ProjectRootManager" version="2" project-jdk-name="Python 2.6.6 (/usr/bin/python)" project-jdk-type="Python SDK" /> |
| 24 | <component name="RunManager"> | 24 | <component name="RunManager"> |
| 25 | <list size="0" /> | 25 | <list size="0" /> |
| 26 | </component> | 26 | </component> |
.idea/workspace.xml
| @@ -30,9 +30,7 @@ | @@ -30,9 +30,7 @@ | ||
| 30 | <component name="ShelveChangesManager" show_recycled="false" /> | 30 | <component name="ShelveChangesManager" show_recycled="false" /> |
| 31 | <component name="TaskManager"> | 31 | <component name="TaskManager"> |
| 32 | <task active="true" id="Default" summary="Default task"> | 32 | <task active="true" id="Default" summary="Default task"> |
| 33 | - <created>1425541040516</created> | ||
| 34 | <option name="number" value="Default" /> | 33 | <option name="number" value="Default" /> |
| 35 | - <updated>1425541040516</updated> | ||
| 36 | </task> | 34 | </task> |
| 37 | <servers /> | 35 | <servers /> |
| 38 | </component> | 36 | </component> |
common.pyc
No preview for this file type
mdata/CV.py
| @@ -20,6 +20,8 @@ class DataCV(DataDumperBase): | @@ -20,6 +20,8 @@ class DataCV(DataDumperBase): | ||
| 20 | self.data_dir = self.base_dir + self.category + '/' | 20 | self.data_dir = self.base_dir + self.category + '/' |
| 21 | self.dict_data = {} | 21 | self.dict_data = {} |
| 22 | 22 | ||
| 23 | + self.table_name = self.base_dir.split('/')[-2] + '-' + self.category | ||
| 24 | + | ||
| 23 | def format(self): | 25 | def format(self): |
| 24 | self.extract() | 26 | self.extract() |
| 25 | 27 | ||
| @@ -29,7 +31,7 @@ class DataCV(DataDumperBase): | @@ -29,7 +31,7 @@ class DataCV(DataDumperBase): | ||
| 29 | img.save('res/tmp.jpg', format='JPEG') | 31 | img.save('res/tmp.jpg', format='JPEG') |
| 30 | image = 'res/tmp.jpg' | 32 | image = 'res/tmp.jpg' |
| 31 | 33 | ||
| 32 | - with open(image, 'rb') as f: | 34 | + with open('res/tmp.jpg', 'rb') as f: |
| 33 | index = md5(f.read()).hexdigest() | 35 | index = md5(f.read()).hexdigest() |
| 34 | 36 | ||
| 35 | self.dict_data[index] = ispos | 37 | self.dict_data[index] = ispos |
| @@ -39,11 +41,11 @@ class DataCV(DataDumperBase): | @@ -39,11 +41,11 @@ class DataCV(DataDumperBase): | ||
| 39 | dir = self.img_dir + index[:3] + '/' | 41 | dir = self.img_dir + index[:3] + '/' |
| 40 | if not os.path.exists(dir): | 42 | if not os.path.exists(dir): |
| 41 | os.makedirs(dir) | 43 | os.makedirs(dir) |
| 42 | - image = dir + index[3:] + '.jpg' | ||
| 43 | - print image | 44 | + image_path = dir + index[3:] + '.jpg' |
| 45 | + # print image_path | ||
| 44 | 46 | ||
| 45 | - if not os.path.exists(image): | ||
| 46 | - shutil.copy(image, image) | 47 | + if not os.path.exists(image_path): |
| 48 | + shutil.copy(image, image_path) | ||
| 47 | else: | 49 | else: |
| 48 | pass | 50 | pass |
| 49 | 51 | ||
| @@ -58,6 +60,10 @@ class DataCV(DataDumperBase): | @@ -58,6 +60,10 @@ class DataCV(DataDumperBase): | ||
| 58 | else: | 60 | else: |
| 59 | self._hash_copy(imagepath, False) | 61 | self._hash_copy(imagepath, False) |
| 60 | 62 | ||
| 63 | + | ||
| 64 | + def build_list(self): | ||
| 65 | + assert self.list_file != None | ||
| 66 | + | ||
| 61 | ordict_img = collections.OrderedDict(sorted(self.dict_data.items(), key=lambda d: d[0])) | 67 | ordict_img = collections.OrderedDict(sorted(self.dict_data.items(), key=lambda d: d[0])) |
| 62 | 68 | ||
| 63 | with open(self.list_file, 'w') as f: | 69 | with open(self.list_file, 'w') as f: |
| @@ -98,7 +104,7 @@ class DataCV(DataDumperBase): | @@ -98,7 +104,7 @@ class DataCV(DataDumperBase): | ||
| 98 | with open(self.list_file, 'rb') as tsvfile: | 104 | with open(self.list_file, 'rb') as tsvfile: |
| 99 | tsvfile = csv.reader(tsvfile, delimiter='\t') | 105 | tsvfile = csv.reader(tsvfile, delimiter='\t') |
| 100 | for line in tsvfile: | 106 | for line in tsvfile: |
| 101 | - path_img = self.img_dir + + line[0][:3] + '/' + line[0][3:] + '.jpg' | 107 | + path_img = self.img_dir + line[0][:3] + '/' + line[0][3:] + '.jpg' |
| 102 | if path_img: | 108 | if path_img: |
| 103 | with open(path_img, 'rb') as fpic: | 109 | with open(path_img, 'rb') as fpic: |
| 104 | dict_databuf[line[0] + '.jpg'] = fpic.read() | 110 | dict_databuf[line[0] + '.jpg'] = fpic.read() |
| @@ -107,8 +113,8 @@ class DataCV(DataDumperBase): | @@ -107,8 +113,8 @@ class DataCV(DataDumperBase): | ||
| 107 | with self.table.batch(batch_size=5000) as b: | 113 | with self.table.batch(batch_size=5000) as b: |
| 108 | for imgname, imgdata in dict_databuf.items(): | 114 | for imgname, imgdata in dict_databuf.items(): |
| 109 | b.put(imgname, {'cf_pic:data': imgdata}) | 115 | b.put(imgname, {'cf_pic:data': imgdata}) |
| 110 | - raise ValueError("Something went wrong!") | ||
| 111 | except ValueError: | 116 | except ValueError: |
| 117 | + raise | ||
| 112 | pass | 118 | pass |
| 113 | 119 | ||
| 114 | 120 | ||
| @@ -127,8 +133,8 @@ class DataCV(DataDumperBase): | @@ -127,8 +133,8 @@ class DataCV(DataDumperBase): | ||
| 127 | with self.table.batch(batch_size=5000) as b: | 133 | with self.table.batch(batch_size=5000) as b: |
| 128 | for imgname, imgtag in dict_tagbuf.items(): | 134 | for imgname, imgtag in dict_tagbuf.items(): |
| 129 | b.put(imgname, {'cf_tag:' + feattype: imgtag}) | 135 | b.put(imgname, {'cf_tag:' + feattype: imgtag}) |
| 130 | - raise ValueError("Something went wrong!") | ||
| 131 | except ValueError: | 136 | except ValueError: |
| 137 | + raise | ||
| 132 | pass | 138 | pass |
| 133 | 139 | ||
| 134 | 140 | ||
| @@ -174,7 +180,7 @@ class DataCV(DataDumperBase): | @@ -174,7 +180,7 @@ class DataCV(DataDumperBase): | ||
| 174 | with self.table.batch(batch_size=5000) as b: | 180 | with self.table.batch(batch_size=5000) as b: |
| 175 | for imgname, featdesc in dict_featbuf.items(): | 181 | for imgname, featdesc in dict_featbuf.items(): |
| 176 | b.put(imgname, {'cf_feat:' + feattype: featdesc}) | 182 | b.put(imgname, {'cf_feat:' + feattype: featdesc}) |
| 177 | - raise ValueError("Something went wrong!") | ||
| 178 | except ValueError: | 183 | except ValueError: |
| 184 | + raise | ||
| 179 | pass | 185 | pass |
| 180 | 186 |
No preview for this file type
mdata/MSR.py
| @@ -51,11 +51,11 @@ class DataMSR(DataDumperBase): | @@ -51,11 +51,11 @@ class DataMSR(DataDumperBase): | ||
| 51 | dir = self.img_dir + index[:3] + '/' | 51 | dir = self.img_dir + index[:3] + '/' |
| 52 | if not os.path.exists(dir): | 52 | if not os.path.exists(dir): |
| 53 | os.makedirs(dir) | 53 | os.makedirs(dir) |
| 54 | - image = dir + index[3:] + '.jpg' | ||
| 55 | - print image | 54 | + image_path = dir + index[3:] + '.jpg' |
| 55 | + print image_path | ||
| 56 | 56 | ||
| 57 | - if not os.path.exists(image): | ||
| 58 | - shutil.copy('res/tmp.jpg', image) | 57 | + if not os.path.exists(image_path): |
| 58 | + shutil.copy('res/tmp.jpg', image_path) | ||
| 59 | # or : | 59 | # or : |
| 60 | # img.save(image, format='JPEG') | 60 | # img.save(image, format='JPEG') |
| 61 | 61 | ||
| @@ -115,12 +115,12 @@ class DataMSR(DataDumperBase): | @@ -115,12 +115,12 @@ class DataMSR(DataDumperBase): | ||
| 115 | with self.table.batch(batch_size=5000) as b: | 115 | with self.table.batch(batch_size=5000) as b: |
| 116 | for imgname, imgdata in dict_buffer.items(): | 116 | for imgname, imgdata in dict_buffer.items(): |
| 117 | b.put(imgname, {'cf_pic:data': imgdata}) | 117 | b.put(imgname, {'cf_pic:data': imgdata}) |
| 118 | - raise ValueError("Something went wrong!") | ||
| 119 | except ValueError: | 118 | except ValueError: |
| 119 | + raise | ||
| 120 | pass | 120 | pass |
| 121 | 121 | ||
| 122 | 122 | ||
| 123 | - def store_tag(self): | 123 | + def store_tag(self, feattype='retrieve'): |
| 124 | if self.table == None: | 124 | if self.table == None: |
| 125 | self.table = self.get_table() | 125 | self.table = self.get_table() |
| 126 | 126 | ||
| @@ -130,19 +130,20 @@ class DataMSR(DataDumperBase): | @@ -130,19 +130,20 @@ class DataMSR(DataDumperBase): | ||
| 130 | with open(self.map_file, 'rb') as tsvfile: | 130 | with open(self.map_file, 'rb') as tsvfile: |
| 131 | tsvfile = csv.reader(tsvfile, delimiter='\t') | 131 | tsvfile = csv.reader(tsvfile, delimiter='\t') |
| 132 | for line in tsvfile: | 132 | for line in tsvfile: |
| 133 | - dict_namebuf[line[0]] = line[2] | 133 | + dict_namebuf[line[2]] = line[0] |
| 134 | 134 | ||
| 135 | with open(self.tag_file, 'rb') as tsvfile: | 135 | with open(self.tag_file, 'rb') as tsvfile: |
| 136 | tsvfile = csv.reader(tsvfile, delimiter='\t') | 136 | tsvfile = csv.reader(tsvfile, delimiter='\t') |
| 137 | for line in tsvfile: | 137 | for line in tsvfile: |
| 138 | - dict_tagbuf[line[-2]] = (line[:-2], line[-1]) | 138 | + dict_tagbuf[line[-2]] = (line[0].split(), line[-1]) |
| 139 | 139 | ||
| 140 | try: | 140 | try: |
| 141 | with self.table.batch(batch_size=5000) as b: | 141 | with self.table.batch(batch_size=5000) as b: |
| 142 | - for key, value in dict_tagbuf.items(): | ||
| 143 | - b.put(dict_namebuf[key] + '.jpg', {'cf_tag:' + ''.join(value[0]): value[1]}) | ||
| 144 | - raise ValueError("Something went wrong!") | 142 | + for key, data in self.table.scan(): |
| 143 | + value = dict_tagbuf[dict_namebuf[key[:-4]]] | ||
| 144 | + b.put(key, {'cf_tag:' + feattype: json.dumps(value[0]), 'cf_tag:eval': value[1]}) | ||
| 145 | except ValueError: | 145 | except ValueError: |
| 146 | + raise | ||
| 146 | pass | 147 | pass |
| 147 | 148 | ||
| 148 | def get_feat(self, feattype): | 149 | def get_feat(self, feattype): |
mdata/MSR.pyc
No preview for this file type
mdata/__init__.pyc
No preview for this file type
No preview for this file type
mfeat/__init__.pyc
No preview for this file type
res/tmp.jpg
test_data.py
| 1 | __author__ = 'chunk' | 1 | __author__ = 'chunk' |
| 2 | 2 | ||
| 3 | +from mdata import MSR, CV | ||
| 3 | 4 | ||
| 4 | -from mdata import MSR | 5 | +def test_MSR(): |
| 6 | + dmsr = MSR.DataMSR() | ||
| 7 | + # msrd.format() | ||
| 8 | + # msrd.build_list() | ||
| 5 | 9 | ||
| 6 | -msrd = MSR.DataMSR(base_dir='/media/chunk/Elements/D/data/MSR-IRC2014/',category='Train',data_file='TrainImageSet.tsv', tag_file='TrainSetLabel.tsv') | ||
| 7 | -# msrd.format() | ||
| 8 | -# msrd.build_list() | 10 | + dmsr.store_image() |
| 11 | + dmsr.store_tag() | ||
| 9 | 12 | ||
| 10 | -print 'helllo' | ||
| 11 | \ No newline at end of file | 13 | \ No newline at end of file |
| 14 | +def test_CV(): | ||
| 15 | + dcv = CV.DataCV() | ||
| 16 | + dcv.format() | ||
| 17 | + dcv.build_list() | ||
| 18 | + | ||
| 19 | +if __name__ == '__main__': | ||
| 20 | + test_CV() | ||
| 21 | + print 'helllo ' | ||
| 12 | \ No newline at end of file | 22 | \ No newline at end of file |