Commit 0d9a20eae7c4bfe5b95f8af39b182ec53e01b979
1 parent
c7fa1d60
Exists in
master
and in
2 other branches
staged.
Showing
10 changed files
with
166 additions
and
143 deletions
Show diff stats
common.pyc
No preview for this file type
mdata/CV.py
| 1 | __author__ = 'chunk' | 1 | __author__ = 'chunk' |
| 2 | 2 | ||
| 3 | from mdata import * | 3 | from mdata import * |
| 4 | -from mfeat import * | 4 | +from mfeat import HOG |
| 5 | 5 | ||
| 6 | import os, sys | 6 | import os, sys |
| 7 | from PIL import Image | 7 | from PIL import Image |
| @@ -15,11 +15,9 @@ import happybase | @@ -15,11 +15,9 @@ import happybase | ||
| 15 | 15 | ||
| 16 | 16 | ||
| 17 | class DataCV(DataDumperBase): | 17 | class DataCV(DataDumperBase): |
| 18 | - def __init__(self, base_dir='/home/hadoop/data/HeadShoulder/', sub_dir='Train/'): | ||
| 19 | - DataDumperBase.__init__(self) | ||
| 20 | - self.base_dir = base_dir | ||
| 21 | - self.sub_dir = sub_dir | ||
| 22 | - | 18 | + def __init__(self, base_dir='/home/hadoop/data/HeadShoulder/', category='Train'): |
| 19 | + DataDumperBase.__init__(self, base_dir, category) | ||
| 20 | + self.data_dir = self.base_dir + self.category + '/' | ||
| 23 | self.dict_data = {} | 21 | self.dict_data = {} |
| 24 | 22 | ||
| 25 | def format(self): | 23 | def format(self): |
| @@ -38,20 +36,20 @@ class DataCV(DataDumperBase): | @@ -38,20 +36,20 @@ class DataCV(DataDumperBase): | ||
| 38 | 36 | ||
| 39 | # origion: | 37 | # origion: |
| 40 | # dir = base_dir + 'Img/Train/' + index[:3] | 38 | # dir = base_dir + 'Img/Train/' + index[:3] |
| 41 | - dir = self.base_dir + self.sub_dir + 'Img/' + index[:3] | 39 | + dir = self.img_dir + index[:3] + '/' |
| 42 | if not os.path.exists(dir): | 40 | if not os.path.exists(dir): |
| 43 | os.makedirs(dir) | 41 | os.makedirs(dir) |
| 44 | - path = dir + '/' + index[3:] + '.jpg' | ||
| 45 | - print path | 42 | + image = dir + index[3:] + '.jpg' |
| 43 | + print image | ||
| 46 | 44 | ||
| 47 | - if not os.path.exists(path): | ||
| 48 | - shutil.copy(image, path) | 45 | + if not os.path.exists(image): |
| 46 | + shutil.copy(image, image) | ||
| 49 | else: | 47 | else: |
| 50 | pass | 48 | pass |
| 51 | 49 | ||
| 52 | 50 | ||
| 53 | def extract(self): | 51 | def extract(self): |
| 54 | - for path, subdirs, files in os.walk(self.base_dir + 'Orig/'): | 52 | + for path, subdirs, files in os.walk(self.data_dir): |
| 55 | for name in files: | 53 | for name in files: |
| 56 | imagepath = os.path.join(path, name) | 54 | imagepath = os.path.join(path, name) |
| 57 | print imagepath | 55 | print imagepath |
| @@ -62,136 +60,121 @@ class DataCV(DataDumperBase): | @@ -62,136 +60,121 @@ class DataCV(DataDumperBase): | ||
| 62 | 60 | ||
| 63 | ordict_img = collections.OrderedDict(sorted(self.dict_data.items(), key=lambda d: d[0])) | 61 | ordict_img = collections.OrderedDict(sorted(self.dict_data.items(), key=lambda d: d[0])) |
| 64 | 62 | ||
| 65 | - lstfile = self.base_dir + self.sub_dir + 'Img/Image.tsv' | ||
| 66 | - with open(lstfile, 'w') as f: | 63 | + with open(self.list_file, 'w') as f: |
| 67 | tsvfile = csv.writer(f, delimiter='\t') | 64 | tsvfile = csv.writer(f, delimiter='\t') |
| 68 | for key, value in ordict_img.items(): | 65 | for key, value in ordict_img.items(): |
| 69 | tsvfile.writerow([key] + [value]) | 66 | tsvfile.writerow([key] + [value]) |
| 70 | 67 | ||
| 68 | + def get_table(self): | ||
| 69 | + if self.table != None: | ||
| 70 | + return self.table | ||
| 71 | 71 | ||
| 72 | - def get_table(self, tablename, connection=None): | ||
| 73 | - if connection is not None: | ||
| 74 | - c = connection | ||
| 75 | - else: | 72 | + if self.connection is None: |
| 76 | c = happybase.Connection('HPC-server') | 73 | c = happybase.Connection('HPC-server') |
| 77 | - tables = c.tables() | ||
| 78 | - if tablename not in tables: | 74 | + self.connection = c |
| 75 | + | ||
| 76 | + tables = self.connection.tables() | ||
| 77 | + if self.table_name not in tables: | ||
| 79 | families = {'cf_pic': dict(), | 78 | families = {'cf_pic': dict(), |
| 80 | 'cf_info': dict(max_versions=10), | 79 | 'cf_info': dict(max_versions=10), |
| 81 | 'cf_tag': dict(), | 80 | 'cf_tag': dict(), |
| 82 | 'cf_feat': dict(), | 81 | 'cf_feat': dict(), |
| 83 | } | 82 | } |
| 84 | - c.create_table(name=tablename, families=families) | 83 | + self.connection.create_table(name=self.table_name, families=families) |
| 84 | + | ||
| 85 | + table = self.connection.table(name=self.table_name) | ||
| 86 | + | ||
| 87 | + self.table = table | ||
| 85 | 88 | ||
| 86 | - tb = c.table(name=tablename) | ||
| 87 | - return tb | 89 | + return table |
| 88 | 90 | ||
| 89 | 91 | ||
| 90 | - def store_image(self, table): | ||
| 91 | - timer.mark() | ||
| 92 | - dir = self.base_dir + self.sub_dir + 'Img/' | ||
| 93 | - maplst = dir + 'Image.tsv' | 92 | + def store_image(self): |
| 93 | + if self.table == None: | ||
| 94 | + self.table = self.get_table() | ||
| 94 | 95 | ||
| 95 | dict_databuf = {} | 96 | dict_databuf = {} |
| 96 | 97 | ||
| 97 | - with open(maplst, 'rb') as tsvfile: | 98 | + with open(self.list_file, 'rb') as tsvfile: |
| 98 | tsvfile = csv.reader(tsvfile, delimiter='\t') | 99 | tsvfile = csv.reader(tsvfile, delimiter='\t') |
| 99 | for line in tsvfile: | 100 | for line in tsvfile: |
| 100 | - path_img = self.base_dir + self.sub_dir + 'Img/Train/' + line[0][:3] + '/' + line[0][3:] + '.jpg' | 101 | + path_img = self.img_dir + + line[0][:3] + '/' + line[0][3:] + '.jpg' |
| 101 | if path_img: | 102 | if path_img: |
| 102 | with open(path_img, 'rb') as fpic: | 103 | with open(path_img, 'rb') as fpic: |
| 103 | dict_databuf[line[0] + '.jpg'] = fpic.read() | 104 | dict_databuf[line[0] + '.jpg'] = fpic.read() |
| 104 | 105 | ||
| 105 | - timer.report() # 58.761801s | ||
| 106 | - timer.mark() | ||
| 107 | try: | 106 | try: |
| 108 | - with table.batch(batch_size=5000) as b: | 107 | + with self.table.batch(batch_size=5000) as b: |
| 109 | for imgname, imgdata in dict_databuf.items(): | 108 | for imgname, imgdata in dict_databuf.items(): |
| 110 | b.put(imgname, {'cf_pic:data': imgdata}) | 109 | b.put(imgname, {'cf_pic:data': imgdata}) |
| 111 | raise ValueError("Something went wrong!") | 110 | raise ValueError("Something went wrong!") |
| 112 | except ValueError: | 111 | except ValueError: |
| 113 | pass | 112 | pass |
| 114 | - timer.report() # 15.570524s | ||
| 115 | 113 | ||
| 116 | 114 | ||
| 117 | - def store_tag(self, table): | ||
| 118 | - timer.mark() | ||
| 119 | - dir = self.base_dir + self.sub_dir + 'Img/' | ||
| 120 | - maplst = dir + 'Image.tsv' | 115 | + def store_tag(self, feattype='hog'): |
| 116 | + if self.table == None: | ||
| 117 | + self.table = self.get_table() | ||
| 121 | 118 | ||
| 122 | dict_tagbuf = {} | 119 | dict_tagbuf = {} |
| 123 | 120 | ||
| 124 | - with open(maplst, 'rb') as tsvfile: | 121 | + with open(self.list_file, 'rb') as tsvfile: |
| 125 | tsvfile = csv.reader(tsvfile, delimiter='\t') | 122 | tsvfile = csv.reader(tsvfile, delimiter='\t') |
| 126 | for line in tsvfile: | 123 | for line in tsvfile: |
| 127 | dict_tagbuf[line[0] + '.jpg'] = line[1] | 124 | dict_tagbuf[line[0] + '.jpg'] = line[1] |
| 128 | 125 | ||
| 129 | - timer.report() # 0.009741s | ||
| 130 | - timer.mark() | ||
| 131 | try: | 126 | try: |
| 132 | - with table.batch(batch_size=5000) as b: | 127 | + with self.table.batch(batch_size=5000) as b: |
| 133 | for imgname, imgtag in dict_tagbuf.items(): | 128 | for imgname, imgtag in dict_tagbuf.items(): |
| 134 | - b.put(imgname, {'cf_tag:class': imgtag}) | 129 | + b.put(imgname, {'cf_tag:' + feattype: imgtag}) |
| 135 | raise ValueError("Something went wrong!") | 130 | raise ValueError("Something went wrong!") |
| 136 | except ValueError: | 131 | except ValueError: |
| 137 | pass | 132 | pass |
| 138 | - timer.report() # 0.509696s | ||
| 139 | 133 | ||
| 140 | 134 | ||
| 141 | - def get_feat(self, category='hog'): | ||
| 142 | - dir = self.base_dir + self.sub_dir + 'Img/' | ||
| 143 | - maplst = dir + 'images_map_Train.tsv' | 135 | + def get_feat(self, feattype='hog'): |
| 144 | 136 | ||
| 145 | dict_tagbuf = {} | 137 | dict_tagbuf = {} |
| 146 | - | ||
| 147 | - with open(maplst, 'rb') as tsvfile: | 138 | + with open(self.list_file, 'rb') as tsvfile: |
| 148 | tsvfile = csv.reader(tsvfile, delimiter='\t') | 139 | tsvfile = csv.reader(tsvfile, delimiter='\t') |
| 149 | for line in tsvfile: | 140 | for line in tsvfile: |
| 150 | dict_tagbuf[line[0] + '.jpg'] = line[1] | 141 | dict_tagbuf[line[0] + '.jpg'] = line[1] |
| 151 | 142 | ||
| 152 | dict_featbuf = {} | 143 | dict_featbuf = {} |
| 153 | - | ||
| 154 | - timer.mark() | ||
| 155 | for imgname, imgtag in dict_tagbuf.items(): | 144 | for imgname, imgtag in dict_tagbuf.items(): |
| 156 | # if imgtag == 'True': | 145 | # if imgtag == 'True': |
| 157 | - path_img = self.base_dir + self.sub_dir + 'Img/Train/' + imgname[:3] + '/' + imgname[3:] | ||
| 158 | - desc = FeatHOG.feat(path_img, size=(48, 48)) | 146 | + image = self.img_dir + imgname[:3] + '/' + imgname[3:] |
| 147 | + desc = HOG.FeatHOG.feat(image, size=(48, 48)) | ||
| 159 | dict_featbuf[imgname] = desc | 148 | dict_featbuf[imgname] = desc |
| 160 | - timer.report() # 4.337425s | ||
| 161 | 149 | ||
| 162 | - timer.mark() | ||
| 163 | for imgname, desc in dict_featbuf.items(): | 150 | for imgname, desc in dict_featbuf.items(): |
| 164 | # print imgname, desc | 151 | # print imgname, desc |
| 165 | - dir = self.base_dir + self.sub_dir + 'Feat/Train/' + imgname[:3] + '/' | 152 | + dir = self.feat_dir + imgname[:3] + '/' |
| 166 | if not os.path.exists(dir): | 153 | if not os.path.exists(dir): |
| 167 | os.makedirs(dir) | 154 | os.makedirs(dir) |
| 168 | - featpath = dir + imgname[3:].split('.')[0] + '.' + category | 155 | + featpath = dir + imgname[3:].split('.')[0] + '.' + feattype |
| 169 | with open(featpath, 'wb') as featfile: | 156 | with open(featpath, 'wb') as featfile: |
| 170 | featfile.write(json.dumps(desc.tolist())) | 157 | featfile.write(json.dumps(desc.tolist())) |
| 171 | 158 | ||
| 172 | - timer.report() # 14.862485s | ||
| 173 | 159 | ||
| 160 | + def store_feat(self, feattype='hog'): | ||
| 161 | + if self.table == None: | ||
| 162 | + self.table = self.get_table() | ||
| 174 | 163 | ||
| 175 | - def store_feat(self, table): | ||
| 176 | - timer.mark() | ||
| 177 | - dir = self.base_dir + self.sub_dir + 'Feat/' | ||
| 178 | dict_featbuf = {} | 164 | dict_featbuf = {} |
| 179 | - for path, subdirs, files in os.walk(dir + 'Train/'): | 165 | + for path, subdirs, files in os.walk(self.feat_dir): |
| 180 | for name in files: | 166 | for name in files: |
| 181 | featpath = os.path.join(path, name) | 167 | featpath = os.path.join(path, name) |
| 182 | # print featpath | 168 | # print featpath |
| 183 | with open(featpath, 'rb') as featfile: | 169 | with open(featpath, 'rb') as featfile: |
| 184 | - imgname = path.split('/')[-1] + name.replace('.hog', '.jpg') | 170 | + imgname = path.split('/')[-1] + name.replace('.' + feattype, '.jpg') |
| 185 | dict_featbuf[imgname] = featfile.read() | 171 | dict_featbuf[imgname] = featfile.read() |
| 186 | 172 | ||
| 187 | - timer.report() # 0.577940s | ||
| 188 | - | ||
| 189 | - timer.mark() | ||
| 190 | try: | 173 | try: |
| 191 | - with table.batch(batch_size=5000) as b: | 174 | + with self.table.batch(batch_size=5000) as b: |
| 192 | for imgname, featdesc in dict_featbuf.items(): | 175 | for imgname, featdesc in dict_featbuf.items(): |
| 193 | - b.put(imgname, {'cf_feat:hog': featdesc}) | 176 | + b.put(imgname, {'cf_feat:' + feattype: featdesc}) |
| 194 | raise ValueError("Something went wrong!") | 177 | raise ValueError("Something went wrong!") |
| 195 | except ValueError: | 178 | except ValueError: |
| 196 | pass | 179 | pass |
| 197 | - timer.report() # 76.075477s | ||
| 198 | \ No newline at end of file | 180 | \ No newline at end of file |
| 181 | + |
mdata/MSR.py
| @@ -19,11 +19,15 @@ import happybase | @@ -19,11 +19,15 @@ import happybase | ||
| 19 | 19 | ||
| 20 | 20 | ||
| 21 | class DataMSR(DataDumperBase): | 21 | class DataMSR(DataDumperBase): |
| 22 | - def __init__(self, base_dir='/home/hadoop/data/MSR-IRC2014/', sub_dir='Dev/', data_file='DevSetImage.tsv'): | ||
| 23 | - DataDumperBase.__init__(self) | ||
| 24 | - self.base_dir = base_dir | ||
| 25 | - self.sub_dir = sub_dir | ||
| 26 | - self.data_file = self.base_dir + self.sub_dir + data_file | 22 | + def __init__(self, base_dir='/home/hadoop/data/MSR-IRC2014/', category='Dev', |
| 23 | + data_file='DevSetImage.tsv', tag_file='DevSetLabel.tsv'): | ||
| 24 | + DataDumperBase.__init__(self, base_dir, category) | ||
| 25 | + | ||
| 26 | + self.data_file = self.base_dir + self.category + '/' + data_file | ||
| 27 | + self.tag_file = self.base_dir + self.category + '/' + tag_file | ||
| 28 | + self.map_file = self.base_dir + self.category + '/' + 'images_map.tsv' | ||
| 29 | + | ||
| 30 | + self.table_name = self.base_dir.split('/')[-2] + '-' + self.category | ||
| 27 | 31 | ||
| 28 | def format(self): | 32 | def format(self): |
| 29 | self.extract() | 33 | self.extract() |
| @@ -44,105 +48,105 @@ class DataMSR(DataDumperBase): | @@ -44,105 +48,105 @@ class DataMSR(DataDumperBase): | ||
| 44 | with open('res/tmp.jpg', 'rb') as f: | 48 | with open('res/tmp.jpg', 'rb') as f: |
| 45 | index = md5(f.read()).hexdigest() | 49 | index = md5(f.read()).hexdigest() |
| 46 | 50 | ||
| 47 | - dir = self.base_dir + self.sub_dir + 'Img/' + index[:3] | 51 | + dir = self.img_dir + index[:3] + '/' |
| 48 | if not os.path.exists(dir): | 52 | if not os.path.exists(dir): |
| 49 | os.makedirs(dir) | 53 | os.makedirs(dir) |
| 50 | - path = dir + '/' + index[3:] + '.jpg' | ||
| 51 | - print path | 54 | + image = dir + index[3:] + '.jpg' |
| 55 | + print image | ||
| 52 | 56 | ||
| 53 | - if not os.path.exists(path): | ||
| 54 | - shutil.copy('res/tmp.jpg', path) | 57 | + if not os.path.exists(image): |
| 58 | + shutil.copy('res/tmp.jpg', image) | ||
| 55 | # or : | 59 | # or : |
| 56 | - # img.save(path, format='JPEG') | 60 | + # img.save(image, format='JPEG') |
| 57 | 61 | ||
| 58 | 62 | ||
| 59 | def extract(self): | 63 | def extract(self): |
| 60 | - for name, data in self.load_base64(): | ||
| 61 | - self.hash_dump(data) | 64 | + for name, data in self._load_base64(): |
| 65 | + self._hash_dump(data) | ||
| 62 | 66 | ||
| 63 | 67 | ||
| 64 | def build_list(self): | 68 | def build_list(self): |
| 65 | - dir = self.base_dir + self.sub_dir | ||
| 66 | - lst = dir + 'Image.lst' | ||
| 67 | - with open(lst, 'wb') as f: | ||
| 68 | - for path, subdirs, files in os.walk(dir): | 69 | + assert self.list_file != None |
| 70 | + with open(self.list_file, 'wb') as f: | ||
| 71 | + for path, subdirs, files in os.walk(self.img_dir): | ||
| 69 | for name in files: | 72 | for name in files: |
| 70 | entry = path.split('/')[-1] + '/' + name | 73 | entry = path.split('/')[-1] + '/' + name |
| 71 | print entry | 74 | print entry |
| 72 | f.write(entry + '\n') | 75 | f.write(entry + '\n') |
| 73 | 76 | ||
| 74 | 77 | ||
| 75 | - def get_table(self, tablename, connection=None): | ||
| 76 | - if connection is not None: | ||
| 77 | - c = connection | ||
| 78 | - else: | 78 | + def get_table(self): |
| 79 | + if self.table != None: | ||
| 80 | + return self.table | ||
| 81 | + | ||
| 82 | + if self.connection is None: | ||
| 79 | c = happybase.Connection('HPC-server') | 83 | c = happybase.Connection('HPC-server') |
| 80 | - tables = c.tables() | ||
| 81 | - if tablename not in tables: | 84 | + self.connection = c |
| 85 | + | ||
| 86 | + tables = self.connection.tables() | ||
| 87 | + if self.table_name not in tables: | ||
| 82 | families = {'cf_pic': dict(), | 88 | families = {'cf_pic': dict(), |
| 83 | 'cf_info': dict(max_versions=10), | 89 | 'cf_info': dict(max_versions=10), |
| 84 | 'cf_tag': dict(), | 90 | 'cf_tag': dict(), |
| 91 | + 'cf_feat': dict(), | ||
| 85 | } | 92 | } |
| 86 | - c.create_table(name=tablename, families=families) | 93 | + self.connection.create_table(name=self.table_name, families=families) |
| 94 | + | ||
| 95 | + table = self.connection.table(name=self.table_name) | ||
| 87 | 96 | ||
| 88 | - tb = c.table(name=tablename) | ||
| 89 | - return tb | 97 | + self.table = table |
| 90 | 98 | ||
| 99 | + return table | ||
| 100 | + | ||
| 101 | + | ||
| 102 | + def store_image(self): | ||
| 103 | + if self.table == None: | ||
| 104 | + self.table = self.get_table() | ||
| 91 | 105 | ||
| 92 | - def store_image(self, table): | ||
| 93 | - timer.mark() | ||
| 94 | - dir = self.base_dir + self.sub_dir + 'Img2/' | ||
| 95 | - lst = dir + 'Image.lst' | ||
| 96 | dict_buffer = {} | 106 | dict_buffer = {} |
| 97 | - with open(lst, 'rb') as f: | 107 | + with open(self.list_file, 'rb') as f: |
| 98 | for line in f: | 108 | for line in f: |
| 99 | path_img = line.strip('\n') | 109 | path_img = line.strip('\n') |
| 100 | if path_img: | 110 | if path_img: |
| 101 | - with open(dir + 'Dev/' + path_img, 'rb') as fpic: | 111 | + with open(self.img_dir + path_img, 'rb') as fpic: |
| 102 | dict_buffer[path_img.replace('/', '')] = fpic.read() | 112 | dict_buffer[path_img.replace('/', '')] = fpic.read() |
| 103 | - timer.report() # 1.507566s | ||
| 104 | - timer.mark() | 113 | + |
| 105 | try: | 114 | try: |
| 106 | - with table.batch(batch_size=5000) as b: | 115 | + with self.table.batch(batch_size=5000) as b: |
| 107 | for imgname, imgdata in dict_buffer.items(): | 116 | for imgname, imgdata in dict_buffer.items(): |
| 108 | b.put(imgname, {'cf_pic:data': imgdata}) | 117 | b.put(imgname, {'cf_pic:data': imgdata}) |
| 109 | raise ValueError("Something went wrong!") | 118 | raise ValueError("Something went wrong!") |
| 110 | except ValueError: | 119 | except ValueError: |
| 111 | pass | 120 | pass |
| 112 | - timer.report() # 228.003684s | ||
| 113 | 121 | ||
| 114 | 122 | ||
| 115 | - def store_tag(self, table): | ||
| 116 | - timer.mark() | ||
| 117 | - dir = self.base_dir + self.sub_dir + 'Img2/' | ||
| 118 | - maplst = dir + 'Image.tsv' | ||
| 119 | - taglist = self.base_dir + self.sub_dir + 'Dev/DevSetLabel.tsv' | 123 | + def store_tag(self): |
| 124 | + if self.table == None: | ||
| 125 | + self.table = self.get_table() | ||
| 126 | + | ||
| 120 | dict_namebuf = {} | 127 | dict_namebuf = {} |
| 121 | dict_tagbuf = {} | 128 | dict_tagbuf = {} |
| 122 | 129 | ||
| 123 | - with open(maplst, 'rb') as tsvfile: | 130 | + with open(self.map_file, 'rb') as tsvfile: |
| 124 | tsvfile = csv.reader(tsvfile, delimiter='\t') | 131 | tsvfile = csv.reader(tsvfile, delimiter='\t') |
| 125 | for line in tsvfile: | 132 | for line in tsvfile: |
| 126 | dict_namebuf[line[0]] = line[2] | 133 | dict_namebuf[line[0]] = line[2] |
| 127 | 134 | ||
| 128 | - with open(taglist, 'rb') as tsvfile: | 135 | + with open(self.tag_file, 'rb') as tsvfile: |
| 129 | tsvfile = csv.reader(tsvfile, delimiter='\t') | 136 | tsvfile = csv.reader(tsvfile, delimiter='\t') |
| 130 | for line in tsvfile: | 137 | for line in tsvfile: |
| 131 | dict_tagbuf[line[-2]] = (line[:-2], line[-1]) | 138 | dict_tagbuf[line[-2]] = (line[:-2], line[-1]) |
| 132 | 139 | ||
| 133 | - timer.report() # 0.148540s | ||
| 134 | - timer.mark() | ||
| 135 | try: | 140 | try: |
| 136 | - with table.batch(batch_size=5000) as b: | 141 | + with self.table.batch(batch_size=5000) as b: |
| 137 | for key, value in dict_tagbuf.items(): | 142 | for key, value in dict_tagbuf.items(): |
| 138 | b.put(dict_namebuf[key] + '.jpg', {'cf_tag:' + ''.join(value[0]): value[1]}) | 143 | b.put(dict_namebuf[key] + '.jpg', {'cf_tag:' + ''.join(value[0]): value[1]}) |
| 139 | raise ValueError("Something went wrong!") | 144 | raise ValueError("Something went wrong!") |
| 140 | except ValueError: | 145 | except ValueError: |
| 141 | pass | 146 | pass |
| 142 | - timer.report() # 3.280105s | ||
| 143 | 147 | ||
| 144 | - def get_feat(self, category): | 148 | + def get_feat(self, feattype): |
| 145 | pass | 149 | pass |
| 146 | 150 | ||
| 147 | - def store_feat(self, table, category): | 151 | + def store_feat(self, feattype): |
| 148 | pass | 152 | pass |
No preview for this file type
mdata/__init__.py
| 1 | +# -*- coding: utf-8 -*- | ||
| 1 | __author__ = 'chunk' | 2 | __author__ = 'chunk' |
| 2 | 3 | ||
| 3 | -__all__ = ['DataDumperBase', ] | 4 | +__all__ = ['DataDumperBase'] |
| 4 | 5 | ||
| 5 | 6 | ||
| 6 | class DataDumperBase(object): | 7 | class DataDumperBase(object): |
| @@ -8,17 +9,33 @@ class DataDumperBase(object): | @@ -8,17 +9,33 @@ class DataDumperBase(object): | ||
| 8 | Base class for image data dumping & retrieving. | 9 | Base class for image data dumping & retrieving. |
| 9 | A regular directory pattern would be like this: | 10 | A regular directory pattern would be like this: |
| 10 | 11 | ||
| 11 | - ├── file-tag-list.tsv | ||
| 12 | - │ | ||
| 13 | - ├── Feat | ||
| 14 | - │ ├── 0a1 | ||
| 15 | - │ └── 53e | ||
| 16 | - │ └── ... | ||
| 17 | - | | ||
| 18 | - └── Img | ||
| 19 | - ├── 0a1 | ||
| 20 | - └── 53e | ||
| 21 | - └── ... | 12 | + ├── Dev (category) |
| 13 | + ├── file-tag.tsv (list_file) | ||
| 14 | + │ | ||
| 15 | + ├── Feat (feat_dir) | ||
| 16 | + │ ├── 0a1 | ||
| 17 | + │ └── 53e | ||
| 18 | + │ └── ... | ||
| 19 | + | | ||
| 20 | + └── Img (img_dir) | ||
| 21 | + ├── 0a1 | ||
| 22 | + └── 53e | ||
| 23 | + └── ... | ||
| 24 | + ├── Train (category) | ||
| 25 | + ├── file-tag.tsv (list_file) | ||
| 26 | + │ | ||
| 27 | + ├── Feat | ||
| 28 | + │ ├── 032 | ||
| 29 | + │ └── a21 | ||
| 30 | + │ └── ... | ||
| 31 | + | | ||
| 32 | + └── Img | ||
| 33 | + ├── 032 | ||
| 34 | + └── a21 | ||
| 35 | + └── ... | ||
| 36 | + . | ||
| 37 | + . | ||
| 38 | + . | ||
| 22 | 39 | ||
| 23 | It can be refractored from the original pattern which is supposed to be generated from web crawlers: | 40 | It can be refractored from the original pattern which is supposed to be generated from web crawlers: |
| 24 | 41 | ||
| @@ -32,38 +49,47 @@ class DataDumperBase(object): | @@ -32,38 +49,47 @@ class DataDumperBase(object): | ||
| 32 | └── ddd.jpg | 49 | └── ddd.jpg |
| 33 | └── ... | 50 | └── ... |
| 34 | 51 | ||
| 52 | + | ||
| 35 | convention: | 53 | convention: |
| 36 | 'img' for image file data while 'image' for file path; | 54 | 'img' for image file data while 'image' for file path; |
| 37 | 55 | ||
| 38 | """ | 56 | """ |
| 39 | 57 | ||
| 40 | - def __init__(self): | ||
| 41 | - self.base_dir = None | ||
| 42 | - self.list_file = None | ||
| 43 | - self.dict_data = None | 58 | + def __init__(self, base_dir, category): |
| 59 | + """ | ||
| 60 | + base_dir: e.g. '/home/hadoop/data/MSR-IRC2014/' | ||
| 61 | + list_file: not data_file! e.g. 'file-tag.tsv' | ||
| 62 | + dict_data: e.g. {'filename':rawdata} or {'filename':tag} | ||
| 63 | + """ | ||
| 64 | + self.base_dir = base_dir | ||
| 65 | + self.category = category | ||
| 66 | + self.dst_dir = self.base_dir + 'dst/' + self.category + '/' | ||
| 67 | + | ||
| 68 | + self.list_file = self.dst_dir + 'file-tag.tsv' | ||
| 69 | + self.feat_dir = self.dst_dir + 'Feat/' | ||
| 70 | + self.img_dir = self.dst_dir + 'Img/' | ||
| 44 | 71 | ||
| 45 | - # self.table_name = None | ||
| 46 | - # self.table = None | ||
| 47 | - # self.connection = None | 72 | + self.table_name = None |
| 73 | + self.table = None | ||
| 74 | + self.connection = None | ||
| 48 | 75 | ||
| 49 | def format(self): | 76 | def format(self): |
| 50 | pass | 77 | pass |
| 51 | 78 | ||
| 52 | 79 | ||
| 53 | - | ||
| 54 | - def get_table(self, tablename, connection=None): | 80 | + def get_table(self): |
| 55 | pass | 81 | pass |
| 56 | 82 | ||
| 57 | - def store_img(self, table): | 83 | + def store_img(self): |
| 58 | pass | 84 | pass |
| 59 | 85 | ||
| 60 | - def store_tag(self, table, category): | 86 | + def store_tag(self, feattype): |
| 61 | pass | 87 | pass |
| 62 | 88 | ||
| 63 | - def get_feat(self, category): | 89 | + def get_feat(self, feattype): |
| 64 | pass | 90 | pass |
| 65 | 91 | ||
| 66 | - def store_feat(self, table, category): | 92 | + def store_feat(self, feattype): |
| 67 | pass | 93 | pass |
| 68 | 94 | ||
| 69 | 95 |
No preview for this file type
mfeat/__init__.py
| @@ -7,7 +7,7 @@ import cv2 | @@ -7,7 +7,7 @@ import cv2 | ||
| 7 | from skimage.feature import hog | 7 | from skimage.feature import hog |
| 8 | from skimage import io, color, transform, exposure | 8 | from skimage import io, color, transform, exposure |
| 9 | 9 | ||
| 10 | -__all__ = ['FeatureBase', 'FeatHOG', 'timer'] | 10 | +__all__ = ['FeatureBase'] |
| 11 | 11 | ||
| 12 | timer = ctimer() | 12 | timer = ctimer() |
| 13 | 13 |
No preview for this file type
res/tmp.jpg
| @@ -0,0 +1,10 @@ | @@ -0,0 +1,10 @@ | ||
| 1 | +__author__ = 'chunk' | ||
| 2 | + | ||
| 3 | + | ||
| 4 | +from mdata import MSR | ||
| 5 | + | ||
| 6 | +msrd = MSR.DataMSR(base_dir='/media/chunk/Elements/D/data/MSR-IRC2014/',category='Train',data_file='TrainImageSet.tsv', tag_file='TrainSetLabel.tsv') | ||
| 7 | +# msrd.format() | ||
| 8 | +# msrd.build_list() | ||
| 9 | + | ||
| 10 | +print 'helllo' | ||
| 0 | \ No newline at end of file | 11 | \ No newline at end of file |