Commit 0d9a20eae7c4bfe5b95f8af39b182ec53e01b979
1 parent
c7fa1d60
Exists in
master
and in
2 other branches
staged.
Showing
10 changed files
with
166 additions
and
143 deletions
Show diff stats
common.pyc
No preview for this file type
mdata/CV.py
1 | __author__ = 'chunk' | 1 | __author__ = 'chunk' |
2 | 2 | ||
3 | from mdata import * | 3 | from mdata import * |
4 | -from mfeat import * | 4 | +from mfeat import HOG |
5 | 5 | ||
6 | import os, sys | 6 | import os, sys |
7 | from PIL import Image | 7 | from PIL import Image |
@@ -15,11 +15,9 @@ import happybase | @@ -15,11 +15,9 @@ import happybase | ||
15 | 15 | ||
16 | 16 | ||
17 | class DataCV(DataDumperBase): | 17 | class DataCV(DataDumperBase): |
18 | - def __init__(self, base_dir='/home/hadoop/data/HeadShoulder/', sub_dir='Train/'): | ||
19 | - DataDumperBase.__init__(self) | ||
20 | - self.base_dir = base_dir | ||
21 | - self.sub_dir = sub_dir | ||
22 | - | 18 | + def __init__(self, base_dir='/home/hadoop/data/HeadShoulder/', category='Train'): |
19 | + DataDumperBase.__init__(self, base_dir, category) | ||
20 | + self.data_dir = self.base_dir + self.category + '/' | ||
23 | self.dict_data = {} | 21 | self.dict_data = {} |
24 | 22 | ||
25 | def format(self): | 23 | def format(self): |
@@ -38,20 +36,20 @@ class DataCV(DataDumperBase): | @@ -38,20 +36,20 @@ class DataCV(DataDumperBase): | ||
38 | 36 | ||
39 | # origion: | 37 | # origion: |
40 | # dir = base_dir + 'Img/Train/' + index[:3] | 38 | # dir = base_dir + 'Img/Train/' + index[:3] |
41 | - dir = self.base_dir + self.sub_dir + 'Img/' + index[:3] | 39 | + dir = self.img_dir + index[:3] + '/' |
42 | if not os.path.exists(dir): | 40 | if not os.path.exists(dir): |
43 | os.makedirs(dir) | 41 | os.makedirs(dir) |
44 | - path = dir + '/' + index[3:] + '.jpg' | ||
45 | - print path | 42 | + image = dir + index[3:] + '.jpg' |
43 | + print image | ||
46 | 44 | ||
47 | - if not os.path.exists(path): | ||
48 | - shutil.copy(image, path) | 45 | + if not os.path.exists(image): |
46 | + shutil.copy(image, image) | ||
49 | else: | 47 | else: |
50 | pass | 48 | pass |
51 | 49 | ||
52 | 50 | ||
53 | def extract(self): | 51 | def extract(self): |
54 | - for path, subdirs, files in os.walk(self.base_dir + 'Orig/'): | 52 | + for path, subdirs, files in os.walk(self.data_dir): |
55 | for name in files: | 53 | for name in files: |
56 | imagepath = os.path.join(path, name) | 54 | imagepath = os.path.join(path, name) |
57 | print imagepath | 55 | print imagepath |
@@ -62,136 +60,121 @@ class DataCV(DataDumperBase): | @@ -62,136 +60,121 @@ class DataCV(DataDumperBase): | ||
62 | 60 | ||
63 | ordict_img = collections.OrderedDict(sorted(self.dict_data.items(), key=lambda d: d[0])) | 61 | ordict_img = collections.OrderedDict(sorted(self.dict_data.items(), key=lambda d: d[0])) |
64 | 62 | ||
65 | - lstfile = self.base_dir + self.sub_dir + 'Img/Image.tsv' | ||
66 | - with open(lstfile, 'w') as f: | 63 | + with open(self.list_file, 'w') as f: |
67 | tsvfile = csv.writer(f, delimiter='\t') | 64 | tsvfile = csv.writer(f, delimiter='\t') |
68 | for key, value in ordict_img.items(): | 65 | for key, value in ordict_img.items(): |
69 | tsvfile.writerow([key] + [value]) | 66 | tsvfile.writerow([key] + [value]) |
70 | 67 | ||
68 | + def get_table(self): | ||
69 | + if self.table != None: | ||
70 | + return self.table | ||
71 | 71 | ||
72 | - def get_table(self, tablename, connection=None): | ||
73 | - if connection is not None: | ||
74 | - c = connection | ||
75 | - else: | 72 | + if self.connection is None: |
76 | c = happybase.Connection('HPC-server') | 73 | c = happybase.Connection('HPC-server') |
77 | - tables = c.tables() | ||
78 | - if tablename not in tables: | 74 | + self.connection = c |
75 | + | ||
76 | + tables = self.connection.tables() | ||
77 | + if self.table_name not in tables: | ||
79 | families = {'cf_pic': dict(), | 78 | families = {'cf_pic': dict(), |
80 | 'cf_info': dict(max_versions=10), | 79 | 'cf_info': dict(max_versions=10), |
81 | 'cf_tag': dict(), | 80 | 'cf_tag': dict(), |
82 | 'cf_feat': dict(), | 81 | 'cf_feat': dict(), |
83 | } | 82 | } |
84 | - c.create_table(name=tablename, families=families) | 83 | + self.connection.create_table(name=self.table_name, families=families) |
84 | + | ||
85 | + table = self.connection.table(name=self.table_name) | ||
86 | + | ||
87 | + self.table = table | ||
85 | 88 | ||
86 | - tb = c.table(name=tablename) | ||
87 | - return tb | 89 | + return table |
88 | 90 | ||
89 | 91 | ||
90 | - def store_image(self, table): | ||
91 | - timer.mark() | ||
92 | - dir = self.base_dir + self.sub_dir + 'Img/' | ||
93 | - maplst = dir + 'Image.tsv' | 92 | + def store_image(self): |
93 | + if self.table == None: | ||
94 | + self.table = self.get_table() | ||
94 | 95 | ||
95 | dict_databuf = {} | 96 | dict_databuf = {} |
96 | 97 | ||
97 | - with open(maplst, 'rb') as tsvfile: | 98 | + with open(self.list_file, 'rb') as tsvfile: |
98 | tsvfile = csv.reader(tsvfile, delimiter='\t') | 99 | tsvfile = csv.reader(tsvfile, delimiter='\t') |
99 | for line in tsvfile: | 100 | for line in tsvfile: |
100 | - path_img = self.base_dir + self.sub_dir + 'Img/Train/' + line[0][:3] + '/' + line[0][3:] + '.jpg' | 101 | + path_img = self.img_dir + + line[0][:3] + '/' + line[0][3:] + '.jpg' |
101 | if path_img: | 102 | if path_img: |
102 | with open(path_img, 'rb') as fpic: | 103 | with open(path_img, 'rb') as fpic: |
103 | dict_databuf[line[0] + '.jpg'] = fpic.read() | 104 | dict_databuf[line[0] + '.jpg'] = fpic.read() |
104 | 105 | ||
105 | - timer.report() # 58.761801s | ||
106 | - timer.mark() | ||
107 | try: | 106 | try: |
108 | - with table.batch(batch_size=5000) as b: | 107 | + with self.table.batch(batch_size=5000) as b: |
109 | for imgname, imgdata in dict_databuf.items(): | 108 | for imgname, imgdata in dict_databuf.items(): |
110 | b.put(imgname, {'cf_pic:data': imgdata}) | 109 | b.put(imgname, {'cf_pic:data': imgdata}) |
111 | raise ValueError("Something went wrong!") | 110 | raise ValueError("Something went wrong!") |
112 | except ValueError: | 111 | except ValueError: |
113 | pass | 112 | pass |
114 | - timer.report() # 15.570524s | ||
115 | 113 | ||
116 | 114 | ||
117 | - def store_tag(self, table): | ||
118 | - timer.mark() | ||
119 | - dir = self.base_dir + self.sub_dir + 'Img/' | ||
120 | - maplst = dir + 'Image.tsv' | 115 | + def store_tag(self, feattype='hog'): |
116 | + if self.table == None: | ||
117 | + self.table = self.get_table() | ||
121 | 118 | ||
122 | dict_tagbuf = {} | 119 | dict_tagbuf = {} |
123 | 120 | ||
124 | - with open(maplst, 'rb') as tsvfile: | 121 | + with open(self.list_file, 'rb') as tsvfile: |
125 | tsvfile = csv.reader(tsvfile, delimiter='\t') | 122 | tsvfile = csv.reader(tsvfile, delimiter='\t') |
126 | for line in tsvfile: | 123 | for line in tsvfile: |
127 | dict_tagbuf[line[0] + '.jpg'] = line[1] | 124 | dict_tagbuf[line[0] + '.jpg'] = line[1] |
128 | 125 | ||
129 | - timer.report() # 0.009741s | ||
130 | - timer.mark() | ||
131 | try: | 126 | try: |
132 | - with table.batch(batch_size=5000) as b: | 127 | + with self.table.batch(batch_size=5000) as b: |
133 | for imgname, imgtag in dict_tagbuf.items(): | 128 | for imgname, imgtag in dict_tagbuf.items(): |
134 | - b.put(imgname, {'cf_tag:class': imgtag}) | 129 | + b.put(imgname, {'cf_tag:' + feattype: imgtag}) |
135 | raise ValueError("Something went wrong!") | 130 | raise ValueError("Something went wrong!") |
136 | except ValueError: | 131 | except ValueError: |
137 | pass | 132 | pass |
138 | - timer.report() # 0.509696s | ||
139 | 133 | ||
140 | 134 | ||
141 | - def get_feat(self, category='hog'): | ||
142 | - dir = self.base_dir + self.sub_dir + 'Img/' | ||
143 | - maplst = dir + 'images_map_Train.tsv' | 135 | + def get_feat(self, feattype='hog'): |
144 | 136 | ||
145 | dict_tagbuf = {} | 137 | dict_tagbuf = {} |
146 | - | ||
147 | - with open(maplst, 'rb') as tsvfile: | 138 | + with open(self.list_file, 'rb') as tsvfile: |
148 | tsvfile = csv.reader(tsvfile, delimiter='\t') | 139 | tsvfile = csv.reader(tsvfile, delimiter='\t') |
149 | for line in tsvfile: | 140 | for line in tsvfile: |
150 | dict_tagbuf[line[0] + '.jpg'] = line[1] | 141 | dict_tagbuf[line[0] + '.jpg'] = line[1] |
151 | 142 | ||
152 | dict_featbuf = {} | 143 | dict_featbuf = {} |
153 | - | ||
154 | - timer.mark() | ||
155 | for imgname, imgtag in dict_tagbuf.items(): | 144 | for imgname, imgtag in dict_tagbuf.items(): |
156 | # if imgtag == 'True': | 145 | # if imgtag == 'True': |
157 | - path_img = self.base_dir + self.sub_dir + 'Img/Train/' + imgname[:3] + '/' + imgname[3:] | ||
158 | - desc = FeatHOG.feat(path_img, size=(48, 48)) | 146 | + image = self.img_dir + imgname[:3] + '/' + imgname[3:] |
147 | + desc = HOG.FeatHOG.feat(image, size=(48, 48)) | ||
159 | dict_featbuf[imgname] = desc | 148 | dict_featbuf[imgname] = desc |
160 | - timer.report() # 4.337425s | ||
161 | 149 | ||
162 | - timer.mark() | ||
163 | for imgname, desc in dict_featbuf.items(): | 150 | for imgname, desc in dict_featbuf.items(): |
164 | # print imgname, desc | 151 | # print imgname, desc |
165 | - dir = self.base_dir + self.sub_dir + 'Feat/Train/' + imgname[:3] + '/' | 152 | + dir = self.feat_dir + imgname[:3] + '/' |
166 | if not os.path.exists(dir): | 153 | if not os.path.exists(dir): |
167 | os.makedirs(dir) | 154 | os.makedirs(dir) |
168 | - featpath = dir + imgname[3:].split('.')[0] + '.' + category | 155 | + featpath = dir + imgname[3:].split('.')[0] + '.' + feattype |
169 | with open(featpath, 'wb') as featfile: | 156 | with open(featpath, 'wb') as featfile: |
170 | featfile.write(json.dumps(desc.tolist())) | 157 | featfile.write(json.dumps(desc.tolist())) |
171 | 158 | ||
172 | - timer.report() # 14.862485s | ||
173 | 159 | ||
160 | + def store_feat(self, feattype='hog'): | ||
161 | + if self.table == None: | ||
162 | + self.table = self.get_table() | ||
174 | 163 | ||
175 | - def store_feat(self, table): | ||
176 | - timer.mark() | ||
177 | - dir = self.base_dir + self.sub_dir + 'Feat/' | ||
178 | dict_featbuf = {} | 164 | dict_featbuf = {} |
179 | - for path, subdirs, files in os.walk(dir + 'Train/'): | 165 | + for path, subdirs, files in os.walk(self.feat_dir): |
180 | for name in files: | 166 | for name in files: |
181 | featpath = os.path.join(path, name) | 167 | featpath = os.path.join(path, name) |
182 | # print featpath | 168 | # print featpath |
183 | with open(featpath, 'rb') as featfile: | 169 | with open(featpath, 'rb') as featfile: |
184 | - imgname = path.split('/')[-1] + name.replace('.hog', '.jpg') | 170 | + imgname = path.split('/')[-1] + name.replace('.' + feattype, '.jpg') |
185 | dict_featbuf[imgname] = featfile.read() | 171 | dict_featbuf[imgname] = featfile.read() |
186 | 172 | ||
187 | - timer.report() # 0.577940s | ||
188 | - | ||
189 | - timer.mark() | ||
190 | try: | 173 | try: |
191 | - with table.batch(batch_size=5000) as b: | 174 | + with self.table.batch(batch_size=5000) as b: |
192 | for imgname, featdesc in dict_featbuf.items(): | 175 | for imgname, featdesc in dict_featbuf.items(): |
193 | - b.put(imgname, {'cf_feat:hog': featdesc}) | 176 | + b.put(imgname, {'cf_feat:' + feattype: featdesc}) |
194 | raise ValueError("Something went wrong!") | 177 | raise ValueError("Something went wrong!") |
195 | except ValueError: | 178 | except ValueError: |
196 | pass | 179 | pass |
197 | - timer.report() # 76.075477s | ||
198 | \ No newline at end of file | 180 | \ No newline at end of file |
181 | + |
mdata/MSR.py
@@ -19,11 +19,15 @@ import happybase | @@ -19,11 +19,15 @@ import happybase | ||
19 | 19 | ||
20 | 20 | ||
21 | class DataMSR(DataDumperBase): | 21 | class DataMSR(DataDumperBase): |
22 | - def __init__(self, base_dir='/home/hadoop/data/MSR-IRC2014/', sub_dir='Dev/', data_file='DevSetImage.tsv'): | ||
23 | - DataDumperBase.__init__(self) | ||
24 | - self.base_dir = base_dir | ||
25 | - self.sub_dir = sub_dir | ||
26 | - self.data_file = self.base_dir + self.sub_dir + data_file | 22 | + def __init__(self, base_dir='/home/hadoop/data/MSR-IRC2014/', category='Dev', |
23 | + data_file='DevSetImage.tsv', tag_file='DevSetLabel.tsv'): | ||
24 | + DataDumperBase.__init__(self, base_dir, category) | ||
25 | + | ||
26 | + self.data_file = self.base_dir + self.category + '/' + data_file | ||
27 | + self.tag_file = self.base_dir + self.category + '/' + tag_file | ||
28 | + self.map_file = self.base_dir + self.category + '/' + 'images_map.tsv' | ||
29 | + | ||
30 | + self.table_name = self.base_dir.split('/')[-2] + '-' + self.category | ||
27 | 31 | ||
28 | def format(self): | 32 | def format(self): |
29 | self.extract() | 33 | self.extract() |
@@ -44,105 +48,105 @@ class DataMSR(DataDumperBase): | @@ -44,105 +48,105 @@ class DataMSR(DataDumperBase): | ||
44 | with open('res/tmp.jpg', 'rb') as f: | 48 | with open('res/tmp.jpg', 'rb') as f: |
45 | index = md5(f.read()).hexdigest() | 49 | index = md5(f.read()).hexdigest() |
46 | 50 | ||
47 | - dir = self.base_dir + self.sub_dir + 'Img/' + index[:3] | 51 | + dir = self.img_dir + index[:3] + '/' |
48 | if not os.path.exists(dir): | 52 | if not os.path.exists(dir): |
49 | os.makedirs(dir) | 53 | os.makedirs(dir) |
50 | - path = dir + '/' + index[3:] + '.jpg' | ||
51 | - print path | 54 | + image = dir + index[3:] + '.jpg' |
55 | + print image | ||
52 | 56 | ||
53 | - if not os.path.exists(path): | ||
54 | - shutil.copy('res/tmp.jpg', path) | 57 | + if not os.path.exists(image): |
58 | + shutil.copy('res/tmp.jpg', image) | ||
55 | # or : | 59 | # or : |
56 | - # img.save(path, format='JPEG') | 60 | + # img.save(image, format='JPEG') |
57 | 61 | ||
58 | 62 | ||
59 | def extract(self): | 63 | def extract(self): |
60 | - for name, data in self.load_base64(): | ||
61 | - self.hash_dump(data) | 64 | + for name, data in self._load_base64(): |
65 | + self._hash_dump(data) | ||
62 | 66 | ||
63 | 67 | ||
64 | def build_list(self): | 68 | def build_list(self): |
65 | - dir = self.base_dir + self.sub_dir | ||
66 | - lst = dir + 'Image.lst' | ||
67 | - with open(lst, 'wb') as f: | ||
68 | - for path, subdirs, files in os.walk(dir): | 69 | + assert self.list_file != None |
70 | + with open(self.list_file, 'wb') as f: | ||
71 | + for path, subdirs, files in os.walk(self.img_dir): | ||
69 | for name in files: | 72 | for name in files: |
70 | entry = path.split('/')[-1] + '/' + name | 73 | entry = path.split('/')[-1] + '/' + name |
71 | print entry | 74 | print entry |
72 | f.write(entry + '\n') | 75 | f.write(entry + '\n') |
73 | 76 | ||
74 | 77 | ||
75 | - def get_table(self, tablename, connection=None): | ||
76 | - if connection is not None: | ||
77 | - c = connection | ||
78 | - else: | 78 | + def get_table(self): |
79 | + if self.table != None: | ||
80 | + return self.table | ||
81 | + | ||
82 | + if self.connection is None: | ||
79 | c = happybase.Connection('HPC-server') | 83 | c = happybase.Connection('HPC-server') |
80 | - tables = c.tables() | ||
81 | - if tablename not in tables: | 84 | + self.connection = c |
85 | + | ||
86 | + tables = self.connection.tables() | ||
87 | + if self.table_name not in tables: | ||
82 | families = {'cf_pic': dict(), | 88 | families = {'cf_pic': dict(), |
83 | 'cf_info': dict(max_versions=10), | 89 | 'cf_info': dict(max_versions=10), |
84 | 'cf_tag': dict(), | 90 | 'cf_tag': dict(), |
91 | + 'cf_feat': dict(), | ||
85 | } | 92 | } |
86 | - c.create_table(name=tablename, families=families) | 93 | + self.connection.create_table(name=self.table_name, families=families) |
94 | + | ||
95 | + table = self.connection.table(name=self.table_name) | ||
87 | 96 | ||
88 | - tb = c.table(name=tablename) | ||
89 | - return tb | 97 | + self.table = table |
90 | 98 | ||
99 | + return table | ||
100 | + | ||
101 | + | ||
102 | + def store_image(self): | ||
103 | + if self.table == None: | ||
104 | + self.table = self.get_table() | ||
91 | 105 | ||
92 | - def store_image(self, table): | ||
93 | - timer.mark() | ||
94 | - dir = self.base_dir + self.sub_dir + 'Img2/' | ||
95 | - lst = dir + 'Image.lst' | ||
96 | dict_buffer = {} | 106 | dict_buffer = {} |
97 | - with open(lst, 'rb') as f: | 107 | + with open(self.list_file, 'rb') as f: |
98 | for line in f: | 108 | for line in f: |
99 | path_img = line.strip('\n') | 109 | path_img = line.strip('\n') |
100 | if path_img: | 110 | if path_img: |
101 | - with open(dir + 'Dev/' + path_img, 'rb') as fpic: | 111 | + with open(self.img_dir + path_img, 'rb') as fpic: |
102 | dict_buffer[path_img.replace('/', '')] = fpic.read() | 112 | dict_buffer[path_img.replace('/', '')] = fpic.read() |
103 | - timer.report() # 1.507566s | ||
104 | - timer.mark() | 113 | + |
105 | try: | 114 | try: |
106 | - with table.batch(batch_size=5000) as b: | 115 | + with self.table.batch(batch_size=5000) as b: |
107 | for imgname, imgdata in dict_buffer.items(): | 116 | for imgname, imgdata in dict_buffer.items(): |
108 | b.put(imgname, {'cf_pic:data': imgdata}) | 117 | b.put(imgname, {'cf_pic:data': imgdata}) |
109 | raise ValueError("Something went wrong!") | 118 | raise ValueError("Something went wrong!") |
110 | except ValueError: | 119 | except ValueError: |
111 | pass | 120 | pass |
112 | - timer.report() # 228.003684s | ||
113 | 121 | ||
114 | 122 | ||
115 | - def store_tag(self, table): | ||
116 | - timer.mark() | ||
117 | - dir = self.base_dir + self.sub_dir + 'Img2/' | ||
118 | - maplst = dir + 'Image.tsv' | ||
119 | - taglist = self.base_dir + self.sub_dir + 'Dev/DevSetLabel.tsv' | 123 | + def store_tag(self): |
124 | + if self.table == None: | ||
125 | + self.table = self.get_table() | ||
126 | + | ||
120 | dict_namebuf = {} | 127 | dict_namebuf = {} |
121 | dict_tagbuf = {} | 128 | dict_tagbuf = {} |
122 | 129 | ||
123 | - with open(maplst, 'rb') as tsvfile: | 130 | + with open(self.map_file, 'rb') as tsvfile: |
124 | tsvfile = csv.reader(tsvfile, delimiter='\t') | 131 | tsvfile = csv.reader(tsvfile, delimiter='\t') |
125 | for line in tsvfile: | 132 | for line in tsvfile: |
126 | dict_namebuf[line[0]] = line[2] | 133 | dict_namebuf[line[0]] = line[2] |
127 | 134 | ||
128 | - with open(taglist, 'rb') as tsvfile: | 135 | + with open(self.tag_file, 'rb') as tsvfile: |
129 | tsvfile = csv.reader(tsvfile, delimiter='\t') | 136 | tsvfile = csv.reader(tsvfile, delimiter='\t') |
130 | for line in tsvfile: | 137 | for line in tsvfile: |
131 | dict_tagbuf[line[-2]] = (line[:-2], line[-1]) | 138 | dict_tagbuf[line[-2]] = (line[:-2], line[-1]) |
132 | 139 | ||
133 | - timer.report() # 0.148540s | ||
134 | - timer.mark() | ||
135 | try: | 140 | try: |
136 | - with table.batch(batch_size=5000) as b: | 141 | + with self.table.batch(batch_size=5000) as b: |
137 | for key, value in dict_tagbuf.items(): | 142 | for key, value in dict_tagbuf.items(): |
138 | b.put(dict_namebuf[key] + '.jpg', {'cf_tag:' + ''.join(value[0]): value[1]}) | 143 | b.put(dict_namebuf[key] + '.jpg', {'cf_tag:' + ''.join(value[0]): value[1]}) |
139 | raise ValueError("Something went wrong!") | 144 | raise ValueError("Something went wrong!") |
140 | except ValueError: | 145 | except ValueError: |
141 | pass | 146 | pass |
142 | - timer.report() # 3.280105s | ||
143 | 147 | ||
144 | - def get_feat(self, category): | 148 | + def get_feat(self, feattype): |
145 | pass | 149 | pass |
146 | 150 | ||
147 | - def store_feat(self, table, category): | 151 | + def store_feat(self, feattype): |
148 | pass | 152 | pass |
No preview for this file type
mdata/__init__.py
1 | +# -*- coding: utf-8 -*- | ||
1 | __author__ = 'chunk' | 2 | __author__ = 'chunk' |
2 | 3 | ||
3 | -__all__ = ['DataDumperBase', ] | 4 | +__all__ = ['DataDumperBase'] |
4 | 5 | ||
5 | 6 | ||
6 | class DataDumperBase(object): | 7 | class DataDumperBase(object): |
@@ -8,17 +9,33 @@ class DataDumperBase(object): | @@ -8,17 +9,33 @@ class DataDumperBase(object): | ||
8 | Base class for image data dumping & retrieving. | 9 | Base class for image data dumping & retrieving. |
9 | A regular directory pattern would be like this: | 10 | A regular directory pattern would be like this: |
10 | 11 | ||
11 | - ├── file-tag-list.tsv | ||
12 | - │ | ||
13 | - ├── Feat | ||
14 | - │ ├── 0a1 | ||
15 | - │ └── 53e | ||
16 | - │ └── ... | ||
17 | - | | ||
18 | - └── Img | ||
19 | - ├── 0a1 | ||
20 | - └── 53e | ||
21 | - └── ... | 12 | + ├── Dev (category) |
13 | + ├── file-tag.tsv (list_file) | ||
14 | + │ | ||
15 | + ├── Feat (feat_dir) | ||
16 | + │ ├── 0a1 | ||
17 | + │ └── 53e | ||
18 | + │ └── ... | ||
19 | + | | ||
20 | + └── Img (img_dir) | ||
21 | + ├── 0a1 | ||
22 | + └── 53e | ||
23 | + └── ... | ||
24 | + ├── Train (category) | ||
25 | + ├── file-tag.tsv (list_file) | ||
26 | + │ | ||
27 | + ├── Feat | ||
28 | + │ ├── 032 | ||
29 | + │ └── a21 | ||
30 | + │ └── ... | ||
31 | + | | ||
32 | + └── Img | ||
33 | + ├── 032 | ||
34 | + └── a21 | ||
35 | + └── ... | ||
36 | + . | ||
37 | + . | ||
38 | + . | ||
22 | 39 | ||
23 | It can be refractored from the original pattern which is supposed to be generated from web crawlers: | 40 | It can be refractored from the original pattern which is supposed to be generated from web crawlers: |
24 | 41 | ||
@@ -32,38 +49,47 @@ class DataDumperBase(object): | @@ -32,38 +49,47 @@ class DataDumperBase(object): | ||
32 | └── ddd.jpg | 49 | └── ddd.jpg |
33 | └── ... | 50 | └── ... |
34 | 51 | ||
52 | + | ||
35 | convention: | 53 | convention: |
36 | 'img' for image file data while 'image' for file path; | 54 | 'img' for image file data while 'image' for file path; |
37 | 55 | ||
38 | """ | 56 | """ |
39 | 57 | ||
40 | - def __init__(self): | ||
41 | - self.base_dir = None | ||
42 | - self.list_file = None | ||
43 | - self.dict_data = None | 58 | + def __init__(self, base_dir, category): |
59 | + """ | ||
60 | + base_dir: e.g. '/home/hadoop/data/MSR-IRC2014/' | ||
61 | + list_file: not data_file! e.g. 'file-tag.tsv' | ||
62 | + dict_data: e.g. {'filename':rawdata} or {'filename':tag} | ||
63 | + """ | ||
64 | + self.base_dir = base_dir | ||
65 | + self.category = category | ||
66 | + self.dst_dir = self.base_dir + 'dst/' + self.category + '/' | ||
67 | + | ||
68 | + self.list_file = self.dst_dir + 'file-tag.tsv' | ||
69 | + self.feat_dir = self.dst_dir + 'Feat/' | ||
70 | + self.img_dir = self.dst_dir + 'Img/' | ||
44 | 71 | ||
45 | - # self.table_name = None | ||
46 | - # self.table = None | ||
47 | - # self.connection = None | 72 | + self.table_name = None |
73 | + self.table = None | ||
74 | + self.connection = None | ||
48 | 75 | ||
49 | def format(self): | 76 | def format(self): |
50 | pass | 77 | pass |
51 | 78 | ||
52 | 79 | ||
53 | - | ||
54 | - def get_table(self, tablename, connection=None): | 80 | + def get_table(self): |
55 | pass | 81 | pass |
56 | 82 | ||
57 | - def store_img(self, table): | 83 | + def store_img(self): |
58 | pass | 84 | pass |
59 | 85 | ||
60 | - def store_tag(self, table, category): | 86 | + def store_tag(self, feattype): |
61 | pass | 87 | pass |
62 | 88 | ||
63 | - def get_feat(self, category): | 89 | + def get_feat(self, feattype): |
64 | pass | 90 | pass |
65 | 91 | ||
66 | - def store_feat(self, table, category): | 92 | + def store_feat(self, feattype): |
67 | pass | 93 | pass |
68 | 94 | ||
69 | 95 |
No preview for this file type
mfeat/__init__.py
@@ -7,7 +7,7 @@ import cv2 | @@ -7,7 +7,7 @@ import cv2 | ||
7 | from skimage.feature import hog | 7 | from skimage.feature import hog |
8 | from skimage import io, color, transform, exposure | 8 | from skimage import io, color, transform, exposure |
9 | 9 | ||
10 | -__all__ = ['FeatureBase', 'FeatHOG', 'timer'] | 10 | +__all__ = ['FeatureBase'] |
11 | 11 | ||
12 | timer = ctimer() | 12 | timer = ctimer() |
13 | 13 |
No preview for this file type
res/tmp.jpg
@@ -0,0 +1,10 @@ | @@ -0,0 +1,10 @@ | ||
1 | +__author__ = 'chunk' | ||
2 | + | ||
3 | + | ||
4 | +from mdata import MSR | ||
5 | + | ||
6 | +msrd = MSR.DataMSR(base_dir='/media/chunk/Elements/D/data/MSR-IRC2014/',category='Train',data_file='TrainImageSet.tsv', tag_file='TrainSetLabel.tsv') | ||
7 | +# msrd.format() | ||
8 | +# msrd.build_list() | ||
9 | + | ||
10 | +print 'helllo' | ||
0 | \ No newline at end of file | 11 | \ No newline at end of file |