# -*- coding: utf-8 -*- __author__ = 'chunk' __all__ = ['DataDumperBase'] class DataDumperBase(object): """ Base class for image data dumping & retrieving. A regular directory pattern would be like this: ├── Dev (category) ├── file-tag.tsv (list_file) │ ├── Feat (feat_dir) │   ├── 0a1 │   └── 53e │   └── ... | └── Img (img_dir) ├── 0a1 └── 53e └── ... ├── Train (category) ├── file-tag.tsv (list_file) │ ├── Feat │   ├── 032 │   └── a21 │   └── ... | └── Img ├── 032 └── a21 └── ... . . . It can be refractored from the original pattern which is supposed to be generated from web crawlers: ├── Neg │   ├── aaa.jpg │   └── bbb.jpg │   └── ... | └── Pos ├── ccc.jpg └── ddd.jpg └── ... convention: 'img' for image file data while 'image' for file path; """ def __init__(self, base_dir, category): """ base_dir: e.g. '/home/hadoop/data/MSR-IRC2014/' list_file: not data_file! e.g. 'file-tag.tsv' dict_data: e.g. {'filename':rawdata} or {'filename':tag} """ self.base_dir = base_dir self.category = category self.dst_dir = self.base_dir + 'dst/' + self.category + '/' self.list_file = self.dst_dir + 'file-tag.tsv' self.feat_dir = self.dst_dir + 'Feat/' self.img_dir = self.dst_dir + 'Img/' self.table_name = None self.table = None self.connection = None def format(self): pass def get_table(self): pass def store_img(self): pass def store_tag(self, feattype): pass def get_feat(self, feattype): pass def store_feat(self, feattype): pass