Blame view

ImageR/imager/mdata/__init__.py 2.44 KB
1f1943eb   qijun   initial commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
# -*- coding: utf-8 -*-
__author__ = 'chunk'

__all__ = ['DataDumperBase']


class DataDumperBase(object):
    """
    Base class for image data dumping & retrieving.
    A regular directory pattern would be like this:

        dst
        ├── Dev (category)
            ├── file-tag.tsv (list_file)
            │
            ├── Feat (feat_dir)
            │   ├── 0a1
            │   └── 53e
            │   └── ...
            |
            └── Img (img_dir)
                ├── 0a1
                └── 53e
                └── ...
        ├── Train (category)
            ├── file-tag.tsv (list_file)
            │
            ├── Feat
            │   ├── 032
            │   └── a21
            │   └── ...
            |
            └── Img
                ├── 032
                └── a21
                └── ...
        .
        .
        .

    It can be refractored from the original pattern which is supposed to be generated from web crawlers:

        ├── Neg
        │   ├── aaa.jpg
        │   └── bbb.jpg
        │   └── ...
        |
        └── Pos
            ├── ccc.jpg
            └── ddd.jpg
            └── ...


    Convention:

        'im' or 'img' is for image file data while 'image' or 'image_path' for file path;

    """

    def __init__(self, base_dir, category):
        """
        base: e.g. '/home/hadoop/data/MSR-IRC2014/'
        list_file: not data_file! e.g. 'file-tag.tsv'
        dict_data: e.g. {'filename':rawdata} or {'filename':tag}
        """
        self.base = base_dir
        self.category = category
        self.dst_dir = self.base + 'dst/'
        if self.category != None:
            self.dst_dir += (self.category + '/')

        self.list_file = self.dst_dir + 'file-tag.tsv'
        self.feat_dir = self.dst_dir + 'Feat/'
        self.img_dir = self.dst_dir + 'Img/'

        self.table_name = None
        self.table = None
        self.connection = None

    def format(self):
        pass


    def get_table(self):
        pass

    def store_img(self):
        pass

    def store_tag(self, tagtype):
        pass

    def store_feat(self, feattype):
        pass


    def get_feat(self, image, feattype):
        pass

    def extract_feat(self, feattype):
        pass


    def load_data(self, mode):
        pass