Blame view

mdata/__init__.py 2.39 KB
0d9a20ea   Chunk   staged.
1
# -*- coding: utf-8 -*-
c7fa1d60   Chunk   refractoration st...
2
3
__author__ = 'chunk'

0d9a20ea   Chunk   staged.
4
__all__ = ['DataDumperBase']
c7fa1d60   Chunk   refractoration st...
5
6
7
8
9
10
11


class DataDumperBase(object):
    """
    Base class for image data dumping & retrieving.
    A regular directory pattern would be like this:

be12257b   Chunk   data-feat-model f...
12
        dst
0d9a20ea   Chunk   staged.
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
        ├── Dev (category)
            ├── file-tag.tsv (list_file)
            │
            ├── Feat (feat_dir)
            │   ├── 0a1
            │   └── 53e
            │   └── ...
            |
            └── Img (img_dir)
                ├── 0a1
                └── 53e
                └── ...
        ├── Train (category)
            ├── file-tag.tsv (list_file)
            │
            ├── Feat
            │   ├── 032
            │   └── a21
            │   └── ...
            |
            └── Img
                ├── 032
                └── a21
                └── ...
        .
        .
        .
c7fa1d60   Chunk   refractoration st...
40
41
42
43
44
45
46
47
48
49
50
51
52

    It can be refractored from the original pattern which is supposed to be generated from web crawlers:

        ├── Neg
        │   ├── aaa.jpg
        │   └── bbb.jpg
        │   └── ...
        |
        └── Pos
            ├── ccc.jpg
            └── ddd.jpg
            └── ...

0d9a20ea   Chunk   staged.
53

be12257b   Chunk   data-feat-model f...
54
55
56
    Convention:

        'im' or 'img' is for image file data while 'image' or 'image_path' for file path;
c7fa1d60   Chunk   refractoration st...
57
58
59

    """

0d9a20ea   Chunk   staged.
60
61
    def __init__(self, base_dir, category):
        """
1dc7c44b   Chunk   crawler-hbase-spa...
62
        base_dir: e.g. '/home/hadoop/data/MSR-IRC2014/'
0d9a20ea   Chunk   staged.
63
64
65
        list_file: not data_file! e.g. 'file-tag.tsv'
        dict_data: e.g. {'filename':rawdata} or {'filename':tag}
        """
1dc7c44b   Chunk   crawler-hbase-spa...
66
        self.base_dir = base_dir
0d9a20ea   Chunk   staged.
67
        self.category = category
6d6d75b8   Chunk   spider LOG system.
68
69
70
        self.dst_dir = self.base_dir + 'dst/' + self.category + '/'

        self.list_file = self.dst_dir + 'file-tag.tsv'
0d9a20ea   Chunk   staged.
71
72
73
74
        self.feat_dir = self.dst_dir + 'Feat/'
        self.img_dir = self.dst_dir + 'Img/'

        self.table_name = None
c7fa1d60   Chunk   refractoration st...
75
        self.table = None
0d9a20ea   Chunk   staged.
76
77
78
        self.connection = None

    def format(self):
c7fa1d60   Chunk   refractoration st...
79
80
81
82
83
        pass


    def get_table(self):
        pass
0d9a20ea   Chunk   staged.
84

c7fa1d60   Chunk   refractoration st...
85
86
    def store_img(self):
        pass
0d9a20ea   Chunk   staged.
87

c7fa1d60   Chunk   refractoration st...
88
89
    def store_tag(self, tagtype):
        pass
2c2d57c7   Chunk   ILSVRC datapath h...
90

c7fa1d60   Chunk   refractoration st...
91
92
    def store_feat(self, feattype):
        pass
be12257b   Chunk   data-feat-model f...
93

c7fa1d60   Chunk   refractoration st...
94
95

    def get_feat(self, image, feattype):
be12257b   Chunk   data-feat-model f...
96
97
98
99
100
        pass

    def extract_feat(self, feattype):
        pass

c7fa1d60   Chunk   refractoration st...
101
102
103

    def load_data(self, mode):
        pass
be12257b   Chunk   data-feat-model f...