Blame view

mdata/__init__.py 2.44 KB
0d9a20ea   Chunk   staged.
1
# -*- coding: utf-8 -*-
c7fa1d60   Chunk   refractoration st...
2
3
__author__ = 'chunk'

0d9a20ea   Chunk   staged.
4
__all__ = ['DataDumperBase']
c7fa1d60   Chunk   refractoration st...
5
6
7
8
9
10
11


class DataDumperBase(object):
    """
    Base class for image data dumping & retrieving.
    A regular directory pattern would be like this:

be12257b   Chunk   data-feat-model f...
12
        dst
0d9a20ea   Chunk   staged.
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
        ├── Dev (category)
            ├── file-tag.tsv (list_file)
            │
            ├── Feat (feat_dir)
            │   ├── 0a1
            │   └── 53e
            │   └── ...
            |
            └── Img (img_dir)
                ├── 0a1
                └── 53e
                └── ...
        ├── Train (category)
            ├── file-tag.tsv (list_file)
            │
            ├── Feat
            │   ├── 032
            │   └── a21
            │   └── ...
            |
            └── Img
                ├── 032
                └── a21
                └── ...
        .
        .
        .
c7fa1d60   Chunk   refractoration st...
40
41
42
43
44
45
46
47
48
49
50
51
52

    It can be refractored from the original pattern which is supposed to be generated from web crawlers:

        ├── Neg
        │   ├── aaa.jpg
        │   └── bbb.jpg
        │   └── ...
        |
        └── Pos
            ├── ccc.jpg
            └── ddd.jpg
            └── ...

0d9a20ea   Chunk   staged.
53

be12257b   Chunk   data-feat-model f...
54
55
56
    Convention:

        'im' or 'img' is for image file data while 'image' or 'image_path' for file path;
c7fa1d60   Chunk   refractoration st...
57
58
59

    """

0d9a20ea   Chunk   staged.
60
61
    def __init__(self, base_dir, category):
        """
1dc7c44b   Chunk   crawler-hbase-spa...
62
        base: e.g. '/home/hadoop/data/MSR-IRC2014/'
0d9a20ea   Chunk   staged.
63
64
65
        list_file: not data_file! e.g. 'file-tag.tsv'
        dict_data: e.g. {'filename':rawdata} or {'filename':tag}
        """
1dc7c44b   Chunk   crawler-hbase-spa...
66
        self.base = base_dir
0d9a20ea   Chunk   staged.
67
        self.category = category
6d6d75b8   Chunk   spider LOG system.
68
69
70
        self.dst_dir = self.base + 'dst/'
        if self.category != None:
            self.dst_dir += (self.category + '/')
0d9a20ea   Chunk   staged.
71
72
73
74

        self.list_file = self.dst_dir + 'file-tag.tsv'
        self.feat_dir = self.dst_dir + 'Feat/'
        self.img_dir = self.dst_dir + 'Img/'
c7fa1d60   Chunk   refractoration st...
75

0d9a20ea   Chunk   staged.
76
77
78
        self.table_name = None
        self.table = None
        self.connection = None
c7fa1d60   Chunk   refractoration st...
79
80
81
82
83

    def format(self):
        pass


0d9a20ea   Chunk   staged.
84
    def get_table(self):
c7fa1d60   Chunk   refractoration st...
85
86
        pass

0d9a20ea   Chunk   staged.
87
    def store_img(self):
c7fa1d60   Chunk   refractoration st...
88
89
        pass

2c2d57c7   Chunk   ILSVRC datapath h...
90
    def store_tag(self, tagtype):
c7fa1d60   Chunk   refractoration st...
91
92
        pass

be12257b   Chunk   data-feat-model f...
93
    def store_feat(self, feattype):
c7fa1d60   Chunk   refractoration st...
94
95
        pass

be12257b   Chunk   data-feat-model f...
96
97
98
99
100

    def get_feat(self, image, feattype):
        pass

    def extract_feat(self, feattype):
c7fa1d60   Chunk   refractoration st...
101
102
103
        pass


be12257b   Chunk   data-feat-model f...
104
105
    def load_data(self, mode):
        pass