Blame view

mdata/__init__.py 2.24 KB
0d9a20ea   Chunk   staged.
1
# -*- coding: utf-8 -*-
c7fa1d60   Chunk   refractoration st...
2
3
__author__ = 'chunk'

0d9a20ea   Chunk   staged.
4
__all__ = ['DataDumperBase']
c7fa1d60   Chunk   refractoration st...
5
6
7
8
9
10
11


class DataDumperBase(object):
    """
    Base class for image data dumping & retrieving.
    A regular directory pattern would be like this:

be12257b   Chunk   data-feat-model f...
12
        ├── Dev (category)
0d9a20ea   Chunk   staged.
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
            ├── file-tag.tsv (list_file)
            │
            ├── Feat (feat_dir)
            │   ├── 0a1
            │   └── 53e
            │   └── ...
            |
            └── Img (img_dir)
                ├── 0a1
                └── 53e
                └── ...
        ├── Train (category)
            ├── file-tag.tsv (list_file)
            │
            ├── Feat
            │   ├── 032
            │   └── a21
            │   └── ...
            |
            └── Img
                ├── 032
                └── a21
                └── ...
        .
        .
        .

c7fa1d60   Chunk   refractoration st...
40
41
42
43
44
45
46
47
48
49
50
51
52
    It can be refractored from the original pattern which is supposed to be generated from web crawlers:

        ├── Neg
        │   ├── aaa.jpg
        │   └── bbb.jpg
        │   └── ...
        |
        └── Pos
            ├── ccc.jpg
            └── ddd.jpg
            └── ...


0d9a20ea   Chunk   staged.
53
    convention:
be12257b   Chunk   data-feat-model f...
54
55
56
        'img' for image file data while 'image' for file path;

    """
c7fa1d60   Chunk   refractoration st...
57
58
59

    def __init__(self, base_dir, category):
        """
0d9a20ea   Chunk   staged.
60
61
        base_dir: e.g. '/home/hadoop/data/MSR-IRC2014/'
        list_file: not data_file! e.g. 'file-tag.tsv'
1dc7c44b   Chunk   crawler-hbase-spa...
62
        dict_data: e.g. {'filename':rawdata} or {'filename':tag}
0d9a20ea   Chunk   staged.
63
64
65
        """
        self.base_dir = base_dir
        self.category = category
1dc7c44b   Chunk   crawler-hbase-spa...
66
        self.dst_dir = self.base_dir + 'dst/' + self.category + '/'
0d9a20ea   Chunk   staged.
67

6d6d75b8   Chunk   spider LOG system.
68
69
70
        self.list_file = self.dst_dir + 'file-tag.tsv'
        self.feat_dir = self.dst_dir + 'Feat/'
        self.img_dir = self.dst_dir + 'Img/'
0d9a20ea   Chunk   staged.
71
72
73
74

        self.table_name = None
        self.table = None
        self.connection = None
c7fa1d60   Chunk   refractoration st...
75

0d9a20ea   Chunk   staged.
76
77
78
    def format(self):
        pass

c7fa1d60   Chunk   refractoration st...
79
80
81
82
83

    def get_table(self):
        pass

    def store_img(self):
0d9a20ea   Chunk   staged.
84
        pass
c7fa1d60   Chunk   refractoration st...
85
86

    def store_tag(self, feattype):
0d9a20ea   Chunk   staged.
87
        pass
c7fa1d60   Chunk   refractoration st...
88
89

    def get_feat(self, feattype):
2c2d57c7   Chunk   ILSVRC datapath h...
90
        pass
c7fa1d60   Chunk   refractoration st...
91
92

    def store_feat(self, feattype):
be12257b   Chunk   data-feat-model f...
93
        pass
c7fa1d60   Chunk   refractoration st...

be12257b   Chunk   data-feat-model f...

c7fa1d60   Chunk   refractoration st...

be12257b   Chunk   data-feat-model f...