84648488
Chunk
reverted.
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
|
__author__ = 'hadoop'
import lmdb
import re, fileinput, math
import numpy as np
import os, sys
import caffe
from caffe.proto import caffe_pb2
caffe_root = '/home/hadoop/Programs/lib/caffe'
package_dir = os.path.dirname(os.path.abspath(__file__))
# Command line to check created files:
# python -mlmdb stat --env=./Downloads/caffe-master/data/liris-accede/train_score_lmdb/
def read_lmdb(lmdb_name=os.path.join(caffe_root, 'examples/mnist/mnist_train_lmdb')):
lmdb_env = lmdb.open(lmdb_name)
lmdb_txn = lmdb_env.begin()
lmdb_cursor = lmdb_txn.cursor()
datum = caffe_pb2.Datum()
for key, value in lmdb_cursor:
datum.ParseFromString(value)
label = datum.label
data = caffe.io.datum_to_array(datum)
print label, data
def _write_lmdb_raw(X, Y, lmdb_name_data='../res/data_lmdb', lmdb_name_label='../res/label_lmdb'):
"""
X - numpy array of data.
Y - numpy array of labels.
"""
print('writing labels...')
# Size of buffer: 1000 elements to reduce memory consumption
for idx in range(int(math.ceil(len(Y) / 1000.0))):
in_db_label = lmdb.open(lmdb_name_label, map_size=int(1e12))
with in_db_label.begin(write=True) as in_txn:
for label_idx, label_ in enumerate(Y[(1000 * idx):(1000 * (idx + 1))]):
im_dat = caffe.io.array_to_datum(np.array(label_, dtype=np.uint8).reshape(1, 1, 1))
in_txn.put('{:0>10d}'.format(1000 * idx + label_idx), im_dat.SerializeToString())
print str(1000 * idx + label_idx + 1) + ' / ' + str(len(Y))
in_db_label.close()
print('writing image data...')
for idx in range(int(math.ceil(len(X) / 1000.0))):
in_db_data = lmdb.open(lmdb_name_data, map_size=int(1e12))
with in_db_data.begin(write=True) as in_txn:
for in_idx, in_ in enumerate(X[(1000 * idx):(1000 * (idx + 1))]):
# im = caffe.io.load_image(in_)
im_dat = caffe.io.array_to_datum(np.array(in_, dtype=int).reshape(1, 200, 200))
in_txn.put('{:0>10d}'.format(1000 * idx + in_idx), im_dat.SerializeToString())
print str(1000 * idx + in_idx + 1) + ' / ' + str(len(X))
in_db_data.close()
def write_lmdb(X, Y, lmdb_name_data='../res/data_lmdb', lmdb_name_label='../res/label_lmdb'):
"""
X - numpy array of data.
Y - numpy array of labels.
"""
print('writing image data...')
for idx in range(int(math.ceil(len(Y) / 1000.0))):
in_db_data = lmdb.open(lmdb_name_data, map_size=int(1e12))
with in_db_data.begin(write=True) as in_txn:
for in_idx, (in_, label_) in enumerate(
zip(X[(1000 * idx):(1000 * (idx + 1))], Y[(1000 * idx):(1000 * (idx + 1))])):
# im = caffe.io.load_image(in_)
im_dat = caffe.io.array_to_datum(np.array(in_, dtype=int).reshape(1, 200, 200), label_)
in_txn.put('{:0>10d}'.format(1000 * idx + in_idx), im_dat.SerializeToString())
print str(1000 * idx + in_idx + 1) + ' / ' + str(len(X))
in_db_data.close()
if __name__ == '__main__':
read_lmdb()
|