내용 |
import numpy as np
data_path = "/home/nova/Data/cifar-10-batches-py/"
img_size = 32
num_channels = 3
img_size_flat = img_size * img_size * num_channels
num_classes = 10
num_file_train = 5
images_per_file = 10000
num_images_train = num_file_train * images_per_file
def unpickle(file):
import cPickle
with open(data_path+file, 'rb') as fo:
dict = cPickle.load(fo)
return dict
def convert_images(raw):
raw_float = np.array(raw, dtype=float) / 255.0
images = raw_float.reshape([-1, num_channels, img_size, img_size])
images = images.transpose([0, 2, 3, 1])
return images
def load_data(file):
data = unpickle(file)
raw_images = data[b'data']
clazz = np.array(data[b'labels'])
images = convert_images(raw_images)
return images, clazz
def one_hot_encoded(clazz):
return np.eye(num_classes)[clazz]
def load_class_names():
raw = unpickle(file="batches.meta")[b'label_names']
names = [x.decode('utf-8') for x in raw]
return names
def load_training_data():
images = np.zeros(shape=[num_images_train, img_size, img_size, num_channels], dtype=float)
clazz = np.zeros(shape=[num_images_train], dtype=int)
begin = 0
for i in range(num_file_train):
images_batch, class_batch = load_data(file='data_batch_'+str(i+1))
num_images = len(images_batch)
end = begin + num_images
images[begin:end, :] = images_batch
clazz[begin:end] = class_batch
begin = end
return images, clazz, one_hot_encoded(clazz)
def load_test_data():
images, clazz = load_data(file="test_batch")
return images, clazz, one_hot_encoded(clazz) |