福岡は今日も雨

情報系大学生のブログ。主に技術,音楽について。

AlexNetをCPUモードで動かす(2:自分でダウンロードしたcifar10を利用する編)

元々のcifar-10のpickleからダウンロードしたものが,どうやら(3024*)のデータであったようだ。
ということで,これをまず(1024*3)に変更。どうやら1024ごとにr,g,bに分かれているようである。
参考
[Python]CIFAR-10, CIFAR-100のデータを読み込む方法 - Qiita

ということで,次にそれで実行しても次元が違うとchainerに怒られたのでよーくchainerでchainer.datasets.get_cifar10()したときのものと照らし合わせて確認

# chainer.datasets.get_cifar10()によるもの
(array([[[ 0.23137257,  0.16862746,  0.19607845, ...,  0.61960787,
           0.59607846,  0.58039218],
         [ 0.0627451 ,  0.        ,  0.07058824, ...,  0.48235297,
           0.4666667 ,  0.4784314 ],
         [ 0.09803922,  0.0627451 ,  0.19215688, ...,  0.46274513,
           0.47058827,  0.42745101],
         ...,
         [ 0.81568635,  0.78823537,  0.77647066, ...,  0.627451  ,
           0.21960786,  0.20784315],
         [ 0.70588237,  0.67843139,  0.72941178, ...,  0.72156864,
           0.38039219,  0.32549021],
         [ 0.69411767,  0.65882355,  0.7019608 , ...,  0.84705889,
           0.59215689,  0.48235297]],

        [[ 0.24313727,  0.18039216,  0.18823531, ...,  0.51764709,
           0.49019611,  0.48627454],
         [ 0.07843138,  0.        ,  0.03137255, ...,  0.34509805,
           0.32549021,  0.34117648],
         [ 0.09411766,  0.02745098,  0.10588236, ...,  0.32941177,
           0.32941177,  0.28627452],
         ...,
         [ 0.66666669,  0.60000002,  0.63137257, ...,  0.52156866,
           0.12156864,  0.13333334],
         [ 0.54509807,  0.48235297,  0.56470591, ...,  0.58039218,
           0.24313727,  0.20784315],
         [ 0.56470591,  0.50588238,  0.55686277, ...,  0.72156864,
           0.46274513,  0.36078432]],

        [[ 0.24705884,  0.17647059,  0.16862746, ...,  0.42352945,
           0.40000004,  0.4039216 ],
         [ 0.07843138,  0.        ,  0.        , ...,  0.21568629,
           0.19607845,  0.22352943],
         [ 0.08235294,  0.        ,  0.03137255, ...,  0.19607845,
           0.19607845,  0.16470589],
         ...,
         [ 0.37647063,  0.13333334,  0.10196079, ...,  0.27450982,
           0.02745098,  0.07843138],
         [ 0.37647063,  0.16470589,  0.11764707, ...,  0.36862746,
           0.13333334,  0.13333334],
         [ 0.45490199,  0.36862746,  0.34117648, ...,  0.54901963,
           0.32941177,  0.28235295]]], dtype=float32), 6)

ふむ
次に(3*1024)にした画像

(array([[ 0.23137255,  0.16862746,  0.19607843, ...,  0.84705883,
          0.59215689,  0.48235294],
        [ 0.24313726,  0.18039216,  0.1882353 , ...,  0.72156864,
          0.4627451 ,  0.36078432],
        [ 0.24705882,  0.17647059,  0.16862746, ...,  0.54901963,
          0.32941177,  0.28235295]], dtype=float32), 6)

なんかすくないな...と思って見ていると,どうやら(3*32*32)のデータを読み込ませていたらしく。
ということでコードを変更

def divive_l_t():
    ary = unpickle()
    data_ary = []
    label_ary = []
    for dict in ary:
        for data, label in zip(dict['data'], dict['labels']):
            rsh_data = data.reshape(3, 1024)
            data_ary.append(rsh_data)
            label_ary.append(label)

    return data_ary, label_ary

def divive_tr_t():
    data_ary, label_ary = divive_l_t()
    imageData = []
    labelData = []
    for data,label in zip(data_ary, label_ary):
        r, g, b = data[0], data[1], data[2]
        rImg = np.asarray(np.float32(r) / 255.0).reshape(32, 32)
        gImg = np.asarray(np.float32(g) / 255.0).reshape(32, 32)
        bImg = np.asarray(np.float32(b) / 255.0).reshape(32, 32)
        img = np.asarray([rImg, gImg, bImg])
        imageData.append(img)
        labelData.append(np.int32(label))

    return imageData, labelData

これで思う形になったので,あとは

train = tuple_dataset.TupleDataset(imageData[0:threshold], labelData[0:threshold])
test = tuple_dataset.TupleDataset(imageData[threshold:], labelData[threshold:])

としておしまいっ。thresholdは閾値で,trainデータとtestデータを分けております。
全コードは以下。GPUモードは帰宅してからとりかかります。

import chainer
import chainer.functions as F
import chainer.links as L
from chainer import training
from chainer.training import extensions
from chainer.datasets import tuple_dataset
import numpy as np
import _pickle as cpickle
import os

def unpickle():
    base_path = os.path.dirname(os.path.abspath(__file__))
    cifar_path = os.path.normpath(os.path.join(base_path, '../cifar-10-batches-py'))
    ary = []
    for i in range(1, 6):
        file_path = cifar_path +'/data_batch_' + str(i)
        fo = open(file_path, 'rb')
        tmp_dic = cpickle.load(fo, encoding='latin1')
        ary.append(tmp_dic)
        fo.close()

    return ary

def divive_l_t():
    ary = unpickle()
    data_ary = []
    label_ary = []
    for dict in ary:
        for data, label in zip(dict['data'], dict['labels']):
            rsh_data = data.reshape(3, 1024)
            data_ary.append(rsh_data)
            label_ary.append(label)

    return data_ary, label_ary

def divive_tr_t():
    data_ary, label_ary = divive_l_t()
    imageData = []
    labelData = []
    for data,label in zip(data_ary, label_ary):
        r, g, b = data[0], data[1], data[2]
        rImg = np.asarray(np.float32(r) / 255.0).reshape(32, 32)
        gImg = np.asarray(np.float32(g) / 255.0).reshape(32, 32)
        bImg = np.asarray(np.float32(b) / 255.0).reshape(32, 32)
        img = np.asarray([rImg, gImg, bImg])
        imageData.append(img)
        labelData.append(np.int32(label))

    return imageData, labelData

class AlexNet(chainer.Chain):

    input_size = 227

    def __init__(self):
        super(AlexNet, self).__init__(
            conv1 = L.Convolution2D(None, 96, 11, stride=4),
            conv2 = L.Convolution2D(None, 256, 3, pad=2),
            conv3 = L.Convolution2D(None, 384, 3, pad=1),
            conv4 = L.Convolution2D(None, 384, 3, pad=1),
            conv5 = L.Convolution2D(None, 256, 3, pad=1),
            fc6 = L.Linear(None, 4096),
            fc7 = L.Linear(None, 4096),
            fc8 = L.Linear(None, 10))

    def __call__(self, x):
        h = F.max_pooling_2d(F.local_response_normalization(F.relu(self.conv1(x))), 3, stride=2)
        h = F.max_pooling_2d(F.local_response_normalization(F.relu(self.conv2(h))), 3, stride=2)
        h = F.relu(self.conv3(h))
        h = F.relu(self.conv4(h))
        h = F.max_pooling_2d(F.relu(self.conv5(h)), 3, stride=2)
        h = F.dropout(F.relu(self.fc6(h)))
        h = F.dropout(F.relu(self.fc7(h)))
        h = F.relu(self.fc8(h))

        return h

# モデルのインスタンス化
model = L.Classifier(AlexNet())
optimizer = chainer.optimizers.Adam()
optimizer.setup(model)

# 訓練データとテストデータに分割
# train, test = chainer.datasets.get_cifar10()
# chainer.datasets.get_cifar10()で渡されるデータの次元は(3,32,32)
# 自分がとってきたデータは(3*1024)
imageData, labelData = divive_tr_t()
threshold = np.int32(len(imageData)/8*7)
train = tuple_dataset.TupleDataset(imageData[0:threshold], labelData[0:threshold])
test = tuple_dataset.TupleDataset(imageData[threshold:], labelData[threshold:])

train_iter = chainer.iterators.SerialIterator(train, 100)
test_iter = chainer.iterators.SerialIterator(test, 100, repeat=False, shuffle=False)

updater = training.StandardUpdater(train_iter, optimizer, device=-1)
trainer = training.Trainer(updater, (100, 'epoch'), out='result')
trainer.extend(extensions.Evaluator(test_iter, model, device=-1))
trainer.extend(extensions.LogReport())
trainer.extend(extensions.PrintReport(['epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy']))
trainer.extend(extensions.ProgressBar())

trainer.run()