내용

글번호 1003
작성자 허진경
작성일 2019-12-13 10:32:13
제목 [Re]딥러닝을 이용한 와인데이터 분류
내용
import tensorflow
# tensorflow.__version__

import pandas as pd
redwine = pd.read_csv("winequality-red.csv", sep=";")

import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
# import tensorflow as tf
import numpy as np

X = redwine.iloc[:, :-1]
y = redwine.iloc[:, -1]

from sklearn.preprocessing import OneHotEncoder
enc = OneHotEncoder()

# type(y.to_numpy()) # [5,5,6,7,...]
y.to_numpy().reshape(-1, 1)

enc.fit(y.to_numpy().reshape(-1, 1))

y_onehot = enc.transform(y.to_numpy().reshape(-1,1))
y_onehot

X.shape, y_onehot.shape

from sklearn.model_selection import train_test_split
train_X, test_X, train_y, test_y = train_test_split(X, y_onehot,
                                                   test_size=0.3)

X_ = tf.placeholder(tf.float32, [None, 11])
y_ = tf.placeholder(tf.float32, [None, 6])

# 은닉1
W_h1 = tf.Variable(tf.truncated_normal([11, 20]))
b_h1 = tf.Variable(tf.truncated_normal([20]))
y_h1 = tf.nn.sigmoid(tf.matmul(X_, W_h1) + b_h1)

# 출력
W_o = tf.Variable(tf.truncated_normal([20, 6]))
y_o = tf.nn.softmax(tf.matmul(y_h1, W_o))

# 손실함수, 옵티마이저
cross_entropy = -tf.reduce_sum(y_ * tf.log(y_o), reduction_indices=[1])
loss = tf.reduce_mean(cross_entropy)
train = tf.train.AdamOptimizer(0.0001).minimize(loss)

# 세션 생성 및 초기화
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)

# 학습
for i in range(100000):
    sess.run(train, feed_dict={X_: train_X, 
                               y_: train_y.toarray()})
    if i%1000 == 0:
        train_loss = sess.run(loss, feed_dict={X_:train_X, 
                                               y_:train_y.toarray()})
        val_loss = sess.run(loss, feed_dict={X_:test_X, 
                                             y_:test_y.toarray()})
        print(train_loss, val_loss)

pred = sess.run(y_o, feed_dict={X_: test_X})
pred_val = np.argmax(pred, axis=1) + 3
y_true = np.argmax(test_y.toarray(), axis=1) + 3
print(pd.crosstab(y_true, pred_val))

from sklearn.metrics import accuracy_score
print(accuracy_score(y_true, pred_val))