학습률(Learning Rate)

Learning Rate Decay

https://s3-us-west-2.amazonaws.com/secure.notion-static.com/5831ddf9-7512-49ce-9feb-a6983aa833eb/Untitled.png

learning_rate = tf.keras.optimizers.schedules.ExponentialDecay
(initial_learning_rate=starter_learning_rate,
 decay_steps=50,  # 50번 마다 decay
 decay_rate=0.96,
 staircase=True)

데이터 전처리(Data preprocessing)

Feature Scaling

https://s3-us-west-2.amazonaws.com/secure.notion-static.com/0bd37a33-aedc-46a9-b8cf-eb06b7ca6afa/Untitled.png

오버피팅 (Overfitting)

https://s3-us-west-2.amazonaws.com/secure.notion-static.com/15b8ce0a-5b1f-4e71-ac22-32ed0999ac05/Untitled.png

import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import tensorflow as tf
from mpl_toolkits.mplot3d import Axes3D

xy = np.array([[828.659973, 833.450012, 908100, 828.349976, 831.659973],
               [823.02002, 828.070007, 1828100, 821.655029, 828.070007],
               [819.929993, 824.400024, 1438100, 818.97998, 824.159973],
               [816, 820.958984, 1008100, 815.48999, 819.23999],
               [819.359985, 823, 1188100, 818.469971, 818.97998],
               [819, 823, 1198100, 816, 820.450012],
               [811.700012, 815.25, 1098100, 809.780029, 813.669983],
               [809.51001, 816.659973, 1398100, 804.539978, 809.559998]])

x_train = xy[:, 0:-1]
y_train = xy[:, [-1]]

def normalization(data):
    numerator = data - np.min(data, 0)
    denominator = np.max(data, 0) - np.min(data, 0)
    return numerator / denominator

xy = normalization(xy)

x_train = xy[:, 0:-1]
y_train = xy[:, [-1]]

dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)).batch(len(x_train))

W = tf.Variable(tf.random.normal([4, 1]), dtype = tf.float32)
b = tf.Variable(tf.random.normal([1]), dtype = tf.float32)

def linearReg_fn(features):
    hypothesis = tf.matmul(features, W) + b
    return hypothesis

# Reguarization
def l2_loss(loss, beta = 0.01):
    W_reg = tf.nn.l2_loss(W)  # sum(t ** 2) / 2
    loss = tf.reduce_mean(loss + W_reg * beta)
    return loss

def loss_fn(hypothesis, features, labels, flag = False):
    cost = tf.reduce_mean(tf.square(hypothesis - labels))
    if(flag):
        cost = l2_loss(cost)
    return cost

is_decay = True

starter_learning_rate = 0.1

if is_decay:
    gloabl_step = tf.Variable(0, trainable = False)
    learning_rate = tf.keras.optimizers.schedules.ExponentialDecay(initial_learning_rate=starter_learning_rate,
                                                                  decay_steps=50,
                                                                  decay_rate=0.96,
                                                                  staircase=True)
    optimizer = tf.keras.optimizers.SGD(learning_rate)
else:
    optimizer = tf.keras.optimizers.SGD(learning_rate = starter_learning_rate)

def grad(hypothesis, features, labels, l2_flag):
    with tf.GradientTape() as tape:
        loss_value = loss_fn(linearReg_fn(features),features,labels, l2_flag)
    return tape.gradient(loss_value, [W,b]), loss_value

EPOCHS = 101

for step in range(EPOCHS):
    for features, labels  in dataset:
        features = tf.cast(features, tf.float32)
        labels = tf.cast(labels, tf.float32)
        grads, loss_value = grad(linearReg_fn(features), features, labels, False)
        optimizer.apply_gradients(grads_and_vars=zip(grads,[W,b]))        
    if step % 10 == 0:
        print("Iter: {}, Loss: {:.4f}".format(step, loss_value))