Lab4

neonii 2025. 3. 16. 13:58

728x90

University of Georgia, Network Data Analysis and Graphical Models 수업을 들으며 작성한 내용입니다.

import numpy as np
from PIL import Image
from matplotlib import transforms, pyplot as plt
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split

%matplotlib inline

# define plot common properties and color constants
plt.rcParams['font.family'] = 'serif'
plt.rcParams['axes.linewidth'] = 0.5
GRAY1, GRAY4, GRAY7 = '#231F20', '#646369', '#929497'

# load monochrome BMP file and convert it into int numpy array
img = np.array(Image.open('zip_numerals.bmp')).astype(int)

# construct 480 16x16 examples with only one channel (usually 3 for RGB)
X = np.zeros(shape=(480, 16, 16, 1)) # 480개의 이미지를 저장할 빈 공간 만들기
y = np.zeros(shape=(480, 1)) # 각 이미지에 해당하는 숫자를 저장할 공간 만들기
example_idx = 0

# 큰 이미지는 한 개의 숫자만 포함된 이미지가 아니라, 많은 숫자가 한꺼번에 포함된 이미지이기 때문에
# 각 숫자를 개별적인 이미지로 나누는 과정이 필요하다.
for i in range(10):
    for j in range(12):
        # 각 숫자는 16x13 픽셀 크기이고, 1픽셀 간격으로 배치되어 있음
        si, fi = i*17, i*17+16
        sj, fj = j*14, j*14+13
        patch = 1-img[si:fi, sj:fj]
        # 잘라낸 숫자의 크기는 16x13 픽셀이지만, 학습 데이터는 16x16 픽셀 크기여야 함
        # 숫자의 가로 크기가 부족하므로, 옆으로 한 픽셀씩 밀면서 4개의 변형된 이미지를 만듦
        # (이때 변형된 이미지라 함은, 숫자의 모양 자체는 변하지 않고 전체가 한 번에 한 칸 이동하는 것)
        for k in range(4):
            X[example_idx, 0:16, k:k+13, 0] = patch
            y[example_idx, 0] = i
            example_idx += 1 # 변형된 이미지가 서로 다른 위치에 저장되도록

# perform a stratified split
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=320, random_state=10, stratify=y)

# convert classes to one-hot vectors
# 신경망의 예측 결과는 확률 분포로 출력됨 (e.g. [0.1, 0.05, 0.02, 0.6] -> 3으로 예측)
# 예측값을 정답과 비교하기 위해서는 0과 1로 변환하는 게 필요 (e.g. [0, 0, 0, 1] -> 정답 3)
ohe = OneHotEncoder(categories='auto').fit(y)
Y_train = ohe.transform(y_train).toarray()
Y_test = ohe.transform(y_test).toarray()

# show some training examples
w = 20  # pixels for one digit
result = np.zeros(shape=(4+w*5, 4+w*10)) # 숫자를 배치할 큰 빈 공간 만듦 (세로 5개, 가로 10개)

for d in range(10):
    d_idx = np.squeeze(np.argwhere(y_train == d))[:, 0]
    d_idx = np.random.choice(d_idx, 5, replace=False) # 각 숫자 별로 랜덤하게 5개 선택
    for i, idx in enumerate(d_idx):
        result[i*w+4:i*w+20, d*w+4:d*w+20] = -X_train[idx].reshape((16, 16))

fig, ax = plt.subplots(figsize=(5.5, 3.3), dpi=110)
ax.imshow(result, cmap="gray")
ax.set_aspect('equal', 'datalim')
ax.tick_params(bottom=False, left=False, labelleft=False, labelbottom=False)
for spine in ax.spines.values():
    spine.set_visible(False)

import numpy as np
from PIL import Image

import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Split dataset (stratified)
# Assuming X and y are prepared from the image (features and labels)
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=320, random_state=10, stratify=y)

# Convert classes to one-hot vectors
ohe = OneHotEncoder(categories='auto').fit(y.reshape(-1, 1))
Y_train = ohe.transform(y_train.reshape(-1, 1)).toarray()
Y_test = ohe.transform(y_test.reshape(-1, 1)).toarray()

# Use CUDA if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print('Using CPU or GPU? \n', device)
print('-----------------------')

# Convert data to torch tensors
X_train = torch.tensor(X_train, dtype=torch.float32).permute(0, 3, 1, 2).to(device)  # Permute to [batch_size, channels, height, width]
X_test = torch.tensor(X_test, dtype=torch.float32).permute(0, 3, 1, 2).to(device)    # Permute to [batch_size, channels, height, width]
Y_train = torch.tensor(Y_train, dtype=torch.float32).to(device)
Y_test = torch.tensor(Y_test, dtype=torch.float32).to(device)

# Define the training procedure
# 각 샘플마다 가중치 업데이트 수행, 한 에포크가 끝나면 정확도 계산
def train_model(model, criterion, optimizer, n_epochs, X_train, Y_train, X_test, Y_test):
    accuracy_test = []
    for epoch in range(n_epochs):
        model.train() # 모델을 학습 모드로 설정
        for i in range(X_train.shape[0]): # 전체 훈련 데이터에 대해서 반복
            optimizer.zero_grad() # 기존의 기울기 초기화
            output = model(X_train[i:i+1]) # 모델에 훈련 데이터를 한 개 넣어 예측값 개선
            loss = criterion(output, Y_train[i:i+1]) # 예측값과 실제값 비교하여 손실 계산
            loss.backward() # Backpropagation
            optimizer.step() # 가중치 업데이트

        # Evaluate the model on test data
        model.eval() # 모델을 평가 모드로 설정
        with torch.no_grad():
            y_test_hat = torch.argmax(model(X_test), dim=1) # 모델이 최종적으로 예측한 클래스
            y_test_labels = torch.argmax(Y_test, dim=1) # 실제 정답 확인
            accuracy = accuracy_score(y_test_labels.cpu(), y_test_hat.cpu())
            accuracy_test.append(accuracy)
    return accuracy_test

Net1: 딥러닝에서 가장 간단한 형태의 신경망

목표: 16x16 픽셀 크기의 숫자 이미지를 보고, 0~9 중 어떤 숫자인지 맞히는 것
과정
1. 이미지(16x16)를 1 dimension으로 변환
2. 256개의 각 픽셀 값을 신경망에 넣어 계산
3. 결과로 0~9까지 10개의 숫자에 대한 확률을 출력
4. 가장 높은 확률을 가진 숫자가 정답

# Model Definitions
# Net-1: Single layer network (i.e. logistic regression)
class Net1(nn.Module):
    def __init__(self):
        super(Net1, self).__init__()
        self.fc = nn.Linear(256, 10) # 256개의 입력과 10개의 출력을 갖는 단층 신경망
        nn.init.xavier_uniform_(self.fc.weight) # 가중치 초기화 방식: Xavier
    def forward(self, x):
        x = x.view(-1, 256) # 16x16을 1차원 벡터(256개의 숫자)로 변환
        x = torch.softmax(self.fc(x), dim=1) # softmax 적용해 확률값으로 변환
        return x

Net2: 조금 더 발전된 형태의 신경망

목표: 16x16 픽셀 크기의 숫자 이미지를 보고, 0~9 중 어떤 숫자인지 맞히는 것
과정
1. 이미지(16x16)를 1 dimension으로 변환
2. 256개의 각 픽셀 값을 신경망에 넣어 계산
3. 은닉층에서 더 복잡한 패턴을 학습
4. 결과로 0~9까지 10개의 숫자에 대한 확률을 출력
5. 가장 높은 확률을 가진 숫자가 정답

# Net2: Two layer network (within 1 hidden layer)
class Net2(nn.Module):
    def __init__(self, n_hidden=12): # 12개의 은닉 뉴런 사용
        super(Net2, self).__init__()
        self.fc1 = nn.Linear(256, n_hidden) # 256개의 입력과 12개의 출력을 가짐
        self.fc2 = nn.Linear(n_hidden, 10) # 12개의 입력과 10개의 출력을 가짐
        nn.init.xavier_uniform_(self.fc1.weight) # 가중치 초기화 방식: Xavier
        nn.init.xavier_uniform_(self.fc2.weight) # 가중치 초기화 방식: Xavier

    def forward(self, x):
        x = x.view(-1, 256) # 16x16을 1차원 벡터(256개의 숫자)로 변환
        x = torch.tanh(self.fc1(x)) # 은닉층에서 비선형 활성화 함수 tanh 적용해 -1~1 범위로 조정
        x = torch.softmax(self.fc2(x), dim=1) # softmax 적용해 확률값으로 변환
        return x

Net 5: Constrained 신경망

목표: 16x16 픽셀 크기의 숫자 이미지를 보고, 0~9 중 어떤 숫자인지 맞히는 것
과정
1. 이미지(16x16)를 입력
2. 합성곱 신경망(CNN convolutional neural network)을 사용하여 이미지 특징 추출
  - 이미지는 픽셀 하나하나보다 "전체적인 패턴"이 더 중요하기 때문!
  - 합성곱 연산을 통해 숫자의 모양을 인식할 수 있음, 즉 사람이 숫자를 구별할 때와 비슷해지는 것!
3. 완전연결층(FC Layer)에서 최종 예측 수행
4. 결과로 0~9까지 10개의 숫자에 대한 확률을 출력
5. 가장 높은 확률을 가진 숫자가 정답

# Net-5: Constrained network 2
class Net5(nn.Module):
    def __init__(self):
        super(Net5, self).__init__()
        # 1개의 흑백 이미지를 입력 받아 2개의 특징 맵을 생성함
        # 이때, 3x3 크기의 작은 창을 밀면서 중요한 특징을 찾아냄, 필터는 2칸씩 이동
        # 그래서 (1, 16, 16)이 (2, 8, 8)로
        self.conv1 = nn.Conv2d(1, 2, kernel_size=3, stride=2, padding=1)
        # 2개의 특징 맵을 입력 받아 4개의 특징 맵을 생성함
        # 마찬가지로 5x5 크기의 창을 1칸씩 밀면서 이동, (4, 4, 4)
        self.conv2 = nn.Conv2d(2, 4, kernel_size=5, stride=1, padding=0)

        # 합성곱의 출력 (4, 4, 4)를 1차원 벡터로 변환
        self.fc = nn.Linear(4 * 4 * 4, 10)

        # 가중치 초기화 방식: Xavier
        nn.init.xavier_uniform_(self.conv1.weight)
        nn.init.xavier_uniform_(self.conv2.weight)
        nn.init.xavier_uniform_(self.fc.weight)

    def forward(self, x):
        x = torch.tanh(self.conv1(x))
        x = torch.tanh(self.conv2(x))
        x = x.reshape(-1, 4 * 4 * 4)
        x = torch.softmax(self.fc(x), dim=1)
        return x

728x90