[Deep Learning | C] Convolution Neural Network

목적

평소에 Python으로 인공지능 공부하면서 GPU 가속부터 최적화 과정에서 항상 아쉬움이 남았다. Low-level에서 메모리 관리하고 싶은 생각이 있어서 C로 구현해봐야겠다고 생각하였다.

수학 함수

이 프로젝트가 끝나고 검색해보면서 GSL 문서를 참고해보니 자료구조를 다음과 같이 설계했었다.

matrix_block 구조체에서 데이터 저장을 담당하고, matrix 에서 matrix_block 구조체 데이터 참조하는 방식이다. 다음과 같이 설계하면, Python의 slice 기능을 쉽게 구현할 수 있다.

// Define a struct to represent a block
typedef struct {
    size_t size;
    double *data;
} matrix_block;

// Define a struct to represent a matrix
typedef struct {
    size_t rows;  // Number of rows
    size_t cols;  // Number of columns
    double *data; // Pointer to the matrix data
    matrix_block *block;
    int owner;
} matrix;

하지만 잘 모르고 개발한 필자는 수학 함수를 다음과 같이 구현하였다.

// matmul.h
#ifndef MATH_MATMUL_H
#define MATH_MATMUL_H

#include <stdio.h>

// Define a struct to represent a matrix
typedef struct {
    int rows;     // Number of rows
    int cols;     // Number of columns
    double *data; // Pointer to the matrix data
} Matrix;

/**
 * @brief Performs matrix multiplication.
 * Multiplies a matrix of size r1 x r2 with another matrix of size r2 x c2.
 *
 * @param m1 The first matrix to be multiplied.
 * @param m2 The second matrix to be multiplied.
 * @param output The output matrix to store the result.
 */
void matmul(Matrix *m1, Matrix *m2, Matrix *output);

/**
 * @brief Function to create a matrix with given rows and columns.
 * Allocates memory for the matrix.
 *
 * @param rows Number of rows
 * @param cols Number of cols
 * @return A matrix struct with allocated memory
 */
Matrix create_matrix(int rows, int cols);

/**
 * @brief Initializes a dynamically allocated matrix by copying data from a 1D
 * array.
 *
 * @param matrix The matrix to be initialized (dynamically allocated).
 * @param data The 1D array whose elements will be copied into the matrix.
 */
void initialize_matrix(Matrix *matrix, const double *data);

/**
 * @brief Function to free the memory of a matrix.
 *
 * @param m The matrix to free
 */
void free_matrix(Matrix *m);

/**
 * @brief Transposes a given matrix.
 *
 * This function creates a new matrix that is the transpose of the input matrix.
 * In the transposed matrix, rows become columns and columns become rows.
 *
 * @param matrix The input matrix to be transposed.
 * @return A new matrix that is the transpose of the input matrix.
 */
Matrix transpose_matrix(Matrix *matrix);

/**
 * @brief Print a matrix.
 *
 * @param matrix The matrix to be printed.
 */
void print_matrix(Matrix *matrix);

#endif // MATH_MATMUL_H

// matmul.c
#include "matrix/matmul.h"
#include <stdio.h>
#include <stdlib.h>

// Function to perform matrix multiplication
void matmul(Matrix *m1, Matrix *m2, Matrix *output) {
    // Verify if matrix multiplication is possible
    if (m1->cols != m2->rows) {
        printf("Matrix dimensions do not match for multiplication: A(%d, %d) * B(%d, %d)\n", m1->rows, m1->cols,
               m2->rows, m2->cols);
        return;
    }

    // Check if output matrix has correct size
    if (output->rows != m1->rows || output->cols != m2->cols) {
        printf("Output matrix size does not match multiplication result size.\n");
        return;
    }

    // Perform matrix multiplication
    for (int i = 0; i < m1->rows; ++i) {
        for (int j = 0; j < m2->cols; ++j) {
            output->data[i * output->cols + j] = 0; // Correct 1D indexing
            for (int k = 0; k < m1->cols; ++k) {
                output->data[i * output->cols + j] += m1->data[i * m1->cols + k] * m2->data[k * m2->cols + j];
            }
        }
    }
}

// Function to create a matrix with given rows and columns
Matrix create_matrix(int rows, int cols) {
    Matrix m;
    m.rows = rows;
    m.cols = cols;

    // Allocate memory for the matrix
    m.data = (double *)malloc(rows * cols * sizeof(double)); // Allocate a single contiguous block of memory
    if (!m.data) {
        printf("Failed to allocate memory for matrix data.\n");
        exit(EXIT_FAILURE);
    }
    return m;
}

// Function to initialize a dynamically allocated matrix
void initialize_matrix(Matrix *matrix, const double *data) {
    int idx = 0;
    for (int i = 0; i < matrix->rows; ++i) {
        for (int j = 0; j < matrix->cols; ++j) {
            matrix->data[i * matrix->cols + j] = data[idx++]; // Correct 1D indexing
        }
    }
}

// Function to free the memory of a matrix
void free_matrix(Matrix *m) {
    if (!m || !m->data)
        return;

    free(m->data);  // Free the single contiguous block of memory
    m->data = NULL; // Prevent dangling pointer
}

// Transposes a given matrix
Matrix transpose_matrix(Matrix *m) {
    Matrix t = create_matrix(m->cols, m->rows);
    for (int i = 0; i < m->rows; ++i) {
        for (int j = 0; j < m->cols; ++j) {
            t.data[j * t.cols + i] = m->data[i * m->cols + j]; // Correct 1D indexing
        }
    }
    return t;
}

// Print a matrix
void print_matrix(Matrix *matrix) {
    for (int i = 0; i < matrix->rows; ++i) {
        for (int j = 0; j < matrix->cols; ++j) {
            printf("%+8.2f ", matrix->data[i * matrix->cols + j]); // Correct 1D indexing
        }
        printf("\n");
    }
}

1. ReLU (Rectified Linear Unit)

정의: ReLU는 신경망에서 자주 사용되는 활성화 함수이다.
공식:

\text{ReLU}(x) = \max(0, x)

\text{ReLU}'(x) = \begin{cases} 1 & \text{if } x > 0 \\ 0 & \text{if } x \leq 0 \end{cases}

역할: ReLU는 음수를 0으로 변환하고 양수는 그대로 유지하여 비선형성을 추가한다. 이를 통해 모델이 더 복잡한 패턴을 학습할 수 있도록 한다.
구현:

#define relu(x) ((x) > 0 ? (x) : 0)
#define relu_derivative(x) ((x) > 0 ? 1 : 0)

2. 손실 함수: MSE (Mean Squared Error)

정의: MSE는 예측값과 실제값 사이의 오차를 제곱하여 평균을 구하는 손실 함수이다.
공식:

\text{MSE Loss} = \frac{1}{N} \sum_{i=1}^{N} (\hat{y}_i - y_i)^2

y^i: 예측값
y_i: 실제값
N: 데이터의 개수
구현:

double mse_loss(Matrix *output, Matrix *target) {
    double loss = 0;
    for (int i = 0; i < output->rows; ++i) {
        for (int j = 0; j < output->cols; ++j) {
            double diff = output->data[i * output->cols + j] - target->data[i * target->cols + j];
            loss += diff * diff;
        }
    }
    return loss / (output->rows * output->cols);
}

출력 행렬과 목표 행렬 간의 차이를 계산한 뒤, 이를 제곱해 합산하고 평균을 구하는 방식으로 손실을 계산한다.

3. Forward Convolution

정의: 합성곱 연산은 CNN의 핵심 작업으로, 입력 행렬과 필터(커널)를 사용해 출력 행렬을 생성하는 연산이다.
공식:

\text{Output}[i, j] = \sum_{p,q} \text{Input}[i+p, j+q] \cdot \text{Filter}[p, q]

구현:

void forward_convolution(Matrix *input, Matrix *filter, Matrix *output) {
...
for (int i = 0; i < output_size; ++i) {
    for (int j = 0; j < output_size; ++j) {
            ...
            output->data[i * output_size + j] = relu(output_matrix.data[i * output_size + j]);
        }
    }
}

입력 행렬을 슬라이딩 윈도우 방식으로 필터와 합성곱을 수행한다.
결과 값에 ReLU 활성화 함수를 적용하여 비선형성을 추가한다.

4. Backward Convolution

정의: 역전파 과정에서 합성곱 연산의 그래디언트를 계산하는 작업이다. 입력과 필터에 대해 손실의 그래디언트를 계산한다.
역할:

\frac{\partial \text{Loss}}{\partial \text{Filter}}: \text{필터에 대한 손실의 변화율}

\frac{\partial \text{Loss}}{\partial \text{Input}}: \text{입력에 대한 손실의 변화율}

구현:

void backward_convolution(Matrix *d_output, Matrix *filter, Matrix *d_input, Matrix *d_filter) {
    ...
    // 필터에 대한 그래디언트 계산
    d_filter->data[fi * d_filter->cols + fj] += grad * d_input->data[(i + fi) * d_input->cols + (j + fj)];
    ...
    // 입력에 대한 그래디언트 계산
    d_input->data[(i + fi) * d_input->cols + (j + fj)] += grad * filter->data[fi * filter->cols + fj];
}

손실의 그래디언트를 역으로 전파하여 필터와 입력의 업데이트 방향을 결정한다.

5. Gradient Descent

정의: 필터 값을 학습률에 따라 업데이트하는 알고리즘이다.
공식:

\text{Filter}_{\text{new}} = \text{Filter}_{\text{old}} - \eta \cdot \frac{\partial \text{Loss}}{\partial \text{Filter}}

η: 학습률
구현:

void update_filter(Matrix *filter, Matrix *d_filter, const double learning_rate) {
    for (int i = 0; i < filter->rows; ++i) {
        for (int j = 0; j < filter->cols; ++j) {
            filter->data[i * filter->cols + j] -= learning_rate * d_filter->data[i * d_filter->cols + j];
        }
    }
}

6. Train 함수

정의: CNN 모델의 학습 과정을 반복하는 함수이다.
과정:

Forward Pass: 입력 데이터와 필터를 사용해 출력값 계산
Loss 계산: MSE 함수를 사용해 손실 값 계산
Backward Pass: 출력 그래디언트를 사용해 필터와 입력의 그래디언트를 계산
필터 업데이트: 그래디언트를 사용해 필터 값을 조정
위 과정을 설정된 epochs 횟수만큼 반복

구현:

void train(Matrix *input, Matrix *target, Matrix *filter, int epochs, double learning_rate) {
    ...
    for (int epoch = 0; epoch < epochs; ++epoch) {
        forward_convolution(input, filter, &output);
        double loss = mse_loss(&output, target);
        mse_loss_derivative(&output, target, &d_output);
        backward_convolution(&d_output, filter, &d_input, &d_filter);
        update_filter(filter, &d_filter, learning_rate);
        printf("Epoch %d, Loss: %.4f\n", epoch + 1, loss);
    }
    ...
}

7. Main 함수

입력 데이터: 5x5 크기의 랜덤 행렬을 생성
srand(time(NULL))를 사용해 난수를 초기화

필터:

초기값으로 3x3 필터 설정

타겟 데이터:

3x3의 목표 행렬 설정

학습:

train 함수를 사용해 필터를 학습시키며 손실 값을 출력

전체 코드

#include "matrix.h"
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>

#define EPSILON 1e-7

// Define ReLU functions
#define relu(x) ((x) > 0 ? (x) : 0)
#define relu_derivative(x) ((x) > 0 ? 1 : 0)

/**
 * @brief Applies Mean Squared Error operation.
 *
 * @param output The output matrix of past data.
 * @param target The target matrix of post data.
 * @return The computed Mean Squared Error loss as a double value.
 */
double mse_loss(Matrix *output, Matrix *target) {
    double loss = 0;
    for (int i = 0; i < output->rows; ++i) {
        for (int j = 0; j < output->cols; ++j) {
            double diff = output->data[i * output->cols + j] - target->data[i * target->cols + j];
            loss += diff * diff;
        }
    }
    return loss / (output->rows * output->cols);
}

/**
 * @brief Computes the derivative of the Mean Squared Error (MSE) loss.
 *
 * dL/d(output) = 2 * (output - target) / N
 *
 * @param output The output matrix of past data, which contains predictions or calculated values.
 * @param target The target matrix of post data, which contains ground truth values.
 * @param d_output The matrix to store the computed derivatives, same size as the output matrix.
 */
void mse_loss_derivative(Matrix *output, Matrix *target, Matrix *d_output) {
    for (int i = 0; i < output->rows; ++i) {
        for (int j = 0; j < output->cols; ++j) {
            d_output->data[i * d_output->cols + j] =
                2 * (output->data[i * output->cols + j] - target->data[i * target->cols + j]) /
                (output->rows * output->cols);
        }
    }
}

/**
 * @brief Applies a forward_convolution operation.
 *
 * @param input The input matrix with dimentsions.
 * @param filter The filter matrix with dimentions.
 * @param output The output matrix to store the forward_convolution result.
 */
void forward_convolution(Matrix *input, Matrix *filter, Matrix *output) {
    // Validate the matrix format
    if (input->rows != input->cols) {
        printf("Invalid input matrix format!\n");
        return;
    }
    if (filter->rows != filter->cols) {
        printf("Invalid filter matrix format!\n");
        return;
    }

    // Define matrices size
    int input_size = input->rows;
    int filter_size = filter->rows;
    int output_size = input_size - filter_size + 1;

    // Create intermediate matrices
    Matrix input_matrix = create_matrix(output_size * output_size, filter_size * filter_size);
    Matrix filter_matrix = create_matrix(filter_size * filter_size, 1);

    // Reallocate the input matrix to flatten matrix
    for (int i = 0; i < output_size; ++i) {
        for (int j = 0; j < output_size; ++j) {
            for (int fi = 0; fi < filter_size; ++fi) {
                for (int fj = 0; fj < filter_size; ++fj) {
                    input_matrix.data[i * output_size + j] = input->data[(i + fi) * input->cols + (j + fj)];
                }
            }
        }
    }

    // Reallocate the filter matrix to flatten matrix
    for (int fi = 0; fi < filter_size; ++fi) {
        for (int fj = 0; fj < filter_size; ++fj) {
            filter_matrix.data[fi * filter_size + fj] = filter->data[fi * filter->cols + fj];
        }
    }

    // Perform matrix multiplication
    Matrix output_matrix = create_matrix(output_size * output_size, 1);
    matmul(&input_matrix, &filter_matrix, &output_matrix);

    // Reshape output_matrix back into output and apply ReLU
    for (int i = 0; i < output_size; ++i) {
        for (int j = 0; j < output_size; ++j) {
            output->data[i * output_size + j] = relu(output_matrix.data[i * output_size + j]);
        }
    }

    // Free intermediate matrices
    free_matrix(&input_matrix);
    free_matrix(&filter_matrix);
    free_matrix(&output_matrix);
}

/**
 * @brief Updates a filter matrix using the gradient descent algorithm.
 *
 * @param filter The filter matrix to be updated, typically representing model parameters.
 * @param d_filter The gradient matrix containing the partial derivatives of the loss
 *                 function with respect to the filter matrix elements.
 * @param learning_rate A constant value controlling the step size of the gradient descent update.
 */
void update_filter(Matrix *filter, Matrix *d_filter, const double learning_rate) {
    for (int i = 0; i < filter->rows; ++i) {
        for (int j = 0; j < filter->cols; ++j) {
            filter->data[i * filter->cols + j] -= learning_rate * d_filter->data[i * d_filter->cols + j];
        }
    }
}

/**
 * @brief Performs the backward pass for convolution, computing gradients with respect to inputs and filters.
 *
 * @param d_output The gradient matrix of the output, representing the loss gradient with respect to the output.
 * @param filter The filter matrix used during the forward pass.
 * @param d_input The gradient matrix of the input to be computed and updated.
 * @param d_filter The gradient matrix of the filter to be computed and updated.
 */
void backward_convolution(Matrix *d_output, Matrix *filter, Matrix *d_input, Matrix *d_filter) {
    int filter_size = filter->rows;
    int output_size = d_output->rows;

    // Initialize d_filter
    for (int i = 0; i < d_filter->rows; ++i) {
        for (int j = 0; j < d_filter->cols; ++j) {
            d_filter->data[i * d_filter->cols + j] = 0;
        }
    }

    // Compute d_filter using matmul (flattened_input is not needed)
    for (int i = 0; i < output_size; ++i) {
        for (int j = 0; j < output_size; ++j) {
            double grad = d_output->data[i * d_output->cols + j];
            for (int fi = 0; fi < filter_size; ++fi) {
                for (int fj = 0; fj < filter_size; ++fj) {
                    d_filter->data[fi * d_filter->cols + fj] +=
                        grad * d_input->data[(i + fi) * d_input->cols + (j + fj)];
                }
            }
        }
    }

    // Compute d_input (no need for flattened_input)
    for (int i = 0; i < d_input->rows; ++i) {
        for (int j = 0; j < d_input->cols; ++j) {
            d_input->data[i * d_input->cols + j] = 0; // Reset d_input to zero before accumulation
        }
    }

    // Backpropagate gradient to input using filter
    for (int i = 0; i < output_size; ++i) {
        for (int j = 0; j < output_size; ++j) {
            double grad = d_output->data[i * d_output->cols + j];
            for (int fi = 0; fi < filter_size; ++fi) {
                for (int fj = 0; fj < filter_size; ++fj) {
                    d_input->data[(i + fi) * d_input->cols + (j + fj)] += grad * filter->data[fi * filter->cols + fj];
                }
            }
        }
    }
}

/**
 * @brief Trains a filter using convolutional forward and backward passes with gradient descent.
 *
 * @param input The input matrix containing training data.
 * @param target The target matrix containing ground truth data.
 * @param filter The filter matrix to be trained (model parameters).
 * @param epochs The number of training iterations.
 * @param learning_rate The learning rate used for gradient descent updates.
 */
void train(Matrix *input, Matrix *target, Matrix *filter, int epochs, double learning_rate) {
    int input_size = input->rows;
    int filter_size = filter->rows;
    int output_size = input_size - filter_size + 1;

    // Create necessary matrices
    Matrix output = create_matrix(output_size, output_size);
    Matrix d_output = create_matrix(output_size, output_size);
    Matrix d_input = create_matrix(input_size, input_size);
    Matrix d_filter = create_matrix(filter_size, filter_size);

    // Training loop
    for (int epoch = 0; epoch < epochs; ++epoch) {
        // Forward pass
        forward_convolution(input, filter, &output);

        // Compute loss
        double loss = mse_loss(&output, target);

        // Backward pass
        mse_loss_derivative(&output, target, &d_output);
        backward_convolution(&d_output, filter, &d_input, &d_filter);

        // Update filter
        update_filter(filter, &d_filter, learning_rate);

        // Print loss for this epoch
        printf("Epoch %d, Loss: %.4f\n", epoch + 1, loss);
    }

    // Free allocated memory
    free_matrix(&output);
    free_matrix(&d_output);
    free_matrix(&d_input);
    free_matrix(&d_filter);
}

int main() {
    srand(0);

    int input_size = 5;
    int filter_size = 3;
    int output_size = input_size - filter_size + 1;

    int epochs = 50;
    double learning_rate = 0.0001;

    // Create input matrix
    Matrix input = create_matrix(input_size, input_size);

    // Initialize random seed
    srand(time(NULL));

    // Fill input matrix with random values
    double input_data[input_size * input_size];
    for (int i = 0; i < input_size * input_size; i++) {
        input_data[i] = (double)(rand() % 100); // Generates random values between 0 and 99
    }
    initialize_matrix(&input, input_data);

    // Create target matrix
    Matrix target = create_matrix(output_size, output_size);
    double target_data[] = {10, 20, 30, 20, 10, 5, 15, 25, 35};
    initialize_matrix(&target, target_data);

    // Create filter matrix
    Matrix filter = create_matrix(filter_size, filter_size);
    double filter_data[] = {0.5, -0.5, 0.5, -0.5, 0.5, -0.5, 0.5, -0.5, 0.5};
    initialize_matrix(&filter, filter_data);

    // Train the CNN
    train(&input, &target, &filter, epochs, learning_rate);

    // Print the final filter
    printf("\nTrained Filter:\n");
    for (int i = 0; i < filter.rows; ++i) {
        for (int j = 0; j < filter.cols; ++j) {
            printf("%8.4f ", filter.data[i * filter.cols + j]);
        }
        printf("\n");
    }

    // Clean up
    free_matrix(&input);
    free_matrix(&target);
    free_matrix(&filter);

    return 0;
}