feat: update hcp testing
This commit is contained in:
commit
54468c2db7
7 changed files with 202 additions and 0 deletions
91
matrix multiplication/CUDA/matrix_mul.cu
Normal file
91
matrix multiplication/CUDA/matrix_mul.cu
Normal file
|
@ -0,0 +1,91 @@
|
|||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
#define N 4096
|
||||
#define FILE_A "matrix_A.bin"
|
||||
#define FILE_B "matrix_B.bin"
|
||||
|
||||
// 主機端讀取矩陣檔案
|
||||
void load_matrix(const char *filename, double *matrix) {
|
||||
FILE *file = fopen(filename, "rb");
|
||||
if (!file) {
|
||||
perror("無法讀取檔案");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
fread(matrix, sizeof(double), N * N, file);
|
||||
fclose(file);
|
||||
}
|
||||
|
||||
// CUDA 核函數:每個執行緒計算 C 中一個元素
|
||||
__global__ void matrixMultiply(const double *A, const double *B, double *C, int n) {
|
||||
int row = blockIdx.y * blockDim.y + threadIdx.y; // 計算矩陣行索引
|
||||
int col = blockIdx.x * blockDim.x + threadIdx.x; // 計算矩陣列索引
|
||||
|
||||
if (row < n && col < n) {
|
||||
double sum = 0;
|
||||
for (int k = 0; k < n; k++) {
|
||||
sum += A[row * n + k] * B[k * n + col];
|
||||
}
|
||||
C[row * n + col] = sum;
|
||||
}
|
||||
}
|
||||
|
||||
int main() {
|
||||
double *h_A = (double *)malloc(N * N * sizeof(double));
|
||||
double *h_B = (double *)malloc(N * N * sizeof(double));
|
||||
double *h_C = (double *)malloc(N * N * sizeof(double));
|
||||
|
||||
if (!h_A || !h_B || !h_C) {
|
||||
perror("記憶體配置失敗");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
// 從檔案載入矩陣
|
||||
load_matrix(FILE_A, h_A);
|
||||
load_matrix(FILE_B, h_B);
|
||||
|
||||
// 配置裝置記憶體
|
||||
double *d_A, *d_B, *d_C;
|
||||
cudaMalloc((void**)&d_A, N * N * sizeof(double));
|
||||
cudaMalloc((void**)&d_B, N * N * sizeof(double));
|
||||
cudaMalloc((void**)&d_C, N * N * sizeof(double));
|
||||
|
||||
// 將主機資料複製到裝置
|
||||
cudaMemcpy(d_A, h_A, N * N * sizeof(double), cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(d_B, h_B, N * N * sizeof(double), cudaMemcpyHostToDevice);
|
||||
|
||||
// 定義區塊與網格大小 (這裡選擇 16x16 的區塊)
|
||||
dim3 threadsPerBlock(16, 16);
|
||||
dim3 blocksPerGrid((N + threadsPerBlock.x - 1) / threadsPerBlock.x,
|
||||
(N + threadsPerBlock.y - 1) / threadsPerBlock.y);
|
||||
|
||||
// 使用 cudaEvent 計時
|
||||
cudaEvent_t start, stop;
|
||||
cudaEventCreate(&start);
|
||||
cudaEventCreate(&stop);
|
||||
cudaEventRecord(start);
|
||||
|
||||
// 執行 CUDA 核函數
|
||||
matrixMultiply<<<blocksPerGrid, threadsPerBlock>>>(d_A, d_B, d_C, N);
|
||||
cudaDeviceSynchronize();
|
||||
|
||||
cudaEventRecord(stop);
|
||||
cudaEventSynchronize(stop);
|
||||
float milliseconds = 0;
|
||||
cudaEventElapsedTime(&milliseconds, start, stop);
|
||||
printf("矩陣乘法完成,花費時間: %f 秒\n", milliseconds / 1000.0);
|
||||
|
||||
// 將結果從裝置複製回主機
|
||||
cudaMemcpy(h_C, d_C, N * N * sizeof(double), cudaMemcpyDeviceToHost);
|
||||
|
||||
// 釋放記憶體
|
||||
cudaFree(d_A);
|
||||
cudaFree(d_B);
|
||||
cudaFree(d_C);
|
||||
free(h_A);
|
||||
free(h_B);
|
||||
free(h_C);
|
||||
|
||||
return 0;
|
||||
}
|
50
matrix multiplication/OpenMP/main.c
Normal file
50
matrix multiplication/OpenMP/main.c
Normal file
|
@ -0,0 +1,50 @@
|
|||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <omp.h>
|
||||
|
||||
#define N 4096
|
||||
#define FILE_A "matrix_A.bin"
|
||||
#define FILE_B "matrix_B.bin"
|
||||
|
||||
void load_matrix(const char *filename, double *matrix) {
|
||||
FILE *file = fopen(filename, "rb");
|
||||
if (!file) {
|
||||
perror("無法讀取檔案");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
fread(matrix, sizeof(double), N * N, file);
|
||||
fclose(file);
|
||||
}
|
||||
|
||||
int main() {
|
||||
double *A = (double *)malloc(N * N * sizeof(double));
|
||||
double *B = (double *)malloc(N * N * sizeof(double));
|
||||
double *C = (double *)calloc(N * N, sizeof(double));
|
||||
|
||||
if (!A || !B || !C) {
|
||||
perror("記憶體配置失敗");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
load_matrix(FILE_A, A);
|
||||
load_matrix(FILE_B, B);
|
||||
|
||||
double start = omp_get_wtime();
|
||||
|
||||
#pragma omp parallel for
|
||||
for (int i = 0; i < N; i++) {
|
||||
for (int j = 0; j < N; j++) {
|
||||
for (int k = 0; k < N; k++) {
|
||||
C[i * N + j] += A[i * N + k] * B[k * N + j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
double end = omp_get_wtime();
|
||||
printf("矩陣乘法完成,花費時間: %f 秒\n", end - start);
|
||||
|
||||
free(A);
|
||||
free(B);
|
||||
free(C);
|
||||
return 0;
|
||||
}
|
12
matrix multiplication/OpenMP/makefile
Normal file
12
matrix multiplication/OpenMP/makefile
Normal file
|
@ -0,0 +1,12 @@
|
|||
CC = gcc
|
||||
CFLAGS = -fopenmp -O2
|
||||
TARGET = matrix_mul
|
||||
SRC = main.c
|
||||
|
||||
all: $(TARGET)
|
||||
|
||||
$(TARGET): $(SRC)
|
||||
$(CC) $(CFLAGS) -o $(TARGET) $(SRC)
|
||||
|
||||
clean:
|
||||
rm -f $(TARGET)
|
BIN
matrix multiplication/OpenMP/matrix_mul
Executable file
BIN
matrix multiplication/OpenMP/matrix_mul
Executable file
Binary file not shown.
BIN
matrix multiplication/generate_matrix
Executable file
BIN
matrix multiplication/generate_matrix
Executable file
Binary file not shown.
37
matrix multiplication/generate_matrix.c
Normal file
37
matrix multiplication/generate_matrix.c
Normal file
|
@ -0,0 +1,37 @@
|
|||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#define N 8192
|
||||
#define FILE_A "matrix_A.bin"
|
||||
#define FILE_B "matrix_B.bin"
|
||||
|
||||
void generate_and_save_matrix(const char *filename) {
|
||||
FILE *file = fopen(filename, "wb");
|
||||
if (!file) {
|
||||
perror("無法打開檔案");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
double *row = (double *)malloc(N * sizeof(double));
|
||||
if (!row) {
|
||||
perror("記憶體配置失敗");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < N; i++) {
|
||||
for (size_t j = 0; j < N; j++) {
|
||||
row[j] = (double)(rand() % 100) / 10.0;
|
||||
}
|
||||
fwrite(row, sizeof(double), N, file);
|
||||
}
|
||||
|
||||
fclose(file);
|
||||
free(row);
|
||||
}
|
||||
|
||||
int main() {
|
||||
generate_and_save_matrix(FILE_A);
|
||||
generate_and_save_matrix(FILE_B);
|
||||
printf("矩陣 A 和 B 已成功產生並存入 %s 和 %s\n", FILE_A, FILE_B);
|
||||
return 0;
|
||||
}
|
12
matrix multiplication/makefile
Normal file
12
matrix multiplication/makefile
Normal file
|
@ -0,0 +1,12 @@
|
|||
CC = gcc
|
||||
CFLAGS = -fopenmp -O2
|
||||
TARGET = generate_matrix
|
||||
SRC = generate_matrix.c
|
||||
|
||||
all: $(TARGET)
|
||||
|
||||
$(TARGET): $(SRC)
|
||||
$(CC) $(CFLAGS) -o $(TARGET) $(SRC)
|
||||
|
||||
clean:
|
||||
rm -f $(TARGET)
|
Loading…
Reference in a new issue