題目描述
請用 CUDA 改寫下段的計算:
main.c
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 | #include <stdio.h>
#include <assert.h>
#include <omp.h>
#include <inttypes.h>
#include <stdint.h>
#include "utils.h"
#define MAXGPU 8
#define MAXCODESZ 32767
#define MAXN 16777216
uint32_t A[MAXN], B[MAXN], C[MAXN];
int main( int argc, char *argv[]) {
omp_set_num_threads(4);
int N;
uint32_t key1, key2;
while ( scanf ( "%d %" PRIu32 " %" PRIu32, &N, &key1, &key2) == 3) {
int chunk = N / 4;
for ( int i = 0; i < N; i++) {
A[i] = encrypt(i, key1);
B[i] = encrypt(i, key2);
}
for ( int i = 0; i < N; i++)
C[i] = A[i] * B[i];
uint32_t sum = 0;
for ( int i = 0; i < N; i++)
sum += C[i];
printf ( "%" PRIu32 "\n" , sum);
}
return 0;
}
|
utils.h
#ifndef _UTILS_H
#define _UTILS_H
#include <stdint.h>
static inline uint32_t rotate_left(uint32_t x, uint32_t n) {
return (x << n) | (x >> (32-n));
}
static inline uint32_t encrypt(uint32_t m, uint32_t key) {
return (rotate_left(m, key&31) + key)^key;
}
#endif
|
範例輸入
範例輸出
編譯參數
1 2 | $ nvcc -Xcompiler "-O2 -fopenmp" main.cu -o main
$ . /main
|
Discussion