10099. Dot Product (CUDA)

I'm a slow walker, but I never walk backwards.

題目描述

請用 CUDA 改寫下段的計算:

main.c

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
#include <stdio.h>
#include <assert.h>
#include <omp.h>
#include <inttypes.h>
#include <stdint.h>
#include "utils.h"
 
#define MAXGPU 8
#define MAXCODESZ 32767
#define MAXN 16777216
uint32_t A[MAXN], B[MAXN], C[MAXN];
int main(int argc, char *argv[]) {
    omp_set_num_threads(4);
    int N;
    uint32_t key1, key2;
    while (scanf("%d %" PRIu32 " %" PRIu32, &N, &key1, &key2) == 3) {
        int chunk = N / 4;
        for (int i = 0; i < N; i++) {
            A[i] = encrypt(i, key1);
            B[i] = encrypt(i, key2);
        }
 
        for (int i = 0; i < N; i++)
            C[i] = A[i] * B[i];
 
        uint32_t sum = 0;
        for (int i = 0; i < N; i++)
            sum += C[i];
        printf("%" PRIu32 "\n", sum);
    }
    return 0;
}

utils.h

#ifndef _UTILS_H
#define _UTILS_H
#include <stdint.h>
static inline uint32_t rotate_left(uint32_t x, uint32_t n) {
    return  (x << n) | (x >> (32-n));
}
static inline uint32_t encrypt(uint32_t m, uint32_t key) {
    return (rotate_left(m, key&31) + key)^key;
}
#endif

範例輸入

16777216 1 2
16777216 3 5

範例輸出

2885681152
2147483648

編譯參數

1
2
$ nvcc -Xcompiler "-O2 -fopenmp" main.cu -o main
$ ./main

Discussion