1cudaStream_t streams[3];
2
3for (int i = 0; i < 3; i++) {
4 cudaStreamCreate(&streams[i]);
5}
6
7cudaMemcpyAsync(h_array_1, d_array_1, size_1 * sizeof(int), cudaMemcpyDeviceToHost, streams[0]);
8cudaMemcpyAsync(h_array_2, d_array_2, size_2 * sizeof(int), cudaMemcpyDeviceToHost, streams[1]);
9cudaMemcpyAsync(h_array_3, d_array_3, size_3 * sizeof(int), cudaMemcpyDeviceToHost, streams[2]);
10
11cudaDeviceSynchronize();
12
13for (int i = 0; i < 3; i++) {
14 cudaStreamDestroy(streams[i]);
15}