cuda - CUDA 中的三线性插值

我需要在 CUDA 中执行三线性插值。这是问题定义:

给定三个点向量:x[nx],y[ny],z[nz] 和一个函数值矩阵func[nx][ny][nz],我想在 x, 范围之间的一些随机点处找到函数值和 z.

我可以在 CUDA 中编写自己的插值内核,但我想知道是否已经存在一个可以完成这项工作的内核。

Cảm ơn!

1 Câu trả lời

如@Farzad 所述,您可以使用纹理过滤在 CUDA 中执行三线性插值。 simpleTexture3D示例提供了有关如何使用它的完整示例。然而,就目前而言,它可能不会立即使用,因为它涉及使用 OpenGL 和 glut 等库以及其他外部依赖项,如 cutil.h.

因此,我发现将上述代码缩减为显示概念的“最小尺寸”示例很有用。正如您将看到的,代码加载位于名为 Bucky.raw 的文件中的外部数据,我从上面链接的 github 页面“借用”了该文件。


Mã như sau:



typedef unsigned char uchar;

#define BLOCKSIZE 16

float w = 0.5; // texture coordinate in z

#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, char *file, int line, bool abort=true)
if (code != cudaSuccess)
fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
if (abort) { getchar(); exit(code); }

typedef unsigned int uint;
typedef unsigned char uchar;

texture tex; // 3D texture

cudaArray *d_volumeArray = 0;

uint *d_output = NULL;
uint *h_output = NULL;

__global__ void
d_render(uint *d_output, uint imageW, uint imageH, float w)
uint x = __umul24(blockIdx.x, blockDim.x) + threadIdx.x;
uint y = __umul24(blockIdx.y, blockDim.y) + threadIdx.y;

float u = x / (float) imageW;
float v = y / (float) imageH;

// read from 3D texture
float voxel = tex3D(tex, u, v, w);

if ((x < imageW) && (y < imageH)) {
// write output color
uint i = __umul24(y, imageW) + x;
d_output[i] = voxel*255;

void main() {

int N = 32;
int imageH = 512;
int imageW = 512;

const char* filename = "Bucky.raw";

// --- Loading data from file
FILE *fp = fopen(filename, "rb");
if (!fp) { fprintf(stderr, "Error opening file '%s'\n", filename); getchar(); return; }

uchar *data = (uchar*)malloc(N*N*N*sizeof(uchar));
size_t read = fread(data, 1, N*N*N, fp);

printf("Read '%s', %lu bytes\n", filename, read);

gpuErrchk(cudaMalloc((void**)&d_output, imageH*imageW*sizeof(uint)));

// --- Create 3D array
const cudaExtent volumeSize = make_cudaExtent(N, N, N);

cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc();
gpuErrchk(cudaMalloc3DArray(&d_volumeArray, &channelDesc, volumeSize));

// --- Copy data to 3D array (host to device)
cudaMemcpy3DParms copyParams = {0};
copyParams.srcPtr = make_cudaPitchedPtr((void*)data, volumeSize.width*sizeof(uchar), volumeSize.width, volumeSize.height);
copyParams.dstArray = d_volumeArray;
copyParams.extent = volumeSize;
copyParams.kind = cudaMemcpyHostToDevice;

// --- Set texture parameters
tex.normalized = true; // access with normalized texture coordinates
tex.filterMode = cudaFilterModeLinear; // linear interpolation
tex.addressMode[0] = cudaAddressModeWrap; // wrap texture coordinates
tex.addressMode[1] = cudaAddressModeWrap;
tex.addressMode[2] = cudaAddressModeWrap;

// --- Bind array to 3D texture
gpuErrchk(cudaBindTextureToArray(tex, d_volumeArray, channelDesc));

// --- Launch the interpolation kernel
const dim3 blockSize(BLOCKSIZE, BLOCKSIZE, 1);
const dim3 gridSize(imageW / blockSize.x, imageH / blockSize.y);
d_render<<>>(d_output, imageW, imageH, w);

// --- Copy the interpolated data to host
h_output = (uint*)malloc(imageW*imageH*sizeof(uint));

std::ofstream outfile;
outfile.open("out_texture.dat", std::ios::out | std::ios::binary);
outfile.write((char*)h_output, imageW*imageH*sizeof(uint));



代码将结果以二进制格式保存在 out_texture.dat 中。您可以根据 Matlab 加载二进制数据

fd = fopen('out_texture.dat','r');
U = fread(fd,imageH*imageW,'unsigned int');

