Cuda and OpenGL Interop

I've been reading through the CUDA documentation and it seems to me, that every buffer that needs to interface with OpenGL needs to be created in the glBuffer. According to the nvidia programming guide, this has to be done like this:

GLuint positionsVBO; struct cudaGraphicsResource* positionsVBO_CUDA; int main() < // Explicitly set device cudaGLSetGLDevice(0); // Initialize OpenGL and GLUT . glutDisplayFunc(display); // Create buffer object and register it with CUDA glGenBuffers(1, positionsVBO); glBindBuffer(GL_ARRAY_BUFFER, &vbo); unsigned int size = width * height * 4 * sizeof(float); glBufferData(GL_ARRAY_BUFFER, size, 0, GL_DYNAMIC_DRAW); glBindBuffer(GL_ARRAY_BUFFER, 0); cudaGraphicsGLRegisterBuffer(&positionsVBO_CUDA, positionsVBO, cudaGraphicsMapFlagsWriteDiscard); // Launch rendering loop glutMainLoop(); >void display() < // Map buffer object for writing from CUDA float4* positions; cudaGraphicsMapResources(1, &positionsVBO_CUDA, 0); size_t num_bytes; cudaGraphicsResourceGetMappedPointer((void**)&positions, &num_bytes, positionsVBO_CUDA)); // Execute kernel dim3 dimBlock(16, 16, 1); dim3 dimGrid(width / dimBlock.x, height / dimBlock.y, 1); createVertices>>(positions, time, width, height); // Unmap buffer object cudaGraphicsUnmapResources(1, &positionsVBO_CUDA, 0); // Render from buffer object glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); glBindBuffer(GL_ARRAY_BUFFER, positionsVBO); glVertexPointer(4, GL_FLOAT, 0, 0); glEnableClientState(GL_VERTEX_ARRAY); glDrawArrays(GL_POINTS, 0, width * height); glDisableClientState(GL_VERTEX_ARRAY); // Swap buffers glutSwapBuffers(); glutPostRedisplay(); > void deleteVBO() < cudaGraphicsUnregisterResource(positionsVBO_CUDA); glDeleteBuffers(1, &positionsVBO); >__global__ void createVertices(float4* positions, float time, unsigned int width, unsigned int height) < // [. ] >

Is there a way to give the cudaMalloc created memory space directly to OpenGL? I've got already working code written on cuda and I want to put my float4 array directly into OpenGL. Say if've got already code like:

float4 *cd = (float4*) cudaMalloc(elements*sizeof(float4)). do_something>>(cd); 

And I wanted to display the output of do_something through OpenGL. Side note: why is the cudaGraphicsResourceGetMappedPointer function run on every timestep?