android-GPU与CPU编程:处理时间不一致
内容导读
互联网集市收集整理的这篇技术教程文章主要介绍了android-GPU与CPU编程:处理时间不一致,小编现在分享给大家,供广大互联网技能从业者学习和参考。文章包含8318字,纯文字阅读大概需要12分钟。
内容图文
我目前正在进行图像跟踪:由于有了摄像头,我可以跟踪与Android系统交互的手指触摸.图像处理是在带有OpenCL的GPU上完成的:我将相机输出转换为黑白帧,以便获得白色斑点.该方法的处理时间为65ms.
由于我的目标是使程序更流畅,因此我使用OpenCV方法在CPU上执行了相同的操作.这样处理时间为115ms.问题在于,使用OpenCV方法时,程序感觉更加灵敏,速度更快,而且我不了解在这种情况下处理时间如何更长:这似乎与我矛盾.
对于测量,我这样进行:
start= clock();
finish = clock();
double time =((double)finish -start)/CLOCKS_PER_SEC;
std::cout<<"process time : "<< time<<std::endl;
这是我的代码:
static cv::Mat original_Right,binary_Right;
static cv::Mat original_Left, binary_Left;
int width, height;
clock_t start,finish;
double time = 0.0;
width = (int) this->camera_Right.getCapture().get(cv::CAP_PROP_FRAME_WIDTH);
height = (int) this->camera_Right.getCapture().get(cv::CAP_PROP_FRAME_HEIGHT);
original_Right.create(height, width, CV_8UC3);
//--------------------------- Camera 2 ---------------------------------
int width_2 = (int) this->camera_Left.getCapture().get(cv::CAP_PROP_FRAME_WIDTH);
int height_2 = (int) this->camera_Left.getCapture().get(cv::CAP_PROP_FRAME_HEIGHT);
original_Left.create(height_2, width_2, CV_8UC3);
binary_Right.create(height, width, CV_32F); // FOR GPU
binary_Left.create(height_2, width_2, CV_32F); // FOR GPU
//binary_Right.create(height, width, CV_8UC1); // FOR CPU
//binary_Left.create(height_2, width_2, CV_8UC1); // FOR CPU
Core::running_ = true;
//------------------------------------ SET UP THE GPU -----------------------------------------
cl_context context;
cl_context_properties properties [3];
cl_kernel kernel;
cl_command_queue command_queue;
cl_program program;
cl_int err;
cl_uint num_of_platforms=0;
cl_platform_id platform_id;
cl_device_id device_id;
cl_uint num_of_devices=0;
cl_mem input, output;
size_t global;
int data_size =height*width*3;
//load opencl source
FILE *fp;
char fileName[] = "./helloTedKrissV2.cl";
char *source_str;
//Load the source code containing the kernel
fp = fopen(fileName, "r");
if (!fp) {
fprintf(stderr, "Failed to load kernel.\n");
exit(1);
}
source_str = (char*)malloc(MAX_SOURCE_SIZE);
global = fread(source_str, 1, MAX_SOURCE_SIZE, fp);
fclose(fp);
//retreives a list of platforms available
if(clGetPlatformIDs(1,&platform_id, &num_of_platforms)!=CL_SUCCESS){
std::cout<<"unable to get a platform_id"<<std::endl;
};
// to get a supported GPU device
if(clGetDeviceIDs(platform_id,CL_DEVICE_TYPE_GPU,1,&device_id, &num_of_devices)!= CL_SUCCESS){
std::cout<<"unable to get a device_id"<<std::endl;
};
//context properties list - must be terminated with 0
properties[0]=CL_CONTEXT_PLATFORM;
properties[1]=(cl_context_properties) platform_id;
properties[2]=0;
// create a context with the gpu device
context = clCreateContext(properties,1,&device_id,NULL,NULL,&err);
//create command queue using the context and device
command_queue = clCreateCommandQueue(context,device_id,0,&err);
//create a program from the kernel source code
program= clCreateProgramWithSource(context,1,(const char **) &source_str, NULL,&err);
// compile the program
if(clBuildProgram(program,0,NULL,NULL,NULL,NULL)!=CL_SUCCESS){
size_t length;
std::cout<<"Error building program"<<std::endl;
char buffer[4096];
clGetProgramBuildInfo(program,device_id,CL_PROGRAM_BUILD_LOG, sizeof(buffer),buffer,&length);
std::cout<< buffer <<std::endl;
}
//specify which kernel from the program to execute
kernel = clCreateKernel(program,"imageProcessing",&err);
while (this->isRunning() == true) {
start= clock(); //--------------------- START----------------------
//----------------------FRAME---------------------
this->camera_Right.readFrame(original_Right);
if (original_Right.empty() == true ) {
std::cerr << "[Core/Error] Original frame is empty." << std::endl;
break;
}
this->camera_Left.readFrame(original_Left);
if (original_Left.empty() == true ) {
std::cerr << "[Core/Error] Original 2 frame is empty." << std::endl;
break;
}
//----------------------FRAME---------------------
//------------------------------------------------IMP GPU ------------------------------------------------------
input = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR , sizeof(unsigned char)*data_size,NULL,NULL);
output =clCreateBuffer(context,CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, sizeof(float)*data_size/3,NULL,NULL);
if(clEnqueueWriteBuffer(command_queue,input,CL_TRUE,0,sizeof(unsigned char)*data_size, original_Right.data ,0,NULL,NULL )!= CL_SUCCESS){};
//set the argument list for the kernel command
clSetKernelArg(kernel,0,sizeof(cl_mem), &input);
clSetKernelArg(kernel,1,sizeof(cl_mem), &output);
global = data_size ;
//enqueue the kernel command for execution
clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, &global, NULL,0,NULL,NULL);
clFinish(command_queue);
//copy the results from out of the output buffer
if(clEnqueueReadBuffer(command_queue,output,CL_TRUE ,0,sizeof(float)*data_size/3,binary_Right.data,0,NULL,NULL )!= CL_SUCCESS){};
clReleaseMemObject(input);
clReleaseMemObject(output);
//------------------------------------------------IMP GPU ------------------------------------------------------
input = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR , sizeof(unsigned char)*data_size,NULL,NULL);
output =clCreateBuffer(context,CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, sizeof(float)*data_size/3,NULL,NULL);
if(clEnqueueWriteBuffer(command_queue,input,CL_TRUE,0,sizeof(unsigned char)*data_size, original_Left.data ,0,NULL,NULL )!= CL_SUCCESS){};
//set the argument list for the kernel command
clSetKernelArg(kernel,0,sizeof(cl_mem), &input);
clSetKernelArg(kernel,1,sizeof(cl_mem), &output);
global = data_size ;
//enqueue the kernel command for execution
clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, &global, NULL,0,NULL,NULL);
clFinish(command_queue);
//copy the results from out of the output buffer
if(clEnqueueReadBuffer(command_queue,output,CL_TRUE ,0,sizeof(float)*data_size/3,binary_Left.data,0,NULL,NULL )!= CL_SUCCESS){};
clReleaseMemObject(input);
clReleaseMemObject(output);
//------------------------------------------------IMP GPU ------------------------------------------------------
// CPU METHOD
// adok::processing::doImageProcessing(original_Right, binary_Right);
// adok::processing::doImageProcessing(original_Left, binary_Left);
//-------------------------------------------------------------- TRACKING ------------------------------------------------------
adok::tracking::doFingerContoursTracking(binary_Right,binary_Left, this->fingerContours, this->perspective_Right,this->perspective_Left, this->distortion_Right,this->distortion_Left, this);
//------------------------------------------- TRACKING -----------------------------------------
//------------------------------SEND COORDINATES TO ANDROID BOARD--------------------
if (getSideRight() && !getSideLeft() ) {
std::cout<<"RIGHT : "<<std::endl;
this->uart_.sendAll(this->fingerContours, this->perspective_Right.getPerspectiveMatrix(), RIGHT);
}else if (!getSideRight() && getSideLeft() ){
std::cout<<"LEFT : "<<std::endl;
this->uart_.sendAll(this->fingerContours, this->perspective_Left.getPerspectiveMatrix(), LEFT);
}else if (getSideRight() && getSideLeft() ){
std::cout<<"RIGHT & LEFT : "<<std::endl;
this->uart_.sendAll(this->fingerContours, this->perspective_Right.getPerspectiveMatrix(), this->perspective_Left.getPerspectiveMatrix());
}
this->setSideRight(0);
this->setSideLeft(0);
finish = clock();
time =(double)(finish - start)/CLOCKS_PER_SEC;
std::cout << "Time: " << time << std::endl; // ------------END-----------
}
clReleaseCommandQueue(command_queue);
clReleaseProgram(program);
clReleaseKernel(kernel);
clReleaseContext(context);
this->stop();
}
还有一点奇怪,当我在CPU上抓帧的时间是5毫秒,而在GPU上抓帧的时间是15毫秒,我不知道为什么它会增加.
而且我正在研究android xu4.
解决方法:
在GPU计算中,有时可能要比CPU计算花费更多时间.因为,对于GPU计算,主进程将数据发送到GPU内存,而在进行数学计算之后,GPU将数据发送回CPU.因此,数据传输和接收回到CPU需要时间.如果计算出的缓冲区大小较大且传输时间较大,则GPU计算可能会花费更多时间. CUDNN库与GPU处理器一起使它快了很多倍.因此,如果您的程序未使用CUDNN,则速度可能会更慢.
内容总结
以上是互联网集市为您收集整理的android-GPU与CPU编程:处理时间不一致全部内容,希望文章能够帮你解决android-GPU与CPU编程:处理时间不一致所遇到的程序开发问题。 如果觉得互联网集市技术教程内容还不错,欢迎将互联网集市网站推荐给程序员好友。
内容备注
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 gblab@vip.qq.com 举报,一经查实,本站将立刻删除。
内容手机端
扫描二维码推送至手机访问。