I am trying to make a image filter in Cuda c and i don't think i understand exactly how does the thread assignment for each pixel work.This is my code so far: imageFilter.cu
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include "Filtering_Functions.h"
__global__ void Grayscale_image(int h, int w, unsigned char* Image) {
int x = (blockIdx.x * blockDim.x) threadIdx.x;
int y = (blockIdx.y * blockDim.y) threadIdx.y;
unsigned int tid = threadIdx.y * blockDim.y threadIdx.x;
if (x > 0 && x < w - 1 && y > 0 && y < h - 1)
{
Image[tid] = 0.299 * Image[tid] 0.587 * Image[tid 1] 0.114 * Image[tid 2];
Image[tid 1] = Image[tid];
Image[tid 2] = Image[tid];
}
}
void Image_Grayscale(unsigned char* Image, int Height, int Width) {
unsigned char* Uploaded_Image = NULL;
dim3 blocks(Width / 16, Height / 16);
dim3 threads(16, 16);
cudaMalloc((void**)&Uploaded_Image, Height * Width * 3);
cudaMemcpy(Uploaded_Image, Image, Height * Width * 3, cudaMemcpyHostToDevice);
Grayscale_image << <blocks, threads >> > (Height, Width, Uploaded_Image);
cudaMemcpy(Image, Uploaded_Image, Height * Width * 3, cudaMemcpyDeviceToHost);
cudaFree(Uploaded_Image);
}
filtering_functions.h
#ifndef filtering_functions
#define filtering_functions
void Image_Grayscale(unsigned char* Image, int Height, int Width);
#endif
ImageFiltering.cpp
#include <iostream>
#include <stdio.h>
#include <opencv2/opencv.hpp>
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgcodecs.hpp>
#include "Filtering_Functions.h"
using namespace std;
using namespace cv;
int main() {
Mat Image = imread("Example.png");
cout << "The uploaded image has Height: " << Image.rows << ", Width: " << Image.cols << endl;
Image_Grayscale(Image.data, Image.rows, Image.cols);
imwrite("Grayscale_Filter.png", Image);
system("pause");
return 0;
}
At the end i do not see any changes. Can someone tell me what am i doing wrong, or at least what i don't understand?
CodePudding user response:
Cuda operations are asynchonous relative to the host processor. I suspect you're attempting to write the data to disk before it's been touched by the GPU. Consider calling cudaStreamSynchronize(0) before trying to examine the results.
CodePudding user response:
The code you provided does not compile - you used an undeclared identifier Image2 in this line:
imwrite("Grayscale_Filter.png", Image2);
Maybe that's the issue? Are you writing something else to the file?
