Today I was trying to create a program that copied an image using the GPU. I created a simple program that does this. To load the image I am using lodepng. The problem isn't with copying via cudaMemcpy because when I copy the image to GPU and back it stays intact, but when I try to copy it in the kernel, it doesn't. Feel free to ask any questions that you have about my problem.
The code:
#include <cuda_runtime.h>
#include <device_launch_parameters.h>
#include <iostream>
#include <Windows.h>
#include <math.h>
#include <LodePNG\lodepng.h>
const int BLOCK_WIDTH = 32;
using namespace std;
__global__ void expousure(unsigned char *in, unsigned char *out)
{
    int x = threadIdx.x + blockIdx.x * blockDim.x;
    int y = threadIdx.y + blockIdx.y * blockDim.y;
    int pitch = blockDim.x * gridDim.x;
    int absIdx = x + y * pitch;
    out[absIdx] = in[absIdx];
}
void decode(std::vector<unsigned char>& image, const char* filename, int& width, int& height)
{
    unsigned widthU, heightU;
        //decode
    unsigned error = lodepng::decode(image, widthU, heightU, filename);
    width = int(widthU);
    height = int(heightU);
    //if there's an error, display it
    if (error) std::cout << "decoder error " << error << ": " << lodepng_error_text(error) << std::endl;
    //the pixels are now in the vector "image", 4 bytes per pixel, ordered RGBARGBA..., use it as texture, draw it, ...
}
void encodeAndSave(const std::vector<unsigned char>& inPixels, const char* filename, int width, int height)
{
    std::vector<unsigned char> outEncoded;
    unsigned error = lodepng::encode(outEncoded, inPixels, unsigned(width), unsigned(height));
    if (error){
        std::cout << "encoder error" << error << ": " << lodepng_error_text(error) << std::endl;
        return;
    }
    lodepng::save_file(outEncoded, filename);
}
void encodeAndSave(unsigned char* inPixels, const char* filename, int width, int height)
{
    std::vector<unsigned char> outEncoded;
    unsigned error = lodepng::encode(outEncoded, inPixels, unsigned(width), unsigned(height));
    if (error){
        std::cout << "encoder error" << error << ": " << lodepng_error_text(error) << std::endl;
        return;
    }
    lodepng::save_file(outEncoded, filename);
}
int main(int argc, char *argv[])
{
    // decode the image to image from filename
    int width, height;
    const char* filename = argc > 1 ? argv[1] : "C:/Users/Russell/Documents/Visual Studio 2013/Projects/Hello CUDA/Release/test.png";
    vector <unsigned char> h_image;
    decode(h_image, filename, width, height);
    unsigned char *d_in;
    unsigned char *d_out;
    cudaMalloc(&d_in, sizeof(unsigned char) * width * height * 4);
    cudaMalloc(&d_out, sizeof(unsigned char) * width * height * 4);
    cudaMemcpy(d_in, &h_image[0], sizeof(unsigned char) * width * height * 4, cudaMemcpyHostToDevice);
    expousure<<<dim3(width / BLOCK_WIDTH, height / BLOCK_WIDTH, 1), dim3(BLOCK_WIDTH, BLOCK_WIDTH, 1) >>>(d_in, d_out);
    unsigned char h_out[256 * 256 * 4];
    cudaMemcpy(h_out, d_out, sizeof(unsigned char) * width * height * 4, cudaMemcpyDeviceToHost);
    // encode and save image from image to filename
    vector <unsigned char> imageOUT;
    const char* outname = "C:/Users/Russell/Documents/Visual Studio 2013/Projects/Hello CUDA/Release/testOUT.png";
    encodeAndSave(h_out, outname, width, height);
}
The input image: https://i.stack.imgur.com/Rx0mF.png
The output image: https://i.stack.imgur.com/HLmPQ.png
 
     
    