I am trying to parallelize a C program which draws Mandelbrot set. I am dividing a section of the image for each processor in shape of equal-sized blocks as I have shown in this image:
I have tried to use the solution here which is almost addressing the same problem. However, I only get a partial image in output:
Also for high resolutions of the image (such as 8000x8000 pixels) the application crashes with segmentation fault 11. Here is my code:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <math.h>
#include <time.h>
#include "mpi.h"
// Main program
int main(int argc, char* argv[])
 {
    /* screen ( integer) coordinate */
    int iX,iY,i,j;
    const int iXmax = 8000; // default
    const int iYmax = 8000; // default
    /* world ( double) coordinate = parameter plane*/
    double Cx, Cy;
    const double CxMin = -2.5;
    const double CxMax = 1.5;
    const double CyMin = -2.0;
    const double CyMax = 2.0; 
    /* */
    double PixelWidth = (CxMax - CxMin)/iXmax;
    double PixelHeight = (CyMax - CyMin)/iYmax;
    int linePerProcess, remainingLines, processMinY,  processMaxY, lastProcessMaxY, result_offset;
    int my_rank, processors, iXmaxHalf;
    int startAlert = 1;
    int receivedAlert;
    unsigned char (*resultBuffer)[3] = NULL;
    unsigned char (*resultBufferTwo)[3] = NULL;
    unsigned char (*finalResultBuffer)[3] = NULL;
    MPI_Status stat;
    /* color component ( R or G or B) is coded from 0 to 255 */
    /* it is 24 bit color RGB file */
    const int MaxColorComponentValue = 255; 
    FILE * fp;
    char *filename = "Mandelbrot.ppm";
    char *comment = "# ";   /* comment should start with # */
    // RGB color array
    unsigned char color[3];
    /* Z = Zx + Zy*i;   Z0 = 0 */
    double Zx, Zy;
    double Zx2, Zy2; /* Zx2 = Zx*Zx;  Zy2 = Zy*Zy  */
    /*  */
    int Iteration;
    const int IterationMax = 2000; // default
    /* bail-out value , radius of circle ;  */
    const double EscapeRadius = 400;
    double ER2 = EscapeRadius * EscapeRadius;
    double startTime, endTime;
    MPI_Init(&argc, &argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
    MPI_Comm_size(MPI_COMM_WORLD, &processors);
    linePerProcess = iYmax / (processors/2);
    iXmaxHalf = iXmax / 2;
    if (my_rank % 2  == 0) {
        processMinY = (my_rank/2) * linePerProcess;
    } else {
        processMinY = ((my_rank - 1)/2) * linePerProcess;
    }
    processMaxY = processMinY + linePerProcess;
    int Rows = iYmax;          // Global array rows
    int Columns = iXmax; // Global array columns
    int sizes[2];                     // No of elements in each dimension of the whole array
    int subSizes[2];                  // No of elements in each dimension of the subarray
    int startCoords[2];               // Starting coordinates of each subarray
    MPI_Datatype recvBlock, recvMagicBlock;
        // Create a subarray (a rectangular block) datatype from a regular, 2d array
    sizes[0] = Rows;
    sizes[1] = Columns;
    subSizes[0] = linePerProcess;
    subSizes[1] = iXmaxHalf;
    startCoords[0] = 0;
    startCoords[1] = 0;
    MPI_Type_create_subarray(2, sizes, subSizes, startCoords, MPI_ORDER_C, MPI_UNSIGNED_CHAR, &recvBlock);
    MPI_Type_create_resized(recvBlock, 0, iXmaxHalf * sizeof(color), &recvMagicBlock);
    MPI_Type_commit(&recvMagicBlock);
    if (my_rank == 0) {
        // startTime = MPI_Wtime();
        // for(i=1; i<processors; i++){
        //  MPI_Send(&startAlert, 1, MPI_INT, i, 0, MPI_COMM_WORLD);
        // }
        // printf("rank; %d\n", my_rank);
        finalResultBuffer = malloc(iXmax * iYmax * sizeof(color));
        for(iY = processMinY; iY < processMaxY; iY++) {
            Cy = CyMin + (iY * PixelHeight);
            if (fabs(Cy) < (PixelHeight / 2))
            {
                Cy = 0.0; /* Main antenna */
            }
            for(iX = 0; iX < iXmaxHalf; iX++)
            {
                Cx = CxMin + (iX * PixelWidth);
                /* initial value of orbit = critical point Z= 0 */
                Zx = 0.0;
                Zy = 0.0;
                Zx2 = Zx * Zx;
                Zy2 = Zy * Zy;
            /* */
                for(Iteration = 0; Iteration < IterationMax && ((Zx2 + Zy2) < ER2); Iteration++)
                {
                    Zy = (2 * Zx * Zy) + Cy;
                    Zx = Zx2 - Zy2 + Cx;
                    Zx2 = Zx * Zx;
                    Zy2 = Zy * Zy;
                };
            /* compute  pixel color (24 bit = 3 bytes) */
                if (Iteration == IterationMax)
                {
                    // Point within the set. Mark it as black
                    color[0] = 0;
                    color[1] = 0;
                    color[2] = 0;
                }
                else 
                {
                    // Point outside the set. Mark it as white
                    double c = 3*log((double)Iteration)/log((double)(IterationMax) - 1.0);
                    if (c < 1)
                    {
                        color[0] = 0;
                        color[1] = 0;
                        color[2] = 255*c;
                    }
                    else if (c < 2)
                    {
                        color[0] = 0;
                        color[1] = 255*(c-1);
                        color[2] = 255;
                    }
                    else
                    {
                        color[0] = 255*(c-2);
                        color[1] = 255;
                        color[2] = 255;
                    }
                }
                finalResultBuffer[(iY*iXmaxHalf)+iX][0] = color[0];
                finalResultBuffer[(iY*iXmaxHalf)+iX][1] = color[1];
                finalResultBuffer[(iY*iXmaxHalf)+iX][2] = color[2];
            }
        }
        result_offset = 1;
        for(i=1; i<processors; i++){
            MPI_Recv(finalResultBuffer, 1, recvMagicBlock, i, 0, MPI_COMM_WORLD, &stat);
            result_offset += 1;
        }
    } else if ((my_rank % 2 == 0) && (my_rank != 0)) {
        // MPI_Recv(&receivedAlert, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, &stat);
        // printf("rank; %d\n", my_rank);
        resultBuffer = malloc(linePerProcess * iXmaxHalf * sizeof(color));
        for(iY = processMinY; iY < processMaxY; iY++) {
            Cy = CyMin + (iY * PixelHeight);
            if (fabs(Cy) < (PixelHeight / 2))
            {
                Cy = 0.0; /* Main antenna */
            }
            for(iX = 0; iX < iXmaxHalf; iX++)
            {
                Cx = CxMin + (iX * PixelWidth);
                /* initial value of orbit = critical point Z= 0 */
                Zx = 0.0;
                Zy = 0.0;
                Zx2 = Zx * Zx;
                Zy2 = Zy * Zy;
            /* */
                for(Iteration = 0; Iteration < IterationMax && ((Zx2 + Zy2) < ER2); Iteration++)
                {
                    Zy = (2 * Zx * Zy) + Cy;
                    Zx = Zx2 - Zy2 + Cx;
                    Zx2 = Zx * Zx;
                    Zy2 = Zy * Zy;
                };
            /* compute  pixel color (24 bit = 3 bytes) */
                if (Iteration == IterationMax)
                {
                    // Point within the set. Mark it as black
                    color[0] = 0;
                    color[1] = 0;
                    color[2] = 0;
                }
                else 
                {
                    // Point outside the set. Mark it as white
                    double c = 3*log((double)Iteration)/log((double)(IterationMax) - 1.0);
                    if (c < 1)
                    {
                        color[0] = 0;
                        color[1] = 0;
                        color[2] = 255*c;
                    }
                    else if (c < 2)
                    {
                        color[0] = 0;
                        color[1] = 255*(c-1);
                        color[2] = 255;
                    }
                    else
                    {
                        color[0] = 255*(c-2);
                        color[1] = 255;
                        color[2] = 255;
                    }
                }
                resultBuffer[((iY-processMinY)*iXmaxHalf)+iX][0] = color[0];
                resultBuffer[((iY-processMinY)*iXmaxHalf)+iX][1] = color[1];
                resultBuffer[((iY-processMinY)*iXmaxHalf)+iX][2] = color[2];
            }
        }
        MPI_Send(resultBuffer, linePerProcess * iXmaxHalf, MPI_UNSIGNED_CHAR, 0, 0, MPI_COMM_WORLD);
        free(resultBuffer);
    } else {
        // MPI_Recv(&receivedAlert, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, &stat);
        // printf("rank; %d\n", my_rank);
        resultBufferTwo = malloc(linePerProcess * iXmaxHalf * sizeof(color));
        for(iY = processMinY; iY < processMaxY; iY++) {
            Cy = CyMin + (iY * PixelHeight);
            if (fabs(Cy) < (PixelHeight / 2))
            {
                Cy = 0.0; /* Main antenna */
            }
            for(iX = iXmaxHalf; iX < iXmax; iX++)
            {
                Cx = CxMin + (iX * PixelWidth);
                /* initial value of orbit = critical point Z= 0 */
                Zx = 0.0;
                Zy = 0.0;
                Zx2 = Zx * Zx;
                Zy2 = Zy * Zy;
            /* */
                for(Iteration = 0; Iteration < IterationMax && ((Zx2 + Zy2) < ER2); Iteration++)
                {
                    Zy = (2 * Zx * Zy) + Cy;
                    Zx = Zx2 - Zy2 + Cx;
                    Zx2 = Zx * Zx;
                    Zy2 = Zy * Zy;
                };
            /* compute  pixel color (24 bit = 3 bytes) */
                if (Iteration == IterationMax)
                {
                    // Point within the set. Mark it as black
                    color[0] = 0;
                    color[1] = 0;
                    color[2] = 0;
                }
                else 
                {
                    // Point outside the set. Mark it as white
                    double c = 3*log((double)Iteration)/log((double)(IterationMax) - 1.0);
                    if (c < 1)
                    {
                        color[0] = 0;
                        color[1] = 0;
                        color[2] = 255*c;
                    }
                    else if (c < 2)
                    {
                        color[0] = 0;
                        color[1] = 255*(c-1);
                        color[2] = 255;
                    }
                    else
                    {
                        color[0] = 255*(c-2);
                        color[1] = 255;
                        color[2] = 255;
                    }
                }
                resultBufferTwo[((iY-processMinY)*iXmaxHalf)+(iX - iXmaxHalf)][0] = color[0];
                resultBufferTwo[((iY-processMinY)*iXmaxHalf)+(iX - iXmaxHalf)][1] = color[1];
                resultBufferTwo[((iY-processMinY)*iXmaxHalf)+(iX - iXmaxHalf)][2] = color[2];
                // printf("rank: %d - value: %u%u%u\n", my_rank,resultBufferTwo[((iY-processMinY)*iXmax)+iX][0],resultBufferTwo[((iY-processMinY)*iXmax)+iX][1],resultBufferTwo[((iY-processMinY)*iXmax)+iX][2]);
            }
        }
        MPI_Send(resultBufferTwo, iXmaxHalf * linePerProcess, MPI_UNSIGNED_CHAR, 0, 0, MPI_COMM_WORLD);
        free(resultBufferTwo);
    }
    if (my_rank == 0) {
        endTime = MPI_Wtime();
        printf("Process time (s): %lf\n", endTime - startTime);
        /*create new file,give it a name and open it in binary mode  */
        fp = fopen(filename, "wb"); /* b -  binary mode */
        /*write ASCII header to the file (PPM file format)*/
        fprintf(fp,"P6\n %s\n %d\n %d\n %d\n", comment, iXmax, iYmax, MaxColorComponentValue);
        for(iY = 0; iY < iYmax; iY++)
        {
            for(iX = 0; iX < iXmax; iX++)
                {
                    fwrite(finalResultBuffer[(iY*iXmax)+iX], 1, 3, fp);
                }
        }
        fclose(fp);
        free(finalResultBuffer);
    }
    MPI_Finalize();
    return 0;
 }
I would appreciate it if someone could help me out to understand what I am doing wrong here.

