I do not know if my answer is perfectly relevant to the question you put, but hereafter I propose my C version of the Flood-Fill algorithm, which does not use recursive calls.
1-11-2017: NEW-VERSION; SUCCESFULLY TESTED WITH TWO BITMAPS.
It uses only a queue of the offsets of the new points, it works on the window: WinnOffs-(WinDimX,WinDimY) of the double-buffer: *VBuffer (copy of the screen or image) and, optionally, it write a mask of the flood-fill's result (*ExtraVBuff). 
ExtraVBuff must be filled it with 0 before the call (if you don't need a mask you may set ExtraVBuff= NULL); using it after call you can do gradient floodfill or other painting effects. NewFloodFill works with 32 Bit per Pixel and it is a C function. I've reinvented this algorithm in 1991 (I wrote his in Pascal), but now it works in C with 32 Bit per Pixel; also not uses any functions calls, does only a division after each "pop" from queue, and never overflows the queue, that, if it is sized in the right way (about 1/4 of the pixels of the image), it allows always to fill correctly any area; I show before the c-function (FFILL.C), after the test program (TEST.C):
#define IMAGE_WIDTH 1024
#define IMAGE_HEIGHT 768
#define IMAGE_SIZE IMAGE_WIDTH*IMAGE_HEIGHT
#define QUEUE_MAX IMAGE_SIZE/4
typedef int T_Queue[QUEUE_MAX];
typedef int T_Image[IMAGE_SIZE];
void NewFloodFill(int X,
                  int Y,
                  int Color,
                  int BuffDimX,
                  int WinOffS,
                  int WinDimX,
                  int WinDimY,
                  T_Image VBuffer,
                  T_Image ExtraVBuff,
                  T_Queue MyQueue)
/* Replaces all pixels adjacent to the first pixel and equal to this;   */
/* if ExtraVBuff == NULL writes to *VBuffer (eg BUFFER of 786432 Pixel),*/
/* otherwise prepare a mask by writing on *ExtraVBuff (such BUFFER must */
/* always have the same size as *VBuffer (it must be initialized to 0)).*/
/*         X,Y: Point coordinates' of origin of the flood-fill.         */
/*     WinOffS: Writing start offset on *VBuffer and *ExtraVBuff.       */
/*    BuffDimX: Width, in number of Pixel (int), of each buffer.        */
/*     WinDimX: Width, in number of Pixel (int), of the window.         */
/*       Color: New color that replace all_Pixel == origin's_point.     */
/*     WinDimY: Height, in number of Pixel (int), of the window.        */
/*     VBuffer: Pointer to the primary buffer.                          */
/*  ExtraVBuff: Pointer to the mask buffer (can be = NULL).             */
/*     MyQueue: Pointer to the queue, containing the new-points' offsets*/
{
 int VBuffCurrOffs=WinOffS+X+Y*BuffDimX;
 int PixelIn=VBuffer[VBuffCurrOffs];
 int QueuePnt=0;
 int *TempAddr=((ExtraVBuff) ? ExtraVBuff : VBuffer);
 int TempOffs1;
 int TempX1;
 int TempX2;
 char FLAG;
 if (0<=X && X<WinDimX && 0<=Y && Y<WinDimY) do
  {
   /* Fill to left the current line */
   TempX2=X;
   while (X>=0 && PixelIn==VBuffer[VBuffCurrOffs])
    {
     TempAddr[VBuffCurrOffs--]=Color;
     --X;
    }
   TempOffs1=VBuffCurrOffs+1;
   TempX1=X+1;
   /* Fill to right the current line */
   VBuffCurrOffs+=TempX2-X;
   X=TempX2;
   while (X+1<WinDimX && PixelIn==VBuffer[VBuffCurrOffs+1])
    {
     ++X;
     TempAddr[++VBuffCurrOffs]=Color;
    }
   TempX2=X;
   /* Backward scan of the previous line; puts new points offset in Queue[] */
   if (Y>0)
    {
     FLAG=1;
     VBuffCurrOffs-=BuffDimX;
     while (X-->=TempX1)
      {
       if (PixelIn!=VBuffer[VBuffCurrOffs] ||
           ExtraVBuff && Color==ExtraVBuff[VBuffCurrOffs])
        FLAG=1;
       else
       if (FLAG)
        {
         FLAG=0;
         if (QueuePnt<QUEUE_MAX)
          MyQueue[QueuePnt++]=VBuffCurrOffs;
        } 
       --VBuffCurrOffs;
      }
    }
   /* Forward scan of the next line; puts new points offset in Queue[] */
   if (Y<WinDimY-1)
    {
     FLAG=1;
     VBuffCurrOffs=TempOffs1+BuffDimX;
     X=TempX1;
     while (X++<=TempX2)
      {
       if (PixelIn!=VBuffer[VBuffCurrOffs] ||
           ExtraVBuff && Color==ExtraVBuff[VBuffCurrOffs])
        FLAG=1;
       else
       if (FLAG)
        {
         FLAG=0;
         if (QueuePnt<QUEUE_MAX)
          MyQueue[QueuePnt++]=VBuffCurrOffs;
        }
       ++VBuffCurrOffs;
      }
    }
   /* Gets a new point offset from Queue[] */ 
   if (--QueuePnt>=0)
    {
     VBuffCurrOffs=MyQueue[QueuePnt];
     TempOffs1=VBuffCurrOffs-WinOffS;
     X=TempOffs1%BuffDimX;
     Y=TempOffs1/BuffDimX;
    }
  /* Repeat the main cycle until the Queue[] is not empty */
  } while (QueuePnt>=0);
}
Here there is the test program:
#include <stdio.h>
#include <malloc.h>
#include "ffill.c"
#define RED_COL 0xFFFF0000
#define WIN_LEFT 52
#define WIN_TOP 48
#define WIN_WIDTH 920
#define WIN_HEIGHT 672
#define START_LEFT 0
#define START_TOP 671
#define BMP_HEADER_SIZE 54
typedef char T_Image_Header[BMP_HEADER_SIZE];
void main(void)
{
 T_Image_Header bmpheader;
 T_Image *image;
 T_Image *mask;
 T_Queue *MyQueue;
 FILE *stream;
 char *filename1="ffill1.bmp";
 char *filename2="ffill2.bmp";
 char *filename3="ffill3.bmp";
 int bwritten;
 int bread;
 image=malloc(sizeof(*image));
 mask=malloc(sizeof(*mask));
 MyQueue=malloc(sizeof(*MyQueue));
 stream=fopen(filename1,"rb");
 bread=fread(&bmpheader, 1, BMP_HEADER_SIZE, stream);
 bread=fread((char *)image, 1, IMAGE_SIZE<<2, stream);
 fclose(stream);
 memset(mask,0,IMAGE_SIZE<<2);
 NewFloodFill(START_LEFT,
              START_TOP,
              RED_COL,
              IMAGE_WIDTH,
              IMAGE_WIDTH*WIN_TOP+WIN_LEFT,
              WIN_WIDTH,
              WIN_HEIGHT,
              *image,
              NULL,
              *MyQueue);
 stream=fopen(filename2,"wb+");
 bwritten=fwrite(&bmpheader, 1, BMP_HEADER_SIZE, stream);
 bwritten=fwrite((char *)image, 1, IMAGE_SIZE<<2, stream);
 fclose(stream);
 stream=fopen(filename3,"wb+");
 bwritten=fwrite(&bmpheader, 1, BMP_HEADER_SIZE, stream);
 bwritten=fwrite((char *)mask, 1, IMAGE_SIZE<<2, stream);
 fclose(stream);
 free(MyQueue);
 free(mask);
 free(image);
}
I've used, for the input of the test program shown, the follow Windows uncompressed .BMP image (ffill1.bmp):

Filled, by the test program shown, as follows (ffill2.bmp):

Using "mask" instead of NULL, the output bitmap is (ffill3.bmp):
