I have a project for my data mining class, in which I have to code the reservoir sampling algorithm for files. The program takes as input a number k, the name of the input file, and the name of the output file to be created. The output file must contain k random lines from the input. I tried some stuff but the output is wrong.
This is the code I use:
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <string.h>
int countLines(FILE* file)
{
    char ch,lines=0;
    while ((ch=fgetc(file)) != EOF)
        if (ch=='\n')
            lines++;
    return(lines);
}
void itemSelection(FILE* fp1, FILE* fp2, int k)
{
    int i,j,n,test=0;
    char line[256];
    char** buffer;
    srand((unsigned int) time(NULL));
    buffer = (char**)malloc(sizeof(char*));
    for(i=0;i<k;i++)
        buffer[i] = (char*)malloc(256*sizeof(char));
    n = countLines(fp1);
    if(k>n)
    {
        rewind(fp1);
        while(fgets(line, 256, fp1)!=NULL)
        {
            printf("%s test\n",line); 
            fprintf(fp2,"%s",line); 
        }
    }
    else
    {
        rewind(fp1);    
        for(i=0;i<k;i++)
        {
            fgets(line, 256, fp1);
            buffer[i]=line;
            printf("first k lines:\t%s\n",buffer[i]);
        }
        for(i=k;i<n;i++)
        {
            fgets(line,256,fp1);
            printf("line is:\t%s.\n", line);
            j = rand() % (i+1);
            if(j<k)
            {
                buffer[j]=line;
                printf("later parts are:\t%s. J is:%d.\n",buffer[j],j);
            }
        }
    }
    for(i=0;i<k;i++)
        printf("buffer test:\t%s.\n", buffer[i]);
}
void printFunc(FILE* fp2,int k)
{
    char line[256];
    int i;
    rewind(fp2);
    for(i=0;i<k;i++)
    {
        fgets(line, 256, fp2);
        printf("print test is:\t%s.\n",line);
    }   
}
void main(int args, char** argv)
{
    FILE* fp1;
    FILE* fp2;
    int k;
    if(args<4)
    {
        printf("Expected more arguments!\n");
        exit(-1);
    }
    fp1 = fopen(argv[2],"r");
    if(fp1 == NULL)
    {
        printf("Could not open input file!\n");
        perror("Error: ");
        exit(-1);
    }
    fp2 = fopen(argv[3],"w");
    if(fp2 == NULL)
    {
        printf("Could not open output file!\n");
        perror("Error: ");
        exit(-1);
    }
    k = atoi(argv[1]);
    itemSelection(fp1, fp2, k);
    printFunc(fp2,k);
    fclose(fp1);
    fclose(fp2);
}
What this program tries to do is read the k first lines from the file and store it in a 2-dimensional string array of (k,256) size. Then for every next line generate a random number j, and if that number is smaller than k, replace the buffer[j] with the most recently line taken from the file.
However the output I get consists of k lines of }, which is the last character of the input.
Like this(e.g k=5):
}
}
}
}
}
When I print the buffer to see its content, it shows correctly. But when I write to the file it writes the wrong output.
Any help would be highly appreciated! Thank you in advance!
 
     
     
    