Consider a L x L size matrix M, whose entries can be either 0 or 1. Each element is 1 with probability p and 0 with probability 1 - p. I will call the elements labelled 1 as black elements and elements labelled 0 as white elements. I'm trying to write a code which:
Generates a random matrix with entries 0's and 1's. I need to input size of matrix L and the probability p.
Labels all the black elements belonging to the same cluster with the same number. (Define a cluster of black element as a maximal connected component in the graph of cells with the value of 1, where edges connect cells whose rows and columns both differ by at most 1 (so up to eight neighbours for each cell). In other words if two black elements of the matrix share an edge or a vertex consider them as belonging to the same black cluster. That is, think of the matrix as a large square and the elements as small squares in the large square.)
- Within a loop that runs from p = 0 to p = 100 (I'm referring to probability p as a percentage): the number of black clusters of each size is written onto a file corresponding to that value of p.
For example:
Input: p = 30, L = 50
Output (which is written to a data file for each p; thus there are 101 data files created by this program, from p = 0 to p = 100):
1 100 (i.e. there are 100 black clusters of size 1)
2 20 (i.e. there are 20 black clusters of size 2)
3 15 (i.e. there are 15 black clusters of size 3)
and so on...
#include "stdafx.h"
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <time.h>
#include <string.h>
int *labels;
int n_labels = 0;
int uf_find(int x) {
int y = x;
while (labels[y] != y)
y = labels[y];
while (labels[x] != x) {
int z = labels[x];
labels[x] = y;
x = z;
}
return y;
}
int uf_union(int x, int y) {
return labels[uf_find(x)] = uf_find(y);
}
int uf_make_set(void) {
labels[0] ++;
assert(labels[0] < n_labels);
labels[labels[0]] = labels[0];
return labels[0];
}
void uf_initialize(int max_labels) {
n_labels = max_labels;
labels = (int*)calloc(sizeof(int), n_labels);
labels[0] = 0;
}
void uf_done(void) {
free(labels);
labels = 0;
}
#define max(a,b) (a>b?a:b)
#define min(a,b) (a>b?b:a)
int hoshen_kopelman(int **matrix, int m, int n) {
uf_initialize(m * n / 2);
for (int i = 0; i<m; i++)
for (int j = 0; j<n; j++)
if (matrix[i][j]) {
int up = (i == 0 ? 0 : matrix[i - 1][j]);
int left = (j == 0 ? 0 : matrix[i][j - 1]);
switch (!!up + !!left) {
case 0:
matrix[i][j] = uf_make_set();
break;
case 1:
matrix[i][j] = max(up, left);
break;
case 2:
matrix[i][j] = uf_union(up, left);
break;
}
int north_west;
if (i == 0 || j == 0)
north_west = 0;
else
north_west = matrix[i - 1][j - 1];
int north_east;
if (i == 0 || j == (n - 1))
north_east = 0;
else
north_east = matrix[i - 1][j + 1];
if (!!north_west == 1)
{
if (i != 0 && j != 0)
{
//if (rand() % 2 == 1)
//{
if (matrix[i][j - 1] == 0 && matrix[i - 1][j] == 0)
{
if (!!matrix[i][j] == 0)
matrix[i][j] = north_west;
else
uf_union(north_west, matrix[i][j]);
}
//}
}
else if (i == 0 || j == 0)
{
}
}
if (!!north_east == 1)
{
if (i != 0 && j != n - 1)
{
//if (rand() % 2 == 1)
//{
if (matrix[i - 1][j] == 0 && matrix[i][j + 1] == 0)
{
if (!!matrix[i][j] == 0)
matrix[i][j] = north_east;
else
uf_union(north_east, matrix[i][j]);
}
//}
}
else if (i == 0 || j == n - 1)
{
}
}
}
int *new_labels = (int*)calloc(sizeof(int), n_labels);
for (int i = 0; i<m; i++)
for (int j = 0; j<n; j++)
if (matrix[i][j]) {
int x = uf_find(matrix[i][j]);
if (new_labels[x] == 0) {
new_labels[0]++;
new_labels[x] = new_labels[0];
}
matrix[i][j] = new_labels[x];
}
int total_clusters = new_labels[0];
free(new_labels);
uf_done();
return total_clusters;
}
void check_labelling(int **matrix, int m, int n) {
int N, S, E, W;
for (int i = 0; i<m; i++)
for (int j = 0; j<n; j++)
if (matrix[i][j]) {
N = (i == 0 ? 0 : matrix[i - 1][j]);
S = (i == m - 1 ? 0 : matrix[i + 1][j]);
E = (j == n - 1 ? 0 : matrix[i][j + 1]);
W = (j == 0 ? 0 : matrix[i][j - 1]);
assert(N == 0 || matrix[i][j] == N);
assert(S == 0 || matrix[i][j] == S);
assert(E == 0 || matrix[i][j] == E);
assert(W == 0 || matrix[i][j] == W);
}
}
int cluster_count(int **matrix, int size, int N)
{
int i;
int j;
int count = 0;
for (i = 0; i < size; i++)
{
for (j = 0; j < size; j++)
{
if (matrix[i][j] == N)
count++;
}
}
return count;
}
int main(int argc, char **argv)
{
srand((unsigned int)time(0));
int p = 0;
printf("Enter number of rows/columns: ");
int size = 0;
scanf("%d", &size);
printf("\n");
FILE *fp;
printf("Enter number of averaging iterations: ");
int iterations = 0;
scanf("%d", &iterations);
for (int p = 0; p <= 100; p++)
{
char str[100];
sprintf(str, "BlackSizeDistribution%03i.txt", p);
fp = fopen(str, "w");
int **matrix;
matrix = (int**)calloc(10, sizeof(int*));
int** matrix_new = (int**)calloc(10, sizeof(int*));
matrix_new = (int **)realloc(matrix, sizeof(int*) * size);
matrix = matrix_new;
for (int i = 0; i < size; i++)
{
matrix[i] = (int *)calloc(size, sizeof(int));
for (int j = 0; j < size; j++)
{
int z = rand() % 100;
z = z + 1;
if (p == 0)
matrix[i][j] = 0;
if (z <= p)
matrix[i][j] = 1;
else
matrix[i][j] = 0;
}
}
hoshen_kopelman(matrix, size, size);
int highest = matrix[0][0];
for (int i = 0; i < size; i++)
for (int j = 0; j < size; j++)
if (highest < matrix[i][j])
highest = matrix[i][j];
int* counter = (int*)calloc(sizeof(int*), highest + 1);
int high = 0;
for (int k = 1; k <= highest; k++)
{
counter[k] = cluster_count(matrix, size, k);
if (counter[k] > high)
high = counter[k];
}
int* size_distribution = (int*)calloc(sizeof(int*), high + 1);
for (int y = 1; y <= high; y++)
{
int count = 0;
for (int z = 1; z <= highest; z++)
if (counter[z] == y)
count++;
size_distribution[y] = count;
}
for (int k = 1; k <= high; k++)
{
fprintf(fp, "\n%d\t%d", k, size_distribution[k]);
printf("\n%d\t%d", k, size_distribution[k]);
}
printf("\n");
check_labelling(matrix, size, size);
for (int i = 0; i < size; i++)
free(matrix[i]);
free(matrix);
}
}
I used the data files outputted by this program to create bar graphs for each p ranging from 0 to 100, using Gnuplot.
Some sample graphs:
This one corresponds to input p = 3 and L = 100
This one corresponds to p = 90 and L = 100
Okay, so I suppose that was enough context for the actual question I had.
The program I wrote only outputs value for one 1 iteration per value of p. Since this program is for a scientific computing purpose I need more accurate values and for that I need to output "averaged" size distribution value; over say 50 or 100 iterations. I'm not exactly sure how to do the averaging.
To be more clear this is what I want:
Say the output for three different runs of the program gives me (for say p = 3, L = 100)
Size Iteration 1 Iteration 2 Iteration 3 Averaged Value (Over 3 iterations)
1 150 170 180 167
2 10 20 15 18
3 1 2 1 1
4 0 0 1 0
.
.
.
I want to do something similar, except that I would need to perform the averaging over 50 or 100 iterations to get accurate values for my mathematical model. By the way, I don't really need to output the values for each iteration i.e. Iteration 1, Iteration 2, Iteration 3, ... in the data file. All I need to "print" is the first column and the last column (which has the averaged values) in the above table.
Now, I would probably need to modify the main function for that to do the averaging.
Here's the main function:
int main(int argc, char **argv)
{
srand((unsigned int)time(0));
int p = 0;
printf("Enter number of rows/columns: ");
int size = 0;
scanf("%d", &size);
printf("\n");
FILE *fp;
printf("Enter number of averaging iterations: ");
int iterations = 0;
scanf("%d", &iterations);
for (int p = 0; p <= 100; p++)
{
char str[100];
sprintf(str, "BlackSizeDistribution%03i.txt", p);
fp = fopen(str, "w");
int **matrix;
matrix = (int**)calloc(10, sizeof(int*));
int** matrix_new = (int**)calloc(10, sizeof(int*));
matrix_new = (int **)realloc(matrix, sizeof(int*) * size);
matrix = matrix_new;
for (int i = 0; i < size; i++)
{
matrix[i] = (int *)calloc(size, sizeof(int));
for (int j = 0; j < size; j++)
{
int z = rand() % 100;
z = z + 1;
if (p == 0)
matrix[i][j] = 0;
if (z <= p)
matrix[i][j] = 1;
else
matrix[i][j] = 0;
}
}
hoshen_kopelman(matrix, size, size);
int highest = matrix[0][0];
for (int i = 0; i < size; i++)
for (int j = 0; j < size; j++)
if (highest < matrix[i][j])
highest = matrix[i][j];
int* counter = (int*)calloc(sizeof(int*), highest + 1);
int high = 0;
for (int k = 1; k <= highest; k++)
{
counter[k] = cluster_count(matrix, size, k);
if (counter[k] > high)
high = counter[k];
}
int* size_distribution = (int*)calloc(sizeof(int*), high + 1);
for (int y = 1; y <= high; y++)
{
int count = 0;
for (int z = 1; z <= highest; z++)
if (counter[z] == y)
count++;
size_distribution[y] = count;
}
for (int k = 1; k <= high; k++)
{
fprintf(fp, "\n%d\t%d", k, size_distribution[k]);
printf("\n%d\t%d", k, size_distribution[k]);
}
printf("\n");
check_labelling(matrix, size, size);
for (int i = 0; i < size; i++)
free(matrix[i]);
free(matrix);
}
}
One of the ways I thought of was declaring a 2D array of size (largest size of black cluster possible for that) x (number of averaging iterations I want), for each run of the p-loop, and at the end of the program extract the average size distribution for each p from the 2D array. The largest size of black cluster possible in a matrix of size 100 x 100 (say) is 10,000. But for say smaller values of p (say p = 1, p = 20, ...) such large size clusters are never even produced! So creating such a large 2D array in the beginning would be a terrible wastage of memory space and it would take days to execute! (Please keep in mind that I need to run this program for L = 1000 and L = 5000 and if possible even L = 10000 for my purpose)
There must be some other more efficient way to do this. But I don't know what. Any help is appreciated.



Note that both axes have logarithmic scaling, so it is a log-log plot. It was generated by running the above program a few times, once for each unique 
As you can see, because nonzero clusters can sometimes connect diagonally too, there are more larger nonzero clusters than zero clusters. The computation took about 37 seconds on an i5-7200U laptop. Because the Xorshift64* seed is given explicitly, this is a repeatable test.