#include <stdio.h>
#include <stdlib.h>
#include <omp.h>

typedef unsigned int uint;
typedef unsigned char uchar;
#define HISTOGRAM_BIN_COUNT 4
uchar* Color;
uint* Histogram;
#define N 32

void histo_omp33(uint *histogram, uchar *color, uint size) {
    #pragma omp parallel
    {
        int i, histogram_private[HISTOGRAM_BIN_COUNT];
        int tid = omp_get_thread_num();
        for(i=0; i<HISTOGRAM_BIN_COUNT; i++) histogram_private[i] = 0;
///////////////////////////////////////////////////////////
        #pragma omp for
        for(i=0; i<size; i++) {
///////////////////////////////////////////////////////////
           histogram_private[color[i]]++;
           printf("t%03d  i=%-3d  c=%-3d  d=%-d\n", tid, i, color[i], histogram_private[color[i]]); fflush(stdout);
        }
        for(i=0; i<HISTOGRAM_BIN_COUNT; i++) {
            #pragma omp critical
            {
                for (int k=0; k < HISTOGRAM_BIN_COUNT; ++k) printf("%3d ", histogram_private[k]);
                fflush(stdout);
                histogram[i] += histogram_private[i];
                printf("t%03d  i=%-3d  d=%-d\n", tid, i, histogram[i]); fflush(stdout);
            }
        }
    }
}

int main() {
    Color = (uchar*)malloc(N * sizeof(uchar));
    Histogram = (uint*)malloc(HISTOGRAM_BIN_COUNT * sizeof(uint));
    srand(2017);
    for (uint i = 0; i < N; ++i) Color[i] = (uchar)(rand() % HISTOGRAM_BIN_COUNT);
    for (uint i = 0; i < HISTOGRAM_BIN_COUNT; ++i) Histogram[i] = 0;

    omp_set_dynamic(0);     // Explicitly disable dynamic teams
    omp_set_num_threads(4); // Use 4 threads for all consecutive parallel regions

    for(int i=0; i<N; i++) printf("%3d ", Color[i]); printf("\n"); fflush(stdout);

    histo_omp33(Histogram, Color, N);

    printf("---\n"); fflush(stdout);
    for(int c=0; c<HISTOGRAM_BIN_COUNT; c++)
        printf("c=%-3d  d=%-d\n", c, Histogram[c]); fflush(stdout);

    free(Color); free(Histogram);
}
