/******************************************************************************
 *        CILK Histogram                                                      *
 *                                                                            *
 * COPYRIGHT: University of Siena                                             *
 *   AUTHORS: Roberto Giorgi, Marco Procaccini                                *
 *   VERSION: 0.0.2                                                           *
 *      DATE: 19/12/23 (dd/mm/yy)                                             *
 *                                                                            *
 *****************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <errno.h>
#include <pthread.h>
#include <cilk/cilk.h>
#include <cilk/cilk_api.h>
#include <sys/time.h>
using namespace std;
//#define DEBUG

// HISTOGRAM RELATED DEFINITIONS AND DECLARATIONS
#define DSIZE 100000000
#define BSIZE 4
#define DATA  unsigned char
#define BIN   uint64_t
DATA *color; 
BIN  *histogram;

// CILK RELATED DEFINITIONS AND DECLARATIONS
#define Cilk_lockvar pthread_mutex_t
#define Cilk_lock pthread_mutex_lock
#define Cilk_unlock pthread_mutex_unlock
Cilk_lockvar *lock;

// PERFORMANCE MEASUREMENTS DEFINITIONS AND DECLARATIONS
struct timeval  tv;
long long start_time;

// XZONESTART FUNCTION --------------------------------------------------------
void xzonestart(int z) {
    printf("[INFO]: starting execution of KERNEL%d\n", z);
    gettimeofday(&tv, NULL);
    start_time = 
        (tv.tv_sec) * 1000 + (tv.tv_usec) / 1000 ; // convert to millisecond
}

// XZONESTOP FUNCTION ---------------------------------------------------------
void xzonestop(int z) {
    gettimeofday(&tv, NULL);
    long long end_time = 
        (tv.tv_sec) * 1000 + (tv.tv_usec) / 1000 ; // convert to millisecond
    printf("[INFO]: EXECUTION TIME = %lld ms (KERNEL%d)\n",
        (end_time - start_time), z);
}

// PREPARE FUNCTION -----------------------------------------------------------
void prepare() {
    printf("[INFO]: histogram size %d, colors %d - preparing...\n",BSIZE, DSIZE);

    color = (DATA*) malloc(DSIZE * sizeof(DATA));
    histogram = (BIN*) malloc(BSIZE * sizeof(BIN));

    // mutex structures allocation
    lock  = (pthread_mutex_t *) calloc(BSIZE,sizeof(pthread_mutex_t));

    // color data structure and histgram initialization
    for (int i = 0; i < DSIZE; ++i) color[i] = (DATA)i%BSIZE;
    for (int i = 0; i < BSIZE; ++i) histogram[i] = 0;
#ifdef DEBUG
    for(int i=0; i< DSIZE; i++) {
        printf("[DBUG] colors[%d]\t%lu\n",i, color[i]);
    }
#endif
}

// KERNEL1 FUNCTION -----------------------------------------------------------
void histo_cilk1(BIN *histogram, DATA *color, uint64_t size) {
    if (size == 1) {
        Cilk_lock(&lock[*color]);
        histogram[*color]++;
        Cilk_unlock(&lock[*color]);
    } else {
        cilk_spawn histo_cilk1(histogram, color, size/2);
        cilk_spawn histo_cilk1(histogram, color + size/2, size - size/2);
        cilk_sync;
    }
} 

// KERNEL2 FUNCTION -----------------------------------------------------------
void histo_cilk2(BIN *histogram, DATA *color, uint64_t size) {
    cilk_for (int i=0; i < size; ++i) {
        BIN b = color[i];
        Cilk_lock(&lock[b]);
        histogram[b]++;
        Cilk_unlock(&lock[b]);
    }
} 

// KERNEL3 FUNCTION -----------------------------------------------------------
void histo_cilk3(BIN *histogram, DATA *color, uint64_t size) {
    uint nslices = (size - 1) / BSIZE + 1;
    Cilk_lockvar *lock2;
    lock2 = (pthread_mutex_t *) calloc(DSIZE,sizeof(pthread_mutex_t));
    BIN *local_histogram = (BIN*) calloc(BSIZE*nslices,sizeof(BIN));

    cilk_for(uint s = 0; s < nslices; s++) {
        uint start = s * BSIZE;
        for (uint i=0; i < BSIZE; ++i) {
             BIN b =  color[i];
             Cilk_lock(&lock2[start+b]);
             local_histogram[start + b] += 1;
             Cilk_unlock(&lock2[start + b]);
        }
    }

    // Combine local histograms into the global histogram
    for (int i = 0; i < BSIZE; i++)
        for (int s = 0; s < nslices; s++)
            histogram[i] += local_histogram[s + i];
    free(lock2);
}

// COMPUTE FUNCTION ----------------------------------------------------------
void compute(int z) {
    xzonestart(z);
    switch (z) {
        case 1: histo_cilk1(histogram, color, DSIZE); break;
        case 2: histo_cilk2(histogram, color, DSIZE); break;
        case 3: histo_cilk3(histogram, color, DSIZE); break;
    }
    xzonestop(z);
}

// REPORT FUNCTION ------------------------------------------------------------
void report() {
    printf("[INFO]: computation completed - starting verification...\n");

    BIN *histo_check = (BIN*) calloc (BSIZE,sizeof(BIN));
    for(int i=0;i<DSIZE;i++) histo_check[color[i]/BSIZE]+=1;

    uint64_t histo_sum=0; 
    uint64_t histocheck_sum=0; 

    for (int i=0; i<BSIZE;i++) {
        histo_sum+=histogram[i];
        histocheck_sum+=histo_check[i];
    }
#ifdef DEBUG
    printf("[DBUG]: histogram checksum %ld histocheck %ld\n",
        histo_sum, histocheck_sum);
#endif

    if(histo_sum !=  histocheck_sum) {
        printf("FAILED\n");
     } else{
        printf("***SUCCESS****\n");
    }
    free(histo_check); free(histogram); free(color); free(lock);
}


// MAIN FUNCTION -----------------------------------------------------------
int main(int argc, char* argv[]){
	
    prepare(); compute(1); report();
    prepare(); compute(2); report();
    prepare(); compute(3); report();
 
    return 0;
}
