/*
 * Copyright 2012-2017 Intel Corporation.
 * 
 * This software is supplied under the terms of a license agreement or
 * nondisclosure agreement with Intel Corporation and may not be copied
 * or disclosed except in accordance with the terms of that agreement.
*/

#define _GNU_SOURCE
#include <sched.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <unistd.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <sys/time.h>
#include <strings.h>

/* MSRs Needed */
#define MSR_PLATFORM_INFO 206
#define MSR_RAPL_POWER_UNIT 1542
#define MSR_IA32_TEMPERATURE_TARGET 418
#define MSR_IA32_APERF 232
#define MSR_IA32_MPERF 231
#define MSR_IA32_THERM_STATUS 412
#define MSR_PKG_ENERGY_STATUS 1553
#define MSR_PP0_ENERGY_STATUS 1593
#define MSR_DRAM_ENERGY_STATUS 1561
#define MSR_IA32_PACKAGE_THERM_STATUS 433

/* Processor Info Structure Type */
typedef struct {
    /* CPU variables */
    unsigned int       cpu_id;
    unsigned int       core_id;
    unsigned int       pack_id;
    unsigned long long cpu_freq;
    /* Core variables - Available only for 1st cpu in core */
    unsigned int       core_temp;
    /* Package variables - Available only for 1st cpu in package */
    unsigned int       pack_temp;
    double             pack_ener;
    double             ram_ener;
    double             core_ener;
    /* Flags for cpu num */
    unsigned char      first_cpu_in_package;
    unsigned char      first_cpu_in_core;
} cpu_info_t;

/* MSR Info Structure Type */
typedef struct {
    unsigned long long aperf;
    unsigned long long mperf;
    unsigned int core_temp;
    unsigned int pack_temp;
    unsigned int pack_energy;
    unsigned int dram_energy;
    unsigned int core_energy;
} msr_values_t;

/* Functions */

/* Compare function for sorting the CPU Info array, by Packagei ID, Core ID and CPU ID. In that order */
static int comp_func(const void *a, const void *b)
{
    const cpu_info_t *p_a = (cpu_info_t*)a;
    const cpu_info_t *p_b = (cpu_info_t*)b;

    if (p_a->pack_id > p_b->pack_id)
        return 1;
    else if (p_a->pack_id < p_b->pack_id)
        return -1;
    else if (p_a->core_id > p_b->core_id)
        return 1;
    else if (p_a->core_id < p_b->core_id)
        return -1;
    else if (p_a->cpu_id > p_b->cpu_id)
        return 1;
    else if (p_a->cpu_id < p_b->cpu_id)
        return -1;
    else
        return 0;
}

#ifdef DEBUG
/* DEBUG Function for printing the CPU Info array */
static void print_cpu_info(cpu_info_t *cpu_info, int num_cpus, cpu_info_t *summary)
{
    int i;
    /* Print Header */
    printf("CPU_ID   CORE_ID   PACK_ID   1st_PACK   1st_CORE   CPU_Freq   Pack_Temp   Core_Temp   Pack_Energy   Core_Energy   RAM_Energy\n");

    /* Print Summary */
    printf("%6c   %7c   %7c   %8c   %8c   %8llu   %9u   %9u   %11.2f   %11.2f   %10.2f\n",
           '-',
           '-',
           '-',
           '-',
           '-',
           summary->cpu_freq,
           summary->pack_temp,
           summary->core_temp,
           summary->pack_ener,
           summary->core_ener,
           summary->ram_ener);

    /* Print CPU Data */
    for (i = 0; i < num_cpus; i++) {
        printf("%6u   %7u   %7u   %8c   %8c   %8llu   %9u   %9u   %11.2f   %11.2f   %10.2f\n",
               cpu_info[i].cpu_id,
               cpu_info[i].core_id,
               cpu_info[i].pack_id,
               (cpu_info[i].first_cpu_in_package) ? '*': ' ',
               (cpu_info[i].first_cpu_in_core) ? '*': ' ',
               cpu_info[i].cpu_freq,
               cpu_info[i].pack_temp,
               cpu_info[i].core_temp,
               cpu_info[i].pack_ener,
               cpu_info[i].core_ener,
               cpu_info[i].ram_ener);
    }
}
#endif

/* Function that creates and initializes  CPU Info array*/
static int init_cpu_info(cpu_info_t **cpu_info, unsigned int *num_cpus, cpu_info_t *summary)
{
    const char *STAT_FILE      = "/proc/stat";
    const char *CORE_FILE      = "/sys/devices/system/cpu/cpu%d/topology/core_id";
    const char *PACK_FILE      = "/sys/devices/system/cpu/cpu%d/topology/physical_package_id";
    const char *THREAD_FILE    = "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list";
    const char *CORE_PACK_FILE = "/sys/devices/system/cpu/cpu%d/topology/core_siblings_list";

    int cpu_id = 0;
    unsigned int i = 0;
    char file_name[256];
    FILE *stat_fp = NULL;
    FILE *core_fp = NULL;
    FILE *pack_fp = NULL;
    FILE *thread_fp = NULL;
    FILE *core_pack_fp = NULL;
    int core_id = 0;
    int pack_id = 0;
    int core_pack_pos = 0;
    int thread_id = 0;
    char delim = 0;
    int num_threads = 0;
    cpu_info_t *cpu_info_arr = NULL;

    *num_cpus = 0;

    /* Get the number of cpus from the /proc/stat file */
    stat_fp = fopen(STAT_FILE, "r");
    if (!stat_fp)
        return 1;

    if (fscanf(stat_fp, "cpu %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n") != 0) {
        fclose(stat_fp);
        return 2;
    }

    while (1) {
        if (fscanf(stat_fp, "cpu%d %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", &cpu_id) != 1)
            break;
        *num_cpus += 1;
    }

    /*  Reserve the memory for all the cpus found */
    cpu_info_arr = (cpu_info_t*)calloc((size_t)*num_cpus, sizeof(cpu_info_t));
    if (!cpu_info_arr)
        goto error;

    bzero(summary, sizeof(cpu_info_t));

    /* For every CPU ID reported on /proc/stat */
    fseek(stat_fp, 0, SEEK_SET);
    if (fscanf(stat_fp, "cpu %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n") == EOF)
        goto clean;

    for (i = 0; i < *num_cpus; i++) {
        if (fscanf(stat_fp, "cpu%d %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", &cpu_id) != 1)
            break;

        /* Get the CPU ID */
        cpu_info_arr[i].cpu_id = cpu_id;

        /* Get Core ID */
        sprintf(file_name, CORE_FILE, i);
        core_fp = fopen(file_name, "r");
        if (!core_fp)
            goto clean;
        if (fscanf(core_fp, "%d\n", &core_id) != 1) {
            fclose(core_fp);
            goto clean;
        }
        cpu_info_arr[i].core_id = core_id;
        fclose(core_fp);

        /*Get Package ID */
        sprintf(file_name, PACK_FILE, i);
        pack_fp = fopen(file_name, "r");
        if (!pack_fp)
            goto clean;
        if (fscanf(pack_fp, "%d\n", &pack_id) != 1) {
            fclose(pack_fp);
            goto clean;
        }
        cpu_info_arr[i].pack_id = pack_id;
        fclose(pack_fp);

        /* Get the CPU Position in Core and mark flag if it is the first CPU on Core */
        sprintf(file_name, THREAD_FILE, i);
        thread_fp = fopen(file_name, "r");
        if (!thread_fp)
            goto clean;
        num_threads = 0;
        while (1) {
            if (fscanf(thread_fp, "%d%c", &thread_id, &delim) != 2)
                break;
            num_threads++;
            if (cpu_id == thread_id && num_threads == 1)
                cpu_info_arr[i].first_cpu_in_core = 1;
        }
        fclose(thread_fp);

        /* Find out if CPU is the first in the Package */
        sprintf(file_name, CORE_PACK_FILE, i);
        core_pack_fp = fopen(file_name, "r");
        if (!core_pack_fp)
            goto clean;
        if (fscanf(core_pack_fp, "%d", &core_pack_pos) != 1) {
            fclose(core_pack_fp);
            goto clean;
        }
        if (cpu_id == core_pack_pos)
            cpu_info_arr[i].first_cpu_in_package = 1;
        fclose(core_pack_fp);
    }

    fclose(stat_fp);

    /* Sort the information by Package, Core, CPU, in that order */
    qsort(cpu_info_arr, (size_t)*num_cpus, sizeof(cpu_info_t), comp_func);

    *cpu_info = cpu_info_arr;
    return 0;

clean:
    free(cpu_info_arr);

error:
    fclose(stat_fp);
    return 3;
}

/* Function that reads MSR files */
static int read_msr(unsigned int cpu_id, off_t offset, unsigned long long *msr)
{
    const char *MSR_FILE = "/dev/cpu/%d/msr";
    char file_name[256];
    int fd = 0;

    sprintf(file_name, MSR_FILE, cpu_id);
    fd = open(file_name, O_RDONLY);
    if (fd == -1)
        return 1;

    if (pread(fd, msr, sizeof(*msr), offset) == -1) {
        close(fd);
        return 2;
    }

    close(fd);
    return 0;
}

/* Function to get CPU cycle count */
static unsigned long long get_cpu_cycle_count(void)
{
     unsigned int a, d;
     asm volatile("rdtsc" : "=a" (a), "=d" (d));
     return (((unsigned long long)a) | (((unsigned long long)d) << 32));
}

/* Function to change execution of the prorgam to another CPU ID */
static int change_executing_cpu(unsigned int cpu_id, cpu_set_t *cpu_set, size_t cpu_set_size)
{
    CPU_ZERO_S(cpu_set_size, cpu_set);
    CPU_SET_S(cpu_id, cpu_set_size, cpu_set);
    if (sched_setaffinity(0, cpu_set_size, cpu_set) == -1)
        return 1;
    else
        return 0;
}

/* Function that reads all the MSR information per CPU */
static int read_cpu_msr_data(const cpu_info_t *cpu_data,
                             int num_cpus,
                             unsigned int target_temp,
                             msr_values_t *cpu_msr_info)
{
    const int PERF_RETRIES = 5;
    unsigned long long cpu_cycle_start = 0;
    unsigned long long cpu_cycle_middle = 0;
    unsigned long long cpu_cycle_end = 0;
    unsigned long long cpu_cycle_aperf = 0;
    unsigned long long cpu_cycle_mperf = 0;
    unsigned long long msr_data;
    cpu_set_t *cpu_set = NULL;
    size_t cpu_set_size = 0;
    int i = 0;
    int j = 0;

    cpu_set = CPU_ALLOC(num_cpus);
    if (cpu_set == NULL)
        return 1;
    cpu_set_size = CPU_ALLOC_SIZE(num_cpus);
    CPU_ZERO_S(cpu_set_size, cpu_set);

    for (i = 0; i < num_cpus; i++) {
        /* Read APERF and MPERF MSR for each CPU <<Bzy_MHz>> */
        for (j = 0; j < PERF_RETRIES;  j++) {
            if (change_executing_cpu(cpu_data[i].cpu_id, cpu_set, cpu_set_size))
                goto error;

            /* Dummy read */
            if (read_msr(cpu_data[i].cpu_id, MSR_IA32_APERF, &cpu_msr_info[i].aperf))
                goto error;

            /* Actual reading */
            cpu_cycle_start = get_cpu_cycle_count();
            if (read_msr(cpu_data[i].cpu_id, MSR_IA32_APERF, &cpu_msr_info[i].aperf))
                goto error;
            cpu_cycle_middle = get_cpu_cycle_count();
            if (read_msr(cpu_data[i].cpu_id, MSR_IA32_MPERF, &cpu_msr_info[i].mperf))
                goto error;
            cpu_cycle_end = get_cpu_cycle_count();

            cpu_cycle_aperf = cpu_cycle_middle - cpu_cycle_start;
            cpu_cycle_mperf = cpu_cycle_end - cpu_cycle_middle;

            if (cpu_cycle_aperf < (2 * cpu_cycle_mperf) &&
                cpu_cycle_mperf < (2 * cpu_cycle_aperf))
                break;
        }
        if (j >= PERF_RETRIES)
                goto error;
        /* Each APERF and MPERF increment is equivalent to 1024 for Intel(R) Xeon Phi(TM) architecture*/
        cpu_msr_info[i].aperf *= 1024;
        cpu_msr_info[i].mperf *= 1024;

        if(!cpu_data[i].first_cpu_in_core)
            continue;

        /* Read Core Temp from MSR <<CoreTmp>> */
        if (read_msr(cpu_data[i].cpu_id, MSR_IA32_THERM_STATUS, &msr_data))
            goto error;
        cpu_msr_info[i].core_temp = target_temp - ((msr_data >> 16) & 0x7F);

        if(!cpu_data[i].first_cpu_in_package)
            continue;

        /* Read Package Temp from MSR <<PkgTmp>> */
        if (read_msr(cpu_data[i].cpu_id, MSR_IA32_PACKAGE_THERM_STATUS, &msr_data))
            goto error;
        cpu_msr_info[i].pack_temp = target_temp - ((msr_data >> 16) & 0x7F);

        /* Read Package Energy from MSR <<PkgWatt>> */
        if (read_msr(cpu_data[i].cpu_id, MSR_PKG_ENERGY_STATUS, &msr_data))
            goto error;
        cpu_msr_info[i].pack_energy = msr_data & 0xFFFFFFFF;

        /* Read DRAM Energy from MSR <<RAMWatt>> */
        if (read_msr(cpu_data[i].cpu_id, MSR_DRAM_ENERGY_STATUS, &msr_data))
            goto error;
        cpu_msr_info[i].dram_energy = msr_data & 0xFFFFFFFF;

        /* Read Core Energy from MSR <<CorWatt>> */
        if (read_msr(cpu_data[i].cpu_id, MSR_PP0_ENERGY_STATUS, &msr_data))
            goto error;
        cpu_msr_info[i].core_energy = msr_data & 0xFFFFFFFF;
    }

    CPU_FREE(cpu_set);
    return 0;

error:
    CPU_FREE(cpu_set);
    return 1;
}

/* Function that calculates delta values from MSRs value obtained from two different readings */
#define DELTA_WRAP(first, second)             \
    if (second > first) {                     \
        first = second - first;               \
    } else {                                  \
        first = 0x100000000 + second - first; \
    }
static int calculate_delta_values(unsigned int num_cpus, msr_values_t *first, msr_values_t *second)
{
    unsigned int i = 0;

    for (i = 0; i < num_cpus; i++) {
        /* APERF & MPERF Deltas */
        if (first[i].aperf > second[i].aperf || first[i].mperf > second[i].mperf)
            return 1;
        first[i].aperf = second[i].aperf - first[i].aperf;
        first[i].mperf = second[i].mperf - first[i].mperf;
        if (first[i].mperf == 0)
            first[i].mperf = 1; /* Divide by zero protection */

        /* Cpre Temp Delta */
        first[i].core_temp = second[i].core_temp;

        /* Package Temp Delta */
        first[i].pack_temp = second[i].pack_temp;

        /* Package Energy Delta*/
        DELTA_WRAP(first[i].pack_energy, second[i].pack_energy);

        /* DRAM Energy Delta*/
        DELTA_WRAP(first[i].dram_energy, second[i].dram_energy);

        /* Core Energy Delta*/
        if (first[i].core_energy != 0 || second[i].core_energy != 0) {
            DELTA_WRAP(first[i].core_energy, second[i].core_energy);
        }
    }

    return 0;
}

/* Function that calculates the summary information from all af the cores */
static int calculate_summary(unsigned int num_cpus, msr_values_t *summary, const msr_values_t *cpu_info)
{
    unsigned int i = 0;

    bzero(summary, sizeof(msr_values_t));

    for (i = 0; i < num_cpus; i++) {
        summary->aperf += cpu_info[i].aperf;
        summary->mperf += cpu_info[i].mperf;

        summary->core_temp = ((summary->core_temp > cpu_info[i].core_temp) ? summary->core_temp : cpu_info[i].core_temp);
        summary->pack_temp = ((summary->pack_temp > cpu_info[i].pack_temp) ? summary->pack_temp : cpu_info[i].pack_temp);

        summary->pack_energy += cpu_info[i].pack_energy;
        summary->dram_energy += cpu_info[i].dram_energy;
        summary->core_energy += cpu_info[i].core_energy;
    }
    summary->aperf = summary->aperf / num_cpus;
    summary->mperf = summary->mperf / num_cpus;

    return 0;
}

/* Function that converts MSR data to CPU Output data */
static int msr_to_cpu(const msr_values_t *msr_data, cpu_info_t *cpu_data, double base_fq, double interval, double ener_units, double dram_units)
{
    cpu_data->cpu_freq = base_fq * msr_data->aperf / msr_data->mperf;
    cpu_data->core_temp = msr_data->core_temp;
    cpu_data->pack_temp = msr_data->pack_temp;
    cpu_data->pack_ener = msr_data->pack_energy * ener_units / interval;
    cpu_data->ram_ener = msr_data->dram_energy * dram_units / interval;
    cpu_data->core_ener = msr_data->core_energy * ener_units / interval;
    return 0;
}

/* Function that fills CPU Info array from MSR data */
static int fill_cpu_data(cpu_info_t *cpu_data, int num_cpus, cpu_info_t *summary_cpu)
{

    msr_values_t *first_read = NULL;
    msr_values_t *second_read = NULL;
    msr_values_t summary_msr;
    const int BASE_FREQ_UNITS = 100; /* KNL radio freq units are 100MHz */
    unsigned long long msr_data = 0;
    int base_cpu_id = 0;
    unsigned int base_freq_ratio   = 0;
    double base_freq_mhz = 0;
    double base_rapl_energy_units = 0;
    double base_rapl_dram_ener_units = 0;
    unsigned int time_units = 0;
    double interval_time = 0;
    unsigned int target_temp = 0;
    struct timeval first_time;
    struct timeval second_time;
    struct timeval delta_time;
    struct timespec sleep_reads = {5, 0};
    int i = 0;

    base_cpu_id = sched_getcpu(); /* Get CPU ID of CPU currently running the app */
    if (base_cpu_id < 0)
        return 1;

    /* Read base values for CPU freq */
    if (read_msr(base_cpu_id, MSR_PLATFORM_INFO, &msr_data))
        return 2;
    base_freq_ratio = (msr_data >> 8) & 0xFF;
    base_freq_mhz = base_freq_ratio * BASE_FREQ_UNITS;

    /* Read units values for energy and time for CPU and DRAM */
    if (read_msr(base_cpu_id, MSR_RAPL_POWER_UNIT, &msr_data))
        return 3;
    base_rapl_energy_units = 1.0 / (1 << ((msr_data >> 8) & 0x1F));
    base_rapl_dram_ener_units = 15.3 / 1000000; /* DRAM Units shuld be the default one. Watts */
    time_units = (msr_data >> 16) & 0xF;
    if (time_units == 0) /* Use default value if there are no units specified in MSR */
        time_units = 0xA;

    /* Read Temp Target */
    if (read_msr(base_cpu_id, MSR_IA32_TEMPERATURE_TARGET, &msr_data))
        return 5;
    target_temp = (msr_data >> 16) & 0xFF; /* Celsius */

    first_read = calloc((size_t)num_cpus, sizeof(msr_values_t));
    if (!first_read)
        return -1;

    second_read = calloc((size_t)num_cpus, sizeof(msr_values_t));
    if (!second_read) {
        free(first_read);
        return -1;
    }

    bzero(&summary_msr, sizeof(msr_values_t));

    if (read_cpu_msr_data(cpu_data, num_cpus, target_temp, first_read)) {
        free(first_read);
        free(second_read);
        return 6;
    }
    gettimeofday(&first_time, (struct timezone *)NULL);

    nanosleep(&sleep_reads, NULL);
    if (read_cpu_msr_data(cpu_data, num_cpus, target_temp, second_read)) {
        free(first_read);
        free(second_read);
        return 7;
    }
    gettimeofday(&second_time, (struct timezone *)NULL);
    timersub(&second_time, &first_time, &delta_time);
    interval_time = delta_time.tv_sec + (delta_time.tv_usec/1000000.0);

    if (calculate_delta_values(num_cpus, first_read, second_read)) {
        free(first_read);
        free(second_read);
        return 8;
    }

    if (calculate_summary(num_cpus, &summary_msr, first_read)) {
        free(first_read);
        free(second_read);
        return 9;
    }

    msr_to_cpu(&summary_msr, summary_cpu, base_freq_mhz, interval_time, base_rapl_energy_units, base_rapl_dram_ener_units);

    for (i = 0; i < num_cpus; i++) {
        msr_to_cpu(&first_read[i], &cpu_data[i], base_freq_mhz, interval_time, base_rapl_energy_units, base_rapl_dram_ener_units);
    }

    free(first_read);
    free(second_read);
    return 0;
}

/* "Public" function that frees previously allocated memory from get_cpu_info */
int free_cpu_info(cpu_info_t *cpu_info)
{
    free(cpu_info);
    cpu_info = NULL;
    return 0;
}

/* "Public" function that will be used to retrieve CPU Information */
int get_cpu_info(unsigned int *num_of_cpus, cpu_info_t **cpu_info, cpu_info_t *summary)
{
    if (!num_of_cpus || !cpu_info)
        return 1; /* Invalid Argunments */

    if (!init_cpu_info(cpu_info, num_of_cpus, summary)) {
        if (*num_of_cpus == 0) {
            return 4; /* Make sure there are more than 0 CPUs, very unlikely, but just to be sure. */
        }
        if (fill_cpu_data((*cpu_info), *num_of_cpus, summary)) {
            free_cpu_info(*cpu_info);
            return 3; /* Error while getting the data from the MSRs */
        }
        return 0;
    }
    return 2; /* Error while creating and initializing the CPU Info array */
}

#ifdef DEBUG
/* DEBUG Main Function for testing and debugging this program. */
int main()
{
    cpu_info_t *cpu_info;
    cpu_info_t summary;
    unsigned int num_cpus = 0;
    if (!init_cpu_info(&cpu_info, &num_cpus, &summary)) {
        if (num_cpus == 0) {
            return 4; /* Make sure there are more than 0 CPUs, very unlikely, but just to be sure. */
        }
        if (fill_cpu_data(cpu_info, num_cpus, &summary)) {
            free_cpu_info(cpu_info);
            return 3;
        }
        print_cpu_info(cpu_info, num_cpus, &summary);
        free_cpu_info(cpu_info);
        return 0;
    }
    return 2;
}
#endif
