# Copyright 2010-2017 Intel Corporation.
# 
# This library is free software; you can redistribute it and/or modify it
# under the terms of the GNU Lesser General Public License as published
# by the Free Software Foundation, version 2.1.
# 
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
# 
# Disclaimer: The codes contained in these modules may be specific
# to the Intel Software Development Platform codenamed Knights Ferry,
# and the Intel product codenamed Knights Corner, and are not backward
# compatible with other Intel products. Additionally, Intel will NOT
# support the codes or instruction set in future products.
# 
# Intel offers no warranty of any kind regarding the code. This code is
# licensed on an "AS IS" basis and Intel is not obligated to provide
# any support, assistance, installation, training, or other services
# of any kind. Intel is also not obligated to provide any updates,
# enhancements or extensions. Intel specifically disclaims any warranty
# of merchantability, non-infringement, fitness for any particular
# purpose, and any other warranty.
# 
# Further, Intel disclaims all liability of any kind, including but
# not limited to liability for infringement of any proprietary rights,
# relating to the use of the code, even if Intel is notified of the
# possibility of such liability. Except as expressly stated in an Intel
# license agreement provided with this code and agreed upon with Intel,
# no license, express or implied, by estoppel or otherwise, to any
# intellectual property rights is granted herein.

"""UT's for micp/kernels/sgemm.py"""

import pytest

import common_utils

import micp.info as micp_info
import micp.common as micp_common
import micp.version as micp_version
import micp.kernels.sgemm as micp_kernels_sgemm

THREADS_PER_CORE = 1

# memories sizes selected to cover different branches
# in micp/kernels/sgemm.py
MEM_2GB = 2000000000
MEM_4GB = 4200000000
MEM_8GB = 8000000000

# Available "cards" for testing
MEM_2GB_61_CORES = (MEM_2GB, 61)
MEM_2GB_52_CORES = (MEM_2GB, 52)
MEM_2GB_40_CORES = (MEM_2GB, 40)
MEM_4GB_61_CORES = (MEM_4GB, 61)
MEM_4GB_52_CORES = (MEM_4GB, 52)
MEM_4GB_40_CORES = (MEM_4GB, 40)
MEM_8GB_61_CORES = (MEM_8GB, 61)
MEM_8GB_52_CORES = (MEM_8GB, 52)
MEM_8GB_40_CORES = (MEM_8GB, 40)

# sgemm typical output
# IMPORTANT the 'timer: xxxx' was replaced by a placeholder {0}
# test should format this field as needed
SGEMM_OUTPUT = '''benchmarking: dgemm
{0}
num_threads : 0
min_niters  : 3
min_t       : 3.000000
first index : -1
last  index : -1
step        : -1
fixed M     : 512
fixed N     : 512
fixed K     : 512
data transf.: maybe (depends on MKL AO setting)
MKL         : 11.1.1 build 20131010 (Product)
processor   : Intel(R) Advanced Vector Extensions (Intel(R) AVX) Enabled Processor
CPU freq.   : 3.49 (may float due to scaling)
max threads : 16
# of co-proc: 1
threads used: 32 (autodetected)
affinity    : KMP_AFFINITY (if any)

#0: NN

testing SGEMM( 'N', 'N', n, n, ... )

          n        min        avg        max     stddev
        512     124.68     330.50     334.29  4.196e-05
*       512     124.68     330.50     334.29  4.196e-05

'''


@pytest.fixture(params=[
    MEM_2GB_61_CORES, MEM_2GB_52_CORES, MEM_2GB_40_CORES,
    MEM_4GB_61_CORES, MEM_4GB_52_CORES, MEM_4GB_40_CORES,
    MEM_8GB_61_CORES, MEM_8GB_52_CORES, MEM_8GB_40_CORES
])
def info_mock(monkeypatch, request, init_basic_knxlb_object):
    """creates InfoKNXLB objects for windows and for linux, then
    monkypatches get_device_index(), num_cores() and mic_memory_size
    using the parameter provided to this fixture to test this kernel
    with different 'devices' (number of cores and memory size).
    Returns the tuple (mem_size, cores) so tests can know which
    configuration is being used"""
    mem_size, cores = request.param
    monkeypatch.setattr(micp_info.Info, 'get_device_index', lambda self: 0)
    monkeypatch.setattr(micp_info.Info, 'num_cores', lambda self: cores)
    monkeypatch.setattr(micp_info.Info, 'mic_memory_size', lambda self: mem_size)

    return mem_size, cores


def test_sgemm_init(info_mock):
    """validate sgemm kernel object is created successfully
    and its name is set correctly"""
    sgemm_kernel = micp_kernels_sgemm.sgemm()
    assert sgemm_kernel.name == 'sgemm'


def test_parameters_values(info_mock):
    """validates parameter names are defined correctly"""
    expected_param_names = ['i_num_rep',
                            'n_num_thread',
                            'm_mode',
                            'M_size',
                            'N_size',
                            'K_size']

    sgemm_kernel = micp_kernels_sgemm.sgemm()
    assert sorted(sgemm_kernel._paramNames) == sorted(expected_param_names)


def test_optimal_quick_parameters(info_mock):
    """validate SGEMM parameters for optimal_quick which should be the same
    regardless of the memory size and/or number of cores"""
    expected = ['--n_num_thread 0 --M_size 5120 '
                '--N_size 5120 --K_size 5120']

    sgemm_kernel = micp_kernels_sgemm.sgemm()
    assert sgemm_kernel._categoryParams['optimal_quick'] == expected


def test_optimal_parameters(info_mock):
    """optimal kernel parameters are calculated to produce the max performance.
    This test case validates that micperf always returns the optimal values
    for the given memory size and the number of cores. For this test case the
    optimal values for the choosen memory size and number of cores have been
    precomputed following the formulas in micp/kernels/sgemm.py.
    """
    PRECOMPUTED_SIZE_FOR_2GB = 8704
    PRECOMPUTED_SIZE_FOR_52_CORES = 13312
    PRECOMPUTED_SIZE_FOR_8GB = 15872

    mem_size, cores = info_mock
    sgemm_kernel = micp_kernels_sgemm.sgemm()

    template = ('--n_num_thread 0 --M_size {0} '
                '--N_size {0} --K_size {0}')
    if mem_size == MEM_2GB:
        expected = [template.format(PRECOMPUTED_SIZE_FOR_2GB)]
    elif cores == 52:
        expected = [template.format(PRECOMPUTED_SIZE_FOR_52_CORES)]
    else:
        expected = [template.format(PRECOMPUTED_SIZE_FOR_8GB)]

    assert sgemm_kernel._categoryParams['optimal'] == expected


def test_scaling_parameters(info_mock):
    """scaling parameters keeps the number of cores constant but gradually
    increases the size of the input matrix to stress the processor/co-processor.
    This test validates that the size of the matrices ranges within the right
    limits given the system's memory size. For this test cases the correct
    ranges has been precomputed following the formulas in micp/kernels/sgemm.py
    """
    PRECOMPUTED_STEPS_FOR_2GB = 17
    PRECOMPUTED_STEPS_FOR_4GB = 31
    PRECOMPUTED_STEPS_FOR_8GB = 32

    mem_size, __ = info_mock
    if mem_size == MEM_2GB:
        steps = PRECOMPUTED_STEPS_FOR_2GB
    elif mem_size == MEM_4GB:
        steps = PRECOMPUTED_STEPS_FOR_4GB
    elif mem_size == MEM_8GB:
        steps = PRECOMPUTED_STEPS_FOR_8GB
    else:
        raise ValueError("TEST ERROR: Invalid memory size for test case")

    template = ('--n_num_thread 0 --M_size {0} '
                '--N_size {0} --K_size {0}')
    matrix_sizes = [512*i for i in range(1, steps+1)]
    expected_scaling_params = [template.format(value) for value in matrix_sizes]
    expected_scaling_quick = expected_scaling_params[:10]

    sgemm_kernel = micp_kernels_sgemm.sgemm()
    assert sgemm_kernel._categoryParams['scaling'] == expected_scaling_params
    assert sgemm_kernel._categoryParams['scaling_quick'] == expected_scaling_quick


def test_scaling_core(info_mock):
    """validate the parameters that sgemm uses for the 'scaling_core'
    category, in this case the input matrix is constant while the number
    of cores is increased gradually"""
    __, cores = info_mock
    step = int(round(cores/10.0))

    core_scaling = range(step, cores, step)
    core_scaling.append(cores)
    template = ('--n_num_thread {0} --M_size 8192 '
                '--N_size 8192 --K_size 8192')
    expected = [template.format(num*THREADS_PER_CORE) for num in core_scaling]

    sgemm_kernel = micp_kernels_sgemm.sgemm()
    assert sgemm_kernel._categoryParams['scaling_core'] == expected


def test_offload_methods(info_mock):
    """validate offload_methods() returns a list with
    the offload methods supported by this kernel"""
    expected = ['native', 'pragma', 'auto', 'local']
    sgemm_kernel = micp_kernels_sgemm.sgemm()
    assert sgemm_kernel.offload_methods() == expected


@pytest.mark.parametrize("offload_method, win_binary, linux_binary", [
    ('pragma', 'sgemm_ofl.exe', 'sgemm_ofl.x'),
    ('local', 'sgemm_cpu.exe', 'sgemm_cpu.x'),
    ('auto', 'sgemm_cpu.exe', 'sgemm_cpu.x'),
])
def test_path_host_exec_valid_offload_methods(offload_method, win_binary,
                                              linux_binary, info_mock):
    """validate path_host_exec() returns the path to the host side
    kernel binary corresponding to a valid offload method."""
    expected = common_utils.get_workload_binary_path(
                    offload_method, win_binary, linux_binary)

    sgemm_kernel = micp_kernels_sgemm.sgemm()
    path_to_binary = sgemm_kernel.path_host_exec(offload_method)

    assert path_to_binary == expected


def test_path_host_exec_knlsb_linux(monkeypatch, info_mock):
    """ONLY FOR KNLSB LINUX, validate path_host_exec() returns the right sgemm
    binary depening on the availability of MCDRAM memory"""

    if micp_common.is_platform_windows():
        pytest.skip()

    knlsb_offload_method = 'local'
    sgemm_kernel = micp_kernels_sgemm.sgemm()

    # test 1: platform is KNLSB and mcdram is available
    monkeypatch.setattr(micp_info.Info, 'is_processor_mcdram_available', lambda __: True)
    path_to_binary = sgemm_kernel.path_host_exec(knlsb_offload_method)
    assert path_to_binary.endswith('sgemm_mcdram_cpu.x')

    # test 2: platform is KNLSB but mcdram is not available
    monkeypatch.setattr(micp_info.Info, 'is_processor_mcdram_available', lambda __: False)
    path_to_binary = sgemm_kernel.path_host_exec(knlsb_offload_method)
    assert 'sgemm_cpu.x' in path_to_binary


@pytest.mark.parametrize("offload_method", ['myo', 'coi', 'scif'])
def test_path_host_exec_invalid_offload_methods(offload_method, info_mock):
    """validate path_host_exec() returns None as the path to the
    host side kernel binary for non supported offload methods"""
    sgemm_kernel = micp_kernels_sgemm.sgemm()
    path_to_binary = sgemm_kernel.path_host_exec(offload_method)

    assert path_to_binary is None


@pytest.mark.parametrize("offload_method, binary_name", [
    ('native', 'sgemm_mic.x'),
])
def test_path_dev_exec_valid_offload_methods(offload_method, binary_name,
                                             info_mock):
    """validate path_dev_exec() returns the path to the card side
    kernel binary corresponding to a valid offload method."""

    # skip test when card is not present
    if micp_version.MIC_PERF_CARD_ARCH == 'NONE':
        pytest.skip()

    expected = common_utils.get_workload_binary_path(
                    offload_method, binary_name, binary_name)

    sgemm_kernel = micp_kernels_sgemm.sgemm()
    path_to_binary = sgemm_kernel.path_dev_exec(offload_method)

    assert path_to_binary == expected


@pytest.mark.parametrize('offload_method', [
    'myo', 'coi', 'scif', 'local', 'pragma', 'auto'
])
def test_path_dev_exec_invalid_offload_methods(offload_method, info_mock):
    """validate path_dev_exec() returns None as the path to the
    card side kernel binary for non supported offload methods"""
    sgemm_kernel = micp_kernels_sgemm.sgemm()
    path_to_binary = sgemm_kernel.path_dev_exec(offload_method)

    assert path_to_binary is None


def test_path_aux_data_valid_offload(info_mock):
    """validate path_dev_exec() returns the path to the shared library
    libiomp5.so for 'native' the only offload method supported"""
    expected = common_utils.get_path_to_libiomp5()

    sgemm_kernel = micp_kernels_sgemm.sgemm()
    path_to_data = sgemm_kernel.path_aux_data('native')
    assert path_to_data == expected


@pytest.mark.parametrize("offload_method", [
    'myo', 'coi', 'scif', 'local', 'pragma', 'auto'
])
def test_path_aux_data_invalid_offload(offload_method, info_mock):
    """validate path_aux_data() returns an empty list for
    non-supported offload method"""
    sgemm_kernel = micp_kernels_sgemm.sgemm()
    path = sgemm_kernel.path_aux_data(offload_method)
    assert not path


def test_parse_descritpion():
    """validate parse_desc() is able to parse the
    kernel description from the kernel output"""

    sgemm_output = SGEMM_OUTPUT.format('timer: native')
    expected_desc = '(M=512, N=512, K=512) MKL SGEMM with 32 (autodetected) threads and 3 iterations'

    sgemm_kernel = micp_kernels_sgemm.sgemm()
    actual_desc = sgemm_kernel.parse_desc(sgemm_output)
    assert actual_desc == expected_desc


@pytest.mark.parametrize("timer, expected_tag", [
    ('timer : native', 'Task.Computation.Avg' ),
    ('timer : invoke', 'Device.Computation.Avg'),
    ('timer : full', 'Host.Computation.Avg'),
    ('no_timer', 'Computation.Avg')
])
def test_parse_performance(timer, expected_tag):
    """validate parse_perf() is able to parse the kernel
    performance results from the kernel output.
    test is parametrized to cover different branches."""

    sgemm_output = SGEMM_OUTPUT.format(timer)
    expected_perf = {'value':'330.5', 'units':'GFlops', 'rollup':True}

    sgemm_kernel = micp_kernels_sgemm.sgemm()
    actual_perf = sgemm_kernel.parse_perf(sgemm_output)

    assert expected_tag in actual_perf
    assert actual_perf[expected_tag] == expected_perf


@pytest.mark.parametrize("platform", ['selfboot', 'leveragedboot'])
def test_enviroment_host(platform, info_mock, monkeypatch):
    """validate that the dictionary that test_enviroment_host()
    returns with all the environment variables to be set on the
    has been initialized correctly. Evaluates both cases self boot
    and leveraged boot"""

    if platform == 'selfboot':
        monkeypatch.setattr(micp_common, 'is_selfboot_platform', lambda: True)
    elif platform == 'leveragedboot':
        monkeypatch.setattr(micp_common, 'is_selfboot_platform', lambda: False)
    else:
        raise ValueError('ERROR: Invalid platform type')

    mem_size, cores = info_mock
    max_mem_size = str(int((mem_size - 1024**3) / (1024**3)))
    threads = str((cores-1)*THREADS_PER_CORE)

    host_libraries = common_utils.get_host_library_path()
    card_libraries = common_utils.get_dev_library_path()

    expected_mic_lb = {'MIC_BUFFERSIZE':'256M',
        'MKL_MIC_ENABLE':'1',
        'MKL_MIC_DISABLE_HOST_FALLBACK':'1',
        'LD_LIBRARY_PATH':host_libraries,
        'MIC_LD_LIBRARY_PATH':card_libraries,
        'MIC_ENV_PREFIX':'MIC',
        'MIC_OMP_NUM_THREADS':threads,
        'KMP_AFFINITY':'compact,1,0',
        'MIC_KMP_AFFINITY':'explicit,granularity=fine,proclist=[1-{0}:1]'.format(threads),
        'MIC_USE_2MB_BUFFERS':'16K',
        'MKL_MIC_MAX_MEMORY':'{0}G'.format(max_mem_size)}

    expected_mic_sb = {'KMP_AFFINITY':'compact,1,0',
        'LD_LIBRARY_PATH':host_libraries,
        'USE_2MB_BUFFERS':'16K'}


    sgemm_kernel = micp_kernels_sgemm.sgemm()
    enviroment_host = sgemm_kernel.environment_host()


    if micp_common.is_selfboot_platform():

        if micp_common.is_platform_windows():
            expected_mic_sb['OMP_NUM_THREADS'] = str(cores)
            expected_mic_sb['MKL_DYNAMIC'] = 'false'
            expected_mic_sb['KMP_BLOCKTIME'] = 'infinite'
            expected_mic_sb['KMP_LIBRARY'] = 'turnaround'

        if not micp_info.Info().is_processor_mcdram_available():
                expected_mic_sb['MKL_FAST_MEMORY_LIMIT'] = '0'

        assert enviroment_host == expected_mic_sb
    else:
        assert enviroment_host == expected_mic_lb


@pytest.mark.parametrize("category", ['optimal', 'scaling_core',
    'scaling', 'test', 'scaling_quick', 'optimal_quick'])
def test_independent_variable(category):
    """validate independent_variable() returns the correct variable that
    will be used for plotting depending on the parameters category"""

    if category == 'scaling_core':
        expected_var = 'n_num_thread'
    else:
        expected_var = 'f_first_matrix_size'

    sgemm_kernel = micp_kernels_sgemm.sgemm()
    independent_var = sgemm_kernel.independent_var(category)
    assert independent_var == expected_var
