/**
   this is the kernel module for dealing with the network lock 
   needed for accessing LPC and FML on KIRA100 < 2.0
**/

#include <linux/module.h>
#include <linux/delay.h>
#include <linux/init.h>
#include <linux/pci.h>
#include <linux/spinlock.h>
#include <linux/interrupt.h>
#include <linux/sched.h>
#include <asm/irq.h>
#include <asm/io.h>
#include <asm/arch/ahb_dma.h>
#include <linux/slab.h>
#include <linux/version.h>

#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
#include <asm/arch-cpe/cpe/peppercon.h>
#else 
#include <asm/arch/platform-a320/peppercon.h>
#endif

#define MACBITS 0x123

/* to use the MMU hack, define this */
#define USE_MMU_HACK

static volatile int dma_net_locked;
static spinlock_t dma_net_lock;
static volatile u32 mac1_last_reg, mac2_last_reg;
#ifndef USE_MMU_HACK
static ahb_dma_channel_data_t *dma_data;
static void *dma_buf_phys;
static u32 *dma_buf_virt;
#endif

inline u32 save_all_irq(u32 irqmask) {
    u32 *intc_regs = (u32*) (CPE_IC_VA_BASE);
    u32 saved_value = intc_regs[KIRA_INTC_IRQ_ENABLE];
    intc_regs[KIRA_INTC_IRQ_ENABLE] = irqmask;
    return saved_value;
}

static inline int network_disable(void)
{
    volatile unsigned int *a = (volatile unsigned int*)(CPE_FTMAC_VA_BASE + 0x88);
    volatile unsigned int *b = (volatile unsigned int*)(CPE_FTMAC2_VA_BASE + 0x88);
    mac1_last_reg = *a & MACBITS;
    mac2_last_reg = *b & MACBITS;
    *a  &= ~MACBITS;
    *b  &= ~MACBITS;
    if (mac1_last_reg || mac2_last_reg) return -1;
    else return 0;
}    

static inline void network_enable(void)
{
    volatile unsigned int *a = (volatile unsigned int*)(CPE_FTMAC_VA_BASE + 0x88);
    volatile unsigned int *b = (volatile unsigned int*)(CPE_FTMAC2_VA_BASE + 0x88);
    *a  |= mac1_last_reg;
    *b  |= mac2_last_reg;
}    

void ahbfix_lock_net(void)
{
#if 1
    unsigned long flags;
    int rv = 0;
    spin_lock_irqsave(&dma_net_lock, flags);
    if (!dma_net_locked) 
        rv = network_disable();
    dma_net_locked++;
    spin_unlock_irqrestore(&dma_net_lock, flags);
    if (rv < 0) udelay(1);
#endif
}

void ahbfix_unlock_net(void)
{
    unsigned long flags;
    spin_lock_irqsave(&dma_net_lock, flags);
    dma_net_locked--;
    if (!dma_net_locked) 
        network_enable();
    spin_unlock_irqrestore(&dma_net_lock, flags);
}

#ifndef USE_MMU_HACK
static void dma_handler(void *isr_data)
{
    // unlock data and instuction cache
    asm volatile (
            ".align 5\n" \
            "mov r0, #0\n" \
            "mcr p15, 0, r0, c9, c0, 1\n"
            "mcr p15, 0, r0, c9, c0, 0\n"
    : : : "r0");
}
#endif

int ahbfix_init(void)
{
    int ret = 0;
    spin_lock_init(&dma_net_lock);
    
#ifdef USE_MMU_HACK
# define SRAM_PHYS_BASE 0xb0000000
# define SRAM_READDATA_OFFSET_32 0x0
# define SRAM_WRITEDATA_OFFSET_32 0x40
    // copy stuff into SRAM
    uint32_t read_data, write_data, read_data_end, write_data_end;

    asm volatile (
	"ldr %0, =label_read_data\n"
	"ldr %1, =label_read_data_end\n"
	"ldr pc, =label_read_data_end\n"
        ".align 4\n"
        "label_read_data: nop\n" 
	"nop\n"
	"ldr r3, [r2]\n"
	"ldr r2, [pc]\n"
	"nop\n"
	"nop\n"
	"mrc 15, 0, r0, c1, c0, 0\n"
	"orr r0, r0, #1\n"
	"nop\n"
	"mcr 15, 0, r0, c1, c0, 0\n"
	"nop\n"
	"mov pc, r4\n"
	"label_read_data_end: nop\n"
        : "=r"(read_data), "=r"(read_data_end)
	: 
	);
asm volatile (
	"ldr %0, =label_write_data\n"
	"ldr %1, =label_write_data_end\n"
	"ldr pc, =label_write_data_end\n"
        ".align 4\n"
        "label_write_data: nop\n" 
	"nop\n"
	"str r3, [r2]\n"
	"ldr r2, [pc]\n"
	"nop\n"
	"ldr r2, [pc]\n"
	"nop\n"
	"mrc 15, 0, r0, c1, c0, 0\n"
	"orr r0, r0, #1\n"
	"mcr 15, 0, r0, c1, c0, 0\n"
	"nop\n"
	"mov pc, r4\n"
	"label_write_data_end: nop\n"
        : "=r"(write_data), "=r"(write_data_end)
	: 
	);

    volatile uint32_t* sram;
    
    sram = ioremap_nocache(CPE_INTRAM_BASE, 0x200);
    if (sram == NULL) {
        ret = -EIO;
        goto fail;
    }
    memcpy((void*)&sram[SRAM_READDATA_OFFSET_32], (void*)read_data, read_data_end-read_data);
    memcpy((void*)&sram[SRAM_WRITEDATA_OFFSET_32], (void*)write_data, write_data_end-write_data);
    iounmap((void*)sram);
    sram = NULL;
 
#else /* !USE_MMU_HACK */
    /* init ahb dma */    
    if (ahb_dma_request_channel(dma_handler, NULL, 2, &dma_data) < 0) {
	ret = -EBUSY;
    	goto fail;
    }
    dma_data->llp_master       = AHBDMA_MASTER_0;
    dma_data->hw_handshake     = 0;

    dma_buf_phys = (void*)(~(dma_addr_t)0);
    dma_buf_virt = pci_alloc_consistent(NULL, 256+8, (dma_addr_t*)&dma_buf_phys); //256 bytes data buffer and 8 bytes dummy operation buffer

#endif

fail:
        return ret;
}

void ahbfix_cleanup(void)
{
#ifndef USE_MMU_HACK
    ahb_dma_release_channel(&dma_data);
    if (dma_buf_virt)
        pci_free_consistent(NULL, 256+8, dma_buf_virt, (dma_addr_t)dma_buf_phys);
#endif
}

#ifndef USE_MMU_HACK
void ahbfix_read_block(u32 *buf, uint32_t phys_base, u32 offset, int cnt, int fix)
{
    uint32_t start = phys_base + (offset << 2);
    uint32_t old_irq;
    
    // stop all irqs, but dma irq
    old_irq = save_all_irq(1 << IRQ_DMAC);

    // prepare dma
    {
        ahb_dma_parm_t parm;
        parm.src  = (uint32_t)dma_buf_phys+256;
        parm.dest = (uint32_t)dma_buf_phys+260;
        parm.sw   = AHBDMA_WIDTH_32BIT;
        parm.dw   = AHBDMA_WIDTH_32BIT;
        parm.sctl = AHBDMA_CTL_FIX;
        parm.dctl = AHBDMA_CTL_FIX;
        parm.src_data_master  = AHBDMA_MASTER_0;
        parm.dest_data_master = AHBDMA_MASTER_0;
        parm.size = 8;
        parm.irq  = AHBDMA_NO_TRIGGER_IRQ;
        ahb_dma_channel_add(dma_data, &parm);
        parm.src  = start;
        parm.dest = (uint32_t)dma_buf_phys;
        parm.sw   = AHBDMA_WIDTH_32BIT;
        parm.dw   = AHBDMA_WIDTH_32BIT;
        parm.sctl = (fix) ? AHBDMA_CTL_FIX : AHBDMA_CTL_INC;
        parm.dctl = AHBDMA_CTL_INC;
        parm.src_data_master  = AHBDMA_MASTER_0;
        parm.dest_data_master = AHBDMA_MASTER_0;
        parm.size = cnt;
        parm.irq  = AHBDMA_NO_TRIGGER_IRQ;
        ahb_dma_channel_add(dma_data, &parm);
        parm.src  = (uint32_t)dma_buf_phys+256;
        parm.dest = (uint32_t)dma_buf_phys+260;
        parm.sw   = AHBDMA_WIDTH_32BIT;
        parm.dw   = AHBDMA_WIDTH_32BIT;
        parm.sctl = AHBDMA_CTL_FIX;
        parm.dctl = AHBDMA_CTL_FIX;
        parm.src_data_master  = AHBDMA_MASTER_0;
        parm.dest_data_master = AHBDMA_MASTER_0;
        parm.size = 1;
        parm.irq  = AHBDMA_TRIGGER_IRQ;
        ahb_dma_channel_add(dma_data, &parm);
    }

    // stop network dma
    ahbfix_lock_net();

    // lock data and instuction cache
    asm volatile (
            ".align 5\n" \
            "mov r1, pc\n" \
            "and r1, r1, #0xfffffff0\n" \
            "mov r3, #0\n" \
            "mov r2, #0x80000000\n" \
            "mcr p15, 0, r2, c9, c0, 1\n" \
            "mcr p15, 0, r2, c9, c0, 0\n" \
            "block_read_loop1:\n" \
            "mcr p15, 0, r1, c7, c13, 1\n" \
            "ldr r2, [r1]\n" \
            "add r1, r1, #16\n" \
            "add r3, r3, #1\n" \
            "cmp r3, #0x40\n" \
            "ble block_read_loop1\n"
    : : : "r1", "r2", "r3");

    // start dma
    dma_data->base[AHBDMA_CHANNEL_CSR] |= 0x1;

    asm volatile (
            ".align 5\n" \
            "block_read_wait_loop:\n" \
            "mrc p15, 0, r1, c9, c0, 1\n" \
            "cmp r1, #0x80000000\n" \
            "beq block_read_wait_loop\n"
    : : : "r1");

    // restart network dma
    ahbfix_unlock_net();

    // re-enable irqs
    save_all_irq(old_irq);

    // get val from dma buf
    consistent_sync(dma_buf_virt, cnt * 4 + 16, DMA_FROM_DEVICE);

    memcpy(buf, dma_buf_virt, cnt * 4);
}


void ahbfix_write_block(u32 *vals, uint32_t phys_base, u32 offset, int cnt, int fix)
{
    uint32_t start = phys_base + (offset << 2);
    uint32_t old_irq;
    
    // stop all irqs, but dma irq
    old_irq = save_all_irq(1 << IRQ_DMAC);

    // copy val to dma buf
    memcpy(dma_buf_virt, vals, cnt * 4);
    consistent_sync(dma_buf_virt, cnt * 4 + 16, DMA_TO_DEVICE);

    // prepare dma
    {
        ahb_dma_parm_t parm;
        parm.src  = (uint32_t)dma_buf_phys+256;
        parm.dest = (uint32_t)dma_buf_phys+260;
        parm.sw   = AHBDMA_WIDTH_32BIT;
        parm.dw   = AHBDMA_WIDTH_32BIT;
        parm.sctl = AHBDMA_CTL_FIX;
        parm.dctl = AHBDMA_CTL_FIX;
        parm.src_data_master  = AHBDMA_MASTER_0;
        parm.dest_data_master = AHBDMA_MASTER_0;
        parm.size = 8;
        parm.irq  = AHBDMA_NO_TRIGGER_IRQ;
        ahb_dma_channel_add(dma_data, &parm);
        parm.dest = start;
        parm.src  = (uint32_t)dma_buf_phys;
        parm.sw   = AHBDMA_WIDTH_32BIT;
        parm.dw   = AHBDMA_WIDTH_32BIT;
        parm.sctl = AHBDMA_CTL_INC;
        parm.dctl = (fix) ? AHBDMA_CTL_FIX : AHBDMA_CTL_INC;
        parm.src_data_master  = AHBDMA_MASTER_0;
        parm.dest_data_master = AHBDMA_MASTER_0;
        parm.size = cnt;
        parm.irq  = AHBDMA_NO_TRIGGER_IRQ;
        ahb_dma_channel_add(dma_data, &parm);
        parm.src  = (uint32_t)dma_buf_phys+256;
        parm.dest = (uint32_t)dma_buf_phys+260;
        parm.sw   = AHBDMA_WIDTH_32BIT;
        parm.dw   = AHBDMA_WIDTH_32BIT;
        parm.sctl = AHBDMA_CTL_FIX;
        parm.dctl = AHBDMA_CTL_FIX;
        parm.src_data_master  = AHBDMA_MASTER_0;
        parm.dest_data_master = AHBDMA_MASTER_0;
        parm.size = 1;
        parm.irq  = AHBDMA_TRIGGER_IRQ;
        ahb_dma_channel_add(dma_data, &parm);
    }

    // stop network dma
    ahbfix_lock_net();

    // lock data and instruction cache
    asm volatile (
            ".align 5\n" \
            "mov r1, pc\n" \
            "and r1, r1, #0xfffffff0\n" \
            "mov r3, #0\n" \
            "mov r2, #0x80000000\n" \
            "mcr p15, 0, r2, c9, c0, 1\n" \
            "mcr p15, 0, r2, c9, c0, 0\n" \
            "block_write_loop1:\n" \
            "mcr p15, 0, r1, c7, c13, 1\n" \
            "ldr r2, [r1]\n" \
            "add r1, r1, #16\n" \
            "add r3, r3, #1\n" \
            "cmp r3, #0x30\n" \
            "ble block_write_loop1\n"
    : : : "r1", "r2", "r3");

    // start dma
    dma_data->base[AHBDMA_CHANNEL_CSR] |= 0x1;

    asm volatile (
            ".align 5\n" \
            "block_write_wait_loop:\n" \
            "mrc p15, 0, r1, c9, c0, 1\n" \
            "cmp r1, #0x80000000\n" \
            "beq block_write_wait_loop\n"
    : : : "r1");
    
    // restart network dma
    ahbfix_unlock_net();

    // re-enable irqs
    save_all_irq(old_irq);
    
    return;
}
#else /* USE_MMU_HACK */
u32 mmu_read_reg(uint32_t phys_base) {
    register uint32_t addr = phys_base;
    register uint32_t val;
    register uint32_t nommuaddr;

    nommuaddr = SRAM_PHYS_BASE + (SRAM_READDATA_OFFSET_32 << 2);
    cli();
    asm volatile (
         "mov r2, %1\n"                     /* phys addr for access (lpc)       */
         "mov r1, %2\n"                     /* phys addr of nonmmu code         */
         "ldr   r4, =mylabel2\n"            /* return address at label2         */
         "nop\n"
         "mov r0, #0\n"                     /* r0 := 0                          */
         "mcr p15, 0, r0, c7, c10, 0\n"     /* Clean DCache All                 */
         "mrc p15, 0, r6, c1, c0, 0\n"      /* CR1-0, MMU config register       */
         "mrc   p15, 0, r0, c1, c0, 0\n"    /* CR1-0. MMU config register       */
         "bic   r0, r0, #4\n"               /* MMU disable                      */
         "bic   r0, r0, #1\n"               /* DCache disable                   */
         "bic   r0, r0, #4096\n"               /* ICache disable                   */
         "mcr   p15, 0, r0, c1, c0, 0\n"    /* CR1-0, MMU config register set   */
         "nop\n"
         "mov   pc, r1\n"                   /* jump to phys addr of nonmmu code */
         "nop\n"
         "nop\n"
         "nop\n"
         "nop\n"
         "nop\n"
         "nop\n"
         "nop\n"
         ".align 5\n"
         "mylabel2: nop\n"                  /* return address                   */
         "nop\n"
         "nop\n"
         "nop\n"
         "nop\n"
         "nop\n"
         "nop\n"
         "sub   r4, r4, #16\n"              /* unknown */
         "mov %0, r3\n"                     /* retval of read of lpc from nonmmu code   */
         "mcr   p15, 0, r6, c1, c0, 0\n"    /* set saved MMU config reg (CR1-0)         */
        : "=r"(val)
        : "r"(addr), "r"(nommuaddr)
        : "r1", "r2", "r3", "r4","r0","r6"
        );
    sti();
    return val;
}

void ahbfix_read_block(u32 *buf, uint32_t phys_base, u32 offset, int cnt, int fix) {
    int i;

    ahbfix_lock_net();
    for ( i = 0; i < cnt; i++ ) {
	buf[i] = mmu_read_reg(phys_base + (offset<<2) + (fix?0:(i<<2)));
    }
    ahbfix_unlock_net();
}

void mmu_write_reg(uint32_t phys_base, uint32_t val2) {
    uint32_t addr = phys_base;
    uint32_t val = val2;
    uint32_t nommuaddr;
    nommuaddr = SRAM_PHYS_BASE + (SRAM_WRITEDATA_OFFSET_32 << 2);

    cli();
    asm volatile (
         "mov r3, %0\n" \
         "mov r2, %1\n" \
         "mov r1, %2\n" \
         "ldr   r4, =mylabel\n" \
         "nop\n" \
         "mov r0, #0\n" \
         "mcr p15, 0, r0, c7, c10, 0\n" \
         "mrc p15, 0, r6, c1, c0, 0\n" \
         "mrc   p15, 0, r0, c1, c0, 0\n" \
         "bic   r0, r0, #4\n" \
         "bic   r0, r0, #1\n" \
         "bic   r0, r0, #4096\n" \
         "mcr   p15, 0, r0, c1, c0, 0\n" \
         "nop\n" \
         "mov   pc, r1\n" \
         "nop\n" \
         "nop\n" \
         "nop\n" \
         "nop\n" \
         "nop\n" \
         "nop\n" \
         "nop\n" \
         ".align 5\n" \
         "mylabel: nop\n" \
         "nop\n" \
         "nop\n" \
         "nop\n" \
         "nop\n" \
         "nop\n" \
         "nop\n" \
         "sub   r4, r4, #16\n" \
         "mov %0, r3\n" \
         "mcr   p15, 0, r6, c1, c0, 0\n" \
        : \
        : "r"(val), "r"(addr),"r"(nommuaddr) \
        : "r1", "r2", "r3", "r4","r0", "r6" \
        );
    sti();

}

void ahbfix_write_block(u32 *vals, uint32_t phys_base, u32 offset, int cnt, int fix) {
    int i;

    ahbfix_lock_net();
    for ( i = 0; i < cnt; i++ ) {
	 mmu_write_reg(phys_base + (offset<<2) + (fix?0:(i<<2)), vals[i]);
    }
    ahbfix_unlock_net();
}
#endif


static int net_lock_init(void)
{
    uint32_t kira_rev;
    
    dma_net_locked = 0;
    dma_net_lock = SPIN_LOCK_UNLOCKED;
    
    kira_rev = pp_kira_get_revision();
    if (KIRA_MAJOR(kira_rev) < 2)
        return ahbfix_init();
    else return 0;
}

static void net_lock_cleanup(void)
{
    uint32_t kira_rev;
    kira_rev = pp_kira_get_revision();
    if (KIRA_MAJOR(kira_rev) < 2)
        ahbfix_cleanup();
}

module_init(net_lock_init);
module_exit(net_lock_cleanup);
EXPORT_SYMBOL(ahbfix_lock_net);
EXPORT_SYMBOL(ahbfix_unlock_net);
EXPORT_SYMBOL(ahbfix_read_block);
EXPORT_SYMBOL(ahbfix_write_block);
