/*****************************************************************************
 * lara_vsc.c
 *
 * Copyright (c) 2001-2004,2005 Peppercon AG, miba@peppercon.de,
 *                                            chkr@peppercon.de
 *
 * Contains the kernel module for LARA devices with the
 * Video Sampling Controller (VSC) video engine
 *****************************************************************************/

/* ------------------------------------------------------------------------- *
 * kernel includes
 * ------------------------------------------------------------------------- */

#include <linux/version.h>
#include <linux/bigphysarea.h>
#include <linux/config.h>
#include <linux/errno.h>
#include <linux/delay.h>
#include <linux/init.h>
#include <linux/ioctl.h>
#include <linux/ioport.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/pci.h>
#include <linux/proc_fs.h>
#include <linux/rtc.h>
#include <linux/slab.h>
#include <linux/types.h>
#include <linux/version.h>
#include <linux/vmalloc.h>
#include <linux/spinlock.h>
#include <linux/interrupt.h>

#include <asm/io.h>
#include <asm/uaccess.h>
#include <asm/irq.h>

#ifdef __powerpc__
# if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
#  include <platforms/peppercon.h>
#  include <asm/ppc405_pcimcs.h>
# else
#  include <platforms/4xx/peppercon.h>
#  include <syslib/ppc405_pcimcs.h>
# endif
# include <asm/ppc4xx_dma.h>
# include <asm-ppc/processor.h>
# include <asm/time.h>
#endif

#ifdef __arm__
# if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
#  include <asm/arch-cpe/cpe_bd.h>
#  include <asm/arch-cpe/cpe/a320c.h>
#  include <asm/arch/cpe_int.h>
# else
#  include <asm/arch/pp_bd.h>
#  include <asm/arch/platform/kira100.h>
# endif
# include <asm/arch/cpe_clk.h>
# include <asm/arch/cpe_gettime.h>
# include <asm/irq.h>
#endif

/* ------------------------------------------------------------------------- *
 * own includes
 * ------------------------------------------------------------------------- */

#include "lara.h"
#include "lara_int.h"

#include "lara_common.h"
#include "vsc_regs.h"

/* ------------------------------------------------------------------------- *
 * global constants, macros and datatypes
 * ------------------------------------------------------------------------- */

#define DRV_NAME		"lara_vsc"
#define VSC_MAJOR		((u8)242)

#if defined(LARA_KIMMSI) || defined(LARA_KIMSMI) || defined (LARA_KIMAMD) || defined (LARA_KIMINTEL)
# undef VSC_MASTER_MODE
# define VSC_USE_PCI
# define VSC_USE_INTMUX
# define VSC_PCI_VENDOR_ID	0x1743	
# define VSC_PCI_DEVICE_ID	0x1003
# define VSC_PCI_BAR_NO		1
# define INTMUX_OFFSET		0x9A0000
# define INTMUX_SIZE		4
# define VSC_INTMUX_BIT		0
# define VSC_IRQ  		27
# define VSC_ACTIVE_INIT	1
# define VSC_FB_OFFSET          0x40000
#else
# ifdef __powerpc__
#  ifdef LARA_KACY
#   define VSC_ACTIVE_INIT	1
#   define VSC_SHARED_IRQ	19
#  else
#   define VSC_ACTIVE_INIT	0
#   define VSC_BASE_ADDR	0xF1000000
#   define VSC_IRQ		30
#  endif
#  define VSC_FB_OFFSET         0x400000
# else
#  define VSC_MASTER_MODE       1
//#undef VSC_MASTER_MODE
#  define VSC_CACHE_LINE_SIZE   16
#  define VSC_BASE_ADDR         CPE_VSCREG_BASE
#  define VSC_ACTIVE_INIT	1
#  define VSC_IRQ  		CPE_IRQ_VSC
#  define VSC_FB_OFFSET         0x40000
# endif
#endif

#define VSC_REGS_SIZE		((VSC_LAST_REG + 4) * 4)
#define VSC_FB_SIZE             (4*1024*1024)
#define VSC_MAX_X_RES		1600
#define VSC_MAX_Y_RES		1200
#define VSC_REGS_OFFSET		0
#define VSC_DIFFMAP_OFFSET	0x003ff800
#define VSC_REG_MAX_RETRIES	10
#define VSC_MEASURE_TIMEOUT_JF	(HZ/10)
#define VSC_MEASURE_SCHEDULE_JF	(HZ/5)
#define VSC_EOP_TIMEOUT		HZ
#define VSC_DMA_TIMEOUT		HZ
#define VSC_FIFO_FILL_THRESHOLD	300 /* 32bit words, max is 512 */

#define VSC_DMA_CHAN_0		0
#define VSC_DMA_CHAN_1		1
#define VSC_DMA_IRQ_0		5
#define VSC_DMA_IRQ_1		6

#define MAX_DMA_SIZE		(512*1024)
#define REG_BUF_SIZE		(VSC_MAX_X_RES/PP_FB_TILE_WIDTH * VSC_MAX_Y_RES/PP_FB_TILE_HEIGHT / 2 * sizeof(BoxRec))

#define VSC_SD_NORMAL		0x00
#define VSC_SD_SAMPLE_ONLY	0x01

#define IRQ_NAME_LEN		16
#define noDEBUG_ISR

/*
 * init data (channel specific initialization values)
 */
typedef struct {
    phys_addr_t		vsc_base;
    char		vsc_irq_nr;
} fd_init_data_t;

/*
 * data structure per file descriptor (one for each channel)
 */
typedef struct {   
    unsigned char	id;				/* global channel id */
    fb_format_info_t	fb_format;			/* size information about the current fb */
    
    u32 *		vsc_diffmap_cpu;		/* VSC difference bitmap, virtual */
    dma_addr_t		vsc_diffmap_bus;		/* VSC difference bitmap, physical */
#if !defined(PP_FEAT_VSC_HW_ENCODING) && defined(__powerpc__)
    ppc_dma_ch_t        dma_chan;			/* DMA channel descriptor */
    u8			dma_error;			/* DMA error flag, pass info from isr */
    wait_queue_head_t	dma_wq;				/* waiting for end of DMA transfer */
    u_long		dma_size;			/* current size, accessible by sgl */
    sgl_handle_t	dma_sgl;			/* sgl handle for 'whole FB DMA reads' */
    unsigned int        dma_chan_nr;
    unsigned int        dma_irq_nr;
    int			dma_chan_registered;
    int			dma_isr_registered;
#endif
    BoxRec*		reg_buf;			/* buffer in kernel to handle region rects */
    
    phys_addr_t         vsc_base;			/* VSC base address, either static or from PCI */
    u8			vsc_active;			/* is VSC programmed and accessible */
    struct semaphore	vsc_mtx;			/* lock access to vsc (grabber, autoadjust) */
    volatile u32	vsc_shadow[VSC_LAST_REG+1];     /* reg shadow, save space in VSC for reads */
    volatile u32 *	vsc_regs;			/* mapped VSC registers */
    spinlock_t		vsc_regs_lock;			/* lock access to VSC registers and shadow */
    volatile u32 *	vsc_intmux_reg;			/* IRQ multiplex register on KIM for MSI prototype */
    volatile u8 *	vsc_fb;				/* mapped VSC video memory */
    char		vsc_irq_nr;			/* VSC irq number */
    int			vsc_isr_registered;		/* VSC irq registered flag */
    char		vsc_irq_name[IRQ_NAME_LEN];	/* for output in /proc fs */
    u8			local_video;			/* video switch pin on lattice */
    vsc_measures_t	measures;			/* offset/length measurements temp storage */
    
    wait_queue_head_t	vsc_sync_wq;			/* processes waiting for sync irq */
    volatile u8		vsc_sync_irq_state;		/* sync irq occured */
    spinlock_t		vsc_sync_irq_lock;		/* protect sync irq state */
    volatile u8		vsc_hm_end_irq;			/* line for offset measurement reached */
    spinlock_t		vsc_hm_end_irq_lock;
    volatile u8		vsc_hs_slow_irq;		/* horizontal sync to slow */
    volatile u8		vsc_vs_slow_irq;		/* vertical sync to slow */
    spinlock_t		vsc_sync_slow_irq_lock;
    volatile u8		vsc_eop_irq;			/* end of processing irq occured */
    volatile u8		vsc_dtr_irq;			/* data ready irq occured */
    volatile u8		vsc_fifo_err;			/* video fifo error (overflow) signaled */
    volatile u8		vsc_adc_res_ok;			/* adc clock domain reset done */
    spinlock_t		vsc_adc_res_ok_lock;
        
    wait_queue_head_t	vsc_grab_wq;			/* process waiting for grab irq */

#ifdef VSC_USE_PCI
    struct pci_dev *	vsc_pci_dev;			/* combined PCI device for VSC & co */
#endif
} fd_data_t;

/*
 * data structure per usage of the module (open/release)
 */
typedef struct {
    fd_data_t *fd_data;	/* points to the appropriate fd data (channel) */
} priv_data_t;

/* ------------------------------------------------------------------------- *
 * global variables
 * ------------------------------------------------------------------------- */

typedef struct {
    char	name[15];

} lara_dev_t;

static lara_dev_t dev = {
    name:	DRV_NAME,
};

/*
 * channel specific initialization data
 */
static fd_init_data_t fd_init_data[] = {
#ifdef LARA_KACY
    /* KACY uses a shared IRQ for all channels */
    {
	.vsc_base	= 0xF0000000,
	.vsc_irq_nr	= -1,
    },
    {
	.vsc_base	= 0xF0800000,
	.vsc_irq_nr	= -1,	
    },
    {
	.vsc_base	= 0xF1000000,
	.vsc_irq_nr	= -1,	
    },
    {
	.vsc_base	= 0xF1800000,
	.vsc_irq_nr	= -1,	
    },
#else
    {
# if !defined(VSC_USE_PCI)
	.vsc_base	= VSC_BASE_ADDR,
# endif
	.vsc_irq_nr	= VSC_IRQ,
    },
#endif
};

/*
 * common initialization data (not channel specific)
 */
static fd_data_t fd_common_init_data = {
    id:				0,
    fb_format:			{},
    vsc_diffmap_cpu:		NULL,
    vsc_diffmap_bus:		0,
#if !defined(PP_FEAT_VSC_HW_ENCODING) && defined(__powerpc__)
    dma_chan:			{},
    dma_chan_nr:		VSC_DMA_CHAN_0,
    dma_irq_nr:			VSC_DMA_IRQ_0,
    dma_error:			0,
    dma_wq:			{},
    dma_size:			0,
    dma_sgl:			0,
    dma_chan_registered:	0,
    dma_isr_registered:		0,
#endif
    reg_buf:			NULL,
    vsc_mtx:			{},
    vsc_regs_lock:		SPIN_LOCK_UNLOCKED,
    vsc_regs:			NULL,
    vsc_intmux_reg:		NULL,
    vsc_fb:			NULL,
    vsc_irq_name:		"",
    vsc_isr_registered:		0,
    local_video:		0,
    vsc_sync_wq:		{},
    vsc_sync_irq_state:		0,
    vsc_sync_irq_lock:		SPIN_LOCK_UNLOCKED,
    vsc_hm_end_irq:		0,
    vsc_hm_end_irq_lock:	SPIN_LOCK_UNLOCKED,
    vsc_hs_slow_irq:		0,
    vsc_vs_slow_irq:		0,
    vsc_sync_slow_irq_lock:	SPIN_LOCK_UNLOCKED,
    vsc_adc_res_ok:		0,
    vsc_adc_res_ok_lock:	SPIN_LOCK_UNLOCKED,
    vsc_eop_irq:		0,
    vsc_dtr_irq:		0,
    vsc_fifo_err:		0,
    measures:			{},
    vsc_grab_wq:		{},
    vsc_active:			VSC_ACTIVE_INIT,
#ifdef VSC_USE_PCI
    vsc_pci_dev:		NULL,
#endif
};

/*
 * actual channel specific data, initialized with the common part and
 * the channel specific initialization data
 */
static fd_data_t fd_data[PP_FEAT_CHANNEL_COUNT];

/*
 * the memory pool buffer space, shared among all channels
 * TODO(miba): might want to make this channel specific as well
 */
static u8 *	        mem_pool_cpu  = NULL;		/* memory pool, virtual */
static dma_addr_t	mem_pool_bus  = 0;		/* memory pool, physical */
static u_char*          mem_pool_user = NULL;		/* memory pool, user space address */

#ifdef VSC_SHARED_IRQ
static u_char		vsc_shared_isr_registered = 0;
#endif

/* ------------------------------------------------------------------------- *
 * function prototypes
 * ------------------------------------------------------------------------- */

int         init_module(void);
void        cleanup_module(void);
int         lara_vsc_init(void);
static void lara_vsc_cleanup(void);
static int  lara_vsc_ioctl(struct inode *, struct file *, uint, ulong);
static int  lara_vsc_open(struct inode *, struct file *);
static int  lara_vsc_release(struct inode *, struct file *);
static int  lara_vsc_mmap(struct file *, struct vm_area_struct *);

/* VSC initialization */
static int  vsc_init(fd_data_t *data);
static void vsc_cleanup(fd_data_t *data);

/* VSC register access functions, they grab vsc_regs_lock */
static void vsc_write_reg(fd_data_t *data, u8 reg, u32 val);
static u32 vsc_read_reg_secure(fd_data_t *data, u8 reg, int *error);
static void vsc_write_reg_masked(fd_data_t *data, u8 reg, u32 val, u32 mask);
static inline void vsc_write_reg_masked_noshadow(fd_data_t *data, u8 reg, u32 val, u32 mask);

/* VSC high level functions */
static int vsc_do_regop(fd_data_t *data, vsc_regop_t *regop);
static int vsc_measure_picture(fd_data_t *data, vsc_measures_t *measures);
static int vsc_autoadjust_step(fd_data_t *data);
static int vsc_set_fb_format(fd_data_t *data, fb_format_info_t *fb_f_info);
static int vsc_sample_and_diff(fd_data_t *data, u_char flags);
static int vsc_fetch_tiles(fd_data_t *data, vsc_fetch_descriptor_t *fetch_desc);
static int vsc_get_clock(fd_data_t *data);

/* VSC low level functions */
#if !defined(PP_FEAT_VSC_HW_ENCODING) && defined(__powerpc__)
static int vsc_transfer_diffmap_ppcdma(fd_data_t *data);
#else
static int vsc_transfer_diffmap(fd_data_t *data);
#endif
static int vsc_transfer(fd_data_t *data, uint32_t bts, uint32_t btf, uint32_t bco, uint32_t bro, uint32_t bta,
			u_char* buf, u_char* source_buf, u_long *count, u_long* last_valid);
static int vsc_fetch_box(fd_data_t *data, vsc_fetch_descriptor_t *fetch_desc,
			 BoxRec *box, u_char* buf, u_long *count);
static int vsc_fetch_box_raw(fd_data_t *data, vsc_fetch_descriptor_t *fetch_desc,
			     BoxRec *box, u_char* buf, u_long *count);

/* ISRs */
static irqreturn_t vsc_isr(int irq, void * dev_id, struct pt_regs * regs);
static irqreturn_t vsc_channel_isr(fd_data_t *data);
#if !defined(PP_FEAT_VSC_HW_ENCODING) && defined(__powerpc__)
static irqreturn_t dma_isr(int irq, void * dev_id, struct pt_regs * regs);
#endif

/* ------------------------------------------------------------------------- *
 * linux kernel module stuff
 * ------------------------------------------------------------------------- */

#if 0
static void
print_buffer(unsigned char* buf, int size, int brk)
{
    int i;
    printk("print_buffer @ 0x%08x:\n", buf);
    for (i = 0; i < size; i++) {	
	if ((i % brk) == 0) printk("\n");
	printk("%02x ", buf[i]);
    }
    printk("\n");
}
#endif

#ifdef MODULE
MODULE_AUTHOR("miba@peppercon.de");
MODULE_DESCRIPTION("VSC device driver");

/*
 * Initialize the module
 */
int init_module(void)
{
    return lara_vsc_init();
}

/*
 * Cleanup - unregister the appropriate file from /proc
 */
void cleanup_module(void)
{
    int r;

    lara_vsc_cleanup();

    if ((r = unregister_chrdev(VSC_MAJOR, dev.name)) < 0) {
	D(D_ALWAYS, "failed to unregister driver (%d)\n", r);
    } else {
	D(D_ALWAYS, "driver unregistered\n");
    }
}
#endif	/* MODULE */

/* ------------------------------------------------------------------------- *
 * structure with driver operations
 * ------------------------------------------------------------------------- */

static struct file_operations lara_vsc_ops = {
    owner:   THIS_MODULE,
    ioctl:   lara_vsc_ioctl,
    open:    lara_vsc_open,
    release: lara_vsc_release,
    mmap:    lara_vsc_mmap
};

/* ------------------------------------------------------------------------- *
 * driver initialization/cleanup
 * ------------------------------------------------------------------------- */

int __init
lara_vsc_init(void)
{
    int rc = SUCCESS;
    int i, r;
    unsigned long addr, size;

    D(D_ALWAYS, "driver init, debuglevel=%d\n", DEBUGLEVEL);

    /* check available init data */
    if (PP_FEAT_CHANNEL_COUNT > sizeof(fd_init_data)/sizeof(fd_init_data_t)) {
	D(D_ERROR, "Not enough init data provided for %d channels (only for %d)\n",
	  PP_FEAT_CHANNEL_COUNT, sizeof(fd_init_data)/sizeof(fd_init_data_t));
	rc = -ENODEV;
	goto error_out;
    }
    
    /* ----------- init data for each channel ------------------------------ */
    for (i=0; i<PP_FEAT_CHANNEL_COUNT; i++) {
	fd_data_t *data = &fd_data[i];
	fd_init_data_t *init_data = &fd_init_data[i];
	
	/* ------------- init channel specific values/structures ----------- */
	memcpy(data, &fd_common_init_data, sizeof(fd_data_t));

	data->id = i;
	data->vsc_base = init_data->vsc_base;
	data->vsc_irq_nr = init_data->vsc_irq_nr;
	snprintf(data->vsc_irq_name, sizeof(data->vsc_irq_name), "VSC %d",
		 data->id);
	
	init_MUTEX(&data->vsc_mtx);
	init_waitqueue_head(&data->vsc_sync_wq);
	init_waitqueue_head(&data->vsc_grab_wq);
#if !defined(PP_FEAT_VSC_HW_ENCODING) && defined(__powerpc__)
	init_waitqueue_head(&data->dma_wq);

	/* ------------- initialize DMA, SGL handle ------------------------ */
	data->dma_chan.buffer_enable = 1;
	data->dma_chan.tce_enable = 1;
	data->dma_chan.etd_output = 1;
	data->dma_chan.pce = 0;	
	data->dma_chan.pwidth = PW_64;
	data->dma_chan.dai = 1;
	data->dma_chan.sai = 1;
	data->dma_chan.cp = PRIORITY_HIGH;
        data->dma_chan.int_enable = 1;
	ppc4xx_init_dma_channel(data->dma_chan_nr, &data->dma_chan);

	if (ppc4xx_alloc_dma_handle(&data->dma_sgl, DMA_MODE_MM, data->dma_chan_nr)) {
	    D(D_ERROR, "failed to allocate DMA sgl handle for channel %d\n", data->id);
	    rc = -ENODEV;
	    goto error_out;
	}
	if (request_irq(data->dma_irq_nr, dma_isr, SA_SHIRQ | SA_SAMPLE_RANDOM,
			"LARA DMA", data)) {
	    D(D_ERROR, "couldn't register DMA irq handler for channel %d\n", data->id);
	    rc = -ENODEV;
	    goto error_out;
	}
	data->dma_isr_registered = 1;
	
	if (request_dma(data->dma_chan_nr, dev.name)) {
	    D(D_ERROR, "unable to get DMA channel for channel %d\n", data->id);
	    rc = -ENODEV;
	    goto error_out;
	}
	data->dma_chan_registered = 1;
	ppc4xx_set_dma_mode(data->dma_chan_nr, DMA_MODE_MM);
#endif /* !defined(PP_FEAT_VSC_HW_ENCODING) && defined(__powerpc__) */
	
	/* ------------------------------- diffmap ------------------------------- */

	data->vsc_diffmap_cpu = pci_alloc_consistent(NULL, VSC_DIFFMAP_SIZE, &data->vsc_diffmap_bus);
	if (data->vsc_diffmap_cpu == NULL) {
	    D(D_ERROR, "failed to allocate vsc diffmap for channel %d\n", data->id);
	    rc = -ENOMEM;
	    goto error_out;
	}

	/* ------------------------- region rect buffer----------------------------- */
	data->reg_buf = (BoxRec*) vmalloc(REG_BUF_SIZE);
	if (data->reg_buf == NULL) {
	    D(D_ERROR, "failed to allocate region rect buffer for channel %d\n", data->id);
	    rc = -ENOMEM;
	    goto error_out;
	}
	D(D_NOTICE, "got region rect buffer for channel %d (%d on %p virtual)\n", data->id,
	  REG_BUF_SIZE, data->reg_buf);

	if (vsc_init(data) != 0) {
	    D(D_ERROR, "VSC initialization failed for channel %d\n", data->id);
	    rc = -ENODEV;
	    goto error_out;
	}

	D(D_ERROR, "VSC initialized for channel %d\n", data->id);
    }

    /* ------------------- alloc memory pool ----------------------------------- */
    size = VSC_MEM_POOL_SIZE;
    mem_pool_cpu = bigphysarea_alloc_pages(size/PAGE_SIZE, 0, GFP_KERNEL);
    if (mem_pool_cpu) {
	mem_pool_bus = virt_to_bus(mem_pool_cpu);
	addr = (unsigned long)mem_pool_cpu;
	while (size > 0) {
	    SetPageReserved(virt_to_page(addr));
	    addr += PAGE_SIZE;
	    size -= PAGE_SIZE;
	}
	memset(mem_pool_cpu, 0x00, VSC_MEM_POOL_SIZE);
    } else {
	D(D_ERROR, "failed to allocate physical memory pool\n");
	rc = -ENOMEM;
	goto error_out;
    }

    D(D_NOTICE, "got physical memory pool (%d on %p virtual/%08x bus)\n",
      VSC_MEM_POOL_SIZE, mem_pool_cpu, mem_pool_bus);
    
    /* ----- shared IRQ for VSC ---------------------------------------- */
#ifdef VSC_SHARED_IRQ
    if (request_irq(VSC_SHARED_IRQ, vsc_isr, SA_SAMPLE_RANDOM, "VSC", NULL)) {
	D(D_ERROR, "couldn't register shared VSC irq handler\n");
	rc = -ENODEV;
	goto error_out;
    }

#ifdef __arm__
    cpe_int_set_irq(VSC_SHARED_IRQ, LEVEL, H_ACTIVE);
    D(D_BLABLA, "enabled shared irq: %d\n", VSC_SHARED_IRQ);
#endif
    
    vsc_shared_isr_registered = 1;
    D(D_NOTICE, "shared VSC ISR registered\n");
#endif
    
    /* ---- register the character device ------------------------------ */
    if ((r = register_chrdev(VSC_MAJOR, dev.name, &lara_vsc_ops)) < 0) {
	D(D_ERROR, "failed to register driver (%d)\n", r);
	rc = -ENODEV;
	goto error_out;
    }

    D(D_ALWAYS, "kernel module loaded successfully!\n");
    return rc;
    
 error_out:
    lara_vsc_cleanup();
    D(D_ALWAYS, "kernel module didn't load successfully %d!\n", rc);
    return rc;
}

static void
lara_vsc_cleanup(void)
{
    fd_data_t *data;
    unsigned long addr, size;	
    int i;

#ifdef VSC_SHARED_IRQ
    if (vsc_shared_isr_registered) {
	free_irq(VSC_SHARED_IRQ, NULL);
    }
#endif	
    for (i=0; i<PP_FEAT_CHANNEL_COUNT; i++) {
	data = &fd_data[i];

	vsc_cleanup(data);
	
	if (data->vsc_diffmap_cpu) {
	    pci_free_consistent(NULL, VSC_DIFFMAP_SIZE, data->vsc_diffmap_cpu, data->vsc_diffmap_bus);
	}
#if !defined(PP_FEAT_VSC_HW_ENCODING) && defined(__powerpc__)
	if (data->dma_chan_registered) {
	    free_dma(data->dma_chan_nr);
	}
	
	if (data->dma_isr_registered) {
	    free_irq(data->dma_irq_nr, data);
	}

	if (data->dma_sgl) {
	    ppc4xx_clear_dma_sgl(data->dma_sgl);
	    ppc4xx_free_dma_handle(data->dma_sgl);
	}
#endif
	if (data->reg_buf) {
	    vfree(data->reg_buf);
	}
    }

    if (mem_pool_cpu) {
	size = VSC_MEM_POOL_SIZE;
	addr = (unsigned long) mem_pool_cpu;
	while (size > 0) {
	    ClearPageReserved(virt_to_page(addr));
	    addr += PAGE_SIZE;
	    size -= PAGE_SIZE;
	}
	bigphysarea_free_pages(mem_pool_cpu);
    }    
}

/* ------------------------------------------------------------------------- *
 * VSC initialization
 * ------------------------------------------------------------------------- */

static int
vsc_init(fd_data_t *data)
{
    int ret = 0;
    
    /* ------------------------ determine VSC base ----------------------------- */
#ifdef VSC_USE_PCI
    data->vsc_pci_dev = pci_find_device(VSC_PCI_VENDOR_ID, VSC_PCI_DEVICE_ID, NULL);
    if (data->vsc_pci_dev == NULL) {
	D(D_ERROR, "PCI device not detected\n");
	ret = -ENODEV;
	goto bail;
    }
    data->vsc_base = pci_resource_start(data->vsc_pci_dev, VSC_PCI_BAR_NO);
#endif
    D(D_NOTICE, "using physical VSC base 0x%08lx\n", data->vsc_base);

    /* -------- map VSC register space and framebuffer, request irq ------------ */
    data->vsc_regs = ioremap_nocache(data->vsc_base + VSC_REGS_OFFSET, VSC_REGS_SIZE);
    if (data->vsc_regs == NULL) {
	D(D_ERROR, "failed to map VSC regs for channel %d\n", data->id);
	ret = -ENOMEM;
	goto bail;
    }
    D(D_NOTICE, "mapped VSC Regs from 0x%08lx to 0x%p (%d Byte)\n", data->vsc_base + VSC_REGS_OFFSET,
      data->vsc_regs, VSC_REGS_SIZE);

    if (data->vsc_active) {
        D(D_NOTICE, "VSC version: %x\n", data->vsc_regs[VSC_REG_VR]);
    }
    
    // vsc fb
    data->vsc_fb = ioremap_nocache(data->vsc_base + VSC_FB_OFFSET, VSC_FB_SIZE);
    if (data->vsc_fb == NULL) {
	D(D_ERROR, "failed to map VSC framebuffer for channel %d\n", data->id);
	ret = -ENOMEM;
	goto bail;
    }
    D(D_NOTICE, "mapped VSC framebuffer to 0x%p (%d Byte)\n",
      data->vsc_fb, VSC_FB_SIZE);

#ifdef VSC_USE_INTMUX
    // kira irq multiplexer
    data->vsc_intmux_reg = ioremap_nocache(data->vsc_base + INTMUX_OFFSET, INTMUX_SIZE);
    if (data->vsc_intmux_reg == NULL) {
	D(D_ERROR, "failed to map VSC IRQ multiplexer\n");
	ret = -ENODEV;
	goto bail;
    }
    D(D_NOTICE, "mapped VSC IRQ multiplexer to 0x%p (%d Byte)\n",
      data->vsc_intmux_reg, INTMUX_SIZE);
#endif

#if !defined(VSC_SHARED_IRQ)
    // vsc irq
    if (request_irq(data->vsc_irq_nr, vsc_isr, SA_SHIRQ | SA_SAMPLE_RANDOM,
		    data->vsc_irq_name, data)) {
	D(D_ERROR, "couldn't register VSC %d irq handler\n", data->id);
	ret = -ENODEV;
	goto bail;
    }

#ifdef __arm__
# if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
    cpe_int_set_irq(data->vsc_irq_nr, LEVEL, H_ACTIVE);
# else
    set_irq_type(data->vsc_irq_nr, IRQT_HIGH);  
# endif
    D(D_BLABLA, "enabled irq: %d\n", data->vsc_irq_nr);
#endif
    
    data->vsc_isr_registered = 1;
    D(D_NOTICE, "VSC %d ISR registered\n", data->id);
#endif
    
 bail:
    return ret;
}

static void
vsc_cleanup(fd_data_t *data)
{
    // no IRQs anymore, please
    if (data->vsc_active && (data->vsc_regs != NULL)) {
	data->vsc_regs[VSC_REG_IMR] = 0;
    }

#if !defined(VSC_SHARED_IRQ)
    if (data->vsc_isr_registered) {
	free_irq(data->vsc_irq_nr, data);
	data->vsc_isr_registered = 0;
    }
#endif
    
#ifdef VSC_USE_INTMUX
    if (data->vsc_intmux_reg != NULL) {
	iounmap((void *)data->vsc_intmux_reg);
	data->vsc_intmux_reg = NULL;
    }
#endif

    if (data->vsc_fb != NULL) {
	iounmap((void *)data->vsc_fb);
	data->vsc_fb = NULL;
    }
    
    if (data->vsc_regs != NULL) {
	iounmap((void *)data->vsc_regs);
	data->vsc_regs = NULL;
    }

    data->vsc_active = 0;
}

/* ------------------------------------------------------------------------- *
 * the driver operations
 * ------------------------------------------------------------------------- */

static int
lara_vsc_ioctl(struct inode * inode, struct file * file, uint cmd, ulong arg)
{
    DECLARE_WAITQUEUE(wait, current);    
    priv_data_t *pdata = file->private_data;
    fd_data_t *data = pdata->fd_data;
    vsc_regop_t regop;
    sync_wait_t sync_wait;
    vsc_sync_speed_t sync_speed;
    vsc_fetch_descriptor_t fetch_desc;
    fb_format_info_t fb_f_info;
    u_long flags;
    int err = 0, ret, sched_ret;

    switch (cmd) {

      case PPIOCVSCACTIVATE:
#ifdef __powerpc__
	  {	      
	      u32 val;
	      int err;

	      val = mcs_in_le32(&data->vsc_regs[VSC_REG_VR], &err);
	      if (err != 0) {
		  D(D_ERROR, "PPIOCVSCACTIVATE: couldn't read VSC version (error=%d)\n", err);
		  return -EIO;
	      }
	      D(D_NOTICE, "PPIOCVSCACTIVATE: REG_VR==0x%08x\n", le32_to_cpu(val));
	  }
#endif
	  data->vsc_active = 1;
	  return SUCCESS;
	  
      case PPIOCVSCREGOP:
	  if (!data->vsc_active) return -EIO;
		  
          if(copy_from_user(&regop, (char *)arg, sizeof(vsc_regop_t))) {
              return -EFAULT;
          }
	  err = vsc_do_regop(data, &regop);
          if (copy_to_user((char *)arg, &regop, sizeof(vsc_regop_t))) {
              return -EFAULT;
          }
	  return err;

      case PPIOCVSCGETSYNCIRQ:
          if(copy_from_user(&sync_wait, (char *)arg, sizeof(sync_wait_t))) {
              return -EFAULT;
          }
	  
	  sync_wait.got_irq = 0;
	  spin_lock_irqsave(&data->vsc_sync_irq_lock, flags);
	  if (data->vsc_sync_irq_state || !sync_wait.do_wait) {
	      sync_wait.got_irq = data->vsc_sync_irq_state;
	      data->vsc_sync_irq_state = 0;

	      data->vsc_regs[VSC_REG_IMR] |= IMR_SFC;
	      spin_unlock_irqrestore(&data->vsc_sync_irq_lock, flags);

	      if (copy_to_user((char *)arg, &sync_wait, sizeof(sync_wait_t))) {
		  return -EFAULT;
	      }
	      return SUCCESS;
	  }
	  current->state = TASK_INTERRUPTIBLE;
	  add_wait_queue(&data->vsc_sync_wq, &wait);
	  spin_unlock_irqrestore(&data->vsc_sync_irq_lock, flags);	  
	  sched_ret = schedule_timeout(HZ/2); // TODO(miba): configurable timeout
	  remove_wait_queue(&data->vsc_sync_wq, &wait);

	  sync_wait.got_irq = (sched_ret == 0) ? 0 : 1;
	  if (copy_to_user((char *)arg, &sync_wait, sizeof(sync_wait_t))) {
	      return -EFAULT;
	  }
	  if (signal_pending(current)) return -ERESTARTSYS;
	  return SUCCESS;

      case PPIOCVSCGETSYNCSPEED:
          if(copy_from_user(&sync_speed, (char *)arg, sizeof(vsc_sync_speed_t))) {
              return -EFAULT;
          }

	  spin_lock_irqsave(&data->vsc_sync_slow_irq_lock, flags);
	  sync_speed.hs_slow = data->vsc_hs_slow_irq;
	  sync_speed.vs_slow = data->vsc_vs_slow_irq;
	  spin_unlock_irqrestore(&data->vsc_sync_slow_irq_lock, flags);
	
	  if (copy_to_user((char *)arg, &sync_speed, sizeof(vsc_sync_speed_t))) {
	      return -EFAULT;
	  }	  
	  return SUCCESS;

      case PPIOCVSCRSTSYNCSPEED:	  
	  spin_lock_irqsave(&data->vsc_sync_slow_irq_lock, flags);
	  data->vsc_hs_slow_irq = data->vsc_vs_slow_irq = 0;
	  vsc_write_reg_masked_noshadow(data, VSC_REG_IMR, IMR_HS_SLOW | IMR_VS_SLOW, IMR_HS_SLOW | IMR_VS_SLOW);
	  spin_unlock_irqrestore(&data->vsc_sync_slow_irq_lock, flags);
	  return SUCCESS;
	  
      case PPIOCVSCMEASUREPICTURE:
          if(copy_from_user(&data->measures, (char *)arg, sizeof(vsc_measures_t))) {
              return -EFAULT;
          }
	  err = vsc_measure_picture(data, &data->measures);
	  if (copy_to_user((char *)arg, &data->measures, sizeof(vsc_measures_t))) {
	      return -EFAULT;
	  } 
	  return err;

      case PPIOCVSCAUTOADJUSTSTEP:
	  if (!data->vsc_active) return -EIO;
		  
	  err = vsc_autoadjust_step(data);
	  return err;

      case PPIOCVSCSAMPLE:
	  return vsc_sample_and_diff(data, VSC_SD_SAMPLE_ONLY);

      case PPIOCVSCSAMPLEANDDIFF:
	  if (!data->vsc_active) return -EIO;

	  err = vsc_sample_and_diff(data, VSC_SD_NORMAL);
	  if (copy_to_user((char*)arg, data->vsc_diffmap_cpu,
			   VSC_DIFFMAP_SIZE)) {
	      err = -EFAULT;
	  }
	  return err;
	    
      case PPIOCVSCFETCHTILES:
	  if (copy_from_user(&fetch_desc, (char *)arg, sizeof(vsc_fetch_descriptor_t))) {
	      return -EFAULT;
	  }
	  err = vsc_fetch_tiles(data, &fetch_desc);
	  if (copy_to_user((char *)arg, &fetch_desc, sizeof(vsc_fetch_descriptor_t))) {
	      return -EFAULT;
	  }
	  return err;

      case PPIOCSETFBFORMATINFO:
	  if (copy_from_user(&fb_f_info, (char *)arg, sizeof(fb_format_info_t))) {
	      return -EFAULT;
	  }	  
	  return vsc_set_fb_format(data, &fb_f_info);

      case PPIOCVSCGETCLOCK:
	  ret = vsc_get_clock(data);
	  if (copy_to_user((char *)arg, &ret, sizeof(int))) {
	      return -EFAULT;
	  }
	  return SUCCESS;
	  
      case PPIOCVSCGETADCRESOK:
	  spin_lock_irqsave(&data->vsc_adc_res_ok_lock, flags);
	  if (copy_to_user((char *)arg, (u_char*) &data->vsc_adc_res_ok, sizeof(u_char))) {
	      spin_unlock_irqrestore(&data->vsc_adc_res_ok_lock, flags);
	      return -EFAULT;
	  }
	  data->vsc_adc_res_ok = 0;
	  spin_unlock_irqrestore(&data->vsc_adc_res_ok_lock, flags);
	  return SUCCESS;	  
      default:
	  D(D_ERROR, "Invalid ioctl request %08x on channel %d\n", cmd, data->id);
	  return -EINVAL;
    }
}

static int
lara_vsc_open(struct inode * inode, struct file * file)
{
    priv_data_t *pdata;
    int minor = MINOR(inode->i_rdev);
    
    if (minor > PP_FEAT_CHANNEL_COUNT-1) {
	D(D_ERROR, "Could not open VSC dev %d (maxi devnr. is %d)\n",
	  minor, PP_FEAT_CHANNEL_COUNT-1);
	return -ENODEV;
    }
    
    if (file->private_data == NULL) {
	pdata = kmalloc(sizeof(priv_data_t), GFP_KERNEL);
	pdata->fd_data = &fd_data[minor];

	file->private_data = (void*) pdata;
    }

    return SUCCESS;
}

static int
lara_vsc_release(struct inode * inode, struct file * file)
{
    priv_data_t *pdata = (priv_data_t*) file->private_data;

    if (pdata) {
	kfree(pdata);
    }

    return SUCCESS;
}

static int
lara_vsc_mmap(struct file *filep, struct vm_area_struct *vma)
{
    unsigned long pos = 0;
    unsigned long start = (unsigned long) vma->vm_start;
    unsigned long size	= (unsigned long) vma->vm_end - vma->vm_start;
    //unsigned long hint  = vma->vm_pgoff;
    //priv_data_t *pdata = filep->private_data; /* mmap may be done to all vsc devices */

    if (!mem_pool_cpu || size != VSC_MEM_POOL_SIZE) return -ENOMEM;
    
    pos = mem_pool_bus;

    D(D_VERBOSE, "remapping pages (vm dest=%08lx, phys src=%08lx, size %ld)\n",
      start, pos, size);

#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
    if (remap_page_range(start, pos, size, vma->vm_page_prot)) {
	return -EAGAIN;
    }
#else
    if (remap_pfn_range(vma, start, pos >> PAGE_SHIFT, size, vma->vm_page_prot)) {
	return -EAGAIN;
    }
#endif

    mem_pool_user = (u_char*)start;
    
    return 0;
}

/* ------------------------------------------------------------------------- *
 * ISR
 * ------------------------------------------------------------------------- */
    
static irqreturn_t
vsc_isr(int irq, void * dev_id, struct pt_regs * regs)
{
#ifdef VSC_SHARED_IRQ
    irqreturn_t irqret = IRQ_NONE;
    int i;

    for (i=0; i<PP_FEAT_CHANNEL_COUNT; i++) {
	fd_data_t *data = &fd_data[i];

	if (vsc_channel_isr(data) == IRQ_HANDLED) {
	    irqret = IRQ_HANDLED;
	}
    }

    return irqret;
#else
    return vsc_channel_isr((fd_data_t*) dev_id);
#endif
}

static irqreturn_t
vsc_channel_isr(fd_data_t *data)
{
    u_long flags, reg;

#ifdef VSC_USE_INTMUX
    {
	u_long mux = readl(&data->vsc_intmux_reg[0]);
	
# ifdef DEBUG_ISR
	D(D_BLABLA, "INTMUX %08lx\n", mux);
# endif
	if (!test_bit(VSC_INTMUX_BIT, &mux)) {
	    // this is not our IRQ
	    return IRQ_NONE;
	}
    }
#endif

    if (!data->vsc_active) {
	D(D_ERROR, "VSC not active, but got irq!\n");
	return IRQ_HANDLED;
    }
    
    reg = data->vsc_regs[VSC_REG_ISR];
#ifdef DEBUG_ISR
    D(D_BLABLA, "VSC IRQ %08lx\n", reg);
#endif
    
#ifdef VSC_SHARED_IRQ
    if ((reg & data->vsc_regs[VSC_REG_IMR]) == 0) {
	// this is not our IRQ
	return IRQ_NONE;
    }
#endif

    if (reg & ISR_SFC) {
	spin_lock_irqsave(&data->vsc_sync_irq_lock, flags);
	data->vsc_sync_irq_state = 1;
#ifdef DEBUG_ISR
	D(D_BLABLA, "iSFC\n");
#endif
	wake_up_interruptible(&data->vsc_sync_wq);
	data->vsc_regs[VSC_REG_IMR] &= ~IMR_SFC;

	spin_unlock_irqrestore(&data->vsc_sync_irq_lock, flags);
    }

    if ((reg & ISR_DTR) && (data->vsc_regs[VSC_REG_IMR] & IMR_DTR)) {
#ifdef DEBUG_ISR
	D(D_BLABLA, "iDTR\n");
#endif
	data->vsc_dtr_irq = 1;
	data->vsc_regs[VSC_REG_IMR] &= ~IMR_DTR;
    }
    
    if (reg & ISR_EOP) {
	if (data->vsc_regs[VSC_REG_IMR] & IMR_EOP) {
#ifdef DEBUG_ISR
	    D(D_BLABLA, "iEOP\n");
#endif
	    data->vsc_eop_irq = 1;
	    wake_up_interruptible(&data->vsc_grab_wq);
	    data->vsc_regs[VSC_REG_IMR] &= ~IMR_EOP;
	} else {
	    D(D_ERROR, "iEOP IRQ but not set in mask register\n");
	}
    }

    if (reg & ISR_FIFO_ERR) {
#ifdef DEBUG_ISR
	D(D_BLABLA, "iFIFO_ERR\n");
#endif
	data->vsc_fifo_err = 1;
	data->vsc_regs[VSC_REG_IMR] &= ~IMR_FIFO_ERR;
    }

    if (reg & ISR_ADC_RES_OK) {
#ifdef DEBUG_ISR
	D(D_BLABLA, "iADC_RES_OK\n");
#endif
	data->vsc_adc_res_ok = 1;
    }
    
    if (reg & ISR_HM_END) {
	spin_lock_irqsave(&data->vsc_hm_end_irq_lock, flags);
	data->vsc_hm_end_irq = 1;
	spin_unlock_irqrestore(&data->vsc_hm_end_irq_lock, flags);
    }

    spin_lock_irqsave(&data->vsc_sync_slow_irq_lock, flags);
    if (reg & ISR_HS_SLOW) {
	data->vsc_hs_slow_irq = 1;
	data->vsc_regs[VSC_REG_IMR] &= ~IMR_HS_SLOW;
    }
    if (reg & ISR_VS_SLOW) {
	data->vsc_vs_slow_irq = 1;
	data->vsc_regs[VSC_REG_IMR] &= ~IMR_VS_SLOW;
    }
    spin_unlock_irqrestore(&data->vsc_sync_slow_irq_lock, flags);

    return IRQ_HANDLED;    
}

#if !defined(PP_FEAT_VSC_HW_ENCODING) && defined(__powerpc__)
static irqreturn_t
dma_isr(int irq, void * dev_id, struct pt_regs * regs)
{
    fd_data_t *data = (fd_data_t*) dev_id;
    int dma_status;

    dma_status = ppc4xx_get_dma_status();
    if (dma_status & (0x00800000 >> data->dma_chan_nr)) {
	D(D_VERBOSE, "DMA status: 0x%08x, Error! ", dma_status);

	mtdcr(EBC0_CFGADDR, EBC0_BESR1);
	D(D_VERBOSE, "EBC0_BESR1=%08x ", mfdcr(EBC0_CFGDATA));
	mtdcr(EBC0_CFGADDR, EBC0_BEAR);
	D(D_VERBOSE, "EBC0_BEAR =%08x\n", mfdcr(EBC0_CFGDATA));

	data->dma_error = 1;
    }
    if ((dma_status & DMA_SG0) == 0) {
	wake_up_interruptible(&data->dma_wq);
    }
    mtdcr(DCRN_DMASR, (0x88888880 >> data->dma_chan_nr));
    /* assume we handled the interrupt */
    return IRQ_HANDLED;
}
#endif /* !defined(PP_FEAT_VSC_HW_ENCODING) && defined(__powerpc__) */

/* ------------------------------------------------------------------------- *
 * internal operations
 * ------------------------------------------------------------------------- */

static int
vsc_do_regop(fd_data_t *data, vsc_regop_t *regop)
{
    int ret = 0;
    
    if (regop->reg > VSC_LAST_REG) {
	D(D_ERROR, "VSC reg %02x out of range\n", regop->reg);
	ret = -1;
	goto bail;
    }

    switch (regop->op) {
      case VSC_REG_READ:
	  regop->data = data->vsc_regs[regop->reg]; 
	  break;
      case VSC_REG_READ_SHADOWED:
	  regop->data = data->vsc_shadow[regop->reg];
	  break;
      case VSC_REG_READ_SECURE:
	  regop->data = vsc_read_reg_secure(data, regop->reg, &ret);
	  goto bail;
	  break;
      case VSC_REG_WRITE:
	  vsc_write_reg(data, regop->reg, regop->data);
	  break;
      case VSC_REG_WRITE_MASKED:
	  vsc_write_reg_masked(data, regop->reg, regop->data, regop->mask);
	  break;
      default:
	  D(D_ERROR, "Unknown regop %d\n", regop->op);
	  ret = -1;
	  break;
    }

 bail:
    return ret;
}

static u32
vsc_read_reg_secure(fd_data_t *data, u8 reg, int *error)
{
    u8  retry_count = VSC_REG_MAX_RETRIES;
    u32 ret1=0, ret2=0;
    u_long flags;

    spin_lock_irqsave(&data->vsc_regs_lock, flags);
    while (retry_count--) {
	ret1 = data->vsc_regs[reg];
	ret2 = data->vsc_regs[reg];
	if (ret1 == ret2) {
	    if (error) *error = 0;
	    spin_unlock_irqrestore(&data->vsc_regs_lock, flags);
	    return ret1;	    
	}
    }

    if (error) *error = -EIO;
    spin_unlock_irqrestore(&data->vsc_regs_lock, flags);
    return 0;
}

static void
vsc_write_reg(fd_data_t *data, u8 reg, u32 val)
{
    u_long flags;
    
    spin_lock_irqsave(&data->vsc_regs_lock, flags);
    data->vsc_shadow[reg]= val;
    data->vsc_regs[reg]  = val;
    spin_unlock_irqrestore(&data->vsc_regs_lock, flags);
}

static void
vsc_write_reg_masked(fd_data_t *data, u8 reg, u32 val, u32 mask)
{
    u_int32_t content;
    u_long flags;

    spin_lock_irqsave(&data->vsc_regs_lock, flags);
    content = data->vsc_regs[reg];
    
    content &= ~mask;
    content |= (val & mask);
    
    data->vsc_shadow[reg] = content;
    data->vsc_regs[reg]   = content;
    spin_unlock_irqrestore(&data->vsc_regs_lock, flags);
}

static inline void
vsc_write_reg_masked_noshadow(fd_data_t *data, u8 reg, u32 val, u32 mask)
{
    u_long flags;

    spin_lock_irqsave(&data->vsc_regs_lock, flags);
    data->vsc_regs[reg]   = (data->vsc_regs[reg] & ~mask) | (val & mask);
    spin_unlock_irqrestore(&data->vsc_regs_lock, flags);   
}

static int
vsc_measure_picture(fd_data_t *data, vsc_measures_t *measures)
{
    int ret = -EIO, i = 0, error;
    unsigned long flags;
    u32 val;
    unsigned int step = 4;
    unsigned int start_line = 0, line;
    unsigned long timeout, t_schedule;

    D(D_VERBOSE, "vsc_measure_picture\n");
    val = vsc_read_reg_secure(data, VSC_REG_MVO, &error);     
    if (error) goto bail;    
    measures->ofsY = val;
    val = vsc_read_reg_secure(data, VSC_REG_MVT, &error);
    if (error) goto bail;    
    measures->totalY = val;

    t_schedule = jiffies + VSC_MEASURE_SCHEDULE_JF;
    vsc_write_reg_masked_noshadow(data, VSC_REG_IMR, 0, IMR_HM_END);
    while (start_line < step) {
	line = start_line;
	while (line < data->fb_format.g_h) {

	    spin_lock_irqsave(&data->vsc_hm_end_irq_lock, flags);
	    data->vsc_regs[VSC_REG_LFH] = line;	    
	    data->vsc_hm_end_irq = 0;
	    vsc_write_reg_masked_noshadow(data, VSC_REG_IMR, IMR_HM_END, IMR_HM_END);
	    spin_unlock_irqrestore(&data->vsc_hm_end_irq_lock, flags);
	    
	    timeout = jiffies + VSC_MEASURE_TIMEOUT_JF;
	    while (data->vsc_hm_end_irq != 1) {
		if (time_after(jiffies, timeout)) {
		    D(D_ERROR, "Timeout during offsetX measure step %d\n", i);
		    goto bail;
		}
	    }

	    if (i >= VSC_MAX_MEASURES) {
		D(D_ERROR, "No space for %d measures\n", i);
		goto bail;
	    }

	    vsc_write_reg_masked_noshadow(data, VSC_REG_IMR, 0, IMR_HM_END);
	    val = vsc_read_reg_secure(data, VSC_REG_MHO, &error);
	    if (error) goto bail;
	    measures->ofsX[i] = (u_int16_t) val;
	    val = vsc_read_reg_secure(data, VSC_REG_MHL, &error);
	    if (error) goto bail;
	    measures->lenX[i] = (u_int16_t) val;

	    if (time_after(jiffies, t_schedule)) {
		schedule();
		t_schedule = jiffies + VSC_MEASURE_SCHEDULE_JF;
	    }
	    i++; line += step;
	}
	start_line++;
    }
    measures->x_count = i;

    ret = SUCCESS;

 bail:
    if (ret != SUCCESS) {
	D(D_ERROR, "Error during vsc_measure_picture\n");
    }
    vsc_write_reg_masked_noshadow(data, VSC_REG_IMR, 0, IMR_HM_END);

    D(D_VERBOSE, "vsc_measure_picture done with %d\n", ret);
    return ret;
}

static int
vsc_autoadjust_step(fd_data_t *data)
{
    DECLARE_WAITQUEUE(wait, current);
    int ret = SUCCESS;
    int sched = 0;
    u_int32_t bts_x = data->fb_format.g_w;
    u_int32_t bts_y = data->fb_format.g_h;
    u_int32_t bta   = 0x00000000;

    data->vsc_regs[VSC_REG_CR]  = 0;// ->idle state
    wmb();
    data->vsc_regs[VSC_REG_CCR] = 0;
    vsc_write_reg_masked_noshadow(data, VSC_REG_IMR, IMR_EOP, IMR_EOP);
    
    data->vsc_regs[VSC_REG_BTS] = ((bts_y - 1) << 16) | (bts_x - 1);
    data->vsc_regs[VSC_REG_BTF] = (0<<8) | 0;
    data->vsc_regs[VSC_REG_BCO] = 0;
    data->vsc_regs[VSC_REG_BRO] = 0;

    D(D_BLABLA, "AA step start\n");
    add_wait_queue(&data->vsc_grab_wq, &wait);

    // start 1st sampling run
    current->state = TASK_INTERRUPTIBLE;

    wmb();
    data->vsc_regs[VSC_REG_CR] = (CR_IDLE(CR_IDLE_SAMPLE) |
				  CR_SAMPLE(CR_SAMPLE_DONE));

    sched = schedule_timeout(VSC_EOP_TIMEOUT);
    if (!sched) {
	D(D_ERROR, "Timeout waiting for aa sampling\n");
	ret = -EIO;
	current->state = TASK_RUNNING;
	remove_wait_queue(&data->vsc_grab_wq, &wait);
	goto bail;
    }
    if (signal_pending(current)) {
	D(D_ERROR, "Signal during wait for aa sampling\n");
	remove_wait_queue(&data->vsc_grab_wq, &wait);
	ret = -EINTR;
	goto bail;
    }

    data->vsc_regs[VSC_REG_CR] = 0;// ->idle state
    
    D(D_BLABLA, "AA step 1st sampling done, proc start\n");
    
    data->vsc_eop_irq = 0;
    vsc_write_reg_masked_noshadow(data, VSC_REG_IMR, IMR_EOP, IMR_EOP);
        
    // start copy, 2nd sampling and processing
    current->state = TASK_INTERRUPTIBLE;    

    data->vsc_regs[VSC_REG_BTA] = bta;
    wmb();
    data->vsc_regs[VSC_REG_CR] = (CR_IDLE(CR_IDLE_COPY)     |
				  CR_COPY(CR_COPY_SAMPLE)   |
				  CR_SAMPLE(CR_SAMPLE_PROC) |
				  CR_PROC(CR_PROC_DONE));

    sched = schedule_timeout(VSC_EOP_TIMEOUT);
    if (!sched) {
	D(D_ERROR, "Timeout waiting for aa copy\n");
	ret = -EIO;
    }
    if (signal_pending(current)) {
	D(D_ERROR, "Signal during wait for aa copy\n");
	ret = -EINTR;
    }
    D(D_BLABLA, "AA step copy done\n");
   
    data->vsc_regs[VSC_REG_CR] = 0;// ->idle state
    data->vsc_eop_irq = 0;
    
    current->state = TASK_RUNNING;
    remove_wait_queue(&data->vsc_grab_wq, &wait);

 bail:
    vsc_write_reg_masked_noshadow(data, VSC_REG_IMR, 0, IMR_EOP);
    return ret;
}

static int
vsc_set_fb_format(fd_data_t *data, fb_format_info_t *fb_f_info)
{
    D(D_BLABLA, "vsc_set_fb_format %d,%d pad %d,%d, tiles %d,%d bpp=%d unsupported=%d\n",
      fb_f_info->g_w, fb_f_info->g_h, fb_f_info->g_w_pd, fb_f_info->g_h_pd,
      fb_f_info->tiles_w, fb_f_info->tiles_h,
      fb_f_info->bpp, fb_f_info->is_unsupported);

    memcpy(&data->fb_format, fb_f_info, sizeof(fb_format_info_t));

    return 0;
}

static int
vsc_sample_and_diff(fd_data_t *data, u_char flags)
{
    DECLARE_WAITQUEUE(wait, current);
    int ret = SUCCESS;
    int sched = 0;

    D(D_BLABLA, "vsc_sample_and_diff %02x\n", flags);
    data->vsc_regs[VSC_REG_CR]  = 0; // ->idle state
    data->vsc_regs[VSC_REG_CCR] = 0; // don't encode the diffmap
    vsc_write_reg_masked_noshadow(data, VSC_REG_IMR, IMR_EOP | IMR_FIFO_ERR, IMR_EOP | IMR_FIFO_ERR);

    // start sampling and processing
    add_wait_queue(&data->vsc_grab_wq, &wait);
    D(D_BLABLA, "in wq\n");
    set_current_state(TASK_INTERRUPTIBLE);

    wmb();
    if (flags & VSC_SD_SAMPLE_ONLY) {
	data->vsc_regs[VSC_REG_CR] = CR_IDLE(CR_IDLE_SAMPLE) |
				     CR_SAMPLE(CR_SAMPLE_DONE);
    } else {
	data->vsc_regs[VSC_REG_CR] = CR_IDLE(CR_IDLE_SAMPLE)   |
				     CR_SAMPLE(CR_SAMPLE_PROC) |
				     CR_PROC(CR_PROC_DONE);
    }

    sched = schedule_timeout(VSC_EOP_TIMEOUT);
    if (!sched) {
	D(D_ERROR, "Timeout waiting for sample/diff processing (SR=%08x)\n", data->vsc_regs[VSC_REG_SR]);
	ret = -EIO;
	set_current_state(TASK_RUNNING);
	remove_wait_queue(&data->vsc_grab_wq, &wait);
	goto bail;
    }
    if (signal_pending(current)) {
	D(D_ERROR, "Signal during wait for sample/diff processing\n");
	remove_wait_queue(&data->vsc_grab_wq, &wait);
	ret = -EINTR;
	goto bail;
    }

    data->vsc_regs[VSC_REG_CR] = 0;// ->idle state
   
    remove_wait_queue(&data->vsc_grab_wq, &wait);
    set_current_state(TASK_RUNNING);

    vsc_write_reg_masked_noshadow(data, VSC_REG_IMR, 0, IMR_EOP | IMR_FIFO_ERR);
    
#if defined(PP_FEAT_VSC_HW_ENCODING) || defined(__arm__)
    if (!(flags & VSC_SD_SAMPLE_ONLY) && vsc_transfer_diffmap(data) != 0) {
	ret = -EIO;
	goto bail;
    }     
#else
    if (!(flags & VSC_SD_SAMPLE_ONLY) && vsc_transfer_diffmap_ppcdma(data) != 0) {
	ret = -EIO;
	goto bail;
    }
#endif

bail:
    D(D_BLABLA, "vsc_sample_and_diff done\n");
    return ret;   
}


static int
vsc_transfer(fd_data_t *data, uint32_t bts, uint32_t btf, uint32_t bco, uint32_t bro, uint32_t bta,
	     u_char* buf, u_char* source_buf, u_long *count, u_long* last_valid)
{
    int ret = 0;
    u_long size_done = 0;
    u_long size = 0;
#if defined(VSC_MASTER_MODE) || defined(__arm__)
    u_char* saved_buf = buf;
#endif
#ifdef VSC_MASTER_MODE
    DECLARE_WAITQUEUE(wait, current);
    int sched = 0;
#else
    u_short ffl = 0;
    u_char finished;
#endif

    data->vsc_regs[VSC_REG_CR] = 0; // ->idle state
    
    data->vsc_eop_irq = 0;
    data->vsc_dtr_irq = 0;

    /* program VSC for a single block transfer, regs are pixel addressed */    
    data->vsc_regs[VSC_REG_BTS] = bts;
    data->vsc_regs[VSC_REG_BTF] = btf;
    data->vsc_regs[VSC_REG_BCO] = bco;
    data->vsc_regs[VSC_REG_BRO] = bro;
    data->vsc_regs[VSC_REG_BTA] = bta;

    /* debug */
#if 0
    printk("BTS: %08x, BTF: %08x, BCO: %08x, BRO: %08x, BTA: %08x, width: %x\n",
	   bts, btf, bco, bro, bta, data->fb_format.g_w_pd);
#endif
    
#if 1
    if ((u_int32_t)buf % 4) {
	D(D_ERROR, "Fetching to unaligned address %p\n", buf);
	return -EIO;
    }
#endif
    
#ifdef VSC_MASTER_MODE
	/* chkr: use wmb(); here like in transfer_diffmap? */
    D(D_BLABLA, "allow EOP\n");
    vsc_write_reg_masked_noshadow(data, VSC_REG_IMR, IMR_EOP, IMR_EOP);
    vsc_write_reg_masked_noshadow(data, VSC_REG_MR, MR_DO_DMA, MR_DO_DMA);

    data->vsc_regs[VSC_REG_DMABASE] = (uint32_t)source_buf;

    D(D_BLABLA, "VSC_MASTER_MODE, MR=%08x, DMABASE=%08x\n",
      data->vsc_regs[VSC_REG_MR], (u32)source_buf);
    
    add_wait_queue(&data->vsc_grab_wq, &wait);
    current->state = TASK_INTERRUPTIBLE;

    D(D_BLABLA, "VSC_MASTER_MODE starting transfer\n");
    
    data->vsc_regs[VSC_REG_CR] = (CR_IDLE(CR_IDLE_TRANS) |
				  CR_TRANS(CR_TRANS_DONE));

    sched = schedule_timeout(VSC_DMA_TIMEOUT);
    if (!sched) {
	D(D_ERROR, "Timeout while waiting for VSC DMA\n");
	ret = -EAGAIN;
    }
    if (signal_pending(current)) {
	D(D_ERROR, "Signal during wait for VSC DMA\n");
	ret = -EINTR;
    }

    current->state = TASK_RUNNING;
    remove_wait_queue(&data->vsc_grab_wq, &wait);

    /* finish here if an error occured */
    if (ret != 0) {
	D(D_ERROR, "An error occured during DMA transfer. Bailing out.\n");
	goto bail;
    }

    size = data->vsc_regs[VSC_REG_DMACOUNT];

    buf += size;
    size_done += size;
    *count = size_done;
    D(D_BLABLA, "DMACOUNT=%ld\n", size);
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
    cpu_dcache_invalidate_range((unsigned long)saved_buf, (unsigned long)saved_buf + size + VSC_CACHE_LINE_SIZE);
#else
    consistent_sync(saved_buf, size + VSC_CACHE_LINE_SIZE, DMA_FROM_DEVICE);
#endif

#else /* !VSC_MASTER_MODE */
	
    vsc_write_reg_masked_noshadow(data, VSC_REG_MR, ~MR_DO_DMA, MR_DO_DMA); 

    D(D_BLABLA, "allow DTR & EOP\n");
    vsc_write_reg_masked_noshadow(data, VSC_REG_IMR, IMR_DTR | IMR_EOP, IMR_DTR | IMR_EOP);
    
    D(D_BLABLA, "xfer start, dtr_irq=%d, eop_irq=%d\n",
      data->vsc_dtr_irq, data->vsc_eop_irq);

    finished = 0;    
    wmb();
    data->vsc_regs[VSC_REG_CR] = (CR_IDLE(CR_IDLE_TRANS) |
				  CR_TRANS(CR_TRANS_DONE));
    while (!finished) {
	unsigned long  timeout = jiffies + VSC_EOP_TIMEOUT;

	// 'busy' waiting loop
	D(D_BLABLA, "Waiting Loop\n");
	while (!data->vsc_dtr_irq && !data->vsc_eop_irq) {
	    if (time_after(jiffies, timeout)) {
		D(D_ERROR, "Timeout while waiting for transfer (SR=%08x, FFL=%08x)\n",
		  data->vsc_regs[VSC_REG_SR], data->vsc_regs[VSC_REG_FFL]);
		   
		ret = -EIO;
		goto bail;
	    }
	    
	    if (signal_pending(current)) {
		D(D_ERROR, "Signal during wait for transfer\n");
		ret = -EINTR;
		goto bail;
	    }

#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
	    if (current->need_resched) schedule();
#else
	    cond_resched();
#endif
	}

	data->vsc_dtr_irq = 0;
 
	finished = data->vsc_eop_irq;
	ffl = data->vsc_regs[VSC_REG_FFL] & FFL_MASK;
	size = ffl << 2;

	D(D_BLABLA, "Fetch (finished=%d), FFL=%d -> Size=%ld, Target=%p\n",
	  finished, ffl, size, buf);

	if (size != 0) {
	    u32* fb32 = (u32*) data->vsc_fb;
	    int i;
	    
	    u32 *buf32 = (u32*) buf;
	    for (i = 0; i < ffl; i++) {
		*buf32++ = *fb32;
	    }
	    buf += size;
	    size_done += size;
	} else {
	    D(D_BLABLA, "empty transfer, nothing to do\n");
	}
	
	vsc_write_reg_masked_noshadow(data, VSC_REG_IMR, IMR_DTR, IMR_DTR);
    }
#ifdef __arm__
    /* write back the cached data (using the virtual addresses from kernel's view */
# if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
    cpu_dcache_clean_range(saved_buf, saved_buf + size_done + VSC_CACHE_LINE_SIZE);
# else
    consistent_sync(saved_buf, size_done + VSC_CACHE_LINE_SIZE, DMA_TO_DEVICE);
# endif
#endif

    *count = size_done;
#endif

    /* CST tells us how many bits of the last word
       are valid (in 8bit increments), 0 means 'all' */
    *last_valid = data->vsc_regs[VSC_REG_CST] & CST_MASK;
    D(D_BLABLA, "CST=%ld\n", *last_valid);

    
    /* this is only a workaround for the 512/1 EOP bug
       we just reread FFL and read the last word if necessary */
# if 0
#  warning REMOVEME (FFL 512/1 words bug) when VSC is fixed
    {
    u32 ffl;

    ffl = data->vsc_regs[VSC_REG_FFL] & FFL_MASK;
    size = ffl << 2;
    if (size != 0) {	
	int i;
	u32 *buf32 = (u32*) buf;
	D(D_BLABLA, "FFL %d leftover\n", ffl);
	for (i = 0; i < ffl; i++) {
	    *buf32++ = *source_buf;
	}
	buf += size;
	size_done += size;
    }
    }
# endif

    data->vsc_dtr_irq  = data->vsc_eop_irq = 0;
    data->vsc_regs[VSC_REG_CR] = 0;// ->idle state
    
 bail:
    vsc_write_reg_masked_noshadow(data, VSC_REG_IMR, 0, IMR_DTR | IMR_EOP);
    {
	u32 ffl = data->vsc_regs[VSC_REG_FFL] & FFL_MASK;
	if (ffl) {
	    D(D_ERROR, "FFL recheck not 0! (was %08x, CST=%08x)\n", ffl,
	      data->vsc_regs[VSC_REG_CST] & CST_MASK);
	    ret = -1;
	}
    }
   
    return ret;
}

static int
vsc_fetch_box(fd_data_t *data, vsc_fetch_descriptor_t *fetch_desc,
	      BoxRec *box, u_char* buf, u_long *count)
{
    int ret = -1;
    u_int width, height;
    u_long size_done = 0, last_valid;
    vsc_update_rect_hdr_t hdr, *hdr_buf = NULL;
#ifndef __powerpc__
    u_char* saved_buf = buf;
#endif
    
    width = box->x2 - box->x1;
    height= box->y2 - box->y1;
    
    /* pad width and height to transfer tile multiples */
    if (width % PP_FB_TILE_WIDTH) width = (width/PP_FB_TILE_WIDTH + 1) * PP_FB_TILE_WIDTH;
    if (height % PP_FB_TILE_HEIGHT) height = (height/PP_FB_TILE_HEIGHT + 1) * PP_FB_TILE_HEIGHT;

    D(D_BLABLA, "vsc_fetch_box (%d,%d)-(%d,%d) WH(%d,%d) -> %p\n", box->x1, box->y1, box->x2, box->y2,
      width, height, buf);

    /* prepare the rect header if requested */
    if (fetch_desc->ctrl & VSC_FETCH_CTRL_ADD_HDR) {
	hdr_buf = (vsc_update_rect_hdr_t*)buf;
    
	hdr.r.x = box->x1;
	hdr.r.y = box->y1;
	hdr.r.w = width;
	hdr.r.h = height;
	hdr.encoding = fetch_desc->enc_tag;
	
	buf += sizeof(vsc_update_rect_hdr_t);
    }

    if (vsc_transfer(data,
		     ((PP_FB_TILE_HEIGHT - 1) << 16) | (PP_FB_TILE_WIDTH - 1), /* bts */
		     ((height / PP_FB_TILE_HEIGHT - 1) << 8) | (width / PP_FB_TILE_WIDTH - 1), /* btf */
		     (u32) (((PP_FB_TILE_HEIGHT - 1) * data->fb_format.g_w_pd - PP_FB_TILE_WIDTH) * -1), /* bco */
		     data->fb_format.g_w_pd - width + PP_FB_TILE_WIDTH, /* bro */
		     data->fb_format.g_w_pd * box->y1 + box->x1, /* bta */
		     buf, (u_char*)(mem_pool_bus + buf - mem_pool_cpu), &size_done, &last_valid))
	goto bail;


    // here the size is still 4 byte aligned
    *count = size_done;
    
    if (fetch_desc->ctrl & VSC_FETCH_CTRL_ADD_HDR) {
	// in the header we put the real size (byte granular)
	if (last_valid != 0) {
	    size_done -= (32 - last_valid) / 8;
	    D(D_BLABLA, "Reduced size for box with w %d, h %d to %ld\n", width, height, size_done);
	}
	
	// now that we know the size, write the header
	hdr.size = size_done;
	memcpy(hdr_buf, &hdr, sizeof(vsc_update_rect_hdr_t));

	*count += sizeof(vsc_update_rect_hdr_t);
	D(D_BLABLA, "hdr: (x,y,w,h)=(%d,%d,%d,%d), encoding=%08x, size=%d\n",
	  hdr.r.x, hdr.r.y, hdr.r.w, hdr.r.h, hdr.encoding, hdr.size);
    }

#ifndef __powerpc__

    // clean (write back) the kernel space cache, better map this as non-cachable
    // because we only write the header, we'll only clean the cache in this area
# if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
    cpu_dcache_clean_range((unsigned long)saved_buf, (unsigned long)saved_buf + sizeof(vsc_update_rect_hdr_t) + VSC_CACHE_LINE_SIZE);
# else   
    consistent_sync(saved_buf, sizeof(vsc_update_rect_hdr_t) + VSC_CACHE_LINE_SIZE, DMA_TO_DEVICE);
# endif
    // invalidate user space view of mem_pool here, after changing the header
    // saved_buf is a pointer to the buffere to which the kernel writes
    // it's within the mem_pool_cpu area
    {
	uint32_t saddr, eaddr;
	
	saddr = (unsigned long)saved_buf - (unsigned long)(mem_pool_cpu) + 
		(unsigned long)(mem_pool_user);
	eaddr =	(unsigned long)saved_buf - (unsigned long)(mem_pool_cpu) + 
		(unsigned long)(mem_pool_user) + *count + VSC_CACHE_LINE_SIZE;
# if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
	cpu_dcache_invalidate_range(saddr, eaddr);
#else
	consistent_sync((void*)saddr, *count, DMA_FROM_DEVICE);
#endif
    }

#endif
    
      D(D_BLABLA, "xfer end, count %ld\n", *count);
      ret = 0;
      
bail:
    return ret;
}

#if defined(PP_FEAT_VSC_HW_ENCODING) || !defined(__powerpc__)
static int
vsc_transfer_diffmap(fd_data_t *data)
{
    int ret = 0;
    u_long size = 0;
    u_long last_valid;
    
    if ((ret = vsc_transfer(data,
			    VSC_DIFFMAP_SIZE / 2 - 1, /* bts */
			    (0<<8) | 0, /* btf */
			    0, /* bco */
			    0, /* bro */
			    VSC_DIFFMAP_OFFSET / 2, /* bta */
			    (u_char*)data->vsc_diffmap_cpu,
			    (u_char*)data->vsc_diffmap_bus,
			    &size, &last_valid))) {
	goto bail;
    }
 bail:
    return ret;
}
#endif

static int
vsc_fetch_tiles(fd_data_t *data, vsc_fetch_descriptor_t *fetch_desc)
{
    RegDataRec regdata;
    RegionRec  reg;
    int ret = 0;
    u_long count = 0, reg_ccr;
    u_char *buf;
    int (*fetch_box)(fd_data_t *data, vsc_fetch_descriptor_t *fetch_desc,
		     BoxRec *box, u_char* buf, u_long *count);

    D(D_BLABLA, "vsc_fetch_tiles ctrl=%02x, offset=%08x, algo=%d\n",
      fetch_desc->ctrl, fetch_desc->mem_offset, fetch_desc->enc.algo);

    fetch_box = vsc_fetch_box;

    if (fetch_desc->ctrl & VSC_FETCH_CTRL_DIRECT_FB) {
	fetch_box = vsc_fetch_box_raw;
    }
   
    // set compression register according to request
    data->vsc_regs[VSC_REG_FTL] = VSC_FIFO_FILL_THRESHOLD;

    reg_ccr = CCR_VSC_ALGO(fetch_desc->enc.algo);
    switch (fetch_desc->enc.algo) {
      case VSC_ALGO_LRLE:
	  reg_ccr |= CCR_LRLE_RMARGIN_RB(fetch_desc->enc.lrle_r_margin_rb) |
	      CCR_LRLE_RMARGIN_G(fetch_desc->enc.lrle_r_margin_g) |
	      CCR_LRLE_GREY_GREEN(fetch_desc->enc.lrle_grey_disable) |
	      CCR_LRLE_GMARGIN(fetch_desc->enc.lrle_g_margin) |
	      CCR_LRLE_RUNLIMIT(fetch_desc->enc.lrle_runlimit) |
	      CCR_LRLE_LINECOPY(fetch_desc->enc.lrle_linecopy) |
	      CCR_LRLE_GREY_FORCE(fetch_desc->enc.lrle_grey_force) |
	      CCR_LRLE_GREY_DISABLE(fetch_desc->enc.lrle_grey_disable) |
	      CCR_LRLE_CMARGIN_RB(fetch_desc->enc.lrle_c_margin_rb) |
	      CCR_LRLE_CMARGIN_G(fetch_desc->enc.lrle_c_margin_g) |
	      CCR_LRLE_COLOR(fetch_desc->enc.lrle_color) |
	      CCR_LRLE_REACC_RUNLIMIT(fetch_desc->enc.lrle_runlimit_reacc);
	  break;
      case VSC_ALGO_DOWNSAMPLE:
	  reg_ccr |= CCR_DOWN_MODE(fetch_desc->enc.down_mode);
	  break;
      default:
	  /* do nothing */
	  break;
    }

    data->vsc_regs[VSC_REG_CCR] = reg_ccr;    

    D(D_BLABLA, "CCR=%08lx\n", reg_ccr);

    fetch_desc->size = 0;
	
    if (fetch_desc->reg) {
	if (copy_from_user(&reg, (char *)fetch_desc->reg, sizeof(RegionRec))) {
	    ret = -EFAULT;
	    goto bail;
	}
    } else {
	D(D_ERROR, "vsc_fetch_tiles called with empty region, ignoring\n");
	ret = -EFAULT;
	goto bail;
    }

    buf = mem_pool_cpu + fetch_desc->mem_offset;
    
    if (reg.data) {
	
	int i;
	/* if there is a data section, get the size */
	if (copy_from_user(&regdata, (char *)reg.data, sizeof(RegDataRec))) {
	    ret = -EFAULT;
	    goto bail;
	}

	if (regdata.numRects == 0) {
	    D(D_ERROR, "vsc_fetch_tiles called with crappy region (data but numRects == 0), error\n");
	    ret = -EFAULT;
	    goto bail;
	}

	D(D_BLABLA, "Got region data of %ld rects\n", regdata.numRects);
	
	/* copy the data to our prepared space for boxrecs */
	if (copy_from_user(data->reg_buf, (char*)(reg.data + 1),
			   regdata.numRects * sizeof(BoxRec))) {
	    D(D_ERROR, "vsc_fetch_tiles: error during copy of region data\n");
	    ret = -EFAULT;
	    goto bail;
	}
	for (i = 0; i < regdata.numRects; i++) {
	    ret = fetch_box(data, fetch_desc, &data->reg_buf[i], buf, &count);
	    if (ret != 0) {
		ret = -EFAULT;
		goto bail;
	    }
	    fetch_desc->size += count;
	    buf += count;
	}

	
    } else {
	D(D_BLABLA, "Got single rect region\n");
	ret = fetch_box(data, fetch_desc, &reg.extents, buf, &count);
	if (ret != 0) {
	    ret = -EFAULT;
	    goto bail;
	}
	fetch_desc->size += count;
    }

    ret = 0;
 bail:
    return ret;
}

#if !defined(PP_FEAT_VSC_HW_ENCODING) && defined(__powerpc__)
static int
vsc_transfer_diffmap_ppcdma(fd_data_t *data)
{
    DECLARE_WAITQUEUE(wait, current);
    u_long flags;
    int sched = 0;
    int ret = 0;

    vsc_write_reg_masked_noshadow(data, VSC_REG_IMR, IMR_EOP, IMR_EOP);
    
    data->vsc_regs[VSC_REG_BTS] = VSC_DIFFMAP_SIZE / 2 - 1;
    data->vsc_regs[VSC_REG_BTF] = (0<<8) | 0;
    data->vsc_regs[VSC_REG_BCO] = 0;
    data->vsc_regs[VSC_REG_BRO] = 0;
    
    add_wait_queue(&data->dma_wq, &wait);
    current->state = TASK_INTERRUPTIBLE;
    
    flags = claim_dma_lock();
    ppc4xx_disable_dma(data->dma_chan_nr);
    data->dma_error = 0;

    ppc4xx_set_dma_addr2(data->dma_chan_nr, data->vsc_base + VSC_FB_OFFSET,
			 data->vsc_diffmap_bus);

    ppc4xx_set_dma_count(data->dma_chan_nr, VSC_DIFFMAP_SIZE);

    mtdcr(DCRN_DMASR, 0x88888880 >> data->dma_chan_nr);
    // counting pixel addresses (16bit), offset is in bytes
    data->vsc_regs[VSC_REG_BTA] = VSC_DIFFMAP_OFFSET / 2;

    wmb();
    data->vsc_regs[VSC_REG_CR] = (CR_IDLE(CR_IDLE_TRANS) |
				  CR_TRANS(CR_TRANS_DONE));
    ppc4xx_enable_dma(data->dma_chan_nr);
    release_dma_lock(flags);
    
    sched = schedule_timeout(VSC_DMA_TIMEOUT);
    if (!sched) {
	D(D_ERROR, "Timeout while waiting for diffmap DMA\n");
	ret = -EIO;
    }
    if (signal_pending(current)) {
	D(D_ERROR, "Signal during wait for diffmap DMA\n");
	ret = -EINTR;
    }
    if (data->dma_error) {
	D(D_ERROR, "Error during diffmap DMA\n");
	ret = -EIO;
    }
    current->state = TASK_RUNNING;
    remove_wait_queue(&data->dma_wq, &wait);
    data->vsc_dtr_irq  = data->vsc_eop_irq = 0;
    
    data->vsc_regs[VSC_REG_CR] = 0;// ->idle state

    vsc_write_reg_masked_noshadow(data, VSC_REG_IMR, 0, IMR_EOP);
    return ret;
}

static int
vsc_fetch_box_raw(fd_data_t *data, vsc_fetch_descriptor_t *fetch_desc,
		  BoxRec *box, u_char *buf, u_long *count)
{
    DECLARE_WAITQUEUE(wait, current);
    u_long flags;
    int ret = 0;
    int sched = 0;
    unsigned int width, height, y, i=0;
    phys_addr_t dest_cache[VSC_MAX_X_RES / PP_FB_TILE_HEIGHT];
    unsigned long size_cache[VSC_MAX_X_RES / PP_FB_TILE_HEIGHT];    

    vsc_write_reg_masked_noshadow(data, VSC_REG_IMR, IMR_EOP, IMR_EOP);
	
    width = box->x2 - box->x1;
    height= box->y2 - box->y1;

    /* pad width and height to transfer tile multiples */
    if (width % PP_FB_TILE_WIDTH) width = (width/PP_FB_TILE_WIDTH + 1) * PP_FB_TILE_WIDTH;
    if (height % PP_FB_TILE_HEIGHT) height = (height/PP_FB_TILE_HEIGHT + 1) * PP_FB_TILE_HEIGHT;

    D(D_BLABLA, "vsc_fetch_box_raw (%d,%d)-(%d,%d) WH(%d,%d)\n", box->x1, box->y1, box->x2, box->y2,
      width, height);

    data->vsc_regs[VSC_REG_CR] = 0; // ->idle state
    
    /* program VSC for a single block transfer, regs are pixel addressed */
    data->vsc_regs[VSC_REG_BTS] = ((PP_FB_TILE_HEIGHT - 1) << 16) | (PP_FB_TILE_WIDTH - 1);
    data->vsc_regs[VSC_REG_BTF] = ((height / PP_FB_TILE_HEIGHT - 1) << 8) | (width / PP_FB_TILE_WIDTH - 1);
    data->vsc_regs[VSC_REG_BCO] = (u32) (((PP_FB_TILE_HEIGHT - 1) * data->fb_format.g_w_pd - PP_FB_TILE_WIDTH) * -1);
    data->vsc_regs[VSC_REG_BRO] = data->fb_format.g_w_pd - width + PP_FB_TILE_WIDTH;
    data->vsc_regs[VSC_REG_BTA] = data->fb_format.g_w_pd * box->y1 + box->x1;

    /* build DMA descriptors */
    for (y = box->y1; y < box->y2; y += PP_FB_TILE_HEIGHT) {
	unsigned long offset = (y/PP_FB_TILE_HEIGHT * data->fb_format.tiles_w + box->x1/PP_FB_TILE_WIDTH) *
	    PP_FB_TILE_SIZE * VSC_PIXEL_SIZE;

	dest_cache[i] = virt_to_bus(mem_pool_cpu + offset);
	size_cache[i] = PP_FB_TILE_HEIGHT * width * VSC_PIXEL_SIZE;

	*count += size_cache[i];

	D(D_BLABLA, "tile row %d, DMA dest %08x, size %ld\n", y/PP_FB_TILE_HEIGHT,
	  (u32) dest_cache[i], size_cache[i]);

	/* we don't care about the source address in VSC framebuffer
	   area for DMA, because the actual pixels transferred are
	   determined by the block transfer registers above. Source
	   address only has to be somewhere in the framebuffer. */

	if (ppc4xx_add_dma_sgl(data->dma_sgl, data->vsc_base + VSC_FB_OFFSET, dest_cache[i],
			       size_cache[i]) != DMA_STATUS_GOOD) {
	    D(D_ERROR, "Error during add_dma_sgl\n");
	    ret = -1;
	    goto bail;
	}
	i++;
    }

    /* start transfer and wait for completion */
    add_wait_queue(&data->dma_wq, &wait);
    current->state = TASK_INTERRUPTIBLE;
    flags = claim_dma_lock(); 
    ppc4xx_disable_dma_sgl(data->dma_sgl);
    data->dma_error = 0;
    wmb();
    data->vsc_regs[VSC_REG_CR] = (CR_IDLE(CR_IDLE_TRANS) |
				  CR_TRANS(CR_TRANS_DONE));

    ppc4xx_enable_dma_sgl(data->dma_sgl);
    release_dma_lock(flags);

    sched = schedule_timeout(VSC_DMA_TIMEOUT);

    if (!sched) {
	D(D_VERBOSE, "Timeout while waiting for fetch DMA\n");
	ret = -EIO;
    }

    if (signal_pending(current)) {
	D(D_ERROR, "Signal during wait for fetch DMA\n");
	ret = -EINTR;
    }
    if (data->dma_error) {
	D(D_ERROR, "Error during fetch DMA (SR=0x%08x)\n", data->vsc_regs[VSC_REG_SR]);
	ret = -EIO;
    }
    data->vsc_regs[VSC_REG_CR] = 0;// ->idle state
    
    current->state = TASK_RUNNING;
    remove_wait_queue(&data->dma_wq, &wait);

    /* invalidate cache for transferred tiles */
    i=0;
    for (y = box->y1; y < box->y2; y += PP_FB_TILE_HEIGHT) {
	invalidate_dcache_range((unsigned long) bus_to_virt(dest_cache[i]),
				(unsigned long) bus_to_virt(dest_cache[i]+size_cache[i]));
	i++;
    }

    D(D_BLABLA, "xfer end, count %ld\n", *count);
   
 bail:
    vsc_write_reg_masked_noshadow(data, VSC_REG_IMR, 0, IMR_EOP);
    ppc4xx_clear_dma_sgl(data->dma_sgl);
    return ret;
}
#else /* defined(PP_FEAT_VSC_HW_ENCODING) || !defined(__powerpc__) */
static int
vsc_fetch_box_raw(fd_data_t *data, vsc_fetch_descriptor_t *fetch_desc,
		  BoxRec *box, u_char *buf, u_long *count)
{
    int ret = 0;
    unsigned int width, height, y;
    u_long size_done = 0, last_valid;
    
    vsc_write_reg_masked_noshadow(data, VSC_REG_IMR, IMR_EOP, IMR_EOP);
	
    width = box->x2 - box->x1;
    height= box->y2 - box->y1;

    /* pad width and height to transfer tile multiples */
    if (width % PP_FB_TILE_WIDTH) width = (width/PP_FB_TILE_WIDTH + 1) * PP_FB_TILE_WIDTH;
    if (height % PP_FB_TILE_HEIGHT) height = (height/PP_FB_TILE_HEIGHT + 1) * PP_FB_TILE_HEIGHT;

    D(D_BLABLA, "vsc_fetch_box_raw (%d,%d)-(%d,%d) WH(%d,%d)\n", box->x1, box->y1, box->x2, box->y2,
      width, height);

    data->vsc_regs[VSC_REG_CR] = 0; // ->idle state

    *count = 0;
    
    for (y = box->y1; y < box->y2; y += PP_FB_TILE_HEIGHT) {
	unsigned long offset = (y/PP_FB_TILE_HEIGHT * data->fb_format.tiles_w + box->x1/PP_FB_TILE_WIDTH) *
	    PP_FB_TILE_SIZE * VSC_PIXEL_SIZE;

	*count += PP_FB_TILE_HEIGHT * width * VSC_PIXEL_SIZE;

	/* transfer only one tile line */
	if (vsc_transfer(data,
			 ((PP_FB_TILE_HEIGHT - 1) << 16) | (PP_FB_TILE_WIDTH - 1), /* bts */
			 (width / PP_FB_TILE_WIDTH - 1), /* btf */
			 (u32) (((PP_FB_TILE_HEIGHT - 1) * data->fb_format.g_w_pd - PP_FB_TILE_WIDTH) * -1), /* bco */
			 data->fb_format.g_w_pd - width + PP_FB_TILE_WIDTH, /* bro */
			 data->fb_format.g_w_pd * y + box->x1, /* bta */
			 mem_pool_cpu + offset, (u_char*)(mem_pool_bus + offset), &size_done, &last_valid)) {
	    
	    goto bail;
	}
	
	// invalidate user space view of mem_pool here, after changing the header
	// saved_buf is a pointer to the buffere to which the kernel writes
	// it's within the mem_pool_cpu area
	// only necessary for kira as the cache is behind the MMU
#if !defined(__powerpc__)
	{
	    uint32_t saddr, eaddr;
	    
	    saddr = (unsigned long)(mem_pool_user) + offset;
	    eaddr = (unsigned long)(mem_pool_user) + offset + size_done;
# if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
	    cpu_dcache_invalidate_range(saddr, eaddr);
# else
	    consistent_sync((void*)saddr, size_done, DMA_FROM_DEVICE);
# endif
	}
#endif /* !__powerpc__ */
    }
    D(D_BLABLA, "xfer end, count %ld\n", *count);
   
 bail:
    vsc_write_reg_masked_noshadow(data, VSC_REG_IMR, 0, IMR_EOP);
    return ret;
}
#endif /* defined(PP_FEAT_VSC_HW_ENCODING) || !defined(__powerpc__) */

static int
vsc_get_clock(fd_data_t *data)
{
    int busclk;
#ifdef __powerpc__
    {
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
	bd_t * bd = (bd_t *)__res;
#else
	bd_t * bd = (bd_t *)&__res;
#endif
# if defined(LARA_KIMMSI) || defined(LARA_KIMSMI) || defined(LARA_KIMAMD)
	busclk = bd->bi_pci_busfreq;
# else    
	busclk = bd->bi_epb_busfreq;
# endif
    }
    return (2*busclk);
#else /* !__powerpc__  = arm */
    {
	bd_arm_t * bd = (bd_arm_t *)__res;

	busclk = BD_AHB_CLK(bd);
	D(D_NOTICE, "busclk: %d\n", busclk);
    }
    return busclk;
#endif
}
