/*
 * hwenc.c
 *
 * -hw encoding related stuff
 * -Interface and encoding setting descriptors for hardware
 *  encoding
 * -additional routines to implement the Lossy RLE encoding in 
 *  software for testing purposes (supports only the 15bit case)
 */

#include "rfb.h"
#include <lara.h>
#include "predictor.h"
#include "debug.h"
#include "hw_subencs.h"

#define PRINT_BUFFER_COLS	0
#if (PRINT_BUFFER_COLS > 0 && DEBUGLEVEL == D_BLABLA)
static void
print_buffer(unsigned char* buf, int size, int bytes_per_line)
{
    int i;

    for (i = 0; i < size; i++) {	
	if ((i % bytes_per_line) == 0) printf("\n");
	printf("%02x ", buf[i]);
    }
    printf("\n");
}
#else
static void print_buffer(unsigned char* buf UNUSED, int size UNUSED, int bytes_per_line UNUSED) {}
#endif

#if defined PP_FEAT_VSC_HW_ENCODING

/* rects smaller than this are not sent through ZLIB */
#define	HWENC_MIN_TO_COMPRESS	12

static inline int send_region_hwenc(rfb_cl_t * clp, RegionRec *reg, u_char *buf, int full_update);
static inline int send_region_swenc(rfb_cl_t * clp, RegionRec *reg, u_char *buf, int full_update,
				    rfb_send_rect_softenc_fn_t softenc_fn);
static inline int send_rect_compressed(rfb_cl_t * clp, u_char *buf, u_int size,
				       u_char zlib_level);
static int send_compressed_size(rfb_cl_t * clp, u_long size);

int
rfb_send_update_hwenc(rfb_cl_t * clp, RegionRec *req_reg, int full_update)
{
    static const char* fn = ___F;
    RegionRec reg1, reg2, reg_ack;
    RegionRec *req = &reg1, *rem = &reg2, *ack = &reg_ack;
    rfb_send_rect_softenc_fn_t softenc_fn;
    int ret = -1;
    int first_full_update = full_update;
    u_char enc;
    u_char video_optimized = 0;
        
    /* decide which encoding to use for this update */
    rfb_prepare_encoding(clp);
    
    enc	= clp->enc.pref_enc;
    softenc_fn = clp->enc.send_rect_softenc_fn;
    video_optimized = clp->enc.video_optimized;
    if (clp->hwenc_setup_needed) {
	vsc_encoding_desc_t enc_desc;
	u_int32_t enc_tag;

	if (enc & rfbEncodingIsHW) {
	    enc_desc = hwenc_subencs[clp->hwenc_subenc];
	    enc_tag = enc | rfbEncodingParamSubenc(clp->hwenc_subenc);
	    D(D_NOTICE, "%s: using hw subencoding %d\n", fn, clp->hwenc_subenc);
	} else {
	    enc_desc = pp_grab_null_encoding_desc;
	    enc_tag = 0;
	    D(D_NOTICE, "%s: using legacy software encoding %d\n", fn, enc);
	}
	pp_grab_set_encoding(clp->grab_client, enc_desc, enc_tag);
	
	clp->hwenc_setup_needed = 0;
    }

    D(D_BLABLA, "region notempty: %d, full_update=%d\n", REGION_NOTEMPTY(req_reg), full_update);
   
    REGION_INIT(req, NullBox, 0);
    REGION_INIT(ack, NullBox, 0);
    REGION_INIT(rem, NullBox, 0);
    REGION_COPY(req, req_reg);
   
    while (REGION_NOTEMPTY(req)) {
	int enc_ret;
	pp_grab_req_flags_t flags = GRAB_REQ_FLAGS_ADD_HDR;

	flags |= (video_optimized) ? GRAB_REQ_FLAGS_LOCK_DIFF : 0;
	flags |= (full_update) ? GRAB_REQ_FLAGS_IGNORE_DIFFMAP : 0;

	REGION_EMPTY(rem); REGION_EMPTY(ack);
	if (pp_grab_request_region(clp->grab_client, flags, req, ack, rem) != 0) {
	    D(D_ERROR, "%s: grab req failed with %d\n", fn, clp->grab_client->req_err);

	    if (clp->grab_client->req_err == GRAB_REQ_ERR_INTERNAL_ERROR) {
		D(D_ERROR, "%s: internal grabber error\n", fn);
		goto bail;
	    }
    
	    /* request failed non-fatal at some point. re-add the region
	       to our request and request it again */

	    REGION_UNION(&clp->req_reg, &clp->req_reg, req);

            if ((clp->grab_client->req_err != GRAB_REQ_ERR_DEBUGGING_ERROR) && 
                (clp->grab_client->req_err != GRAB_REQ_ERR_RETRY)) {
		clp->fbu_possible = 0;
	    }

	    MUTEX_LOCK(&clp->s2c_queue_mtx);
	    if (!clp->full_fb_update) {
		clp->full_fb_update = full_update;
	    }
	    clp->fb_update_requested = 1;
	    MUTEX_UNLOCK(&clp->s2c_queue_mtx);

	    ret = 0; /* error is already resolved here */
	    goto bail;
	}

	/* nothing to do, maybe no diff, keep request flags */
	if (!REGION_NOTEMPTY(ack)) {
	    MUTEX_LOCK(&clp->s2c_queue_mtx);
	    if (!clp->full_fb_update) {
		clp->full_fb_update = full_update;
	    }
	    clp->fb_update_requested = 1;
	    MUTEX_UNLOCK(&clp->s2c_queue_mtx);

	    ret = 0;
	    goto bail;	    
	}
	
	pred_measure_start(clp, ack);

	/* now we have the ack region in our buffer, send it out */
	if (enc & rfbEncodingIsHW) {
	    enc_ret = send_region_hwenc(clp, ack, clp->grab_client->buf, first_full_update);
	} else {
	    enc_ret = send_region_swenc(clp, ack, clp->grab_client->buf, first_full_update, softenc_fn);
	}

	pred_measure_stop(clp);
	
	if (enc_ret != 0) {	    
	    D(D_NOTICE, "%s: send_region failed\n", fn);
	    goto bail;
	}

	/* with hw encoding we call the predictor after a request because
	   we need the actual transferred region, thats okay because
	   the predictor has a latency anyway */
	
	if (clp->pred_type != PRED_TYPE_NONE) {
	    pred_predict_coding(clp, ack);
	}

	first_full_update = 0;
	
	REGION_EMPTY(req);
	req = (req == &reg1) ? &reg2 : &reg1;
	rem = (rem == &reg1) ? &reg2 : &reg1;
    }
    
    ret = 0;
    
 bail:
    pp_grab_release_buffer(clp->grab_client);
    
    REGION_UNINIT(rem);
    REGION_UNINIT(ack);
    REGION_UNINIT(req);
    return ret;
}

static inline int
send_region_hwenc(rfb_cl_t * clp, RegionRec *reg, u_char *buf, int full_update)
{
    static const char* fn = ___F;
    char *fbu = NULL;
    size_t size;
    u_char zlib_level = clp->hwenc_zlib_level;
    int ret = -1, i;
    uint16_t n_rects;
    
    assert(reg && buf);
    
    n_rects = REGION_NUM_RECTS(reg);
    D(D_BLABLA, "%s: fbu n_rects=%d\n", fn, n_rects);
    if (n_rects == 0) goto bail;

    fbu = rfb_create_fb_update_pdu(n_rects,
    	clp->connection_flags & (rfbConnectionFlagSasHW | rfbConnectionFlagSasSW),
    	full_update, &size);
    if (!fbu || size == 0) {
    	goto bail;
    }
    
    if (rfb_write(clp, fbu, size) != 0) {
	D(D_NOTICE, "%s: rfb write failed\n", fn);
	goto bail;
    }

    for (i = 0; i < n_rects; i++) {
	vsc_update_rect_hdr_t *vsc_hdr = (vsc_update_rect_hdr_t*) buf;
	char *rfb_hdr;
	size_t size1, size2;
	u_int32_t size_pad;
	u_int32_t encoding = vsc_hdr->encoding;
	
	if (zlib_level != 0 && vsc_hdr->size >= HWENC_MIN_TO_COMPRESS) {
	    /* rect will be compressed, so mark the header */
	    encoding |= rfbEncodingParamZLIB(zlib_level);
	}

	/* get header data from vsc header */
    	rfb_hdr = rfb_create_fb_update_rect_header(vsc_hdr->r.x, vsc_hdr->r.y,
    						   vsc_hdr->r.w, vsc_hdr->r.h,
    						   encoding, &size1);
    	if (!rfb_hdr || size1 == 0) {
    	    goto bail;
    	}
    	
	print_buffer((u_char*)vsc_hdr, sizeof(vsc_update_rect_hdr_t), PRINT_BUFFER_COLS);

	buf += sizeof(vsc_update_rect_hdr_t);
	
	D(D_BLABLA, "%s: (x,y,w,h)=(%d,%d,%d,%d), enc=%08x, size=%d\n", fn,
	  vsc_hdr->r.x, vsc_hdr->r.y, vsc_hdr->r.w, vsc_hdr->r.h,
	  vsc_hdr->encoding, vsc_hdr->size);

	size_pad = vsc_hdr->size;
	if (size_pad % 4) size_pad += 4 - (size_pad % 4);
	
	/* send the rect header (coordinates) */
	if (rfb_write(clp, rfb_hdr, size1) != 0) {
	    free(rfb_hdr);
	    goto bail;
	}
	free(rfb_hdr);

	if (zlib_level != 0 && vsc_hdr->size >= HWENC_MIN_TO_COMPRESS) {
	    /* send the rect content compressed, ignore padding here */
	    if (send_rect_compressed(clp, buf, vsc_hdr->size, zlib_level) != 0) {
		D(D_NOTICE, "%s: send_rect_compressed failed\n", fn);
		goto bail;
	    }
	} else {
	    char *hwenc_hdr;

    	    hwenc_hdr = rfb_create_hwenc_hdr(vsc_hdr->size, &size2);
    	    if (!hwenc_hdr || size2 == 0) {
    	    	goto bail;
    	    }
    	    
	    print_buffer(buf, vsc_hdr->size, PRINT_BUFFER_COLS);

	    /* send the size of the rect (not padded) */
	    if (rfb_write(clp, hwenc_hdr, size2) != 0) {
		free(hwenc_hdr);
		goto bail;
	    }
	    free(hwenc_hdr);

	    /* the rect itself is sent padded */
	    if (rfb_write(clp, buf, size_pad) != 0) {
		D(D_NOTICE, "%s: rfb write failed\n", fn);
		goto bail;
	    }
	}

	/* next rect has to start 4 byte aligned again, so go forward with padding */
	buf += size_pad;
    }

    ret = 0;
    
 bail:    
    free(fbu);
    return ret;
}
 
static inline int
send_region_swenc(rfb_cl_t * clp, RegionRec *reg, u_char *buf, int full_update,
		  rfb_send_rect_softenc_fn_t softenc_fn)
{
    static const char* fn = ___F;
    char *fbu = NULL;
    size_t size;
    int n_upd_reg_rects;
    int ret = -1, i;

    /* FIXME: merge with rfb_send_update_softenc in proto.c */
    
    assert(reg && buf && softenc_fn);

    if (clp->enc.pref_enc == rfbEncodingTight) {
	n_upd_reg_rects = 0;

	for (i = 0; i < REGION_NUM_RECTS(reg); i++) {
	    int x = REGION_RECTS(reg)[i].x1;
	    int y = REGION_RECTS(reg)[i].y1;
	    int w = REGION_RECTS(reg)[i].x2 - x;
	    int h = REGION_RECTS(reg)[i].y2 - y;
	    int n = rfb_num_coded_rects_tight(clp, x, y, w, h);
	    if (n == 0) {
		n_upd_reg_rects = 0xFFFF;
		break;
	    }
	    n_upd_reg_rects += n;
	}
    } else {
	n_upd_reg_rects = REGION_NUM_RECTS(reg);
    }  
    
    clp->useTightCache = 0;
    if (clp->enc.tightCacheEnabled) {
	int upd_reg_a = 0;
	    
	for (i = 0; i < REGION_NUM_RECTS(reg); i++) {
	    upd_reg_a += (REGION_RECTS(reg)[i].x2 - REGION_RECTS(reg)[i].x1) *
			 (REGION_RECTS(reg)[i].y2 - REGION_RECTS(reg)[i].y1);
	}
	if (upd_reg_a >= rfbTightCacheMinUpdRegA) clp->useTightCache = 1;
    }
    
    D(D_BLABLA, "%s: fbu n_rects=%d\n", fn, n_upd_reg_rects);
    if (n_upd_reg_rects == 0) goto bail;
	
    fbu = rfb_create_fb_update_pdu(n_upd_reg_rects,
    	clp->connection_flags & (rfbConnectionFlagSasHW | rfbConnectionFlagSasSW),
    	full_update, &size);
    if (!fbu || size == 0) {
    	goto bail;
    }

    if (rfb_write(clp, fbu, size) != 0) {
	D(D_NOTICE, "%s: rfb write failed\n", fn);
	goto bail;
    }

    for (i = 0; i < REGION_NUM_RECTS(reg); i++) {
	vsc_update_rect_hdr_t *vsc_hdr = (vsc_update_rect_hdr_t*) buf;
	D(D_BLABLA, "%s: (x,y,w,h)=(%d,%d,%d,%d)\n", fn,
	  vsc_hdr->r.x, vsc_hdr->r.y, vsc_hdr->r.w, vsc_hdr->r.h);
	
	buf += sizeof(vsc_update_rect_hdr_t);

	/* set base for all send functions */
	clp->fb_start		= buf;
	clp->fb_tile_pitch	= vsc_hdr->r.w / PP_FB_TILE_WIDTH;
	clp->fb_start_ofs_x	= vsc_hdr->r.x;
	clp->fb_start_ofs_y	= vsc_hdr->r.y;
	// printf("press the any key to cont.\n");
	// fgets((char*)&input, 16, stdin);
	if (softenc_fn(clp, vsc_hdr->r.x, vsc_hdr->r.y, vsc_hdr->r.w, vsc_hdr->r.h) != 0) {
	    D(D_NOTICE, "%s: softenc_fn failed\n", fn);
	    goto bail;
	}
	buf += vsc_hdr->size;
    }
    
    if (rfb_send_update_buf(clp) != 0) {
	D(D_NOTICE, "%s: final rfb_send_update_buf failed\n", fn);
	goto bail;
    }
    
    ret = 0;
    
 bail:
    
    free(fbu);
    return ret; 
}

static inline int
send_rect_compressed(rfb_cl_t * clp, u_char *buf, u_int size,
		     u_char zlib_level)
{
    z_streamp pz;
    u_char flushing;
    u_char *old;
    int err, portion_size;
    static const char* fn = ___F;

    assert(buf && size);
    
    pz = &clp->zsStruct_hw;

    /* send uncompressed size */
    if (send_compressed_size(clp, size) != 0) {
	D(D_VERBOSE, "%s: sending uncompressed size failed\n", fn);
	return -1;
    }

    /* Initialize compression stream if needed. */
    if (!clp->zsActive_hw) {
        pz->zalloc = Z_NULL;
        pz->zfree = Z_NULL;
        pz->opaque = Z_NULL;

        err = deflateInit2 (pz, zlib_level, Z_DEFLATED, MAX_WBITS,
                            MAX_MEM_LEVEL, Z_DEFAULT_STRATEGY);
        if (err != Z_OK) {
	    D(D_VERBOSE, "%s: deflateInit2 failed\n", fn);
            return -1;
	}

        clp->zsActive_hw = 1;
        clp->zsLevel_hw = zlib_level;
    }

    /* prepare buffer pointers */
    pz->next_in = buf;
    pz->avail_in = size;
    pz->next_out = clp->update_buf;
    pz->avail_out = UPDATE_BUF_SIZE;
    
    /* Change compression parameters if needed. */
    if (zlib_level != clp->zsLevel_hw) {
	int z_ret;
	D(D_BLABLA, "pre-params avail_in=%d, avail_out=%d\n", pz->avail_in, pz->avail_out);
        if ((z_ret = deflateParams (pz, zlib_level, Z_DEFAULT_STRATEGY)) != Z_OK) {
	    D(D_VERBOSE, "%s: deflateParams failed (%d)\n", fn, z_ret);
            return -1;
        }
	D(D_BLABLA, "post-params avail_in=%d, avail_out=%d\n", pz->avail_in, pz->avail_out);
        clp->zsLevel_hw = zlib_level;

	/* send size and data for initial chunk created during param call */
	portion_size = UPDATE_BUF_SIZE - pz->avail_out;
	if (portion_size != 0) {
	    D(D_BLABLA, "%s: pSize=%d\n", fn, portion_size);
	    if (send_compressed_size(clp, portion_size) != 0) {
		D(D_VERBOSE, "%s: sending compressed size failed\n", fn);
		return -1;
	    }
	    
	    if (rfb_write(clp, clp->update_buf, portion_size) != 0) {
		D(D_VERBOSE, "%s: rfb_write failed\n", fn);
		return -1;
	    }
	}
    }
    
    flushing = 0;
    while (pz->avail_in || flushing) {
	D(D_BLABLA, "%s: deflate pre: next_in=%p, avail_in=%d, flushing=%d\n", fn,
	  pz->next_in, pz->avail_in, flushing);

	old = pz->next_in;
	/* Actual compression. */
	if ( deflate (pz, Z_SYNC_FLUSH) != Z_OK ) {
	    D(D_VERBOSE, "%s: deflating failed\n", fn);
	    return -1;
	}
	D(D_BLABLA, "%s: deflate post: next_out=%p, avail_out=%d\n", fn,
	  pz->next_out, pz->avail_out);

	D(D_BLABLA, "%s: uCPortion=%d\n", fn, pz->next_in-old);

	/* no more input data but not everything could
	   be written -> flush run */
	if (pz->avail_in == 0 && pz->avail_out == 0) {
	    flushing = 1;
	} else {
	    flushing = 0;
	}

	/* send size and accumulated data */
	portion_size = UPDATE_BUF_SIZE - pz->avail_out;
	D(D_BLABLA, "%s: pSize=%d\n", fn, portion_size);
	if (send_compressed_size(clp, portion_size) != 0) {
	    D(D_VERBOSE, "%s: sending compressed size failed\n", fn);
	    return -1;
	}
		    
	if (rfb_write(clp, clp->update_buf, portion_size) != 0) {
	    D(D_VERBOSE, "%s: rfb_write failed\n", fn);
	    return -1;
	}

	pz->next_out = clp->update_buf;
	pz->avail_out = UPDATE_BUF_SIZE;
    }

    return 0;   
}

static int
send_compressed_size(rfb_cl_t * clp, u_long size)
{
    u_char sizebuf[3], sizecnt;

    sizecnt = 0;
    sizebuf[sizecnt++] = size & 0x7F;
    if (size > 0x7F) {
	sizebuf[sizecnt-1] |= 0x80;
	sizebuf[sizecnt++] = size >> 7 & 0x7F;
	if (size > 0x3FFF) {
	    sizebuf[sizecnt-1] |= 0x80;
	    sizebuf[sizecnt++] = size >> 14 & 0xFF;
	}
    }

    if (rfb_write(clp, sizebuf, sizecnt) != 0) {
	return -1;
    }

    return 0;
}

#else /* !PP_FEAT_VSC_HW_ENCODING */

#undef LRLE_DEBUG
#ifdef LRLE_DEBUG
# define LD(fmt, args...) printf(fmt, ##args);
#else
# define LD(fmt, args...) 
#endif

/* software LRLE stuff */
#define LMIN3(a,b,c)    ((a)<(b)?((a)<(c)?(a):(c)):((b)<(c)?(b):(c)))
#define LMAX3(a,b,c)    ((a)>(b)?((a)>(c)?(a):(c)):((b)>(c)?(b):(c)))
#define LABS(x)		((x) > 0 ? (x) : -(x))

// color locations in 3 byte per pixel
#define RED		2
#define GREEN		1
#define BLUE		0

// output types
#define O_BMP		0
#define O_RLE		1

// RLE commands
#define RLE_INIT	0
#define RLE_COPY	1
#define RLE_GREY	3
#define RLE_WORD	4

// RLE parameters
#define R_LIMIT		31
#define C_LIMIT		31
#define BYTE_CMP	0
#define USE2D		1

#define MAX_LRLE_SIZE	(4 * PP_FB_TILE_WIDTH * PP_FB_TILE_HEIGHT)

typedef struct {
    unsigned char palette[256][3];
    unsigned char pByteTable[32][32][32];
    int numColors;
    int isGrey;
    int depth;
} lrle_pr_t;

lrle_pr_t* lrle_pr_table[rfbLRLESubencCount];

int margin = 0;

#ifdef LRLE_DEBUG
int debug = 0; // global debug flag
#endif

static int  send_cells(rfb_cl_t * clp, unsigned char subformat, int rx, int ry, int rw, int rh);
static int  encode_cell(unsigned char *src, unsigned char *dst, int w, int h);
static void genRLE(unsigned char *dst, int *rle_size, int cmd, int run, int v[]);
static int  putRun(unsigned char* dst, int i_out, int runV, int runLength);

static int  postprocess_rle(unsigned char* src, unsigned char* dst, int raw_size,
			    unsigned char format);
static void build_palette(unsigned char format);
static void build_table(unsigned char format);

int
rfb_hwenc_init(void)
{
    memset(lrle_pr_table, 0, sizeof(lrle_pr_table));

    return 0;
}

void
rfb_hwenc_cleanup(void)
{
    int i;

    for (i = 0; i < rfbLRLESubencCount; i++) {
	if (lrle_pr_table[i] != NULL) {
	    free(lrle_pr_table[i]);
	}
    }
}

int
rfb_send_rect_lrle(rfb_cl_t * clp, int x, int y, int w, int h)
{
    char *rect;
    size_t size;
    unsigned char subformat = clp->hwenc_subenc;
    
#ifdef TIME_MEASURE
    clp->time_pixel += w*h;
#endif
    
    rect = rfb_create_fb_update_rect_header(x, y, w, h,
    	rfbEncodingLRLESoft | rfbEncodingParamSubenc(subformat), &size);
    if (!rect || size == 0) {
    	return -1;
    }

    if (clp->ub_len + size > UPDATE_BUF_SIZE) {
	if (rfb_send_update_buf(clp) == -1) {
	    free(rect);
	    return -1;
	}
    }

    memcpy(&clp->update_buf[clp->ub_len], rect, size);
    free(rect);
    clp->ub_len += size;
    
    switch (clp->pix_fmt.bitsPerPixel_8) {
      case 16:
	  return send_cells(clp, subformat, x, y, w, h);
    }

    pp_log("rfb_send_rect_lrle(): bpp %d?\n", clp->pix_fmt.bitsPerPixel_8);
    return -1;
}

int
rfb_lrle_init_tables(unsigned char subformat)
{
    if (subformat >= rfbLRLESubencCount) return 0;
    
    if (subformat != rfbLRLESubenc15bitDirectLossy &&
	subformat != rfbLRLESubenc15bitDirectLossless &&
	lrle_pr_table[subformat] == NULL) {

	int isGrey=0, depth=0;
	
	lrle_pr_table[subformat] = malloc(sizeof(lrle_pr_t));

	switch (subformat) {
	  case rfbLRLESubenc7bitDirectLossy:
	  case rfbLRLESubenc7bitDirectLossless:
	      isGrey = 0; depth = 7;
	      break;
	  default:
	      pp_log_err("lrle build_table: unkown LRLE format %d\n", subformat);
	      assert(0);
	      break;
	}

    	lrle_pr_table[subformat]->depth	 = depth;
	lrle_pr_table[subformat]->numColors = 1 << depth;
	lrle_pr_table[subformat]->isGrey = isGrey;
	
	build_palette(subformat);
	build_table(subformat);
    }
    
    return 0;
}

static int
send_cells(rfb_cl_t * clp, unsigned char subformat, int rx, int ry, int rw, int rh)
{
    int x, y, w, h, size, raw_size;
    unsigned char *fb_ptr;
    unsigned char buf[MAX_LRLE_SIZE];
    u_int16_t clientPixelData[PP_FB_TILE_WIDTH * PP_FB_TILE_HEIGHT];

    for (y = ry; y < ry+rh; y += PP_FB_TILE_HEIGHT) {
	for (x = rx; x < rx+rw; x += PP_FB_TILE_WIDTH) {
	    w = PP_FB_TILE_WIDTH;
	    h = PP_FB_TILE_HEIGHT;
	    if (rx+rw - x < PP_FB_TILE_WIDTH)
		w = rx+rw - x;
	    if (ry+rh - y < PP_FB_TILE_HEIGHT)
		h = ry+rh - y;

	    if (clp->ub_len + (2 * PP_FB_TILE_WIDTH * PP_FB_TILE_HEIGHT) > UPDATE_BUF_SIZE) {
		if (rfb_send_update_buf(clp) == -1) {
		    return -1;
		}
	    }

	    // get a tile from framebuffer
	    fb_ptr = GET_PTR_INTO_FB(clp, x, y);

	    clp->lrle_translate_fn(clp->lrle_translate_lookup_table, NULL,
				   NULL, fb_ptr, (char *)clientPixelData,
				   clp->fb_width_pd * (clp->fb_bpp / 8), w, h);

#ifdef LRLE_DEBUG
	    debug = (x == 0 && y == 0) ? 1 : 0;
#endif

	    LD("encode_cell (%d,%d), w=%d, h=%d, subformat=%d\n", x, y, w, h, subformat);

	    if (subformat == rfbLRLESubenc15bitDirectLossy ||
		subformat == rfbLRLESubenc15bitDirectLossless) {
		raw_size = encode_cell((unsigned char*)clientPixelData,
				       &clp->update_buf[clp->ub_len], w, h);
		LD("raw_size = %d\n", raw_size);
		print_buffer(&clp->update_buf[clp->ub_len], raw_size, PRINT_BUFFER_COLS);
		
		assert(raw_size < MAX_LRLE_SIZE);
		clp->ub_len += raw_size;		
	    } else {
		raw_size = encode_cell((unsigned char*)clientPixelData, buf, w, h);
		LD("raw_size = %d\n", raw_size);
		print_buffer(buf, raw_size, PRINT_BUFFER_COLS);

		assert(raw_size < MAX_LRLE_SIZE);
		size = postprocess_rle(buf, &clp->update_buf[clp->ub_len], raw_size,
				       subformat);

		print_buffer(&clp->update_buf[clp->ub_len], size, PRINT_BUFFER_COLS);
		clp->ub_len += size;
	    }
	}
    }

    return 0;
}

/**
 * convert 16bpp RGB565 to into single bytes of an array
 */
static inline void
convert_pix_to_array(unsigned char *src, unsigned char *dst)
{
    unsigned short *src16 = (unsigned short*) src;

    LD("%04x -> ", *src16);
    
    dst[RED]   = (*src16 & 0xF800) >> 11;
    dst[GREEN] = (*src16 & 0x07E0) >> 5;
    dst[BLUE]  = (*src16 & 0x001F);
   
    LD("RGB %02x %02x %02x\n", dst[RED], dst[GREEN], dst[BLUE]);
}

/**
 * encodes a cell (hextile) from RGB565 source to an LRLE stream
 *
 * @return	lrle output size for the cell
 */
static int
encode_cell(unsigned char *src, unsigned char *dst, int w, int h)
{
    unsigned char prevLine[PP_FB_TILE_WIDTH][3];    
    int	minV[3], maxV[3], runV[3];
    int	endLocalRun = -1, endLineCopy = -1;
    int rle_size = 0;
    int	x, y, c;
    int	outX = 0, outY = 0;
    unsigned char pix[3], *ppix;
    int run = 0, beginRun, endRun, continueRun;
    int pixType;
    
    // initialization
    memset(prevLine, 255, w * 3);

    // loop through cell pixel data
    genRLE(dst, &rle_size, RLE_INIT, 0, NULL);
    
    for (y = 0; y <= h; y++) {
	for (x = 0; x < w; x++) {
	    LD("(%2d,%2d) ", x, y);

	    convert_pix_to_array(src, pix);
	    src += 2;
	    ppix = prevLine[x];
	    
	    // Do we need to end our local run?
	    if (endLocalRun == 0 &&
		((y == h) || (run == R_LIMIT) ||
		 pix[RED] > minV[RED]+margin ||
		 pix[RED] < maxV[RED]-margin ||
		 pix[GREEN] > minV[GREEN]+2*margin ||
		 pix[GREEN] < maxV[GREEN]-2*margin ||
		 pix[BLUE] > minV[BLUE]+margin ||
		 pix[BLUE] < maxV[BLUE]-margin)) {
		
		endLocalRun = run + 1;
	    }
	    
	    // Do we need to end our line copy?
	    if (endLineCopy == 0 &&
		((y == h) || (run == C_LIMIT) ||
		 LABS(pix[RED]   - ppix[RED])   > margin ||
		 LABS(pix[GREEN] - ppix[GREEN]) > 2*margin ||
		 LABS(pix[BLUE]  - ppix[BLUE])  > margin)) {
		
		endLineCopy = run + 1;
	    }

	    // Figure out what we're doing
	    endRun = endLocalRun && endLineCopy && (x || y);
	    beginRun = (endRun && (y < h)) || !(x || y);
	    continueRun = !endRun && !beginRun;
	    
	    // Do end-of-run processing
	    if (endRun) {
		// print out completed run
		if (endLineCopy >= endLocalRun) {	// use line copy
		    genRLE(dst, &rle_size, RLE_COPY, run, NULL);
		    outX = x;
		    outY = y;
		} else {
		    // use local run
		    for (c=0; c<3; c++) {
			runV[c] = (minV[c] + maxV[c] + 1) / 2;
		    }

		    // byte compression?
		    if (BYTE_CMP &&
			LMAX3(2*runV[RED],runV[GREEN],2*runV[BLUE]) -
			LMIN3(2*runV[RED],runV[GREEN],2*runV[BLUE]) <= 3) {

			pixType = RLE_GREY;
		    } else {
			pixType = RLE_WORD;
		    }
		    
		    // Generate the RLE code
		    genRLE(dst, &rle_size, pixType, run, runV);

		    // Copy new pixel data to prevLine: very important!!!
		    while (run-- >= 0) {
			for (c=0; c<3; c++) {
			    prevLine[outX][c] = runV[c];
			}
			if (++outX == w) {
			    outX = 0;
			    outY++;
			}
		    }
		    outX = x;
		}
	    }

	    // Initialize new run
	    if (x == 0 && y == 0) {
		genRLE(dst, &rle_size, RLE_INIT, 0, NULL);
	    }
	    
	    if (beginRun) {
		outX = x;
		outY = y;
		run = 0;
		endLocalRun = 0;
		endLineCopy = (y == 0) || !USE2D ||
			      (LABS(pix[0]-ppix[0]) > margin)   ||
			      (LABS(pix[1]-ppix[1]) > 2*margin) ||
			      (LABS(pix[2]-ppix[2]) > margin)   ? -1 : 0;
		
		for (c=0; c<3; c++) {
		    minV[c] = maxV[c] = pix[c];
		}
	    }

	    // Continue run
	    if (continueRun) {
		run++;
		// update local run if necessary
		if (!endLocalRun) {
		    for (c=0; c<3; c++) {
			if (pix[c] < minV[c])
			    minV[c] = pix[c];
			else if (pix[c] > maxV[c])
			    maxV[c] = pix[c];
		    }
		}
	    }

	    // Get out of here if we're at end of cell
	    if (y == h) {
		break;
	    }
	}
    }

    // Error check
    if (outX) {
	fprintf(stderr, "ERROR: cell %dx%d; wrote through column %d\n", 
		w, h, outX);
	fprintf(stderr, "run=%d, endLocalRun=%d, endLineCopy=%d\n",
		run, endLocalRun, endLineCopy);
	assert(0);
    }
       
    return rle_size;
}

/**
 * generate RLE code for the given command
 *
 * @return	the size of the code generated
 */
static void
genRLE(unsigned char *dst, int *rle_size, int cmd, int run, int v[])
{
    static int oldV[3];
    static int last_noncopy; // used for supressing trailing line copies
    int	c;
    
    assert(rle_size);

    switch(cmd) {
      case RLE_INIT:
	  LD("RLE_INIT\n");
	  oldV[0] = oldV[1] = oldV[2] = -1;
	  break;
      case RLE_COPY:
	  LD("RLE_COPY %d\n", run);
	  oldV[0] = oldV[1] = oldV[2] = -1;
	  if (run >= 31) {
	      dst[(*rle_size)++] = 0xFF;
	      dst[(*rle_size)++] = run;
	  } else {
	      dst[(*rle_size)++] = 0xE0 + run;
	  }
	  break;
      case RLE_GREY:
	  if (v[0]!=oldV[0] || v[1]!=oldV[1] || v[2]!=oldV[2]) {
	      LD("RLE_GREY %02x\n", v[GREEN]);
	      dst[(*rle_size)++] = 0x80 + v[GREEN];
	      for (c=0; c<3; c++) {
		  oldV[c] = v[c];
	      }
	      --run;
	  }
	  if (run >= 0) {
	      LD("RUN %d\n", run);
	      dst[(*rle_size)++] = 0xC0 + run;
	  }
	  break;
      case RLE_WORD:
	  if (v[0]!=oldV[0] || v[1]!=oldV[1] || v[2]!=oldV[2]) {
	      dst[(*rle_size)++] = ((v[RED]) << 2) + ((v[GREEN] & 0x30) >> 4);
	      dst[(*rle_size)++] = ((v[GREEN] & 0x0E) << 4) + v[BLUE];
	      assert(!((((v[RED]) << 2) + ((v[GREEN] & 0x30) >> 4)) & 0x80));
	      LD("RLE_WORD %02x %02x\n", ((v[RED]) << 2) + ((v[GREEN] & 0x30) >> 4), ((v[GREEN] & 0x0E) << 4) + v[BLUE]);
	      for (c=0; c<3; c++) {
		  oldV[c] = v[c];
	      }
	      --run;
	  }
	  if (run >= 0) {
	      LD("RUN %d\n", run);
	      dst[(*rle_size)++] = 0xC0 + run;
	  }
	  break;
      default:
	  fprintf(stderr, "ERROR: unknown RLE command %d\n", cmd);
	  assert(0);
    }
    
    if (cmd != RLE_COPY) {
	last_noncopy = *rle_size;
    }
}

static int
postprocess_rle(unsigned char* src, unsigned char* dst, int raw_size,
		unsigned char format)
{
    lrle_pr_t *pr = lrle_pr_table[format];
    int c, runV, runLength, i_in, i_out, run, v;
    int	r, g, b;

    runV = -1;
    runLength = 0;

    for (i_out = i_in = 0; i_in < raw_size; i_in++) {
	if (src[i_in] == 0xFF && src[i_in+1] == 0xDF) {
	    // end of cell
	    i_in++;
	    i_out = putRun(dst, i_out, runV, runLength);
	    runV = -1;
	    dst[i_out++] = 0xFF;
	    dst[i_out++] = 0xDF;
	    continue;
	}

	c = src[i_in];

	switch((c & 0xC0) >> 6) {
	  case 0:
	  case 1:
	      // word pixel value
	      c = (c << 8) + src[++i_in];
	      LD("%04x (%02x %02x %02x)->", c, (c & 0x7C00) >> 10, (c & 0x03E0) >> 5, (c & 0x001F));
	      r = (c & 0x7C00) >> 10;
	      g = (c & 0x03E0) >> 5;
	      b = (c & 0x001F);
	      if (pr->numColors < 128)
		  v = pr->pByteTable[r][g][b];
	      else {
		  if ((r & 0x1E) == (g & 0x1E) && (r & 0x1E) == (b & 0x1E))
		      v = 128 + ((g & 0x1E)+((g & 0x10) != 0));
		  else
		      v = ((r & 0x18)<<2) + (g & 0x1C) + ((b & 0x18)>>3);
	      }
	      LD("%02x -> %02x %02x %02x\n", v,
		 (v & 0x60) >> 5, (v & 0x1C) >> 2, (v & 0x03));
	      
	      if (v == runV) {
		  runLength++;
	      }
	      else {
		  i_out = putRun(dst, i_out, runV, runLength);
		  runV = v;
		  runLength = 1;
	      }
	      break;
	  case 2:
	      r = g = b = (c & 0x3f) >> 1;
	      v = (pr->numColors < 128) ? pr->pByteTable[r][g][b] : 128 + r;
	      if (v == runV) {
		  runLength++;
	      }
	      else {
		  i_out = putRun(dst, i_out, runV, runLength);
		  runV = v;
		  runLength = 1;
	      }
	      break;
	  case 3:
	      run = (c & 0x1F);
	      if (c & 0x20) {	// line copy
		  i_out = putRun(dst, i_out, runV, runLength);
		  dst[i_out++] = c;
		  runV = -1;
	      }
	      else			// continue run
		  runLength += run + 1;
	      break;
	  default:
	      fprintf(stderr, "ERROR: unknown RLE code %d\n", c);
	      assert(0);
	      break;
	}
    }
    
    return i_out;
}

static int
putRun(unsigned char* dst, int i_out, int runV, int runLength)
{
    int	r;

    if (runV >= 0) {
	if ((runV & 0xC0) == 0xC0) {
	    fprintf(stderr, "ERROR: runV = 0x%02x\n", runV);
	}
	dst[i_out++] = runV;
	--runLength;
	while (runLength > 0) {
	    r = (runLength <= 32 ? runLength : 32);
	    dst[i_out++] = 0xC0 + r-1;
	    runLength -= r;
	}
    }

    return i_out;
}

/**
 * Builds a lookup table for the specified palette
 */
void
build_table(unsigned char format)
{
    lrle_pr_t *pr_format = lrle_pr_table[format];
    int		r,  g,  b;
    int		r8, g8, b8;
    double	pr, pg, pb;
    double	redPercent = 0.30;
    double	greenPercent = 0.59;
    double	bluePercent = 0.11;
    int		grey;
    long	diff, sum, distance;
    int		x;
    int		code;
   
    // Go through every possible 15 bit color and figure out the color in the 
    // palette that best matches it and store its code in the look up table.

    for (r = 0; r <= 31; r++) {
	for (g = 0; g <= 31; g++) {
	    for (b = 0; b <= 31; b++) {
		if (!pr_format->isGrey && pr_format->numColors == 16) {
		    r8 = r * 8 + 4;
		    g8 = g * 8 + 4;
		    b8 = b * 8 + 4;
		} else {
		    r8 = (r * 33) / 4;
		    g8 = (g * 33) / 4;
		    b8 = (b * 33) / 4;
		}

		// Convert to greyscale if necessary
		if (pr_format->isGrey) {
		    //					grey = (r8*2 + g8*5 + b8) / 8;
		    pr = ((float) r8) * redPercent;
		    pg = ((float) b8) * bluePercent;
		    pb = ((float) g8) * greenPercent;
		    grey = (int) (pr + pg + pb);
		    if (grey > 255) {
			fprintf(stderr, "RLE_PR: grey overflow (%d)\n", grey);
			grey = 255;
		    }
		    r8 = g8 = b8 = grey;
		    pr_format->pByteTable[r][g][b] = grey >> (8 - pr_format->depth);
		} else {
		    // Find the nearest color in the palette
		    distance = 0x7FFFFFFF;
		    code = 0xFF;
		    
		    for (x=0; x < pr_format->numColors; x++) {
			// Compute the distance from current color to palette entry
			diff = r8 - pr_format->palette[x][RED];
			sum  = (diff * diff);
			diff = g8 - pr_format->palette[x][GREEN];
			sum += (diff * diff);
			diff = b8 - pr_format->palette[x][BLUE];
			sum += (diff * diff);
			
			if (sum < distance) {
			    distance = sum;
			    code = x;
			    //printf("\tsum = %d; new code = %d\n", sum, code);
			}
		    }
		    
		    // Store the code
		    pr_format->pByteTable[r][g][b] = code;
		}
	    }
	}
    }
}

/**
 * Build a color palette
 */
void
build_palette(unsigned char format)
{
    lrle_pr_t *pr = lrle_pr_table[format];
    int i, j, k, cnum;
        
    switch(format) {
      case rfbLRLESubenc7bitDirectLossy:
      case rfbLRLESubenc7bitDirectLossless:
	  for (i = 0; i < 4; i++)
	      for (j = 0; j < 8; j++)
		  for (k = 0; k < 4; k++) {
		      cnum = (i*32) + (j*4) + k;
		      pr->palette[cnum][RED]	  = i * 85;
		      pr->palette[cnum][GREEN] = (j * 73) / 2;
		      pr->palette[cnum][BLUE]  = k * 85;
		  }
	  break;

      default:
	  pp_log_err("lrle build_palette: unkown LRLE format %d\n", format);
	  assert(0);
	  break;
    }
}

#endif /* !PP_FEAT_VSC_HW_ENCODING */
