/******************************************************************************\
* cat_condensation_simple.c                                                    *
*                                                                              *
* Implementation of a particle filter algorithm for intelligent online mouse   *
* cursor tracking (CAT)                                                        *
*                                                                              *
* SIMPLE APPROACH!                                                             *
* This filter only uses pointer moves for sampling and the diffmap information *
* for resampling                                                               *
*                                                                              *
* Copyright 2005 Peppercon AG                                                  *
* Thomas Weber tweb@peppercon.de                                               *
\******************************************************************************/

/* TODO!!!
 * - strip off interface data only used for assertions... (fbinfo);
 */

#include <math.h> // for sqrt
#include <string.h> // for ffs

#include <pp/base.h>
#include <pp/vsc.h>

#include "cat_debug.h"
#include "cat_internal.h"

static int diffmap_coord_from_abs_coord(u_char *dmx, u_char *dmy,
                                        u_int16_t fbx, u_int16_t fby,
                                        const fb_format_info_t *fb_info);
static int diffmap_offset_from_diffmap_coord(u_char *idx, u_int32_t *mask,
                                             u_char line_len,
                                             u_char dmx, u_char dmy,
                                             const fb_format_info_t *fb_info);
#if 0
static int diffmap_offset_from_abs_coord(u_char *idx, u_int32_t *mask,
                                         u_char line_len,
                                         u_int16_t x, u_int16_t y,
                                         const fb_format_info_t *fb_info);
#endif
static int abs_coord_from_diffmap_offset(u_int16_t *x, u_int16_t *y,
                                         u_char idx, u_int32_t mask,
                                         u_char line_len,
                                         const fb_format_info_t *fb_info);
static int get_merged_diffmap(cat_queue_diffmap_entry_t *cqde,
                              cat_queue_t *diffmap_queue);
static void initialize_samples(cat_condensation_simple_data_t *ccd,
                               const fb_format_info_t *fb_info);
static void initialize_sample_random(cat_sample_t *sample, 
                                     const fb_format_info_t *fb_info);
static int initialize_sample_from_samples(cat_sample_t *sample,
                                          const cat_sample_t *samples);
static int initialize_sample_from_diffmap(cat_sample_t *sample,
                                          const cat_queue_diffmap_entry_t *cqde,
                                          const fb_format_info_t *fb_info);
static void get_movement_mask(cat_queue_diffmap_entry_t *cqde, 
                              const cat_sample_t *sample,
                              int16_t diff_x, int16_t diff_y,
                              const fb_format_info_t *fb_info);
static void bresenham(u_char xstart, u_char ystart, 
                      int16_t dx, int16_t dy,
                      u_char xmin, u_char xmax, u_char ymin, u_char ymax,
                      u_char expand,
                      u_int16_t *rnpix, u_char **rxpix, u_char **rypix);
#if 0
static void diffmap_overlay(cat_queue_diffmap_entry_t *overlay,
                            const cat_queue_diffmap_entry_t *d1,
                            const cat_queue_diffmap_entry_t *d2);
#endif
static int get_nearest_match(const cat_queue_diffmap_entry_t *diffmap, 
                             const cat_queue_diffmap_entry_t *mask, 
                             const cat_sample_t *sample,
                             int16_t dx, int16_t dy,
                             const fb_format_info_t *fb_info,
                             u_char *match_x, u_char *match_y);


#if defined(CAT_DEBUG)
static int sample_sort(const void *p1, const void *p2);
#endif /* CAT_DEBUG */

#if defined(SIMPLE_CONDENSATION_DEBUG_SAMPLES)
FILE *dsf;
static const char dsfn[] = "/root/ds.log";

static void debug_samples_dump(const cat_sample_t *samples);
#endif /* SIMPLE_CONDENSATION_DEBUG_SAMPLES */

#define RANDOM_NOISE(range)     (random() & range) 
#define RANDOM_NOISE_PM(range)  ((random() & ((range << 1) | 1)) - range)
#define SET_WITHIN(i, min, max) ((i) < (min) ? (min) : ((i) > (max) ? (max) : (i)))

/**
 * condensation lifecycle
 */
int cat_condensation_simple_init(driver_t *obj) {
    cat_condensation_simple_data_t *ccd;
    data_cat_t* mouse_data;
    
    assert(obj);
    
    mouse_data = (data_cat_t*)obj->data_conv_mouse.data;
    assert(mouse_data);
    
    if(mouse_data->tr_data || mouse_data->tr_sample ||
       mouse_data->tr_resample) {
        CDMSG(1, "Failed to initialize condensation tracking\n");
        abort();
        return PP_ERR;
    }
    
    /* initialize condensation tracking */
    ccd = (cat_condensation_simple_data_t*)malloc(sizeof(cat_condensation_simple_data_t));
    ccd->samples = (cat_sample_t*)calloc(sizeof(cat_sample_t),
                                         CAT_CONDENSATION_SAMPLE_COUNT);
    ccd->initialize = 1;
    
    mouse_data->tr_data = (void*)ccd;
    mouse_data->tr_sample = cat_condensation_simple_sample;
    mouse_data->tr_resample = cat_condensation_simple_resample;
    mouse_data->get_pos_estim = cat_condensation_simple_get_position_estimate;
    
#if defined(SIMPLE_CONDENSATION_DEBUG_SAMPLES)
    dsf = fopen(dsfn, "w");
#endif /* SIMPLE_CONDENSATION_DEBUG_SAMPLES */
    
    return PP_SUC;
}

void cat_condensation_simple_cleanup(driver_t *obj) {
    cat_condensation_simple_data_t *ccd;
    data_cat_t* mouse_data;
    
#if defined(SIMPLE_CONDENSATION_DEBUG_SAMPLES)
    fclose(dsf);
#endif /* SIMPLE_CONDENSATION_DEBUG_SAMPLES */

    assert(obj);
    
    mouse_data = (data_cat_t*)obj->data_conv_mouse.data;
    assert(mouse_data);
    
    ccd = (cat_condensation_simple_data_t*)mouse_data->tr_data;
    if(ccd) {
        free(ccd->samples);
        free(ccd);
    }
    mouse_data->tr_data = NULL;
    mouse_data->tr_sample = NULL;
    mouse_data->tr_resample = NULL;
    mouse_data->get_pos_estim = NULL; 
}

/**
 * condensation callbacks
 */
void cat_condensation_simple_sample(driver_t *obj) {
    cat_condensation_simple_data_t *ccd;
    data_cat_t *mouse_data;
    cat_queue_t *ptr_pos_queue;
    int i, s;
    fb_format_info_t fb_info;
    cat_queue_entry_t *cqe;
    u_int16_t current_x, current_y;
    
//    CDMSG(1, "sampling...\n");
    
    assert(obj);
    
    mouse_data = (data_cat_t*)obj->data_conv_mouse.data;
    assert(mouse_data);
    
    ptr_pos_queue = mouse_data->ptr_pos_queue;
    assert(ptr_pos_queue);
    
    ccd = (cat_condensation_simple_data_t*)mouse_data->tr_data;
    assert(ccd);

    current_x = ccd->absolute.x;
    current_y = ccd->absolute.y;
    
    if(pp_vsc_get_fb_format(0, &fb_info) == PP_ERR) {
        CDMSG(CAT_CDMSG_WARNING, "No framebuffer info yet\n");
        return;
    }

    if(ccd->initialize) {
        initialize_samples(ccd, &fb_info);
    }

    cat_queue_lock(ptr_pos_queue);
    for(i = 0; NULL != (cqe = cat_queue_dequeue(ptr_pos_queue)); ++i) {
        assert(cqe->type == CAT_QUEUE_PTR_POS_ENTRY);
        
        ccd->absolute.x = cqe->data.ptr_pos.x;
        ccd->absolute.y = cqe->data.ptr_pos.y;
        
        /* TODO! do obj for every cqe? makes no sense at the moment... */
        for(s = 0; s < CAT_CONDENSATION_SAMPLE_COUNT; ++s) {
            cat_sample_t* sample;
            
            sample = &ccd->samples[s];
            if(!sample->weight) {
                if(PP_ERR == initialize_sample_from_samples(sample, 
                                                            ccd->samples)) {
                   initialize_sample_random(sample, &fb_info);
                }
                continue;
            }
        }
        cat_queue_destroy_entry(cqe);
    }
    cat_queue_unlock(ptr_pos_queue);

/*
    CDMSG(1, "moved to (%d; %d), dx = %d, dy = %d\n", 
          ccd->absolute.x, ccd->absolute.y, ccd->relative.x, ccd->relative.y);
*/
}

void cat_condensation_simple_resample(driver_t *obj) {
    cat_condensation_simple_data_t *ccd;
    data_cat_t *mouse_data;
    cat_queue_t *diffmap_queue, *ptr_move_queue;
    cat_queue_diffmap_entry_t cqde_merged, cqde_mask;
    cat_queue_entry_t *cqe;
    int i;
    fb_format_info_t fb_info;
    int16_t match_count = 0, real_diff_x = 0, real_diff_y = 0, 
            diff_x = 0, diff_y = 0;
    
//    CDMSG(1, "resampling...\n");
    
    assert(obj);
    
    mouse_data = (data_cat_t*)obj->data_conv_mouse.data;
    assert(mouse_data);
    
    diffmap_queue = mouse_data->diffmap_queue;
    assert(diffmap_queue);
    
    ptr_move_queue = mouse_data->ptr_move_queue;
    assert(ptr_move_queue);
    
    ccd = (cat_condensation_simple_data_t*)mouse_data->tr_data;
    assert(ccd);
    
    if(pp_vsc_get_fb_format(0, &fb_info) == PP_ERR) {
        CDMSG(CAT_CDMSG_WARNING, "No framebuffer info yet\n");
        return;
    }

    /* get diffmap sensor info */
    if(get_merged_diffmap(&cqde_merged, diffmap_queue) == PP_ERR) {
        CDMSG(1, "failed to get diffmap, leaving\n");
        return;
    }
    
    /* get kme sensor info */
    cat_queue_lock(ptr_move_queue);
    while(NULL != (cqe = cat_queue_dequeue(ptr_move_queue))) {
        assert(cqe->type == CAT_QUEUE_PTR_MOVE_ENTRY);
        diff_x += cqe->data.ptr_move.x;
        diff_y += cqe->data.ptr_move.y;
        cat_queue_destroy_entry(cqe);
    }
    cat_queue_unlock(ptr_move_queue);

    /* initialize diffmap mask by merged diffmap attributes */
    cqde_mask.size = cqde_merged.size;
    cqde_mask.line_len = cqde_merged.line_len;
    cqde_mask.width = cqde_merged.width;
    cqde_mask.height = cqde_merged.height;
    cqde_mask.data = (u_int32_t*)malloc(cqde_mask.size);
    
    for(i = 0; i < CAT_CONDENSATION_SAMPLE_COUNT; ++i) {
        /* check for each sample, if position corresponds with diffmap */
        cat_sample_t* sample;
        int match;
        u_char match_x, match_y;
        int16_t tmpx, tmpy;
        
        sample = &ccd->samples[i];
        
        get_movement_mask(&cqde_mask, sample, diff_x, diff_y, &fb_info);
#if 0
        cat_debug_print_diffmap(cqde_mask.data, cqde_mask.size, 
                                cqde_mask.line_len, cqde_mask.width,
                                cqde_mask.height);
        CDMSG(1, "movement mask for sample (%d; %d) and dx = %d, dy = %d\n",
              sample->x, sample->y, diff_x, diff_y);
        sleep(1);
#endif

#if 0 // simple match
        {
            u_int16_t j;
            /* check wether diffmap matches diffmap mask */
            for(j = 0, match = 0; !match && j < cqde_merged.size / 4; ++j) {
                if(cqde_mask.data[j] & cqde_merged.data[j]) {
                    /* at least one match... enough for us... */
                    match = 1;
                    break;
                }
            }
            match_x = diff_x / 16;
            match_y = diff_y / 16;
        }
#else // nearest match
        match = get_nearest_match(&cqde_merged, &cqde_mask, sample, 
                                  diff_x, diff_y, &fb_info,
                                  &match_x, &match_y) == PP_SUC;
        
if (0) {
//if (!match) {
u_int16_t j;
/* check wether diffmap matches diffmap mask */
for(j = 0, match = 0; !match && j < cqde_merged.size / 4; ++j) {
if(cqde_mask.data[j] & cqde_merged.data[j]) {
/* at least one match... enough for us... */
printf("... but there is some match @ line %d\n", j / 2);
/*
match_x = diff_x / 16;
match_y = diff_y / 16;
match = 1;
*/
//DEBUG_CQDE(cqde_merged);
//getchar();
break;
}
}
}

#endif
        
#if 0
        cat_debug_print_diffmap(cqde_mask.data, cqde_mask.size, 
                                cqde_mask.line_len, cqde_mask.width,
                                cqde_mask.height);
        CDMSG(1, "movement mask for sample (%d; %d) and dx = %d, dy = %d\n",
              sample->x, sample->y, diff_x, diff_y);
        sleep(1);
#endif

        /* calc new weights */
        sample->weight &= 0x7f; // mask out init bit
        if(match) {
            /* there is a diff at sample position */
            if(sample->weight < CAT_CONDENSATION_SAMPLE_MAX_WEIGHT) {
                /* weight is not maximum */
                sample->weight += CAT_CONDENSATION_SAMPLE_WEIGHT_INC;
                if(sample->weight < CAT_CONDENSATION_SAMPLE_WEIGHT_INC) {
                    /* TODO! handle overflow... */
                    sample->weight = CAT_CONDENSATION_SAMPLE_MAX_WEIGHT;
                }
            } /* else weight is already max */
            
            tmpx = match_x * PP_FB_TILE_WIDTH + RANDOM_NOISE(0xf);
            tmpy = match_y * PP_FB_TILE_HEIGHT + RANDOM_NOISE(0xf);
            ++match_count;
            real_diff_x += tmpx - sample->x;
            real_diff_x += tmpy - sample->y;
        } else {
/*
#warning invalidate
sample->weight = 0;
continue;
*/
            /* decrease weight */
            if(sample->weight > CAT_CONDENSATION_SAMPLE_WEIGHT_DEC) {
                sample->weight -= CAT_CONDENSATION_SAMPLE_WEIGHT_DEC;
                tmpx = sample->x + diff_x;
                tmpy = sample->y + diff_y;
            } else {
                /* we decreased to 0, sample is invalid now */
#if 0 // from diffmap
                if(initialize_sample_from_diffmap(sample, &cqde_merged,
                                                  &fb_info) == PP_ERR) 
#endif
                {
                    /* init from diffmap failed, invalidate sample */
                    sample->weight = 0;
                    continue;
                }
            }
        }

        /* move sample to its new position */
        /* TODO! scale movement! */
        if(tmpx < 0) {
            sample->x = 0;
        } else {
            if(tmpx >= (int16_t)fb_info.g_w){
                sample->x = fb_info.g_w - 1;
            } else {
                sample->x = tmpx;
            }
        }
        if(tmpy < 0) {
            sample->y = 0;
        } else {
            if(tmpy >= (int16_t)fb_info.g_h){
                sample->y = fb_info.g_h - 1;
            } else {
                sample->y = tmpy;
            }
        }
    }
    
#if defined(SIMPLE_CONDENSATION_DEBUG_SAMPLES)
    debug_samples_dump(ccd->samples);
#endif /* SIMPLE_CONDENSATION_DEBUG_SAMPLES */
    
#if defined(CAT_DEBUG)
    {
        unsigned long xmean, ymean, mean_sz;
        u_int16_t estim_x, estim_y;
        int diff_x, diff_y;
        int top = CAT_CONDENSATION_SAMPLE_COUNT >> 1;
        
        /* sort samples and print top n */
        qsort(ccd->samples, CAT_CONDENSATION_SAMPLE_COUNT, sizeof(cat_sample_t),
              sample_sort);
        for(i = 0, xmean = 0, ymean = 0, mean_sz = 0; 
            i < (top < CAT_CONDENSATION_SAMPLE_COUNT ? 
                top : CAT_CONDENSATION_SAMPLE_COUNT);
            ++i) {
            cat_sample_t* sample = &ccd->samples[i];
            if(sample->weight <= (CAT_CONDENSATION_SAMPLE_INIT_WEIGHT & 0x7f)) {
                /* do not show samples just initialized or below */
                break;
            }
            CDMSG(0, "%d: (%3d) x = %4d, y = %4d\n",
                  i, sample->weight, sample->x, sample->y);
            xmean += sample->x * sample->weight;
            ymean += sample->y * sample->weight;
            mean_sz += sample->weight;
        }
        
//        if(mean_sz > CAT_CONDENSATION_SAMPLE_INIT_WEIGHT * 2 * (u_int)i)
        if(i >= CAT_CONDENSATION_SAMPLE_COUNT >> 5)
        {
            double dev;
            estim_x = xmean / mean_sz;
            estim_y = ymean / mean_sz;
            diff_x = mouse_data->rc_abs.x - estim_x;
            diff_y = mouse_data->rc_abs.y - estim_y;
            for(top = i, i = 0, dev = 0; i < top; ++i) {
                /* berechne varianz */
                cat_sample_t* sample = &ccd->samples[i];
               
                dev += (sample->x - estim_x) * (sample->x - estim_x) +
                       (sample->y - estim_y) * (sample->x - estim_x);
            }
            dev = sqrt(dev / top);
            if(dev < 10) {
                ccd->estim.x = estim_x;
                ccd->estim.y = estim_y;
                ccd->reliable = 1;
                CDMSG(1, "estim x = %u (%d), y = %u (%d), dev = %.2f (%d)\n",
                      estim_x, diff_x, estim_y, diff_y, dev, top);
                
                /* adjust translation table */
                real_diff_x /= match_count;
                real_diff_y /= match_count;
                cat_translation_adjust(obj, diff_x, real_diff_x);
                cat_translation_adjust(obj, diff_y, real_diff_y);
            } else {
                ccd->reliable = 0;
            }
        } else {
            ccd->reliable = 0;
        }
    }
#endif /* CAT_DEBUG */
    
    /* free merged diffmap data */
    free(cqde_merged.data);
    /* free diffmap mask data */
    free(cqde_mask.data);
};

int64_t cat_condensation_simple_get_position_estimate(const driver_t *obj,
                                                      u_int16_t *estim_x, 
                                                      u_int16_t *estim_y) {
    cat_condensation_simple_data_t *ccd;
    data_cat_t *mouse_data;
//return PP_ERR;

    assert(obj);
    
    mouse_data = ((data_cat_t*)obj->data_conv_mouse.data);
    assert(mouse_data);
    
    ccd = (cat_condensation_simple_data_t*)mouse_data->tr_data;
    assert(ccd);

    if(!ccd->reliable) {
        return PP_ERR;
    }
    
    *estim_x = ccd->estim.x;
    *estim_y = ccd->estim.y;
    
    return PP_SUC;
}


/****************************** local functions *******************************/

static int diffmap_coord_from_abs_coord(u_char *dmx, u_char *dmy,
                                        u_int16_t fbx, u_int16_t fby,
                                        const fb_format_info_t *fb_info) {
    /* calculate diffmap coordinates from absolute cordinates */
    assert(dmx);
    assert(dmy);
    assert(fb_info);
    
    assert(fbx < fb_info->g_w);
    assert(fby < fb_info->g_h);
    
    *dmy = fby / PP_FB_TILE_HEIGHT;
    *dmx = fbx / PP_FB_TILE_WIDTH;
    return PP_SUC;
}

static int diffmap_offset_from_diffmap_coord(u_char *idx, u_int32_t *mask,
                                             u_char line_len,
                                             u_char dmx, u_char dmy,
                                             const fb_format_info_t *fb_info) {
    /* calculate diffmap offset from absolute cordinates */
    u_int32_t offset;
    
    assert(idx);
    assert(mask);
    assert(fb_info);

    assert(dmx < fb_info->tiles_w);
    assert(dmy < fb_info->tiles_h);

    offset = dmy * line_len + dmx;
    
    *idx = offset / 32;
    *mask = 1 << (offset % 32);
    
    return PP_SUC;
}

#if 0
static int diffmap_offset_from_abs_coord(u_char *idx, u_int32_t *mask,
                                         u_char line_len,
                                         u_int16_t x, u_int16_t y,
                                         const fb_format_info_t *fb_info) {
    /* calculate diffmap offset from absolute cordinates */
    u_char dmx, dmy;
    
    diffmap_coord_from_abs_coord(&dmx, &dmy, x,  y, fb_info);
    diffmap_offset_from_diffmap_coord(idx, mask, line_len, dmx, dmy, fb_info);
    
    return PP_SUC;
}
#endif

static int abs_coord_from_diffmap_offset(u_int16_t *x, u_int16_t *y,
                                         u_char idx, u_int32_t mask,
                                         u_char line_len,
                                         const fb_format_info_t *fb_info) {
    u_char line, column;
    
    assert(x);
    assert(y);
    assert(mask); // at least one bit has to be set!
    assert(fb_info);
    
    line = idx / (line_len / 32);
    column = (idx * 32) % line_len + ffs(mask) - 1;
    
    *x = column * PP_FB_TILE_WIDTH + PP_FB_TILE_WIDTH / 2;
    *y = line * PP_FB_TILE_HEIGHT + PP_FB_TILE_HEIGHT / 2;
    
    assert(*x < fb_info->g_w);
    assert(*y < fb_info->g_h);
    
    return PP_SUC;
}

static int get_merged_diffmap(cat_queue_diffmap_entry_t *cqde,
                              cat_queue_t *diffmap_queue) {
    u_char i;
    cat_queue_entry_t *cqe;
    u_int16_t cqe_size, cqe_line_len, cqe_width, cqe_height, max_x, y_offs;
    u_int32_t *diffmap;
    
    assert(cqde);
    assert(diffmap_queue);
    
    cat_queue_lock(diffmap_queue);
    if(NULL == (cqe = cat_queue_dequeue(diffmap_queue))) {
        /* there are no entries in diffmap queue, return */
        CDMSG(1, "nothing to dequeue!\n");
        goto error;
    }
    
    assert(cqe->type == CAT_QUEUE_DIFFMAP_ENTRY);
    assert(cqe->data.diffmap.data);
   
    cqe_size = cqe->data.diffmap.size;
    cqe_line_len = cqe->data.diffmap.line_len;
    cqe_width = cqe->data.diffmap.width;
    cqe_height = cqe->data.diffmap.height;
    
    /* init diffmap to first entry dequed */
    diffmap = (u_int32_t*)malloc(cqe_size);
    memcpy(diffmap, cqe->data.diffmap.data, cqe_size);
    
    cat_queue_destroy_entry(cqe);

    max_x = cqe_width / 32;
    y_offs = cqe_line_len / 32;

    for(i = 1; NULL != (cqe = cat_queue_dequeue(diffmap_queue)); ++i) {
        u_int16_t x, y, offset;
        
        /* paranoia... */
        assert(cqe->type == CAT_QUEUE_DIFFMAP_ENTRY);
        assert(cqe->data.diffmap.data);
        assert(cqe->data.diffmap.size == cqe_size);
        assert(cqe->data.diffmap.line_len == cqe_line_len);
        assert(cqe->data.diffmap.width == cqe_width);
        assert(cqe->data.diffmap.height == cqe_height);
        
        for(y = 0; y < cqe_height; ++y) {
            for(x = 0; x < max_x; ++x) {
                offset = y * y_offs + x;
                diffmap[offset] |= cqe->data.diffmap.data[offset];
            }
        }

        cat_queue_destroy_entry(cqe);
    }
    cat_queue_unlock(diffmap_queue);
    
//    CDMSG(1, "read %u entries from diffmap queue\n", i);
    
    cqde->data = diffmap;
    cqde->size = cqe_size;
    cqde->line_len = cqe_line_len;
    cqde->width = cqe_width;
    cqde->height = cqe_height;
    
    return PP_SUC;
 error:
    cat_queue_unlock(diffmap_queue);
    
    return PP_ERR;
}

static void initialize_samples(cat_condensation_simple_data_t *ccd,
                               const fb_format_info_t *fb_info) {
    int i;
    
    assert(ccd);
    assert(fb_info);
    
    for(i = 0; i < CAT_CONDENSATION_SAMPLE_COUNT; ++i) {
        initialize_sample_random(&ccd->samples[i], fb_info);
    }
    ccd->initialize = 0;
}

static void initialize_sample_random(cat_sample_t *sample, 
                                     const fb_format_info_t *fb_info) {
    assert(sample);
    assert(fb_info);
    
    /* TODO: something "faster" than mod? */
    sample->x = random() % fb_info->g_w;
    sample->y = random() % fb_info->g_h;
    sample->weight = CAT_CONDENSATION_SAMPLE_INIT_WEIGHT;
    /*CDMSG(1, "initialized sample: (%d, %d)(%u)\n", 
          sample->x, sample->y, sample->weight);*/
}

static int initialize_sample_from_samples(cat_sample_t *sample,
                                          const cat_sample_t *samples) {
    u_char count;
    
    assert(sample);
    assert(samples);
    
    for(count = 0; count < 100; ++count) {
        u_int16_t idx;
        idx = random() & CAT_CONDENSATION_SAMPLE_COUNT;
        if(samples[idx].weight && !(samples[idx].weight & 0x80)) {
            /* do not initialize to invalid or just created samples! */
            sample->x = samples[idx].x;
            sample->y = samples[idx].y;
            sample->weight = CAT_CONDENSATION_SAMPLE_INIT_WEIGHT;
        }
    }
    
    /* sample not set, return error code */
    return PP_ERR;
}

static int initialize_sample_from_diffmap(cat_sample_t *sample,
                                          const cat_queue_diffmap_entry_t *cqde,
                                          const fb_format_info_t *fb_info) {
    u_char count;
    
    assert(sample);
    assert(cqde);
    assert(cqde->data);
    assert(fb_info);

    for(count = 0; count < 100; ++count) {
        u_int16_t idx;
        idx = random() % cqde->size; // get a random _byte_!!! of diffmap
        if(((u_char*)cqde->data)[idx]) {
            u_char offset;
            u_int32_t mask;
            
            /* byte idx -> u_int32_t idx mapping, thats simple: */
            offset = idx / 4;
            
            /* get least significant set bit and convert to u_int32_t mask:
             * - calc offset of idx byte in offset u_int32_t:
             *   idx - (offset * 4)
             * - note, we are on PPC, care about byte ordering
             *   3 - (idx - (offset * 4))
             * - multiply with 8 bits in byte:
             *   (3 - (idx - (offset * 4))) * 8
             * - add offset of least significant bit set it in idx byte:
             *   (3 - (idx - (offset * 4))) * 8 +
             *   ffs(((u_char*)cqde->data)[idx]) - 1
             * - shift by that... thats the magic...
             *   1 << ((3 - (idx - (offset * 4))) * 8 +
             *         ffs(((u_char*)cqde->data)[idx]) - 1)
             */
            mask = 1 << ((3 - (idx - (offset * 4))) * 8 +
                         ffs(((u_char*)cqde->data)[idx]) - 1);
            
            assert(cqde->data[offset] & mask); // assert mask is correct ;-)
            
            abs_coord_from_diffmap_offset(&sample->x, &sample->y,
                                          offset, mask, cqde->line_len, 
                                          fb_info);
            sample->weight = CAT_CONDENSATION_SAMPLE_INIT_WEIGHT;
        }
    }
    
    /* sample not set, return error code */
    return PP_ERR;
}

static void get_movement_mask(cat_queue_diffmap_entry_t *cqde, 
                              const cat_sample_t *sample,
                              int16_t diff_x, int16_t diff_y,
                              const fb_format_info_t *fb_info) {
    u_char xmin, xmax, ymin, ymax, dmx, dmy, idx, *tiles_x, *tiles_y;
    u_int16_t tiles_sz, i;
    u_int32_t mask;
   
    assert(cqde);
    assert(cqde->data);
    assert(sample);
    assert(fb_info);
    
    /* clear the mask */
    memset(cqde->data, 0, cqde->size);
    
    /* set sample position in diffmap*/
    diffmap_coord_from_abs_coord(&dmx, &dmy, sample->x, sample->y, fb_info);
    
    /* TODO! perhaps we want to restrict borders... */
#if 0 // init to the max!
    xmin = 0;
    xmax = cqde->width - 1;
    ymin = 0;
    ymax = cqde->height - 1;
#else // smart init
    {
        int16_t tmp_x, tmp_y;
        
        tmp_x = SET_WITHIN(sample->x - diff_x / 2,
                           0, (int16_t)fb_info->g_w - 1);
        tmp_y = SET_WITHIN(sample->y - diff_y / 2, 0,
                           (int16_t)fb_info->g_h - 1);
        diffmap_coord_from_abs_coord(&xmin, &ymin, tmp_x, tmp_y, fb_info);
        
        tmp_x = SET_WITHIN(sample->x + 2 * diff_x,
                           0, (int16_t)fb_info->g_w - 1);
        tmp_y = SET_WITHIN(sample->y + 2 * diff_y,
                           0, (int16_t)fb_info->g_h - 1);
        diffmap_coord_from_abs_coord(&xmax, &ymax, tmp_x, tmp_y, fb_info);
        
        if(xmin > xmax) {
            /* swap */
            idx = xmin;
            xmin = xmax;
            xmax = idx;
        }
        if(ymin > ymax) {
            /* swap */
            idx = ymin;
            ymin = ymax;
            ymax = idx;
        }
        
        /* spread */
/*
        xmin = xmin > 0 ? xmin - 1 : 0;
        xmax = xmax < cqde->width - 1 ? xmax + 1 : cqde->width - 1;
        ymin = ymin > 0 ? ymin - 1 : 0;
        ymax = ymax < cqde->height - 1 ? ymax + 1 : cqde->height - 1;
*/
    }
#endif
    
    /* calculate affected hex tile coordinates */
    bresenham(dmx, dmy, diff_x, diff_y, xmin, xmax, ymin, ymax, 1, 
              &tiles_sz, &tiles_x, &tiles_y);
    
    /* get mask */
    for(i = 0; i < tiles_sz; ++i) {
//        CDMSG(1, "diffmap coord %3d: (%5d; %5d)\n", i, tiles_x[i], tiles_y[i]);
        diffmap_offset_from_diffmap_coord(&idx, &mask, cqde->line_len, 
                                          tiles_x[i], tiles_y[i], fb_info);
        cqde->data[idx] |= mask;
    }
    
    if(tiles_sz) {
        free(tiles_x);
        free(tiles_y);
    }
}

static void bresenham(u_char xstart, u_char ystart,     // startpoint 
                      int16_t dx, int16_t dy,           // direction
                      u_char xmin, u_char xmax,         // x-range
                      u_char ymin, u_char ymax,         // y-range
                      u_char expand,                    // line width
                      u_int16_t *rnpix, u_char **rxpix, u_char **rypix) {
    /* see http://de.wikipedia.org/wiki/Bresenham-Algorithmus */
    int x, y, t, dist, xerr, yerr, incx, incy, sz, width;
    u_char *xpix, *ypix;
#if !defined(NDEBUG)
    u_char loop = 0;
#endif
     
    assert(rnpix);
    assert(rxpix);
    assert(rypix);

    assert(xmin <= xmax);
    assert(ymin <= ymax);
     
    /* compute the sign of the increment in both directions */
    if (dx < 0) {
        incx = -1;
        dx = -dx;
    } else {
        incx = dx ? 1 : 0;
    }
     
    if (dy < 0) {
        incy = -1;
        dy = -dy;
    } else {
        incy = dy ? 1 : 0;
    }
 
    /* determine which distance is larger */
    dist = (dx > dy) ? dx : dy;
 
    /* initialize to the maxium number off affected cells possible */
    sz = (xmax + ymax - xmin - ymin + 2) * (expand * 2 + 1); 
    xpix = (u_char*)malloc(sz * sizeof(u_char));
    ypix = (u_char*)malloc(sz * sizeof(u_char));
 
    /* initialize startpoint */
    for(t = 0, x = xstart - expand; x <= xstart + expand; ++x) {
        for(y = ystart - expand; y <= ystart + expand; ++y) {
            if((x - xstart) * (x - xstart) + (y - ystart) * (y - ystart) > expand * expand ||
               x < xmin || x > xmax || y < ymin || y > ymax) {
                continue;
            }
            assert(t < sz);
            xpix[t] = x;
            ypix[t] = y;
            ++t;
        }
    }
    
    /* if we got no movement, we finished */
    if(dx == 0 && dy == 0) {
        goto finish;
    }
    
    /* initilaizations before loop */
    x = xstart;
    y = ystart;
    xerr = dx;
    yerr = dy;
    
    /* compute the pixels */
    while(1) {
        assert(t < sz);
        assert(++loop < sz);
        
        xerr += dx;
        yerr += dy;
     
        if(xerr > dist) {
            xerr -= dist;
            x += incx;
            if(x < xmin || x > xmax) {
                /* we reached a border */
                break;
            }
            for(width = y - expand; width <= y + expand; width++) {
                /* expand the line */
                if(width < ymin || width > ymax) {
                    continue;
                }
                xpix[t] = x;
                ypix[t] = width;
                ++t;
            }
        }
     
        if(yerr>dist) {
            yerr -= dist;
            y += incy;
            if(y < ymin || y > ymax) {
                /* we reached a border */
                break;
            }
            for(width = x - expand; width <= x + expand; width++) {
                /* expand the line */
                if(width < xmin || width > xmax) {
                    continue;
                }
                xpix[t] = width;
                ypix[t] = y;
                ++t;
            }
        }
    }
    
 finish:
    *rnpix = t;
    if(t) {
        sz = t * sizeof(u_char);
        *rxpix = (u_char*)malloc(sz);
        memcpy(*rxpix, xpix, sz);
        *rypix = (u_char*)malloc(sz);
        memcpy(*rypix, ypix, sz);
    }
    
    free(xpix);
    free(ypix);
}

#if 0
static void diffmap_overlay(cat_queue_diffmap_entry_t *overlay,
                            const cat_queue_diffmap_entry_t *d1,
                            const cat_queue_diffmap_entry_t *d2) {
    size_t i;
    
    assert(overlay);
    assert(d1);
    assert(d1->data);
    assert(d2);
    assert(d2->data);
    assert(d1->size == d2->size); 
    assert(d1->line_len == d2->line_len); 
    assert(d1->width == d2->width); 
    assert(d1->height == d2->height);
    
    overlay->size = d2->size; 
    overlay->line_len = d2->line_len; 
    overlay->width = d2->width; 
    overlay->height = d2->height;
    overlay->data = (u_int32_t*)malloc(overlay->size);
    
    for(i = 0; i < overlay->size / 4; ++i) {
        overlay->data[i] = d1->data[i] & d2->data[i];
    }
}
#endif

static int get_nearest_match(const cat_queue_diffmap_entry_t *diffmap, 
                             const cat_queue_diffmap_entry_t *mask, 
                             const cat_sample_t *sample,
                             int16_t dx, int16_t dy,
                             const fb_format_info_t *fb_info,
                             u_char *match_x, u_char *match_y) {
    u_char dmx, dmy, startx, starty, min, max, idx, blocknr, bit, skip;
    signed char iter, line;
    int16_t fbstartx, fbstarty;
    u_int32_t block;
#if !defined(NDEBUG)
    u_char loop = 0;
#endif
char nmd = 0;

    assert(diffmap);
    assert(diffmap->data);
    assert(mask);
    assert(mask->data);
    assert(diffmap->size == mask->size); 
    assert(diffmap->line_len == mask->line_len); 
    assert(diffmap->width == mask->width); 
    assert(diffmap->height == mask->height);
    assert(sample);
    assert(match_x);
    assert(match_y);
    
    
    diffmap_coord_from_abs_coord(&dmx, &dmy, sample->x, sample->y, fb_info);
    
    fbstartx = sample->x + dx;
    fbstarty = sample->y + dy;
    if(fbstartx < 0 || fbstartx >= (int16_t)fb_info->g_w ||
       fbstarty < 0 || fbstarty >= (int16_t)fb_info->g_h) {
        /* calculate intersection with border... */
        u_int16_t rnpix;
        u_char *rxpix, *rypix;
        
        /* TODO! split bresenham? */
        bresenham(dmx, dmy, dx, dy, 0, 
                  diffmap->width - 1, 0, diffmap->height - 1, 0,
                  &rnpix, &rxpix, &rypix);
        if(rnpix) {
            /* get last coord-pair as it marks the border */
            startx = rxpix[rnpix - 1];
            starty = rypix[rnpix - 1];
            free(rxpix);
            free(rypix);
//CDMSG(nmd, "breasenham startx = %d, starty = %d, dmx = %d, dmy = %d\n", startx, starty, dmx, dmy);
        } else {
            abort(); // something is wrong here...
            return PP_ERR;
        }
    } else {
        diffmap_coord_from_abs_coord(&startx, &starty, 
                                     fbstartx, fbstarty, fb_info);
//CDMSG(nmd, "regular startx = %d, starty = %d, dmx = %d, dmy = %d\n", startx, starty, dmx, dmy);
    }
    
    if(dx == 0 && dy == 0) {
        /* no movement, test sample position */
        diffmap_offset_from_diffmap_coord(&idx, &block, diffmap->line_len,
                                          dmx, dmy, fb_info);
        if(diffmap->data[idx] & mask->data[idx] & block) {
            return PP_SUC;
        } else {
//CDMSG(nmd, "return PP_ERR... no movement...\n");
            return PP_ERR;
        }
    }

    iter = 0;
    skip = 0;
    
    if(abs(dx) > abs(dy)) {
        /* horizontal spreading */
        u_char start_line;
        signed char stop_line, line_inc;

        /* init variables */
        if(dx > 0) {
            /* search rightwards */
            min = dmx;
            max = diffmap->width - 1;
        } else {
            /* search leftwards */
            min = 0;
            max = dmx;
        }
        if(dy > 0) {
            /* search downwards */
            start_line = diffmap->height - 1;
            stop_line = dmy - 1;
            line_inc = -1;
        } else {
            /* search upwards */
            start_line = 0;
            stop_line = dmy + 1;
            line_inc = 1;
        }
        
        while(1) {
            signed char column = starty + iter;
#if !defined(NDEBUG)
            u_char line_loop = 0;
#endif
            
            if(column < min || column > max) {
                /* we are beyond borders */
                if(skip) {
                    /* we already skipped last time, return */
CDMSG(nmd, "return PP_ERR...\n");
                    return PP_ERR;
                }
                skip = 1;
            } else {
                skip = 0;

                blocknr = column / 32;
                bit = column % 32;
                /* search for matches in column */
                for(line = start_line; line != stop_line; line += line_inc) {
                    /* get a block */
                    idx = line * diffmap->line_len / 32 + blocknr;
                    block = diffmap->data[idx] & mask->data[idx];
                    if(block && (1 << bit) & block) {
                        /* we got a match... */
                        *match_x = blocknr * 32 + bit;
                        *match_y = line;
                        return PP_SUC;
                    }
                    
                    assert(++line_loop <= diffmap->height);
                }
            }
            
            /* 0; 1; -1; 2; -2; 3; -3; ... */
            if(iter > 0) {
                iter = -iter;
            } else {
                iter = -iter + 1;
            }
            
            assert(++loop < 2 * diffmap->width + 2);
        }
    } else {
        /* vertical spreading
         * get lines from diffmap and search blocks...
         */

        /* init variables */
        if(dy > 0) {
            /* search downwards */
//min = 0;
            min = dmy;
            max = diffmap->height - 1;
        } else {
            /* search upwards */
            min = 0;
//            max = diffmap->height;
            max = dmy;
        }
        
        while(1) {
            /* get a line */
            line = starty + iter;
            
            if(line < min || line > max) {
                /* we are beyond borders */
                if(skip) {
                    /* we already skipped last time, return */
if(line != -1 && line != 48) CDMSG(nmd, "dy: %d, y: %d, dmy: %d, starty: %d, min: %d, line: %d, max: %d, iter: %d\n", dy, sample->y, dmy, starty, min, line, max, iter);
                    return PP_ERR;
                }
                skip = 1;
            } else {
                skip = 0;

                /* search for matches in line */
                for(blocknr = 0; blocknr < diffmap->line_len / 32; ++blocknr) {
                    /* get a block */
                    idx = line * diffmap->line_len / 32 + blocknr;
                    block = diffmap->data[idx] & mask->data[idx];
                    if(block) {
                        /* block contains at least one match */
#if 0 // old...
                        for(bit = 0; bit < 32; ++bit) {
                            /* get a bit */
                            if((block >> bit) & 1) {
                                /* we got a match... */
                                *match_x = blocknr * 32 + bit;
                                *match_y = line;
                                return PP_SUC;
                            }
                        }
#else
                        *match_x = blocknr * 32 + ffs(block) - 1;
                        *match_y = line;
                        return PP_SUC;
#endif
                    }
                }
            }
            
            /* 0; 1; -1; 2; -2; 3; -3; ... */
            if(iter > 0) {
                iter = -iter;
            } else {
                iter = -iter + 1;
            }
            
            assert(++loop < 2 * diffmap->height + 2);
        }
    }
    
    abort(); // we should never get here...
    /* no match found */
    return PP_ERR;
}

#if defined(CAT_DEBUG)
/**
 * sort by weight
 */
static int sample_sort(const void *p1, const void *p2) {
    assert(p1);
    assert(p2);

    return (((cat_sample_t*)p1)->weight & 0x7f) <
           (((cat_sample_t*)p2)->weight & 0x7f) ? 1 : -1;
}
#endif /* CAT_DEBUG */

#if defined(SIMPLE_CONDENSATION_DEBUG_SAMPLES)
/**
 * prints sample arrangement to file
 */
static void debug_samples_dump(const cat_sample_t *samples) {
#define SCDSRX 1024 // X-resolution
#define SCDSRY 768 // Y-resolution
#define SCDSX 4 // X-shift
#define SCDSY (SCDSX + 1) // Y-shift
    u_char grid[(SCDSRY >> SCDSY) + 1][(SCDSRX >> SCDSX) + 1];
    uint i;
    
    for(i = 0; i < SCDSRY >> SCDSY; ++i) {
        memset(grid[i], ' ', SCDSRX >> SCDSX);
        grid[i][SCDSRX >> SCDSX] = 0;
    }
    memset(grid[SCDSRY >> SCDSY], '=', SCDSRX >> SCDSX);
    grid[SCDSRY >> SCDSY][SCDSRX >> SCDSX] = 0;
    
    for(i = 0; i < CAT_CONDENSATION_SAMPLE_COUNT; ++i) {
        if(grid[samples[i].y >> SCDSY][samples[i].x >> SCDSX] == ' ') {
            grid[samples[i].y >> SCDSY][samples[i].x >> SCDSX] = '1';
        } else if(grid[samples[i].y >> SCDSY][samples[i].x >> SCDSX] < 126) {
            grid[samples[i].y >> SCDSY][samples[i].x >> SCDSX]++;
        }
    }
    
    for(i = 0; i < (SCDSRY >> SCDSY) + 1; ++i) {
        fprintf(dsf, "%s\n", grid[i]);
    }
}
#endif /* SIMPLE_CONDENSATION_DEBUG_SAMPLES */
    

