/**
 * watchdog.c
 *
 * A simple watchdog module to monitor internal threads.
 *
 * (c) 2006 Peppercon AG, Ralf Guenther <rgue@peppercon.de>
 */

#include <pp/watchdog.h>
#include <pp/base.h>
#include <pp/log.h>
#include <sys/time.h>
#include <pthread.h>

// internal data

static struct list_head wd_clients = LIST_HEAD_INIT(wd_clients);
static pthread_mutex_t wd_lock = PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP;
static int wd_next_id = 1;
static pthread_t wd_thread;
static int wd_term = 0;

typedef struct {
    struct list_head anchor;
    int id;
    const char *name;
    int timeout;
    void (*action)(int id);
    pthread_t thread;
    const char *ctx;
    int last_trig_time;
    int dead_cnt;
} wd_client_t;

// internal functions

static int wd_getsecs(void)
{
    struct timeval tv;
    gettimeofday(&tv, NULL);
    return tv.tv_sec;
}

static void* wd_thread_func(void* arg UNUSED)
{
    MUTEX_LOCK(&wd_lock);
    while (!wd_term) {
        int now = wd_getsecs();
        struct list_head *pos;
        list_for_each(pos, &wd_clients) {
            wd_client_t *c = list_entry(pos, wd_client_t, anchor);
            if (c->last_trig_time + c->timeout < now) {
                pp_log("watchdog ERROR: client %s did not respond within %dsecs (last seen at checkpoint %s)!\n",
                       c->name, now - c->last_trig_time, c->ctx ? c->ctx : "");
                c->last_trig_time = now;
                if (c->action) c->action(c->id);
            }
        }

        MUTEX_UNLOCK(&wd_lock);
        sleep(1); // TODO: should use pthread_cond_timedwait for faster termination
        MUTEX_LOCK(&wd_lock);
    }
    wd_term = 0; // reset this flag
    MUTEX_UNLOCK(&wd_lock);
    return NULL;
}

// interface

int pp_wd_register(const char* name, int timeout /*secs*/, void (*action)(int id))
{
    int res = 0;
    wd_client_t *c = (wd_client_t*)malloc(sizeof(wd_client_t));
    c->name = name;
    c->timeout = timeout;
    c->action = action;
    c->thread = pthread_self();
    c->last_trig_time = wd_getsecs();
    c->id = wd_next_id++; // no wrap protection!
    c->ctx = NULL;
    c->dead_cnt = 0;

    MUTEX_LOCK(&wd_lock);
    if (list_empty(&wd_clients)) {
        // start wd thread when first client registers
        res = pthread_create(&wd_thread, NULL, wd_thread_func, 0);
        if (res < 0) pp_log_err("watchdog: ERROR, failed to start wd thread\n");
    }
    list_add(&c->anchor, &wd_clients);
    if (res >= 0) res = c->id;
    MUTEX_UNLOCK(&wd_lock);

    return res;
}

int pp_wd_trigger(int id, const char* ctx)
{
    int res = -1;
    struct list_head *pos;
    pthread_t thread = pthread_self();
    MUTEX_LOCK(&wd_lock);
    list_for_each(pos, &wd_clients) {
        wd_client_t *c = list_entry(pos, wd_client_t, anchor);
        if ((id && c->id == id) || c->thread == thread) {
            c->last_trig_time = wd_getsecs();
            c->ctx = ctx;
            res = 0;
            break;
        }
    }
    MUTEX_UNLOCK(&wd_lock);
    return res;
}

int pp_wd_unregister(int id)
{
    int res = -1;
    struct list_head *pos;
    pthread_t thread = pthread_self();
    MUTEX_LOCK(&wd_lock);
    list_for_each(pos, &wd_clients) { // must not be safe agains del, since we leave after del
        wd_client_t *c = list_entry(pos, wd_client_t, anchor);
        if ((id && c->id == id) || c->thread == thread) {
            list_del(pos);
            free(c);
            res = 0;
            if (list_empty(&wd_clients)) {
                // stop wd thread when last client unregisters
                wd_term = 1;
                MUTEX_UNLOCK(&wd_lock);
                res = pthread_join(wd_thread, NULL);
                if (res < 0) pp_log_err("watchdog: ERROR, failed to wait for wd thread termination\n");
                return res; // already unlocked
            }
            break;
        }
    }
    MUTEX_UNLOCK(&wd_lock);
    return res;
}
