#include <stdlib.h>
#include <string.h>
#include "textrec.h"


void textrec_set_type_cb(struct textrec_parser *ps, field_type_cb cb)
{
    ps->type_cb = cb;
}

void textrec_set_string_cb(struct textrec_parser *ps, field_string_cb cb)
{
    ps->string_cb = cb;
}

void textrec_set_blob_cb(struct textrec_parser *ps, field_blob_cb cb)
{
    ps->blob_cb = cb;
}

void textrec_set_integer_cb(struct textrec_parser *ps, field_integer_cb cb)
{
    ps->integer_cb = cb;
}

void textrec_set_boolean_cb(struct textrec_parser *ps, field_boolean_cb cb)
{
    ps->boolean_cb = cb;
}

void textrec_set_user_data(struct textrec_parser *ps, void *ud)
{
    ps->user_data = ud;
}


enum {
    state_finish = textrec_state_finish,
    state_start,
    state_strange_space,
    state_name,
    state_space_after_name,
    state_expect_contline,
    state_leading_ws,
    state_value,
    state_cmtskip2contline,
    state_cmtskip2start
};


void textrec_init(struct textrec_parser *ps)
{
    ps->state = state_start;
    ps->res_code = textrec_res_ok;
    ps->line = 1;
    ps->empty_line_code = 0;
    ps->type_cb = NULL;
    ps->string_cb = NULL;
    ps->blob_cb = NULL;
    ps->integer_cb = NULL;
    ps->boolean_cb = NULL;
    ps->user_data = NULL;

    ps->namebufsize = 64;
    ps->namebuf = malloc(ps->namebufsize);
    ps->namecurlen = 0;
    ps->databufsize = 256;
    ps->databuf = malloc(ps->databufsize);
    ps->datacurlen = 0;
}

void textrec_cleanup(struct textrec_parser *ps)
{
    free(ps->namebuf);
    free(ps->databuf);
}


/* ===== the FSM implementation ===== */


static int iswhitespace(int c)
{
    return c == ' ' || c == '\t' || c == '\r' || c == '\n';
}

static void put_name_char(struct textrec_parser *ps, int c)
{
    if(ps->namecurlen + 2 > ps->namebufsize) {
        int newlen;
        char *newbuf;
        newlen = ps->namecurlen * 2;
        newbuf = malloc(newlen);
        if(!newbuf) {
            ps->state = state_finish;
            ps->res_code = textrec_res_no_memory;
            return;
        }
        memcpy(newbuf, ps->namebuf, ps->namecurlen + 1);
        free(ps->namebuf);
        ps->namebuf = newbuf;
        ps->namebufsize = newlen;
    }
    ps->namebuf[ps->namecurlen] = c;
    ps->namebuf[ps->namecurlen + 1] = 0;
    ps->namecurlen++;
}

static void handle_start(struct textrec_parser *ps, int c)
{
    if(c == '\n')
        return;
    if(c == '#' || c == ';') {
        ps->state = state_cmtskip2start;
        return;
    }
    if(iswhitespace(c)) {
        ps->state = state_strange_space;
        return;
    }
    if(c == '+') {
        /* unexpected continuation line */
        ps->res_code = textrec_res_no_line_to_continue;
        ps->state = state_finish;
        return;
    }
    ps->state = state_name;
    put_name_char(ps, c);  /* this may change the state on error */
}

static void handle_strange_space(struct textrec_parser *ps, int c)
{
    if(c == '\n') {  /* empty line! */
        if(ps->empty_line_code == 0) {
            ps->state = state_start;
        } else {
            ps->res_code = ps->empty_line_code;
            ps->state = state_finish;
        }
        return;
    }
    if(c == '#' || c == ';') {
        ps->state = state_cmtskip2start;
        return;
    }
    if(iswhitespace(c))
        return;
    /* unexpected continuation line */
    ps->res_code = textrec_res_no_line_to_continue;
    ps->state = state_finish;
}

static void handle_name(struct textrec_parser *ps, int c)
{
    if(!iswhitespace(c)) {
        put_name_char(ps, c);  /* this may change the state on error */
        return;
    }

    /* whitespace => first, handle the name which is complete now */

    if(!ps->type_cb) {
        ps->res_code = textrec_res_no_type_cb;
        ps->state = state_finish;
        return;
    }
    ps->curtype = (*ps->type_cb)(ps->namebuf, ps->user_data);
    if(ps->curtype == ftype_unknown) {
        ps->res_code = textrec_res_unknown_type;
        ps->state = state_finish;
        return;
    }

    /* now that we finished with the name, let's see what's next */

    if(c == '\n')
        ps->state = state_expect_contline;
    else
        ps->state = state_space_after_name;
}

static void put_value_char(struct textrec_parser *ps, int c)
{
    if(ps->datacurlen + 2 > ps->databufsize) {
        int newlen;
        unsigned char *newbuf;
        newlen = ps->datacurlen * 2;
        newbuf = malloc(newlen);
        if(!newbuf) {
            ps->state = state_finish;
            ps->res_code = textrec_res_no_memory;
            return;
        }
        memcpy(newbuf, ps->databuf, ps->datacurlen + 1);
        free(ps->databuf);
        ps->databuf = newbuf;
        ps->databufsize = newlen;
    }
    ps->databuf[ps->datacurlen] = c;
    ps->databuf[ps->datacurlen + 1] = 0;
    ps->datacurlen++;
}

static void handle_space_after_name(struct textrec_parser *ps, int c)
{
    if(c == '\n') {
        ps->state = state_expect_contline;
        return;
    }
    if(iswhitespace(c))
        return;

    ps->state = state_value;
    put_value_char(ps, c);  /* this may change the state on error */
}

/* skip trailing whitespace, but no more that one NL */
static void strip_trailing_space(struct textrec_parser *ps)
{
    int nl_stripped = 0;
    while(ps->datacurlen > 0) {
        unsigned char ch = ps->databuf[ps->datacurlen-1];
        if(ch == '\n') {
            if(nl_stripped)
                break;
            nl_stripped = 1;
        } else
        if(!iswhitespace(ch))
            break;
        ps->datacurlen--;
    }
    ps->databuf[ps->datacurlen] = 0;
}

static int hexdig2num(char c)
{
    if(c >= '0' && c <= '9')
        return c - '0';
    if(c >= 'A' && c <= 'F')
        return c - 'A' + 10;
    if(c >= 'a' && c <= 'f')
        return c - 'a' + 10;
    return -1;
}


    /* may be reimplement this via hexstr2data */
static int convert_blob_to_binary(struct textrec_parser *ps)
{
    int dest, i, half, val;
    dest = 0;
    half = 0;
    for(i = 0; i < ps->datacurlen; i++) {
        unsigned char c = ps->databuf[i];
        int dig;
        if(iswhitespace(c))
            continue;
        dig = hexdig2num(c);
        if(dig == -1)
            return 0;
        val = !half ? ((dig << 4) & 0xf0) : (val | (dig & 0x0f));
        if(half) {
            ps->databuf[dest] = val;
            dest++;
        }
        half = !half;
    }
    ps->datacurlen = dest;
    return !half;
}

static int get_integer_from_data(struct textrec_parser *ps, long long *llval)
{
    int s = 0;
    long long m = 0;
    const unsigned char *p = ps->databuf;
    while(*p && iswhitespace(*p))
        p++;
    if(*p == '-' || *p == '+') {
        s = (*p == '-');
        p++;
    }
    while(*p && !iswhitespace(*p)) {
        if(*p < '0' || *p > '9')
            return 0;
        m = m * 10 + (*p - '0');
        p++;
    }
    while(*p && iswhitespace(*p))
        p++;
    if(!*p) {
        *llval = s ? -m : m;
        return 1;
    }
    return 0;
}

static int get_boolean_from_dbuf(const unsigned char *dbuf, int *bval)
{
    static const unsigned char booleanreps[] =
        "yes\0"     "1"   "no\0"       "\0"
        "true\0"    "1"   "false\0"    "\0"
        "on\0"      "1"   "off\0"      "\0"
        "enable\0"  "1"   "disable\0"  "\0"
        "enabled\0" "1"   "disabled\0" "\0"
        "1\0"       "1"   "0\0"        "\0" "@@";

    const unsigned char *p, *dat, *t;
    unsigned char ch;
    int ineq;

    dat = dbuf;
    while(*dat && iswhitespace(*dat))
        dat++;

    p = booleanreps;
    t = dat;
    ineq = 0;
    while(*p != '@') {
        if(ineq) {
            if(*p) {
                p++;
            } else {
                p += 2;
                ineq = 0;
                t = dat;
            }
            continue;
        }
        if(!*p && (!*t || iswhitespace(*t))) {   /* they match! */
            *bval = (*(p+1) != 0);
            return 1;
        }
        if(!*p) {          /* current repres. doesn't match and it's over */
            p += 2;        /* try the next one */
            ineq = 0;
            t = dat;
        }
        ch = *t;
        if(ch >= 'A' && ch <= 'Z')
            ch += ('a' - 'A');
        if(ch == *p) {     /* they _still_ match */
            p++;
            t++;
        } else {     /* no match, but the pattern still not ended */
            ineq = 1;
            p++;
        }
    }
    return 0;
}

static void commit_record(struct textrec_parser *ps)
{
    int r, t, bval, savelen;
    long long llval;
    strip_trailing_space(ps);
    switch(ps->curtype) {
    case ftype_ignored:
        r = 0;
        break;
    case ftype_string:
        if(!ps->string_cb) {
            r = textrec_res_no_string_cb;
            break;
        }
        r = (*ps->string_cb)(ps->namebuf, ps->databuf, ps->datacurlen,
                             ps->user_data);
        break;
    case ftype_blob:
        /* blobs tend to contain sensitive data, so we wipe the buffer
           every time it is no longer needed */
        savelen = ps->datacurlen;
        if(!ps->blob_cb) {
            r = textrec_res_no_blob_cb;
            memset(ps->databuf, 0, savelen);
            break;
        }
        t = convert_blob_to_binary(ps);
        if(!t) {
            r = textrec_res_incorrect_blob_data;
            memset(ps->databuf, 0, savelen);
            break;
        }
        r = (*ps->blob_cb)(ps->namebuf, ps->databuf, ps->datacurlen,
                           ps->user_data);
        memset(ps->databuf, 0, savelen);
        break;
    case ftype_integer:
        if(!ps->integer_cb) {
            r = textrec_res_no_integer_cb;
            break;
        }
        t = get_integer_from_data(ps, &llval);
        if(!t) {
            r = textrec_res_incorrect_integer;
            break;
        }
        r = (*ps->integer_cb)(ps->namebuf, llval, ps->user_data);
        break;
    case ftype_boolean:
        if(!ps->boolean_cb) {
            r = textrec_res_no_boolean_cb;
            break;
        }
        t = get_boolean_from_dbuf(ps->databuf, &bval);
        if(!t) {
            r = textrec_res_incorrect_boolean;
            break;
        }
        r = (*ps->boolean_cb)(ps->namebuf, bval, ps->user_data);
        break;
    case ftype_unknown:
    default:
        r = textrec_res_unknown_type;
    }
    if(r) {
        ps->res_code = r;
        ps->state = state_finish;
    } else {
        ps->state = state_start;
    }
    ps->namebuf[0] = 0;
    ps->namecurlen = 0;
    ps->databuf[0] = 0;
    ps->datacurlen = 0;
}

static void handle_expect_contline(struct textrec_parser *ps, int c)
{
    if(c == '\n') { /* empty line! */
        commit_record(ps);
        if(ps->state != state_start)  /* commit_record raised an error */
            return;
        if(ps->empty_line_code != 0) {
            ps->res_code = ps->empty_line_code;
            ps->state = state_finish;
        }
        return;
    }
    if(c == '+') {
        ps->state = state_value;
        return;
    }
    if(iswhitespace(c)) {
        ps->state = state_leading_ws;
        return;
    }
    if(c == ';' || c == '#') {
        ps->state = state_cmtskip2contline;
        return;
    }

    /* significant char found on the first pos. => this is the next name */

    commit_record(ps);
    if(ps->state != state_start)   /* smth. happened with the callback */
        return;
    put_name_char(ps, c);
}

static void handle_leading_ws(struct textrec_parser *ps, int c)
{
    if(c == '\n') { /* empty line! */
        commit_record(ps);
        if(ps->state != state_start)  /* commit_record raised an error */
            return;
        if(ps->empty_line_code != 0) {
            ps->res_code = ps->empty_line_code;
            ps->state = state_finish;
        }
        return;
    }
    if(iswhitespace(c))
        return;
    if(c == ';' || c == '#') {
        ps->state = state_cmtskip2contline;
        return;
    }

    /* significant char found */

    ps->state = state_value;
    put_value_char(ps, c);  /* this may change the state on error */
}

static void handle_value(struct textrec_parser *ps, int c)
{
    put_value_char(ps, c);
    if(ps->state != state_value)  /* perhaps smth. wrong with memory */
        return;
    if(c == '\n')
        ps->state = state_expect_contline;
}

static void handle_cmtskip2contline(struct textrec_parser *ps, int c)
{
    if(c == '\n')
        ps->state = state_expect_contline;
}

static void handle_cmtskip2start(struct textrec_parser *ps, int c)
{
    if(c == '\n')
        ps->state = state_start;
}

int textrec_feedchar(struct textrec_parser *ps, int c)
{
    if(ps->state == state_finish)
        return ps->res_code;
    if(c == '\n')
        ps->line++;
    if(c == -1) {
        if(ps->state == state_expect_contline)
            commit_record(ps);  /* this should change the state */
        if(ps->state == state_start || ps->state == state_strange_space)
            ps->res_code = textrec_res_eof;
        else
            ps->res_code = textrec_res_unfinished_line;
        ps->state = state_finish;
        return ps->res_code;
    }
    switch(ps->state) {
    case state_start:            handle_start(ps, c);            break;
    case state_strange_space:    handle_strange_space(ps, c);    break;
    case state_name:             handle_name(ps, c);             break;
    case state_space_after_name: handle_space_after_name(ps, c); break;
    case state_expect_contline:  handle_expect_contline(ps, c);  break;
    case state_leading_ws:       handle_leading_ws(ps, c);       break;
    case state_value:            handle_value(ps, c);            break;
    case state_cmtskip2contline: handle_cmtskip2contline(ps, c); break;
    case state_cmtskip2start:    handle_cmtskip2start(ps, c);    break;
    case state_finish:                                           break;
    default:
        ps->state = state_finish;
        ps->res_code = textrec_res_internal_error;
    }
    return ps->res_code;
}

const char *textrec_error_message(int code)
{
    switch(code) {
    case textrec_res_ok:                    return "everything's fine";
    case textrec_res_eof:                   return "end of file (normal)";
    case textrec_res_no_type_cb:            return "no type callback";
    case textrec_res_unknown_type:          return "unknown field type";
    case textrec_res_no_string_cb:          return "no string callback";
    case textrec_res_no_blob_cb:            return "no blob callback";
    case textrec_res_no_integer_cb:         return "no integer callback";
    case textrec_res_no_boolean_cb:         return "no boolean callback";
    case textrec_res_unfinished_line:       return "unfinished line at EOF";
    case textrec_res_no_line_to_continue:   return "no line to continue";
    case textrec_res_incorrect_blob_data:   return "invalid blob data";
    case textrec_res_incorrect_integer:     return "invalid integral number";
    case textrec_res_incorrect_boolean:     return "invalid boolean";
    case textrec_res_no_memory:             return "can't get enough memory";
    case textrec_res_internal_error:        return "internal error";
    default:
        return code > 0 ? "application-defined error" : "unknown error";
    }
}




/* --------------------------------------------------------------- */
/* ------ TESTS -------------------------------------------------- */
/* --------------------------------------------------------------- */

#ifdef TEXTREC_TEST

#include <stdio.h>

static void booltest(const char *s, int exp_ok, int exp_val)
{
    int ok, val = -1;
    ok = get_boolean_from_dbuf((const unsigned char *)s, &val);
    if(ok == exp_ok && val == exp_val)
        printf("get_boolean[%s]: passed\n", s);
    else
        printf("get_boolean[%s]: failed %d!=%d or %d!=%d\n",
               s, ok, exp_ok, val, exp_val);
}

static void all_booltests()
{
    booltest("yes", 1, 1);
    booltest("no", 1, 0);
    booltest("true", 1, 1);
    booltest("false", 1, 0);
    booltest("enabled", 1, 1);
    booltest("disabled", 1, 0);
    booltest("1", 1, 1);
    booltest("0", 1, 0);
    booltest("           0         ", 1, 0);
    booltest("         disabled           ", 1, 0);
    booltest("         disabled           ", 1, 0);
    booltest(" EnAbLed", 1, 1);
    booltest("disAbLed ", 1, 0);
    booltest("foobar", 0, -1);
    booltest("ena", 0, -1);
    booltest("   disa   ", 0, -1);
}

static const unsigned char testfile[] =
"remark   This is a test key which is almost useless\n"
"id       ff9a087bc94de73e8708\n"
"preflen  12\n"
"secret   2505a8d1fe4b993db4bedc058d65cd2e703eb8b52e06ae7326b829e7b439eb07\n"
"         e2085a28174d5ecb9ffff66263f330572ed28282dfdeff9a087bc94de73e8708\n"
"public   e2085a28174d5ecb9ffff66263f330572ed28282dfdeff9a087bc94de73e8708\n"
"yeshash  fed079fa81a087e794ba74c4f6c9e208bbfac11b6f5120526731fb978d69a622\n"
;

/* -------------------------------------------------------------------- */

struct test_results {
    int fieldscount;
    char remark[80];
    int remarklen;
    uchar id[10];
    int idlen;
    int preflen;
    uchar secret[64];
    int secretlen;
    uchar pub[32];
    int publen;
    uchar yeshash[32];
    int yeshashlen;
};

static int test_type_cb(const char *field_name, void *user_data)
{
    if(0 == strcmp(field_name, "remark"))
        return ftype_string;
    if(0 == strcmp(field_name, "preflen"))
        return ftype_integer;
    return ftype_blob;
}

static int test_string_cb(const char *fnm, const uchar *str, int len, void *ud)
{
    struct test_results *tr = ud;

    fprintf(stderr, "*XX* string_cb(%s, %s, %d)\n", fnm, str, len);

    tr->fieldscount++;

    if(0 == strcmp(fnm, "remark")) {
        int lencp = len > sizeof(tr->remark)-1 ? sizeof(tr->remark)-1 : len;
        memcpy(tr->remark, str, lencp);
        tr->remark[lencp] = 0;
        tr->remarklen = len;
        return 0;
    } else {
        fprintf(stderr, "unknown string field [%s]\n", fnm);
        return 1;
    }
}

static int test_blob_cb(const char *fnm, const uchar *buf, int len, void *ud)
{
    struct test_results *tr = ud;
    int lencp;

    fprintf(stderr, "*XX* blob_cb(%s, %d)\n", fnm, len);

    tr->fieldscount++;

    if(0 == strcmp(fnm, "id")) {
        lencp = len > sizeof(tr->id) ? sizeof(tr->id) : len;
        memcpy(tr->id, buf, lencp);
        tr->idlen = len;
        return 0;
    } else
    if(0 == strcmp(fnm, "secret")) {
        lencp = len > sizeof(tr->secret) ? sizeof(tr->secret) : len;
        memcpy(tr->secret, buf, lencp);
        tr->secretlen = len;
        return 0;
    } else
    if(0 == strcmp(fnm, "public")) {
        lencp = len > sizeof(tr->pub) ? sizeof(tr->pub) : len;
        memcpy(tr->pub, buf, lencp);
        tr->publen = len;
        return 0;
    } else
    if(0 == strcmp(fnm, "yeshash")) {
        lencp = len > sizeof(tr->yeshash) ? sizeof(tr->yeshash) : len;
        memcpy(tr->yeshash, buf, lencp);
        tr->yeshashlen = len;
        return 0;
    } else {
        fprintf(stderr, "unknown blob field [%s]\n", fnm);
        return 1;
    }
}

static int test_integer_cb(const char *fnm, long long num, void *ud)
{
    struct test_results *tr = ud;

    fprintf(stderr, "*XX* string_cb(%s, %lld)\n", fnm, num);

    tr->fieldscount++;

    if(0 == strcmp(fnm, "preflen")) {
        tr->preflen = num;
        return 0;
    } else {
        fprintf(stderr, "unknown integer field [%s]\n", fnm);
        return 1;
    }
}

static void inttest(const char *name, int val, int exp_val)
{
    printf("%s: %d: %s\n", name, val, val == exp_val ? "passed" : "FAILED");
}

static void filetest()
{
    struct textrec_parser parser;
    struct test_results results;
    const unsigned char *p;
    int res;

    memset(&results, 0, sizeof(results));

    textrec_init(&parser);
    textrec_set_type_cb(&parser, test_type_cb);
    textrec_set_string_cb(&parser, test_string_cb);
    textrec_set_blob_cb(&parser, test_blob_cb);
    textrec_set_integer_cb(&parser, test_integer_cb);
    textrec_set_user_data(&parser, &results);

    for(p = testfile; *p; p++) {
        res = textrec_feedchar(&parser, *p);
        if(res != 0) {
            fprintf(stderr, "parser abort, code %d\n", res);
            break;
        }
    }
    res = textrec_feedchar(&parser, -1);

    inttest("fieldscount", results.fieldscount, 6);
    inttest("remarklen", results.remarklen, 42);
    inttest("idlen", results.idlen, 10);
    inttest("id_0", results.id[0], 0xff);
    inttest("id_3", results.id[3], 0x7b);
    inttest("id_9", results.id[9], 0x08);
    inttest("preflen", results.preflen, 12);
    inttest("secretlen", results.secretlen, 64);
    inttest("secret_0", results.secret[0], 0x25);
    inttest("secret_2", results.secret[2], 0xa8);
    inttest("secret_31", results.secret[31], 0x07);
    inttest("secret_32", results.secret[32], 0xe2);
    inttest("secret_60", results.secret[60], 0xe7);
    inttest("secret_63", results.secret[63], 0x08);
    inttest("publen", results.publen, 32);
    inttest("pub_0", results.pub[0], 0xe2);
    inttest("pub_2", results.pub[2], 0x5a);
    inttest("pub_31", results.pub[31], 0x08);
    inttest("yeshashlen", results.yeshashlen, 32);
    inttest("yeshash_0", results.yeshash[0], 0xfe);
    inttest("yeshash_2", results.yeshash[2], 0x79);
    inttest("yeshash_31", results.yeshash[31], 0x22);
}


static int test2_type_cb(const char *field_name, void *user_data)
{
    if(0 == strcmp(field_name, "preflen"))
        return ftype_integer;
    return ftype_ignored;
}

static int test2_integer_cb(const char *fnm, long long num, void *ud)
{
    *((int*)ud) = num;
    return 0;
}

static void fileign_test()
{
    struct textrec_parser parser;
    int result = -1;
    const unsigned char *p;
    int res;

    textrec_init(&parser);
    textrec_set_type_cb(&parser, test2_type_cb);
    textrec_set_integer_cb(&parser, test2_integer_cb);
    textrec_set_user_data(&parser, &result);

    for(p = testfile; *p; p++) {
        res = textrec_feedchar(&parser, *p);
        if(res != 0) {
            fprintf(stderr, "parser abort, code %d\n", res);
            break;
        }
    }
    res = textrec_feedchar(&parser, -1);

    inttest("int_only", result, 12);
}

/* -------------------------------------------------------------------- */






int main()
{
    all_booltests();
    filetest();
    fileign_test();
    return 0;
}

#endif
