/* vi:set sw=4:

    mails.c

    Copyright (c) 1998-2001 Minero Aoki <aamine@loveruby.net>

    This program is free software.
    You can distribute/modify this program under the terms of
    the GNU Lesser General Public License version 2 or later.

*/

#include <stdio.h>
#ifdef __STDC__
# include <stdlib.h>
#endif

#include "ruby.h"


#define TMAIL_VERSION "0.9.10"

static VALUE MailScanner;
static VALUE ScanError;


struct mails
{
    char *pbeg;
    char *p;
    char *pend;
    unsigned int flags;
    VALUE comments;
};

#define MODE_MIME (1 << 0)
#define MODE_RECV (1 << 1)

#define MIME_MODE_P(s) ((s)->flags & MODE_MIME)
#define RECV_MODE_P(s) ((s)->flags & MODE_RECV)

#define MAILS_DEBUG (1 << 4)

#define GET_SCANNER(val, s) Data_Get_Struct(val, struct mails, s)


static void
real_reset(sc)
    struct mails *sc;
{
    sc->pbeg = 0;
    sc->p = 0;
    sc->pend = 0;
    sc->flags = 0;
    sc->comments = Qnil;
}

static void
mails_free(sc)
    struct mails *sc;
{
    free(sc);
}

static VALUE
mails_s_new(klass, str, ident, cmt)
    VALUE klass;
{
    struct mails *sc;
    char *tmp;

    sc = ALLOC_N(struct mails, 1);
    real_reset(sc);

    Check_Type(str, T_STRING);
    sc->pbeg = sc->p = RSTRING(str)->ptr;
    sc->pend = sc->p + RSTRING(str)->len;

    tmp = STR2CSTR(ident);
    if      (strcmp(tmp, "RecvH") == 0)         sc->flags |= MODE_RECV;
    else if (strcmp(tmp, "CTypeH") == 0)        sc->flags |= MODE_MIME;
    else if (strcmp(tmp, "CEncodingH") == 0)    sc->flags |= MODE_MIME;
    else if (strcmp(tmp, "CDispositionH") == 0) sc->flags |= MODE_MIME;

    if (! NIL_P(cmt)) {
        Check_Type(cmt, T_ARRAY);
        sc->comments = cmt;
    }
    else {
        sc->comments = Qnil;
    }

    return Data_Wrap_Struct(MailScanner, 0, mails_free, sc);
}

static VALUE
mails_debug_get(self)
    VALUE self;
{
    struct mails *sc;

    GET_SCANNER(self, sc);
    if (sc->flags & MAILS_DEBUG)
        return Qtrue;
    else
        return Qfalse;
}

static VALUE
mails_debug_set(self, flag)
    VALUE self, flag;
{
    struct mails *sc;

    GET_SCANNER(self, sc);
    if (RTEST(flag))
        sc->flags |= MAILS_DEBUG;
    else
        sc->flags &= ~MAILS_DEBUG;
    return Qnil;
}


/*
--------------------------------------------------------
                      scan functions
--------------------------------------------------------
*/

/* I know this implement is agree, but usually useful. */

#define ESC '\033'

/* skip until "\e(B" (ascii) */
static void
fwd_jis(sc)
    struct mails *sc;
{
    for (; sc->p < sc->pend; sc->p++) {
        if ((*sc->p == ESC) && (strcmp(sc->p, "\033(B") == 0)) {
            sc->p += 3;
            break;
        }
    }
}

#define IS_JCHAR(ch) ((unsigned char)(ch) > 127)

static void
fwd_jstr(sc)
    struct mails *sc;
{
    while (sc->p < sc->pend) {
        if ((unsigned char)*sc->p > 127) {
            sc->p++;
            if (sc->p < sc->pend) sc->p++;
        }
        else return;
    }
}


static VALUE
scan_word(sc, special)
    struct mails *sc;
    char *special;
{
    char *beg = sc->p;

    while (sc->p < sc->pend) {
        unsigned char ch = (unsigned char)(*sc->p);

        if (strchr(special, ch)) {
            break;
        }
        else if (ch == ESC) {
            fwd_jis(sc);
            continue;
        }
        else if (ch > 127) {
            fwd_jstr(sc);
            continue;
        }
        else if (ch <= 32) {
            break;
        }
        sc->p++;
    }

    return rb_str_new(beg, sc->p - beg);
}

#define ATOM_SPECIAL  "()<>[]@,;:\"\\."
#define TOKEN_SPECIAL "()<>[]@,;:\"\\/?="

static VALUE
scan_atom(sc)
    struct mails *sc;
{
    return scan_word(sc, ATOM_SPECIAL);
}

static VALUE
scan_token(sc)
    struct mails *sc;
{
    return scan_word(sc, TOKEN_SPECIAL);
}


#define BUFSIZE 256

static VALUE
scan_qstr(sc, term)
    struct mails *sc;
    char term;
{
    char *p;
    char buf[BUFSIZE];
    VALUE ret = rb_str_new("", 0);

    p = buf;
    while (sc->p < sc->pend) {
        if (*sc->p == term) {
            sc->p++;
            rb_str_cat(ret, buf, p - buf);
            return ret;
        }
        else if (*sc->p == '\\') {
            sc->p++;
        }

        *p = *sc->p; p++; sc->p++;
        if (p >= buf + BUFSIZE) {
            rb_str_cat(ret, buf, BUFSIZE);
            p = buf;
        }
    }

    rb_raise(ScanError, "unterminated quoted/domlit");
    return Qnil;
}

static VALUE
scan_quoted(sc)
    struct mails *sc;
{
    sc->p++;
    return scan_qstr(sc, '"');
}

static VALUE
scan_domlit(sc)
    struct mails *sc;
{
    sc->p++;
    return scan_qstr(sc, ']');
}


#if 0
#define BOUNDARY_CHAR "()+_,-./\': ?="

static VALUE
scan_boundary(sc)
    struct mails *sc;
{
    unsigned char *beg;

    for (beg = sc->p; sc->p < sc->pend; sc->p++) {
        unsigned char ch = (unsigned char)(*sc->p);

        if (! (strchr(BOUNDARY_CHAR, ch) ||
               (*sc->p >= 'A' && *sc->p <= 'Z') ||
               (*sc->p >= 'a' && *sc->p <= 'z') ||
               (*sc->p >= '0' && *sc->p <= '9')))
            break;
        }
    }

    return rb_str_new(beg, sc->p - beg);
}
#endif


#if 0
static void
fwd_eol(sc)
    struct mails *sc;
{
    if (*sc->p == '\n') {
        sc->p++;
    }
    else if (*sc->p == '\r') {
        sc->p++;
        if (*sc->p == '\n')
            sc->p++;
    }

    return;
}


static void
bufadd(buf, beg, len, ret, p_var)
    char *buf, *beg;
    long len;
    VALUE ret;
    char **p_var;
{
    if (*p_var - buf + len > BUFSIZE) {
        /* flush buffer */
        rb_str_cat(ret, buf, *p_var - buf);
        *p_var = buf;
    }
    if (len > BUFSIZE) {
        rb_str_cat(ret, beg, len);
        *p_var = buf;
    }
    else {
        memcpy(*p_var, beg, len);
        *p_var += len;
    }
}
#endif

static VALUE
scan_comment(sc)
    struct mails *sc;
{
    int nest = 1;
    char *p;
    VALUE ret = rb_str_new("", 0);

    sc->p++;
    p = sc->p;
    while (sc->p < sc->pend) {
        if (*sc->p == ESC) {
            fwd_jis(sc);
        }
        else if (IS_JCHAR(*sc->p)) {
            fwd_jstr(sc);
        }
        else {
            switch (*sc->p) {
            case '(':
                nest++;
                break;
            case ')':
                nest--;
                if (nest == 0) {
                    rb_str_cat(ret, p, sc->p - p);
                    sc->p++;
                    return ret;
                }
                break;
            case '\\':
                rb_str_cat(ret, p, sc->p - p);
                sc->p++;
                if (sc->p == sc->pend)
                    rb_raise(ScanError, "incomplete char quote");
                p = sc->p;
                break;
            default:
                break;
            }
            sc->p++;
        }
    }

    rb_raise(ScanError, "unterminated comment");
    return Qnil;
}


#define IS_LWSP(ch) (ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r')
#define IS_DIGIT(ch) (ch >= '0' && ch <= '9')
#define IS_ATOMCHAR(ch) \
    ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || \
     (ch >= '0' && ch <= '9') || (ch == ESC) || IS_JCHAR(ch) || \
     strchr("#!$%&'`*+-{|}~^/=?", ch))

#define IS_TOKENCHAR(ch) \
    ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || \
     (ch >= '0' && ch <= '9') || (ch == ESC) || IS_JCHAR(ch) || \
     strchr("#!$%&'`*+-{|}~^.", ch))


static void
skip_lwsp(sc)
    struct mails *sc;
{
    while (sc->p < sc->pend) {
        if (IS_LWSP(*sc->p)) sc->p++;
        else break;
    }
}

#define IS_ALPHA(ch) ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch >= 'Z'))
#define IS_UPPER(ch) (ch >= 'A' && ch >= 'Z')
#define TO_LOWER(ch) (IS_UPPER(ch) ? ch + 32 : ch)

static int
nccmp(a, b)
    char *a, *b;
{
    while (*a && *b) {
        if (*a != *b)
            if (TO_LOWER(*a) != TO_LOWER(*b))
                return 0;
        a++; b++;
    }
    return (*a == *b) ? 1 : 0;
}

static int
digit_p(str)
    VALUE str;
{
    char *p;
    int i;

    p = RSTRING(str)->ptr;
    for (i = 0; i < RSTRING(str)->len; i++) {
        if (! IS_DIGIT(RSTRING(str)->ptr[i]))
            return 0;
    }
    return 1;
}

static VALUE tok_atom, tok_digit, tok_token, tok_quoted, tok_domlit;
static VALUE tok_from, tok_by, tok_via, tok_with, tok_id, tok_for;

static VALUE
atomsym(sc, str)
    struct mails *sc;
    VALUE str;
{
    if (digit_p(str)) {
        return tok_digit;
    }
    else if (RECV_MODE_P(sc)) {
        char *p = RSTRING(str)->ptr;
        if      (nccmp(p, "from")) return tok_from;
        else if (nccmp(p, "by"))   return tok_by;
        else if (nccmp(p, "via"))  return tok_via;
        else if (nccmp(p, "with")) return tok_with;
        else if (nccmp(p, "id"))   return tok_id;
        else if (nccmp(p, "for"))  return tok_for;
    }
    return tok_atom;
}

static void
debugout(sc, sym, val)
    struct mails *sc;
    VALUE sym, val;
{
    VALUE s;

    s = rb_funcall(sym, rb_intern("inspect"), 0),
    printf("%7ld %-10s %s\n", (unsigned long)(sc->pend - sc->p),
                              RSTRING(s)->ptr,
                              RSTRING(val)->ptr);
}

static VALUE
mails_scan(self)
    VALUE self;
{
    struct mails *sc;
    VALUE arr;
    int debug;

#define PASS(s,v) \
    if (debug) debugout(sc,s,v);\
    RARRAY(arr)->ptr[0] = s;\
    RARRAY(arr)->ptr[1] = v;\
    rb_yield(arr)

#define D(m) if (debug) puts(m)

    GET_SCANNER(self, sc);
    if (!sc->p) {
        rb_raise(ScanError, "Mails#scan called before reset");
    }
    arr = rb_assoc_new(Qnil, Qnil);
    debug = sc->flags & MAILS_DEBUG;

    while (sc->p < sc->pend) {
        D("new loop");
        if (IS_LWSP(*sc->p)) {
            D("lwsp");
            skip_lwsp(sc);
            if (sc->p >= sc->pend)
                break;
        }

        if (MIME_MODE_P(sc)) {
            if (IS_TOKENCHAR(*sc->p)) {
                D("token");
                PASS(tok_token, scan_token(sc));
                continue;
            }
        }
        else {
            if (IS_ATOMCHAR(*sc->p)) {
                VALUE tmp;
                D("atom");
                tmp = scan_atom(sc);
                PASS(atomsym(sc, tmp), tmp);
                continue;
            }
        }

        if (*sc->p == '"') {
            if (debug) puts("quoted");
            PASS(tok_quoted, scan_quoted(sc));
        }
        else if (*sc->p == '(') {
            VALUE c;
            D("comment");
            c = scan_comment(sc);
            if (! NIL_P(sc->comments))
                rb_ary_push(sc->comments, c);
        }
        else if (*sc->p == '[') {
            D("domlit");
            PASS(tok_domlit, scan_domlit(sc));
        }
        else {
            VALUE ch;
            D("char");
            ch = rb_str_new(sc->p, 1);
            sc->p++;
            PASS(ch, ch);
        }
    }

    PASS(Qfalse, rb_str_new("$", 1));
    return Qnil;
}


static VALUE
intn(str)
    char *str;
{
    ID tmp;

    tmp = rb_intern(str);
#ifdef ID2SYM
    return ID2SYM(tmp);
#else
    return INT2FIX(tmp);
#endif
}

void
Init_cmails()
{
    VALUE TMail;
    VALUE tmp;
    ID sym_tmail = rb_intern("TMail");
    ID sym_scerr = rb_intern("ScanError");

    if (rb_const_defined(rb_cObject, sym_tmail))
        TMail = rb_const_get(rb_cObject, sym_tmail);
    else
        TMail = rb_define_module("TMail");
    MailScanner = rb_define_class_under(TMail, "Scanner_C", rb_cObject);

    tmp = rb_str_new2(TMAIL_VERSION);
    rb_obj_freeze(tmp);
    rb_define_const(MailScanner, "Version", tmp);

    rb_define_singleton_method(MailScanner, "new", mails_s_new, 3);
    rb_define_method(MailScanner, "scan", mails_scan, 0);
    rb_define_method(MailScanner, "debug", mails_debug_get, 0);
    rb_define_method(MailScanner, "debug=", mails_debug_set, 1);

    if (rb_const_defined(rb_cObject, sym_scerr))
        ScanError = rb_const_get(rb_cObject, sym_scerr);
    else
        ScanError = rb_define_class("ScanError", rb_eStandardError);

    tok_atom   = intn("ATOM");
    tok_digit  = intn("DIGIT");
    tok_token  = intn("TOKEN");
    tok_quoted = intn("QUOTED");
    tok_domlit = intn("DOMLIT");

    tok_from = intn("FROM");
    tok_by   = intn("BY");
    tok_via  = intn("VIA");
    tok_with = intn("WITH");
    tok_id   = intn("ID");
    tok_for  = intn("FOR");
}
