/*
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or (at
 *  your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful, but
 *  WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
 *  MA 02110-1301, USA.
 *
 *  Copyright © 2006 Pierre Habouzit
 */
/*
 * Copyright notice from original mutt:
 * Copyright (C) 1999-2000 Thomas Roessler <roessler@does-not-exist.org>
 *
 * This file is part of mutt-ng, see http://www.muttng.org/.
 * It's licensed under the GNU General Public License,
 * please see the file GPL in the top level source directory.
 */

#include <lib-lib/lib-lib.h>

#ifdef HAVE_LANGINFO_H
#  include <langinfo.h>
#endif

#include "charset.h"

#ifndef EILSEQ
#  define EILSEQ EINVAL
#endif
@import "lib-lua/base.cpkg"

int     Charset_is_utf8    = 0;
wchar_t CharsetReplacement = '?';

static rx_t *charset_hooks = NULL;
static rx_t *iconv_hooks   = NULL;

static char *charset_init(void)
{
    const char *res = "iso-8859-1";
#ifdef HAVE_LANGINFO_H
    char buff[STRING];
    char buff2[STRING];

    m_strcpy(buff, sizeof(buff), nl_langinfo(CODESET));
    charset_canonicalize(buff2, sizeof(buff2), buff);

    /* finally, set $charset */
    if (!m_strisempty(buff2)) {
        res = buff2;
    }
#endif
    bind_textdomain_codeset(PACKAGE, res);
    return m_strdup(res);
}

static void charset_onchange(const char *cset)
{
    Charset_is_utf8    = charset_is_utf8(cset);
    CharsetReplacement = Charset_is_utf8 ? 0xfffd : '?';
}

@package mod_cset {
    /*
     ** .pp
     ** This variable is a colon-separated list of character encoding
     ** schemes for messages without character encoding indication.
     ** Header field values and message body content without character encoding
     ** indication would be assumed that they are written in one of this list.
     ** By default, all the header fields and message body without any charset
     ** indication are assumed to be in \fTus-ascii\fP.
     ** .pp
     ** For example, Japanese users might prefer this:
     ** .pp
     ** \fTset assumed_charset="iso-2022-jp:euc-jp:shift_jis:utf-8"\fP
     ** .pp
     ** However, only the first content is valid for the message body.
     ** This variable is valid only if $$strict_mime is unset.
     */
    string_t assumed_charset = m_strdup("us-ascii");

    /*
     ** .pp
     ** Character set your terminal uses to display and enter textual data.
     */
    string_t charset = {
        .init     = charset_init();
        .onchange = charset_onchange($$);
    };

    /*
     ** .pp
     ** This variable is a colon-separated list of character encoding
     ** schemes for text file attatchments.
     ** If \fIunset\fP, $$charset value will be used instead.
     ** For example, the following configuration would work for Japanese
     ** text handling:
     ** .pp
     ** \fTset file_charset="iso-2022-jp:euc-jp:shift_jis:utf-8"\fP
     ** .pp
     ** Note: ``\fTiso-2022-*\fP'' must be put at the head of the value as shown above
     ** if included.
     */
    string_t file_charset    = NULL;

    /*
     ** .pp
     ** A list of character sets for outgoing messages. Madmutt will use the
     ** first character set into which the text can be converted exactly.
     ** If your ``$$charset'' is not \fTiso-8859-1\fP and recipients may not
     ** understand \fTUTF-8\fP, it is advisable to include in the list an
     ** appropriate widely used standard character set (such as
     ** \fTiso-8859-2\fP, \fTkoi8-r\fP or \fTiso-2022-jp\fP) either
     ** instead of or after \fTiso-8859-1\fP.
     */
    string_t send_charset    = m_strdup("us-ascii:iso-8859-1:utf-8");

    void charset_hook(rx_t local, const string_t alias) {
        rx_set_template(local, alias);
        rx_list_add2(&charset_hooks, &local);
        RETURN();
    };

    void iconv_hook(rx_t local, const string_t alias) {
        rx_set_template(local, alias);
        rx_list_add2(&iconv_hooks, &local);
        RETURN();
    };
};

/****************************************************************************/
/* charset functions                                                        */
/****************************************************************************/

#include "charset.gperf"
void charset_canonicalize(char *dest, ssize_t dlen, const char *name)
{
    const struct cset_pair *cp;
    char scratch[STRING];
    const char *p;
    int i = 0;

    if (!name) {
        m_strcpy(dest, dlen, "us-ascii");
        return;
    }

    // canonize name: only keep a-z0-9 and dots, put into lowercase
    for (p = name; *p && *p != ':' && i < ssizeof(scratch) - 1; p++) {
        if (isalnum(*p) || *p== '.') {
            scratch[i++] = tolower((unsigned char)*p);
        }
    }
    scratch[i] = '\0';

    cp = charset_canonicalize_aux(scratch, strlen(scratch));
    if (cp) {
        m_strcpy(dest, dlen, cp->pref);
    } else {
        m_strcpy(dest, dlen, name);
        m_strtolower(dest);
    }
}

/* XXX: MC: UGLY return of local static */
const char *charset_getfirst(const char *charset)
{
    static char fcharset[STRING];
    const char *p;

    if (m_strisempty(charset))
        return "us-ascii";

    p = m_strchrnul(charset, ':');
    m_strncpy(fcharset, sizeof(fcharset), charset, p - charset);
    return fcharset;
}

int charset_is_utf8(const char *s)
{
    char buf[STRING];
    charset_canonicalize(buf, sizeof(buf), s);
    return !m_strcmp(buf, "utf-8");
}

int charset_is_us_ascii(const char *s)
{
    char buf[STRING];
    charset_canonicalize(buf, sizeof(buf), s);
    return !m_strcmp(buf, "us-ascii");
}


/****************************************************************************/
/* iconv-line functions                                                     */
/****************************************************************************/

/* Like iconv_open, but canonicalises the charsets */
iconv_t mutt_iconv_open(const char *tocode, const char *fromcode, int flags)
{
    char to1[STRING],   to2[STRING];
    char from1[STRING], from2[STRING];
    char tmp[STRING];
    iconv_t cd;

    if ((flags & M_ICONV_HOOK_TO)
    &&  rx_list_match2(charset_hooks, tocode, tmp, sizeof(tmp))) {
        charset_canonicalize(to1, sizeof(to1), tmp);
    } else {
        charset_canonicalize(to1, sizeof(to1), tocode);
    }

    if ((flags & M_ICONV_HOOK_FROM)
    &&  rx_list_match2(charset_hooks, fromcode, tmp, sizeof(tmp))) {
        charset_canonicalize(from1, sizeof(from1), tmp);
    } else {
        charset_canonicalize(from1, sizeof(from1), fromcode);
    }

    if ((cd = iconv_open(to1, from1)) != MUTT_ICONV_ERROR)
        return cd;

    if (rx_list_match2(iconv_hooks, to1, to2, sizeof(to2))
    &&  rx_list_match2(iconv_hooks, from1, from2, sizeof(from2)))
        return iconv_open(to2, from2);

    return MUTT_ICONV_ERROR;
}


/* Like iconv, but keeps going even when the input is invalid
   If you're supplying inrepls, the source charset should be stateless;
   if you're supplying an outrepl, the target charset should be.  */
/* XXX: MC: I do not understand what it does yet */
ssize_t mutt_iconv(iconv_t cd,
                   const char **inbuf, ssize_t *inbytesleft,
                   char **outbuf, ssize_t *outbytesleft,
                   const char **inrepls, const char *outrepl)
{
    ssize_t ret = 0, ret1;
    const char *ib = *inbuf;
    ssize_t ibl = *inbytesleft;
    char *ob = *outbuf;
    ssize_t obl = *outbytesleft;

    for (;;) {
        ret1 = my_iconv(cd, &ib, &ibl, &ob, &obl);
        if (ret1 != -1)
            ret += ret1;

        if (ibl && obl && errno == EILSEQ) {
            if (inrepls) {
                /* Try replacing the input */
                const char **t;

                for (t = inrepls; *t; t++) {
                    const char *ib1 = *t;
                    ssize_t ibl1 = m_strlen(*t);
                    char *ob1 = ob;
                    ssize_t obl1 = obl;

                    my_iconv(cd, &ib1, &ibl1, &ob1, &obl1);
                    if (!ibl1) {
                        ++ib, --ibl;
                        ob = ob1, obl = obl1;
                        ++ret;
                        break;
                    }
                }
                if (*t)
                    continue;
            }
            /* Replace the output */
            if (!outrepl)
                outrepl = "?";
            my_iconv(cd, 0, 0, &ob, &obl);
            if (obl) {
                ssize_t n = m_strlen(outrepl);

                if (n > obl) {
                    outrepl = "?";
                    n = 1;
                }
                memcpy(ob, outrepl, n);
                ++ib, --ibl;
                ob += n, obl -= n;
                ++ret;
                my_iconv(cd, 0, 0, 0, 0); /* for good measure */
                continue;
            }
        }
        *inbuf = ib, *inbytesleft = ibl;
        *outbuf = ob, *outbytesleft = obl;
        return ret;
    }
}

/* Convert a string */
int
mutt_convert_string(char **ps, const char *from, const char *to, int flags)
{
    iconv_t cd;
    const char *repls[] = { "\357\277\275", "?", 0 };

    if (m_strisempty(*ps))
        return 0;

    cd = mutt_iconv_open(to, from, flags);
    if (cd != MUTT_ICONV_ERROR) {
        const char **inrepls = NULL;
        const char *outrepl = NULL;
        const char *ib;
        char *buf, *ob;
        ssize_t ibl, obl;

        if (charset_is_utf8(to))
            outrepl = "\357\277\275";
        else
        if (charset_is_utf8(from))
            inrepls = repls;
        else
            outrepl = "?";

        ibl = m_strlen(*ps) + 1;
        ib  = *ps;

        obl = MB_LEN_MAX * ibl;
        ob  = buf = p_new(char, obl + 1);

        mutt_iconv(cd, &ib, &ibl, &ob, &obl, inrepls, outrepl);
        iconv_close(cd);

        *ob = '\0';

        p_delete(ps);
        *ps = buf;
        return 0;
    }

    return -1;
}

static ssize_t convert_string(const char *f, ssize_t flen,
                              const char *from, const char *to,
                              char **t, ssize_t * tlen)
{
    iconv_t cd;
    char *buf, *ob;
    ssize_t obl;
    ssize_t n;
    int e;

    if ((cd = mutt_iconv_open(to, from, 0)) == MUTT_ICONV_ERROR)
        return -1;

    obl = 4 * flen + 1;
    ob  = buf = p_new(char, obl);
    n   = my_iconv(cd, &f, &flen, &ob, &obl);

    if (n < 0 || my_iconv(cd, 0, 0, &ob, &obl) < 0) {
        e = errno;
        p_delete(&buf);
        iconv_close(cd);
        errno = e;
        return -1;
    }

    *ob   = '\0';
    *tlen = ob - buf;
    *t    = buf;
    iconv_close(cd);
    return n;
}

int mutt_convert_nonmime_string(char **ps)
{
    const char *p = mod_cset.assumed_charset;
    ssize_t ulen = m_strlen(*ps);
    char *u = *ps;

    while (*p) {
        const char *q;
        char fromcode[LONG_STRING], *s = NULL;
        ssize_t slen;

        if (!ulen)
            return 0;

        while (*p == ':')
            p++;

        q = m_strchrnul(p, ':');
        m_strncpy(fromcode, sizeof(fromcode), p, q - p);
        p = q;

        if (convert_string(u, ulen, fromcode, mod_cset.charset, &s, &slen) >= 0) {
            p_delete(ps);
            *ps = s;
            return 0;
        }
    }

    return -1;
}

/****************************************************************************/
/* fgetconv functions                                                       */
/****************************************************************************/

/* fgetconv_t stuff for converting a file while reading it
   Used in sendlib.c for converting from mutt's charset */

struct fgetconv_t {
    FILE *file;
    iconv_t cd;
    char bufi[BUFSIZ];
    char bufo[BUFSIZ];
    char *p;
    char *ob;
    char *ib;
    ssize_t ibl;
    const char **inrepls;
};

fgetconv_t *
fgetconv_open(FILE *file, const char *from, const char *to, int flags)
{
    static const char *repls[] = { "\357\277\275", "?", 0 };

    struct fgetconv_t *fc = p_new(struct fgetconv_t, 1);

    fc->file = file;
    fc->cd   = MUTT_ICONV_ERROR;
    if (from && to)
        fc->cd = mutt_iconv_open(to, from, flags);

    if (fc->cd != MUTT_ICONV_ERROR) {
        fc->p  = fc->ob = fc->bufo;
        fc->ib = fc->bufi;
        fc->ibl = 0;
        fc->inrepls = repls + charset_is_utf8(to);
    }
    return fc;
}

void fgetconv_close(fgetconv_t **fcp)
{
    struct fgetconv_t *fc = *fcp;

    if (fc->cd != MUTT_ICONV_ERROR)
        iconv_close (fc->cd);
    p_delete(fcp);
}


int fgetconv(fgetconv_t *fc)
{
    if (!fc)
        return EOF;

    if (fc->cd == MUTT_ICONV_ERROR)
        return fgetc(fc->file);

    if (!fc->p)
        return EOF;
    if (fc->p < fc->ob)
        return (unsigned char)*(fc->p)++;

    /* Try to convert some more */
    fc->p = fc->ob = fc->bufo;
    if (fc->ibl) {
        ssize_t obl = ssizeof(fc->bufo);

        my_iconv(fc->cd, (const char **)&fc->ib, &fc->ibl, &fc->ob, &obl);
        if (fc->p < fc->ob)
            return (unsigned char)*(fc->p)++;
    }

    /* If we trusted iconv a bit more, we would at this point
     * ask why it had stopped converting ... */

    /* Try to read some more */
    if (fc->ibl == sizeof(fc->bufi)
    || (fc->ibl && fc->ib + fc->ibl < fc->bufi + sizeof(fc->bufi))) {
        fc->p = NULL;
        return EOF;
    }

    if (fc->ibl) {
        memcpy(fc->bufi, fc->ib, fc->ibl);
    }
    fc->ib = fc->bufi;
    fc->ibl += fread(fc->ib + fc->ibl, 1, sizeof(fc->bufi) - fc->ibl,
                     fc->file);

    /* Try harder this time to convert some */
    if (fc->ibl) {
        ssize_t obl = ssizeof(fc->bufo);

        mutt_iconv(fc->cd, (const char **)&fc->ib, &fc->ibl, &fc->ob, &obl,
                   fc->inrepls, 0);
        if (fc->p < fc->ob) {
            return (unsigned char)*(fc->p)++;
        }
    }

    /* Either the file has finished or one of the buffers is too small */
    fc->p = NULL;
    return EOF;
}

char *fgetconvs(char *buf, ssize_t len, fgetconv_t *fc)
{
    ssize_t pos = 0;

    while (pos < len - 1) {
        int c = fgetconv(fc);
        if (c == EOF)
            break;

        buf[pos++] = c;
        if (c == '\n')
            break;
    }
    buf[pos] = '\0';

    return pos ? buf : NULL;
}

/* vim:set ft=c: */