X-Git-Url: http://git.madism.org/?p=apps%2Fmadmutt.git;a=blobdiff_plain;f=mbyte.c;fp=mbyte.c;h=0000000000000000000000000000000000000000;hp=6c9a76c96613a079ff63f939775baf0131930015;hb=10e80e482eff3762b0b8d41b0c7795b76704479c;hpb=90c36546d834608e1ce96f4142ef9a1f6b405a7c diff --git a/mbyte.c b/mbyte.c deleted file mode 100644 index 6c9a76c..0000000 --- a/mbyte.c +++ /dev/null @@ -1,438 +0,0 @@ -/* - * Copyright notice from original mutt: - * Copyright (C) 2000 Edmund Grimley Evans - * - * This file is part of mutt-ng, see http://www.muttng.org/. - * It's licensed under the GNU General Public License, - * please see the file GPL in the top level source directory. - */ - -/* - * Japanese support by TAKIZAWA Takashi . - */ - -#if HAVE_CONFIG_H -# include "config.h" -#endif - -#include - -#include "mutt.h" -#include "mbyte.h" -#include "charset.h" - - -#include - -#include - -#ifndef EILSEQ -#define EILSEQ EINVAL -#endif - -int Charset_is_utf8 = 0; - -#ifndef HAVE_WC_FUNCS -static int charset_is_ja = 0; -static iconv_t charset_to_utf8 = (iconv_t) (-1); -static iconv_t charset_from_utf8 = (iconv_t) (-1); -#endif - -void mutt_set_charset (char *charset) -{ - char buffer[STRING]; - - mutt_canonical_charset (buffer, sizeof (buffer), charset); - - Charset_is_utf8 = 0; -#ifndef HAVE_WC_FUNCS - charset_is_ja = 0; - if (charset_to_utf8 != (iconv_t) (-1)) { - iconv_close (charset_to_utf8); - charset_to_utf8 = (iconv_t) (-1); - } - if (charset_from_utf8 != (iconv_t) (-1)) { - iconv_close (charset_from_utf8); - charset_from_utf8 = (iconv_t) (-1); - } -#endif - - if (!strcmp (buffer, "utf-8")) - Charset_is_utf8 = 1; -#ifndef HAVE_WC_FUNCS - else if (!ascii_strcasecmp (buffer, "euc-jp") - || !ascii_strcasecmp (buffer, "shift_jis") - || !ascii_strcasecmp (buffer, "cp932") - || !ascii_strcasecmp (buffer, "eucJP-ms")) { - charset_is_ja = 1; - charset_to_utf8 = iconv_open ("UTF-8", charset); - charset_from_utf8 = iconv_open (charset, "UTF-8"); - } -#endif - -#ifdef HAVE_BIND_TEXTDOMAIN_CODESET - bind_textdomain_codeset (PACKAGE, buffer); -#endif -} - -#ifndef HAVE_WC_FUNCS - -/* - * For systems that don't have them, we provide here our own - * implementations of wcrtomb(), mbrtowc(), iswprint() and wcwidth(). - * Instead of using the locale, as these functions normally would, - * we use Mutt's Charset variable. We support 3 types of charset: - * (1) For 8-bit charsets, wchar_t uses the same encoding as char. - * (2) For UTF-8, wchar_t uses UCS. - * (3) For stateless Japanese encodings, we use UCS and convert - * via UTF-8 using iconv. - * Unfortunately, we can't handle non-stateless encodings. - */ - -static size_t wcrtomb_iconv (char *s, wchar_t wc, iconv_t cd) -{ - char buf[MB_LEN_MAX]; - const char *ib; - char *ob; - size_t ibl, obl, r; - - if (s) { - ibl = mutt_wctoutf8 (buf, wc); - if (ibl == (size_t) (-1)) - return (size_t) (-1); - ib = buf; - ob = s; - obl = MB_LEN_MAX; - r = my_iconv(cd, &ib, &ibl, &ob, &obl); - } - else { - ib = ""; - ibl = 1; - ob = buf; - obl = sizeof (buf); - r = my_iconv(cd, &ib, &ibl, &ob, &obl); - } - return ob - s; -} - -size_t wcrtomb (char *s, wchar_t wc, mbstate_t * ps) -{ - /* We only handle stateless encodings, so we can ignore ps. */ - - if (Charset_is_utf8) - return mutt_wctoutf8 (s, wc); - else if (charset_from_utf8 != (iconv_t) (-1)) - return wcrtomb_iconv (s, wc, charset_from_utf8); - else { - if (!s) - return 1; - if (wc < 0x100) { - *s = wc; - return 1; - } - errno = EILSEQ; - return (size_t) (-1); - } -} - -size_t mbrtowc_iconv (wchar_t * pwc, const char *s, size_t n, - mbstate_t * ps, iconv_t cd) -{ - static mbstate_t mbstate; - const char *ib, *ibmax; - char *ob, *t; - size_t ibl, obl, k, r; - char bufi[8], bufo[6]; - - if (!n) - return (size_t) (-2); - - t = memchr (ps, 0, sizeof (*ps)); - k = t ? (t - (char *) ps) : sizeof (*ps); - if (k > sizeof (bufi)) - k = 0; - if (k) { - /* use the buffer for input */ - memcpy (bufi, ps, k); - ib = bufi; - ibmax = bufi + (k + n < sizeof (bufi) ? k + n : sizeof (bufi)); - memcpy (bufi + k, s, ibmax - bufi - k); - } - else { - /* use the real input */ - ib = s; - ibmax = s + n; - } - - ob = bufo; - obl = sizeof (bufo); - ibl = 1; - - for (;;) { - r = my_iconv(cd, &ib, &ibl, &ob, &obl); - if (ob > bufo && (!k || ib > bufi + k)) { - /* we have a character */ - p_clear(ps, 1); - utf8rtowc (pwc, bufo, ob - bufo, &mbstate); - return (pwc && *pwc) ? (ib - (k ? bufi + k : s)) : 0; - } - else if (!r || (r == (size_t) (-1) && errno == EINVAL)) { - if (ib + ibl < ibmax) - /* try using more input */ - ++ibl; - else if (k && ib > bufi + k && bufi + k + n > ibmax) { - /* switch to using real input */ - ib = s + (ib - bufi - k); - ibmax = s + n; - k = 0; - ++ibl; - } - else { - /* save the state and give up */ - p_clear(ps, 1); - if (ibl <= sizeof (mbstate_t)) /* need extra condition here! */ - memcpy (ps, ib, ibl); - return (size_t) (-2); - } - } - else { - /* bad input */ - errno = EILSEQ; - return (size_t) (-1); - } - } -} - -size_t mbrtowc (wchar_t * pwc, const char *s, size_t n, mbstate_t * ps) -{ - static mbstate_t mbstate; - - if (!ps) - ps = &mbstate; - - if (Charset_is_utf8) - return utf8rtowc (pwc, s, n, ps); - else if (charset_to_utf8 != (iconv_t) (-1)) - return mbrtowc_iconv (pwc, s, n, ps, charset_to_utf8); - else { - if (!s) { - p_clear(ps, 1); - return 0; - } - if (!n) - return (size_t) - 2; - if (pwc) - *pwc = (wchar_t) (unsigned char) *s; - return (*s != 0); - } -} - -int iswprint (wint_t wc) -{ - if (Charset_is_utf8 || charset_is_ja) - return ((0x20 <= wc && wc < 0x7f) || 0xa0 <= wc); - else - return (0 <= wc && wc < 256) ? IsPrint (wc) : 0; -} - -int iswspace (wint_t wc) -{ - if (Charset_is_utf8 || charset_is_ja) - return (9 <= wc && wc <= 13) || wc == 32; - else - return (0 <= wc && wc < 256) ? isspace (wc) : 0; -} - -static wint_t towupper_ucs (wint_t x) -{ - /* Only works for x < 0x130 */ - if ((0x60 < x && x < 0x7b) || (0xe0 <= x && x < 0xff && x != 0xf7)) - return x - 32; - else if (0x100 <= x && x < 0x130) - return x & ~1; - else if (x == 0xb5) - return 0x39c; - else if (x == 0xff) - return 0x178; - else - return x; -} - -static wint_t towlower_ucs (wint_t x) -{ - /* Only works for x < 0x130 */ - if ((0x40 < x && x < 0x5b) || (0xc0 <= x && x < 0xdf && x != 0xd7)) - return x + 32; - else if (0x100 <= x && x < 0x130) - return x | 1; - else - return x; -} - -static int iswalnum_ucs (wint_t wc) -{ - /* Only works for x < 0x220 */ - if (wc >= 0x100) - return 1; - else if (wc < 0x30) - return 0; - else if (wc < 0x3a) - return 1; - else if (wc < 0xa0) - return (0x40 < (wc & ~0x20) && (wc & ~0x20) < 0x5b); - else if (wc < 0xc0) - return (wc == 0xaa || wc == 0xb5 || wc == 0xba); - else - return !(wc == 0xd7 || wc == 0xf7); -} - -wint_t towupper (wint_t wc) -{ - if (Charset_is_utf8 || charset_is_ja) - return towupper_ucs (wc); - else - return (0 <= wc && wc < 256) ? toupper (wc) : wc; -} - -wint_t towlower (wint_t wc) -{ - if (Charset_is_utf8 || charset_is_ja) - return towlower_ucs (wc); - else - return (0 <= wc && wc < 256) ? tolower (wc) : wc; -} - -int iswalnum (wint_t wc) -{ - if (Charset_is_utf8 || charset_is_ja) - return iswalnum_ucs (wc); - else - return (0 <= wc && wc < 256) ? isalnum (wc) : 0; -} - -/* - * l10n for Japanese: - * Symbols, Greek and Cyrillic in JIS X 0208, Japanese Kanji - * Character Set, have a column width of 2. - */ -int wcwidth_ja (wchar_t ucs) -{ - if (ucs >= 0x3021) - return -1; /* continue with the normal check */ - /* a rough range for quick check */ - if ((ucs >= 0x00a1 && ucs <= 0x00fe) || /* Latin-1 Supplement */ - (ucs >= 0x0391 && ucs <= 0x0451) || /* Greek and Cyrillic */ - (ucs >= 0x2010 && ucs <= 0x266f) || /* Symbols */ - (ucs >= 0x3000 && ucs <= 0x3020)) /* CJK Symbols and Punctuation */ - return 2; - else - return -1; -} - -int wcwidth_ucs (wchar_t ucs); - -int wcwidth (wchar_t wc) -{ - if (!Charset_is_utf8) { - if (!charset_is_ja) { - /* 8-bit case */ - if (!wc) - return 0; - else if ((0 <= wc && wc < 256) && IsPrint (wc)) - return 1; - else - return -1; - } - else { - /* Japanese */ - int k = wcwidth_ja (wc); - - if (k != -1) - return k; - } - } - return wcwidth_ucs (wc); -} - -size_t utf8rtowc (wchar_t * pwc, const char *s, size_t n, mbstate_t * _ps) -{ - static wchar_t mbstate; - wchar_t *ps = (wchar_t *) _ps; - size_t k = 1; - unsigned char c; - wchar_t wc; - int count; - - if (!ps) - ps = &mbstate; - - if (!s) { - *ps = 0; - return 0; - } - if (!n) - return (size_t) - 2; - - if (!*ps) { - c = (unsigned char) *s; - if (c < 0x80) { - if (pwc) - *pwc = c; - return (c != 0); - } - else if (c < 0xc2) { - errno = EILSEQ; - return (size_t) - 1; - } - else if (c < 0xe0) - wc = ((c & 0x1f) << 6) + (count = 0); - else if (c < 0xf0) - wc = ((c & 0x0f) << 12) + (count = 1); - else if (c < 0xf8) - wc = ((c & 0x07) << 18) + (count = 2); - else if (c < 0xfc) - wc = ((c & 0x03) << 24) + (count = 3); - else if (c < 0xfe) - wc = ((c & 0x01) << 30) + (count = 4); - else { - errno = EILSEQ; - return (size_t) - 1; - } - ++s, --n, ++k; - } - else { - wc = *ps & 0x7fffffff; - count = wc & 7; /* if count > 4 it will be caught below */ - } - - for (; n; ++s, --n, ++k) { - c = (unsigned char) *s; - if (0x80 <= c && c < 0xc0) { - wc |= (c & 0x3f) << (6 * count); - if (!count) { - if (pwc) - *pwc = wc; - *ps = 0; - return wc ? k : 0; - } - --count, --wc; - if (!(wc >> (11 + count * 5))) { - errno = count < 4 ? EILSEQ : EINVAL; - return (size_t) - 1; - } - } - else { - errno = EILSEQ; - return (size_t) - 1; - } - } - *ps = wc; - return (size_t) - 2; -} - -#endif /* !HAVE_WC_FUNCS */ - -wchar_t replacement_char (void) -{ - return Charset_is_utf8 ? 0xfffd : '?'; -}