From: Pierre Habouzit Date: Mon, 13 Nov 2006 02:03:34 +0000 (+0100) Subject: not having wide chars is soooo 90, please, I *really* don't care with X-Git-Url: http://git.madism.org/?p=apps%2Fmadmutt.git;a=commitdiff_plain;h=10e80e482eff3762b0b8d41b0c7795b76704479c not having wide chars is soooo 90, please, I *really* don't care with antiquated OSes, those can stick to plain mutt. Signed-off-by: Pierre Habouzit --- diff --git a/Makefile.am b/Makefile.am index 8edfb70..2ecb4e5 100644 --- a/Makefile.am +++ b/Makefile.am @@ -21,11 +21,11 @@ madmutt_SOURCES = $(BUILT_SOURCES) \ charset.c compress.c commands.c complete.c \ compose.c copy.c editmsg.c init.c keymap.c lib.c \ flags.c filter.c from.c handler.c hcache.c hdrline.c headers.c help.c hook.c \ - main.c mbox.c mbyte.c mh.c muttlib.c mutt_idna.c mx.c \ + main.c mbox.c mh.c muttlib.c mutt_idna.c mx.c \ pager.c pattern.c postpone.c query.c \ recvattach.c recvcmd.c rfc1524.c rfc3676.c \ score.c send.c sendlib.c sidebar.c sort.c state.c status.c \ - thread.c utf8.c wcwidth.c account.c + thread.c account.c madmutt_LDADD = @MUTT_LIB_OBJECTS@ @LIBOBJS@ \ -Limap -limap -Lpop -lpop $(LIBNNTP) \ @@ -61,7 +61,7 @@ CPPFLAGS=@CPPFLAGS@ -I$(includedir) EXTRA_madmutt_SOURCES = \ account.c mutt_sasl.c dotlock.c remailer.c \ - alias.h browser.h mbyte.h remailer.h state.h \ + alias.h browser.h remailer.h state.h \ mutt_idna.h mutt_libesmtp.c sidebar.h state.h EXTRA_DIST = config.rpath COPYRIGHT GPL OPS OPS.PGP OPS.CRYPT OPS.SMIME TODO \ @@ -73,7 +73,7 @@ EXTRA_DIST = config.rpath COPYRIGHT GPL OPS OPS.PGP OPS.CRYPT OPS.SMIME TODO \ mime.h mutt.h mutt_sasl.h mbox.h mh.h mx.h pager.h protos.h rfc1524.h \ rfc3676.h sort.h mime.types autogen.sh \ OPS.MIX remailer.c remailer.h browser.h state.h \ - mbyte.h lib.h extlib.c pgpewrap.c smime_keys.pl pgplib.h madmuttrc.head madmuttrc \ + lib.h extlib.c pgpewrap.c smime_keys.pl pgplib.h madmuttrc.head madmuttrc \ makedoc.c stamp-doc-rc README.SSL smime.h\ depcomp mutt_idna.h mutt_libesmtp.h diff --git a/charset.c b/charset.c index ba6b422..08c4b0f 100644 --- a/charset.c +++ b/charset.c @@ -35,6 +35,8 @@ # define EILSEQ EINVAL #endif +int Charset_is_utf8 = 0; + /* * The following list has been created manually from the data under: * http://www.isi.edu/in-notes/iana/assignments/character-sets @@ -612,3 +614,23 @@ int mutt_convert_nonmime_string (char **ps) } return -1; } + +void mutt_set_charset (char *charset) +{ + char buffer[STRING]; + + mutt_canonical_charset (buffer, sizeof (buffer), charset); + + Charset_is_utf8 = 0; + if (!strcmp (buffer, "utf-8")) + Charset_is_utf8 = 1; + +#ifdef HAVE_BIND_TEXTDOMAIN_CODESET + bind_textdomain_codeset (PACKAGE, buffer); +#endif +} + +wchar_t replacement_char (void) +{ + return Charset_is_utf8 ? 0xfffd : '?'; +} diff --git a/charset.h b/charset.h index 1b5f025..738cd73 100644 --- a/charset.h +++ b/charset.h @@ -56,4 +56,9 @@ void mutt_set_langinfo_charset (void); #define M_ICONV_HOOK_FROM 1 #define M_ICONV_HOOK_TO 2 +extern int Charset_is_utf8; + +void mutt_set_charset(char *charset); +wchar_t replacement_char(void); + #endif /* _CHARSET_H */ diff --git a/configure.ac b/configure.ac index 2a615d1..0cb3f9f 100644 --- a/configure.ac +++ b/configure.ac @@ -701,18 +701,18 @@ changequote([, ])dnl mutt_cv_iconv_nontrans=no) LIBS="$mutt_save_LIBS") if test "$mutt_cv_iconv_nontrans" = yes; then - AC_DEFINE(ICONV_NONTRANS, 1) + AC_DEFINE(ICONV_NONTRANS, 1) else - AC_DEFINE(ICONV_NONTRANS, 0) + AC_DEFINE(ICONV_NONTRANS, 0) fi CPPFLAGS="$CPPFLAGS -I\$(top_srcdir)/intl" if test "$BUILD_INCLUDED_LIBINTL" = "yes"; then - AC_DEFINE(HAVE_BIND_TEXTDOMAIN_CODESET,1, - [ Define if your gettext has bind_textdomain_codeset. ]) + AC_DEFINE(HAVE_BIND_TEXTDOMAIN_CODESET,1, + [ Define if your gettext has bind_textdomain_codeset. ]) else - AC_CHECK_FUNCS(bind_textdomain_codeset) + AC_CHECK_FUNCS(bind_textdomain_codeset) fi fi # libiconv @@ -720,86 +720,11 @@ fi # libiconv dnl -- locales -- AC_CHECK_HEADERS(wchar.h) - -AC_CACHE_CHECK([for wchar_t], mutt_cv_wchar_t, - AC_TRY_COMPILE([ -#include -#include -#ifdef HAVE_WCHAR_H -#include -#endif - ], - [ wchar_t wc; return 0; ], - mutt_cv_wchar_t=yes, - mutt_cv_wchar_t=no)) - -if test "$mutt_cv_wchar_t" = no; then - AC_DEFINE(wchar_t,int,[ Define to 'int' if system headers don't define. ]) -fi - -AC_CACHE_CHECK([for wint_t], mutt_cv_wint_t, - AC_TRY_COMPILE([ -#include -#include -#ifdef HAVE_WCHAR_H -#include -#endif - ], - [ wint_t wc; return 0; ], - mutt_cv_wint_t=yes, - mutt_cv_wint_t=no)) - -if test "$mutt_cv_wint_t" = no; then - AC_DEFINE(wint_t,int,[ Define to 'int' if system headers don't define. ]) -fi - AC_CHECK_HEADERS(wctype.h) AC_CHECK_FUNCS(iswalnum iswalpha iswcntrl iswdigit) AC_CHECK_FUNCS(iswgraph iswlower iswprint iswpunct iswspace iswupper) AC_CHECK_FUNCS(iswxdigit towupper towlower) - -AC_CACHE_CHECK([for mbstate_t], mutt_cv_mbstate_t, - AC_TRY_COMPILE([ -#include -#include -#ifdef HAVE_WCHAR_H -#include -#endif - ], - [ mbstate_t s; return 0; ], - mutt_cv_mbstate_t=yes, - mutt_cv_mbstate_t=no)) - -if test "$mutt_cv_mbstate_t" = no; then - AC_DEFINE(mbstate_t,int,[ Define to 'int' if system headers don't define. ]) -fi - -wc_funcs=maybe -AC_ARG_WITH(wc-funcs, AC_HELP_STRING([--without-wc-funcs], [Do not use the system's wchar_t functions]), - wc_funcs=$withval) - -if test "$wc_funcs" != yes -a "$wc_funcs" != no; then - AC_CACHE_CHECK([for wchar_t functions], mutt_cv_wc_funcs, - mutt_cv_wc_funcs=no - AC_TRY_LINK([ -#define _XOPEN_SOURCE 1 -#include -#include -#ifdef HAVE_WCTYPE_H -#include -#endif -#ifdef HAVE_WCHAR_H -#include -#endif], - [mbrtowc(0, 0, 0, 0); wctomb(0, 0); wcwidth(0); - iswprint(0); iswspace(0); towlower(0); towupper(0); iswalnum(0)], - mutt_cv_wc_funcs=yes)) - wc_funcs=$mutt_cv_wc_funcs -fi - -if test $wc_funcs = yes; then - AC_DEFINE(HAVE_WC_FUNCS,1,[ Define if you are using the system's wchar_t functions. ]) -fi +AC_TYPE_MBSTATE_T AC_CACHE_CHECK([for nl_langinfo and CODESET], mutt_cv_langinfo_codeset, [AC_TRY_LINK([#include ], diff --git a/help.c b/help.c index c0b2252..627f4c2 100644 --- a/help.c +++ b/help.c @@ -13,6 +13,11 @@ # include "config.h" #endif +#include +#include +#include +#include + #include #include #include @@ -24,10 +29,6 @@ #include "keymap.h" #include "pager.h" -#include -#include -#include - static struct binding_t *help_lookupFunction (int op, int menu) { int i; diff --git a/init.c b/init.c index e215b0c..8bd2854 100644 --- a/init.c +++ b/init.c @@ -31,7 +31,6 @@ #include "mutt.h" #include "keymap.h" -#include "mbyte.h" #include "charset.h" #include "thread.h" #include diff --git a/lib-ui/curs_lib.c b/lib-ui/curs_lib.c index ee53578..3a59d64 100644 --- a/lib-ui/curs_lib.c +++ b/lib-ui/curs_lib.c @@ -16,6 +16,7 @@ #endif #include +#include #include #include #include @@ -44,7 +45,7 @@ #include "mutt.h" #include "pager.h" -#include "mbyte.h" +#include "charset.h" /* not possible to unget more than one char under some curses libs, and it * is impossible to unget function keys in SLang, so roll our own input diff --git a/lib-ui/enter.c b/lib-ui/enter.c index 3eb1f13..0d5b12e 100644 --- a/lib-ui/enter.c +++ b/lib-ui/enter.c @@ -12,6 +12,8 @@ # include "config.h" #endif +#include + #include #include "curses.h" @@ -23,9 +25,6 @@ #include "history.h" #include "buffy.h" - -#include - /* redraw flags for mutt_enter_string() */ enum { M_REDRAW_INIT = 1, /* go to end of line and redraw */ diff --git a/lib-ui/menu.c b/lib-ui/menu.c index a8f9925..f91fecd 100644 --- a/lib-ui/menu.c +++ b/lib-ui/menu.c @@ -20,7 +20,7 @@ #include "menu.h" #include "mutt.h" -#include "mbyte.h" +#include "charset.h" #include "sidebar.h" #include diff --git a/mbyte.c b/mbyte.c deleted file mode 100644 index 6c9a76c..0000000 --- a/mbyte.c +++ /dev/null @@ -1,438 +0,0 @@ -/* - * Copyright notice from original mutt: - * Copyright (C) 2000 Edmund Grimley Evans - * - * This file is part of mutt-ng, see http://www.muttng.org/. - * It's licensed under the GNU General Public License, - * please see the file GPL in the top level source directory. - */ - -/* - * Japanese support by TAKIZAWA Takashi . - */ - -#if HAVE_CONFIG_H -# include "config.h" -#endif - -#include - -#include "mutt.h" -#include "mbyte.h" -#include "charset.h" - - -#include - -#include - -#ifndef EILSEQ -#define EILSEQ EINVAL -#endif - -int Charset_is_utf8 = 0; - -#ifndef HAVE_WC_FUNCS -static int charset_is_ja = 0; -static iconv_t charset_to_utf8 = (iconv_t) (-1); -static iconv_t charset_from_utf8 = (iconv_t) (-1); -#endif - -void mutt_set_charset (char *charset) -{ - char buffer[STRING]; - - mutt_canonical_charset (buffer, sizeof (buffer), charset); - - Charset_is_utf8 = 0; -#ifndef HAVE_WC_FUNCS - charset_is_ja = 0; - if (charset_to_utf8 != (iconv_t) (-1)) { - iconv_close (charset_to_utf8); - charset_to_utf8 = (iconv_t) (-1); - } - if (charset_from_utf8 != (iconv_t) (-1)) { - iconv_close (charset_from_utf8); - charset_from_utf8 = (iconv_t) (-1); - } -#endif - - if (!strcmp (buffer, "utf-8")) - Charset_is_utf8 = 1; -#ifndef HAVE_WC_FUNCS - else if (!ascii_strcasecmp (buffer, "euc-jp") - || !ascii_strcasecmp (buffer, "shift_jis") - || !ascii_strcasecmp (buffer, "cp932") - || !ascii_strcasecmp (buffer, "eucJP-ms")) { - charset_is_ja = 1; - charset_to_utf8 = iconv_open ("UTF-8", charset); - charset_from_utf8 = iconv_open (charset, "UTF-8"); - } -#endif - -#ifdef HAVE_BIND_TEXTDOMAIN_CODESET - bind_textdomain_codeset (PACKAGE, buffer); -#endif -} - -#ifndef HAVE_WC_FUNCS - -/* - * For systems that don't have them, we provide here our own - * implementations of wcrtomb(), mbrtowc(), iswprint() and wcwidth(). - * Instead of using the locale, as these functions normally would, - * we use Mutt's Charset variable. We support 3 types of charset: - * (1) For 8-bit charsets, wchar_t uses the same encoding as char. - * (2) For UTF-8, wchar_t uses UCS. - * (3) For stateless Japanese encodings, we use UCS and convert - * via UTF-8 using iconv. - * Unfortunately, we can't handle non-stateless encodings. - */ - -static size_t wcrtomb_iconv (char *s, wchar_t wc, iconv_t cd) -{ - char buf[MB_LEN_MAX]; - const char *ib; - char *ob; - size_t ibl, obl, r; - - if (s) { - ibl = mutt_wctoutf8 (buf, wc); - if (ibl == (size_t) (-1)) - return (size_t) (-1); - ib = buf; - ob = s; - obl = MB_LEN_MAX; - r = my_iconv(cd, &ib, &ibl, &ob, &obl); - } - else { - ib = ""; - ibl = 1; - ob = buf; - obl = sizeof (buf); - r = my_iconv(cd, &ib, &ibl, &ob, &obl); - } - return ob - s; -} - -size_t wcrtomb (char *s, wchar_t wc, mbstate_t * ps) -{ - /* We only handle stateless encodings, so we can ignore ps. */ - - if (Charset_is_utf8) - return mutt_wctoutf8 (s, wc); - else if (charset_from_utf8 != (iconv_t) (-1)) - return wcrtomb_iconv (s, wc, charset_from_utf8); - else { - if (!s) - return 1; - if (wc < 0x100) { - *s = wc; - return 1; - } - errno = EILSEQ; - return (size_t) (-1); - } -} - -size_t mbrtowc_iconv (wchar_t * pwc, const char *s, size_t n, - mbstate_t * ps, iconv_t cd) -{ - static mbstate_t mbstate; - const char *ib, *ibmax; - char *ob, *t; - size_t ibl, obl, k, r; - char bufi[8], bufo[6]; - - if (!n) - return (size_t) (-2); - - t = memchr (ps, 0, sizeof (*ps)); - k = t ? (t - (char *) ps) : sizeof (*ps); - if (k > sizeof (bufi)) - k = 0; - if (k) { - /* use the buffer for input */ - memcpy (bufi, ps, k); - ib = bufi; - ibmax = bufi + (k + n < sizeof (bufi) ? k + n : sizeof (bufi)); - memcpy (bufi + k, s, ibmax - bufi - k); - } - else { - /* use the real input */ - ib = s; - ibmax = s + n; - } - - ob = bufo; - obl = sizeof (bufo); - ibl = 1; - - for (;;) { - r = my_iconv(cd, &ib, &ibl, &ob, &obl); - if (ob > bufo && (!k || ib > bufi + k)) { - /* we have a character */ - p_clear(ps, 1); - utf8rtowc (pwc, bufo, ob - bufo, &mbstate); - return (pwc && *pwc) ? (ib - (k ? bufi + k : s)) : 0; - } - else if (!r || (r == (size_t) (-1) && errno == EINVAL)) { - if (ib + ibl < ibmax) - /* try using more input */ - ++ibl; - else if (k && ib > bufi + k && bufi + k + n > ibmax) { - /* switch to using real input */ - ib = s + (ib - bufi - k); - ibmax = s + n; - k = 0; - ++ibl; - } - else { - /* save the state and give up */ - p_clear(ps, 1); - if (ibl <= sizeof (mbstate_t)) /* need extra condition here! */ - memcpy (ps, ib, ibl); - return (size_t) (-2); - } - } - else { - /* bad input */ - errno = EILSEQ; - return (size_t) (-1); - } - } -} - -size_t mbrtowc (wchar_t * pwc, const char *s, size_t n, mbstate_t * ps) -{ - static mbstate_t mbstate; - - if (!ps) - ps = &mbstate; - - if (Charset_is_utf8) - return utf8rtowc (pwc, s, n, ps); - else if (charset_to_utf8 != (iconv_t) (-1)) - return mbrtowc_iconv (pwc, s, n, ps, charset_to_utf8); - else { - if (!s) { - p_clear(ps, 1); - return 0; - } - if (!n) - return (size_t) - 2; - if (pwc) - *pwc = (wchar_t) (unsigned char) *s; - return (*s != 0); - } -} - -int iswprint (wint_t wc) -{ - if (Charset_is_utf8 || charset_is_ja) - return ((0x20 <= wc && wc < 0x7f) || 0xa0 <= wc); - else - return (0 <= wc && wc < 256) ? IsPrint (wc) : 0; -} - -int iswspace (wint_t wc) -{ - if (Charset_is_utf8 || charset_is_ja) - return (9 <= wc && wc <= 13) || wc == 32; - else - return (0 <= wc && wc < 256) ? isspace (wc) : 0; -} - -static wint_t towupper_ucs (wint_t x) -{ - /* Only works for x < 0x130 */ - if ((0x60 < x && x < 0x7b) || (0xe0 <= x && x < 0xff && x != 0xf7)) - return x - 32; - else if (0x100 <= x && x < 0x130) - return x & ~1; - else if (x == 0xb5) - return 0x39c; - else if (x == 0xff) - return 0x178; - else - return x; -} - -static wint_t towlower_ucs (wint_t x) -{ - /* Only works for x < 0x130 */ - if ((0x40 < x && x < 0x5b) || (0xc0 <= x && x < 0xdf && x != 0xd7)) - return x + 32; - else if (0x100 <= x && x < 0x130) - return x | 1; - else - return x; -} - -static int iswalnum_ucs (wint_t wc) -{ - /* Only works for x < 0x220 */ - if (wc >= 0x100) - return 1; - else if (wc < 0x30) - return 0; - else if (wc < 0x3a) - return 1; - else if (wc < 0xa0) - return (0x40 < (wc & ~0x20) && (wc & ~0x20) < 0x5b); - else if (wc < 0xc0) - return (wc == 0xaa || wc == 0xb5 || wc == 0xba); - else - return !(wc == 0xd7 || wc == 0xf7); -} - -wint_t towupper (wint_t wc) -{ - if (Charset_is_utf8 || charset_is_ja) - return towupper_ucs (wc); - else - return (0 <= wc && wc < 256) ? toupper (wc) : wc; -} - -wint_t towlower (wint_t wc) -{ - if (Charset_is_utf8 || charset_is_ja) - return towlower_ucs (wc); - else - return (0 <= wc && wc < 256) ? tolower (wc) : wc; -} - -int iswalnum (wint_t wc) -{ - if (Charset_is_utf8 || charset_is_ja) - return iswalnum_ucs (wc); - else - return (0 <= wc && wc < 256) ? isalnum (wc) : 0; -} - -/* - * l10n for Japanese: - * Symbols, Greek and Cyrillic in JIS X 0208, Japanese Kanji - * Character Set, have a column width of 2. - */ -int wcwidth_ja (wchar_t ucs) -{ - if (ucs >= 0x3021) - return -1; /* continue with the normal check */ - /* a rough range for quick check */ - if ((ucs >= 0x00a1 && ucs <= 0x00fe) || /* Latin-1 Supplement */ - (ucs >= 0x0391 && ucs <= 0x0451) || /* Greek and Cyrillic */ - (ucs >= 0x2010 && ucs <= 0x266f) || /* Symbols */ - (ucs >= 0x3000 && ucs <= 0x3020)) /* CJK Symbols and Punctuation */ - return 2; - else - return -1; -} - -int wcwidth_ucs (wchar_t ucs); - -int wcwidth (wchar_t wc) -{ - if (!Charset_is_utf8) { - if (!charset_is_ja) { - /* 8-bit case */ - if (!wc) - return 0; - else if ((0 <= wc && wc < 256) && IsPrint (wc)) - return 1; - else - return -1; - } - else { - /* Japanese */ - int k = wcwidth_ja (wc); - - if (k != -1) - return k; - } - } - return wcwidth_ucs (wc); -} - -size_t utf8rtowc (wchar_t * pwc, const char *s, size_t n, mbstate_t * _ps) -{ - static wchar_t mbstate; - wchar_t *ps = (wchar_t *) _ps; - size_t k = 1; - unsigned char c; - wchar_t wc; - int count; - - if (!ps) - ps = &mbstate; - - if (!s) { - *ps = 0; - return 0; - } - if (!n) - return (size_t) - 2; - - if (!*ps) { - c = (unsigned char) *s; - if (c < 0x80) { - if (pwc) - *pwc = c; - return (c != 0); - } - else if (c < 0xc2) { - errno = EILSEQ; - return (size_t) - 1; - } - else if (c < 0xe0) - wc = ((c & 0x1f) << 6) + (count = 0); - else if (c < 0xf0) - wc = ((c & 0x0f) << 12) + (count = 1); - else if (c < 0xf8) - wc = ((c & 0x07) << 18) + (count = 2); - else if (c < 0xfc) - wc = ((c & 0x03) << 24) + (count = 3); - else if (c < 0xfe) - wc = ((c & 0x01) << 30) + (count = 4); - else { - errno = EILSEQ; - return (size_t) - 1; - } - ++s, --n, ++k; - } - else { - wc = *ps & 0x7fffffff; - count = wc & 7; /* if count > 4 it will be caught below */ - } - - for (; n; ++s, --n, ++k) { - c = (unsigned char) *s; - if (0x80 <= c && c < 0xc0) { - wc |= (c & 0x3f) << (6 * count); - if (!count) { - if (pwc) - *pwc = wc; - *ps = 0; - return wc ? k : 0; - } - --count, --wc; - if (!(wc >> (11 + count * 5))) { - errno = count < 4 ? EILSEQ : EINVAL; - return (size_t) - 1; - } - } - else { - errno = EILSEQ; - return (size_t) - 1; - } - } - *ps = wc; - return (size_t) - 2; -} - -#endif /* !HAVE_WC_FUNCS */ - -wchar_t replacement_char (void) -{ - return Charset_is_utf8 ? 0xfffd : '?'; -} diff --git a/mbyte.h b/mbyte.h deleted file mode 100644 index b6f1274..0000000 --- a/mbyte.h +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright notice from original mutt: - * [none] - * - * This file is part of mutt-ng, see http://www.muttng.org/. - * It's licensed under the GNU General Public License, - * please see the file GPL in the top level source directory. - */ - -#ifndef _MBYTE_H -# define _MBYTE_H - -#include "config.h" - -# ifdef HAVE_WC_FUNCS -# ifdef HAVE_WCHAR_H -# include -# endif -# ifdef HAVE_WCTYPE_H -# include -# endif -# endif - -#ifndef HAVE_WC_FUNCS -size_t wcrtomb (char *s, wchar_t wc, mbstate_t * ps); -size_t mbrtowc (wchar_t * pwc, const char *s, size_t n, mbstate_t * ps); -int iswprint (wint_t wc); -int iswspace (wint_t wc); -int iswalnum (wint_t wc); -wint_t towupper (wint_t wc); -wint_t towlower (wint_t wc); -int wcwidth (wchar_t wc); -#endif /* !HAVE_WC_FUNCS */ - - -void mutt_set_charset (char *charset); -extern int Charset_is_utf8; -size_t utf8rtowc (wchar_t * pwc, const char *s, size_t n, mbstate_t * _ps); -wchar_t replacement_char (void); - -#endif /* _MBYTE_H */ diff --git a/pager.c b/pager.c index 2a8d836..ea07331 100644 --- a/pager.c +++ b/pager.c @@ -16,6 +16,7 @@ #endif #include +#include #include #include #include @@ -40,7 +41,7 @@ #include "pager.h" #include "attach.h" #include "recvattach.h" -#include "mbyte.h" +#include "charset.h" #include "sidebar.h" #include "buffy.h" diff --git a/protos.h b/protos.h index 4f9c2ef..aee7bbe 100644 --- a/protos.h +++ b/protos.h @@ -17,8 +17,6 @@ #include #endif -#include "mbyte.h" - #include #include diff --git a/utf8.c b/utf8.c deleted file mode 100644 index 35faa6c..0000000 --- a/utf8.c +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Copyright notice from original mutt: - * [none] - * - * This file is part of mutt-ng, see http://www.muttng.org/. - * It's licensed under the GNU General Public License, - * please see the file GPL in the top level source directory. - */ - -#if HAVE_CONFIG_H -# include "config.h" -#endif - -#ifndef HAVE_WC_FUNCS - -#include - -#ifndef EILSEQ -#define EILSEQ EINVAL -#endif - -int mutt_wctoutf8(char *p, unsigned int c) -{ - static unsigned char const mark[7] = { - 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC - }; - - int bytes; - - if (c >= 0x200000) { - errno = EILSEQ; - return -1; - } - - bytes = 1 + (c >= 0x80) + (c >= 0x800) + (c >= 0x10000); - p += bytes; - - switch (bytes) { - case 4: *--p = (c | 0x80) & 0xbf; c >>= 6; - case 3: *--p = (c | 0x80) & 0xbf; c >>= 6; - case 2: *--p = (c | 0x80) & 0xbf; c >>= 6; - case 1: *--p = c | mark[bytes]; - } - - return bytes; -} - -#endif /* !HAVE_WC_FUNCS */ diff --git a/wcwidth.c b/wcwidth.c deleted file mode 100644 index 89656f8..0000000 --- a/wcwidth.c +++ /dev/null @@ -1,204 +0,0 @@ -/* - * This is an implementation of wcwidth() and wcswidth() (defined in - * IEEE Std 1002.1-2001) for Unicode. - * - * http://www.opengroup.org/onlinepubs/007904975/functions/wcwidth.html - * http://www.opengroup.org/onlinepubs/007904975/functions/wcswidth.html - * - * In fixed-width output devices, Latin characters all occupy a single - * "cell" position of equal width, whereas ideographic CJK characters - * occupy two such cells. Interoperability between terminal-line - * applications and (teletype-style) character terminals using the - * UTF-8 encoding requires agreement on which character should advance - * the cursor by how many cell positions. No established formal - * standards exist at present on which Unicode character shall occupy - * how many cell positions on character terminals. These routines are - * a first attempt of defining such behavior based on simple rules - * applied to data provided by the Unicode Consortium. - * - * For some graphical characters, the Unicode standard explicitly - * defines a character-cell width via the definition of the East Asian - * FullWidth (F), Wide (W), Half-width (H), and Narrow (Na) classes. - * In all these cases, there is no ambiguity about which width a - * terminal shall use. For characters in the East Asian Ambiguous (A) - * class, the width choice depends purely on a preference of backward - * compatibility with either historic CJK or Western practice. - * Choosing single-width for these characters is easy to justify as - * the appropriate long-term solution, as the CJK practice of - * displaying these characters as double-width comes from historic - * implementation simplicity (8-bit encoded characters were displayed - * single-width and 16-bit ones double-width, even for Greek, - * Cyrillic, etc.) and not any typographic considerations. - * - * Much less clear is the choice of width for the Not East Asian - * (Neutral) class. Existing practice does not dictate a width for any - * of these characters. It would nevertheless make sense - * typographically to allocate two character cells to characters such - * as for instance EM SPACE or VOLUME INTEGRAL, which cannot be - * represented adequately with a single-width glyph. The following - * routines at present merely assign a single-cell width to all - * neutral characters, in the interest of simplicity. This is not - * entirely satisfactory and should be reconsidered before - * establishing a formal standard in this area. At the moment, the - * decision which Not East Asian (Neutral) characters should be - * represented by double-width glyphs cannot yet be answered by - * applying a simple rule from the Unicode database content. Setting - * up a proper standard for the behavior of UTF-8 character terminals - * will require a careful analysis not only of each Unicode character, - * but also of each presentation form, something the author of these - * routines has avoided to do so far. - * - * http://www.unicode.org/unicode/reports/tr11/ - * - * Markus Kuhn -- 2003-05-20 (Unicode 4.0) - * - * Permission to use, copy, modify, and distribute this software - * for any purpose and without fee is hereby granted. The author - * disclaims all warranties with regard to this software. - * - * Latest version: http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c - */ - -#if HAVE_CONFIG_H -# include "config.h" -#endif - -#ifndef HAVE_WC_FUNCS - -#include "mutt.h" -#include "mbyte.h" - -#include - -struct interval { - int first; - int last; -}; - -/* auxiliary function for binary search in interval table */ -static int bisearch (wchar_t ucs, const struct interval *table, int max) -{ - int min = 0; - int mid; - - if (ucs < table[0].first || ucs > table[max].last) - return 0; - while (max >= min) { - mid = (min + max) / 2; - if (ucs > table[mid].last) - min = mid + 1; - else if (ucs < table[mid].first) - max = mid - 1; - else - return 1; - } - - return 0; -} - - -/* The following two functions define the column width of an ISO 10646 - * character as follows: - * - * - The null character (U+0000) has a column width of 0. - * - * - Other C0/C1 control characters and DEL will lead to a return - * value of -1. - * - * - Non-spacing and enclosing combining characters (general - * category code Mn or Me in the Unicode database) have a - * column width of 0. - * - * - SOFT HYPHEN (U+00AD) has a column width of 1. - * - * - Other format characters (general category code Cf in the Unicode - * database) and ZERO WIDTH SPACE (U+200B) have a column width of 0. - * - * - Hangul Jamo medial vowels and final consonants (U+1160-U+11FF) - * have a column width of 0. - * - * - Spacing characters in the East Asian Wide (W) or East Asian - * Full-width (F) category as defined in Unicode Technical - * Report #11 have a column width of 2. - * - * - All remaining characters (including all printable - * ISO 8859-1 and WGL4 characters, Unicode control characters, - * etc.) have a column width of 1. - * - * This implementation assumes that wchar_t characters are encoded - * in ISO 10646. - */ - -int wcwidth_ucs (wchar_t ucs) -{ - /* sorted list of non-overlapping intervals of non-spacing characters */ - /* generated by "uniset +cat=Me +cat=Mn +cat=Cf -00AD +1160-11FF +200B c" */ - static const struct interval combining[] = { - {0x0300, 0x0357}, {0x035D, 0x036F}, {0x0483, 0x0486}, - {0x0488, 0x0489}, {0x0591, 0x05A1}, {0x05A3, 0x05B9}, - {0x05BB, 0x05BD}, {0x05BF, 0x05BF}, {0x05C1, 0x05C2}, - {0x05C4, 0x05C4}, {0x0600, 0x0603}, {0x0610, 0x0615}, - {0x064B, 0x0658}, {0x0670, 0x0670}, {0x06D6, 0x06E4}, - {0x06E7, 0x06E8}, {0x06EA, 0x06ED}, {0x070F, 0x070F}, - {0x0711, 0x0711}, {0x0730, 0x074A}, {0x07A6, 0x07B0}, - {0x0901, 0x0902}, {0x093C, 0x093C}, {0x0941, 0x0948}, - {0x094D, 0x094D}, {0x0951, 0x0954}, {0x0962, 0x0963}, - {0x0981, 0x0981}, {0x09BC, 0x09BC}, {0x09C1, 0x09C4}, - {0x09CD, 0x09CD}, {0x09E2, 0x09E3}, {0x0A01, 0x0A02}, - {0x0A3C, 0x0A3C}, {0x0A41, 0x0A42}, {0x0A47, 0x0A48}, - {0x0A4B, 0x0A4D}, {0x0A70, 0x0A71}, {0x0A81, 0x0A82}, - {0x0ABC, 0x0ABC}, {0x0AC1, 0x0AC5}, {0x0AC7, 0x0AC8}, - {0x0ACD, 0x0ACD}, {0x0AE2, 0x0AE3}, {0x0B01, 0x0B01}, - {0x0B3C, 0x0B3C}, {0x0B3F, 0x0B3F}, {0x0B41, 0x0B43}, - {0x0B4D, 0x0B4D}, {0x0B56, 0x0B56}, {0x0B82, 0x0B82}, - {0x0BC0, 0x0BC0}, {0x0BCD, 0x0BCD}, {0x0C3E, 0x0C40}, - {0x0C46, 0x0C48}, {0x0C4A, 0x0C4D}, {0x0C55, 0x0C56}, - {0x0CBC, 0x0CBC}, {0x0CBF, 0x0CBF}, {0x0CC6, 0x0CC6}, - {0x0CCC, 0x0CCD}, {0x0D41, 0x0D43}, {0x0D4D, 0x0D4D}, - {0x0DCA, 0x0DCA}, {0x0DD2, 0x0DD4}, {0x0DD6, 0x0DD6}, - {0x0E31, 0x0E31}, {0x0E34, 0x0E3A}, {0x0E47, 0x0E4E}, - {0x0EB1, 0x0EB1}, {0x0EB4, 0x0EB9}, {0x0EBB, 0x0EBC}, - {0x0EC8, 0x0ECD}, {0x0F18, 0x0F19}, {0x0F35, 0x0F35}, - {0x0F37, 0x0F37}, {0x0F39, 0x0F39}, {0x0F71, 0x0F7E}, - {0x0F80, 0x0F84}, {0x0F86, 0x0F87}, {0x0F90, 0x0F97}, - {0x0F99, 0x0FBC}, {0x0FC6, 0x0FC6}, {0x102D, 0x1030}, - {0x1032, 0x1032}, {0x1036, 0x1037}, {0x1039, 0x1039}, - {0x1058, 0x1059}, {0x1160, 0x11FF}, {0x1712, 0x1714}, - {0x1732, 0x1734}, {0x1752, 0x1753}, {0x1772, 0x1773}, - {0x17B4, 0x17B5}, {0x17B7, 0x17BD}, {0x17C6, 0x17C6}, - {0x17C9, 0x17D3}, {0x17DD, 0x17DD}, {0x180B, 0x180D}, - {0x18A9, 0x18A9}, {0x1920, 0x1922}, {0x1927, 0x1928}, - {0x1932, 0x1932}, {0x1939, 0x193B}, {0x200B, 0x200F}, - {0x202A, 0x202E}, {0x2060, 0x2063}, {0x206A, 0x206F}, - {0x20D0, 0x20EA}, {0x302A, 0x302F}, {0x3099, 0x309A}, - {0xFB1E, 0xFB1E}, {0xFE00, 0xFE0F}, {0xFE20, 0xFE23}, - {0xFEFF, 0xFEFF}, {0xFFF9, 0xFFFB}, {0x1D167, 0x1D169}, - {0x1D173, 0x1D182}, {0x1D185, 0x1D18B}, {0x1D1AA, 0x1D1AD}, - {0xE0001, 0xE0001}, {0xE0020, 0xE007F}, {0xE0100, 0xE01EF} - }; - - /* test for 8-bit control characters */ - if (ucs == 0) - return 0; - if (ucs < 32 || (ucs >= 0x7f && ucs < 0xa0)) - return -1; - - /* binary search in table of non-spacing characters */ - if (bisearch (ucs, combining, - sizeof (combining) / sizeof (struct interval) - 1)) - return 0; - - /* if we arrive here, ucs is not a combining or C0/C1 control character */ - - return 1 + (ucs >= 0x1100 && (ucs <= 0x115f || /* Hangul Jamo init. consonants */ - ucs == 0x2329 || ucs == 0x232a || (ucs >= 0x2e80 && ucs <= 0xa4cf && ucs != 0x303f) || /* CJK ... Yi */ - (ucs >= 0xac00 && ucs <= 0xd7a3) || /* Hangul Syllables */ - (ucs >= 0xf900 && ucs <= 0xfaff) || /* CJK Compatibility Ideographs */ - (ucs >= 0xfe30 && ucs <= 0xfe6f) || /* CJK Compatibility Forms */ - (ucs >= 0xff00 && ucs <= 0xff60) || /* Fullwidth Forms */ - (ucs >= 0xffe0 && ucs <= 0xffe6) || - (ucs >= 0x20000 && ucs <= 0x2fffd) || - (ucs >= 0x30000 && ucs <= 0x3fffd))); -} - -#endif /* !HAVE_WC_FUNCS */