X-Git-Url: http://git.madism.org/?p=apps%2Fmadmutt.git;a=blobdiff_plain;f=charset.c;h=bbaf8ca7a6ccb1bdc7b138774d2908ef610939cd;hp=0652ffc807c6c27b15e684486438050e954b4377;hb=2c56b665394c80195b976537e608b690947fcb14;hpb=ac813896ca32d850febc2d95065ac4fa040f11f9 diff --git a/charset.c b/charset.c index 0652ffc..bbaf8ca 100644 --- a/charset.c +++ b/charset.c @@ -21,6 +21,9 @@ #include #include #include +#ifdef HAVE_LANGINFO_CODESET +# include +#endif #include #include @@ -30,11 +33,11 @@ #include "mutt.h" #include "charset.h" - #ifndef EILSEQ -# define EILSEQ EINVAL +# define EILSEQ EINVAL #endif +char *Charset; int Charset_is_utf8 = 0; /* @@ -47,212 +50,181 @@ int Charset_is_utf8 = 0; */ static struct { - const char *key; - const char *pref; + const char *key; + const char *pref; } PreferredMIMENames[] = { - { - "ansi_x3.4-1968", "us-ascii"}, { - "iso-ir-6", "us-ascii"}, { - "iso_646.irv:1991", "us-ascii"}, { - "ascii", "us-ascii"}, { - "iso646-us", "us-ascii"}, { - "us", "us-ascii"}, { - "ibm367", "us-ascii"}, { - "cp367", "us-ascii"}, { - "csASCII", "us-ascii"}, { - "csISO2022KR", "iso-2022-kr"}, { - "csEUCKR", "euc-kr"}, { - "csISO2022JP", "iso-2022-jp"}, { - "csISO2022JP2", "iso-2022-jp-2"}, { - "ISO_8859-1:1987", "iso-8859-1"}, { - "iso-ir-100", "iso-8859-1"}, { - "iso_8859-1", "iso-8859-1"}, { - "latin1", "iso-8859-1"}, { - "l1", "iso-8859-1"}, { - "IBM819", "iso-8859-1"}, { - "CP819", "iso-8859-1"}, { - "csISOLatin1", "iso-8859-1"}, { - "ISO_8859-2:1987", "iso-8859-2"}, { - "iso-ir-101", "iso-8859-2"}, { - "iso_8859-2", "iso-8859-2"}, { - "latin2", "iso-8859-2"}, { - "l2", "iso-8859-2"}, { - "csISOLatin2", "iso-8859-2"}, { - "ISO_8859-3:1988", "iso-8859-3"}, { - "iso-ir-109", "iso-8859-3"}, { - "ISO_8859-3", "iso-8859-3"}, { - "latin3", "iso-8859-3"}, { - "l3", "iso-8859-3"}, { - "csISOLatin3", "iso-8859-3"}, { - "ISO_8859-4:1988", "iso-8859-4"}, { - "iso-ir-110", "iso-8859-4"}, { - "ISO_8859-4", "iso-8859-4"}, { - "latin4", "iso-8859-4"}, { - "l4", "iso-8859-4"}, { - "csISOLatin4", "iso-8859-4"}, { - "ISO_8859-6:1987", "iso-8859-6"}, { - "iso-ir-127", "iso-8859-6"}, { - "iso_8859-6", "iso-8859-6"}, { - "ECMA-114", "iso-8859-6"}, { - "ASMO-708", "iso-8859-6"}, { - "arabic", "iso-8859-6"}, { - "csISOLatinArabic", "iso-8859-6"}, { - "ISO_8859-7:1987", "iso-8859-7"}, { - "iso-ir-126", "iso-8859-7"}, { - "ISO_8859-7", "iso-8859-7"}, { - "ELOT_928", "iso-8859-7"}, { - "ECMA-118", "iso-8859-7"}, { - "greek", "iso-8859-7"}, { - "greek8", "iso-8859-7"}, { - "csISOLatinGreek", "iso-8859-7"}, { - "ISO_8859-8:1988", "iso-8859-8"}, { - "iso-ir-138", "iso-8859-8"}, { - "ISO_8859-8", "iso-8859-8"}, { - "hebrew", "iso-8859-8"}, { - "csISOLatinHebrew", "iso-8859-8"}, { - "ISO_8859-5:1988", "iso-8859-5"}, { - "iso-ir-144", "iso-8859-5"}, { - "ISO_8859-5", "iso-8859-5"}, { - "cyrillic", "iso-8859-5"}, { - "csISOLatinCyrillic", "iso8859-5"}, { - "ISO_8859-9:1989", "iso-8859-9"}, { - "iso-ir-148", "iso-8859-9"}, { - "ISO_8859-9", "iso-8859-9"}, { - "latin5", "iso-8859-9"}, /* this is not a bug */ - { - "l5", "iso-8859-9"}, { - "csISOLatin5", "iso-8859-9"}, { - "ISO_8859-10:1992", "iso-8859-10"}, { - "iso-ir-157", "iso-8859-10"}, { - "latin6", "iso-8859-10"}, /* this is not a bug */ - { - "l6", "iso-8859-10"}, { - "csISOLatin6", "iso-8859-10"}, { - "csKOI8r", "koi8-r"}, { - "MS_Kanji", "Shift_JIS"}, /* Note the underscore! */ - { - "csShiftJis", "Shift_JIS"}, { - "Extended_UNIX_Code_Packed_Format_for_Japanese", "EUC-JP"}, { - "csEUCPkdFmtJapanese", "EUC-JP"}, { - "csGB2312", "gb2312"}, { - "csbig5", "big5"}, - /* - * End of official brain damage. What follows has been taken - * from glibc's localedata files. - */ - { - "iso_8859-13", "iso-8859-13"}, { - "iso-ir-179", "iso-8859-13"}, { - "latin7", "iso-8859-13"}, /* this is not a bug */ - { - "l7", "iso-8859-13"}, { - "iso_8859-14", "iso-8859-14"}, { - "latin8", "iso-8859-14"}, /* this is not a bug */ - { - "l8", "iso-8859-14"}, { - "iso_8859-15", "iso-8859-15"}, { - "latin9", "iso-8859-15"}, /* this is not a bug */ - /* Suggested by Ionel Mugurel Ciobica */ - { - "latin0", "iso-8859-15"}, /* this is not a bug */ - { - "iso_8859-16", "iso-8859-16"}, { - "latin10", "iso-8859-16"}, /* this is not a bug */ - /* - * David Champion has observed this with - * nl_langinfo under SunOS 5.8. - */ - { - "646", "us-ascii"}, - /* - * http://www.sun.com/software/white-papers/wp-unicode/ - */ - { - "eucJP", "euc-jp"}, { - "PCK", "Shift_JIS"}, { - "ko_KR-euc", "euc-kr"}, { - "zh_TW-big5", "big5"}, - /* seems to be common on some systems */ - { - "sjis", "Shift_JIS"}, { - "euc-jp-ms", "eucJP-ms"}, - /* - * If you happen to encounter system-specific brain-damage with - * respect to character set naming, please add it above this - * comment, and submit a patch to . - */ - /* End of aliases. Please keep this line last. */ - { - NULL, NULL} + {"ansi_x3.4-1968", "us-ascii"}, + {"iso-ir-6", "us-ascii"}, + {"iso_646.irv:1991", "us-ascii"}, + {"ascii", "us-ascii"}, + {"iso646-us", "us-ascii"}, + {"us", "us-ascii"}, + {"ibm367", "us-ascii"}, + {"cp367", "us-ascii"}, + {"csascii", "us-ascii"}, + {"csiso2022kr", "iso-2022-kr"}, + {"cseuckr", "euc-kr"}, + {"csiso2022jp", "iso-2022-jp"}, + {"csiso2022jp2", "iso-2022-jp-2"}, + {"iso_8859-1:1987", "iso-8859-1"}, + {"iso-ir-100", "iso-8859-1"}, + {"iso_8859-1", "iso-8859-1"}, + {"latin1", "iso-8859-1"}, + {"l1", "iso-8859-1"}, + {"ibm819", "iso-8859-1"}, + {"cp819", "iso-8859-1"}, + {"csisolatin1", "iso-8859-1"}, + {"iso_8859-2:1987", "iso-8859-2"}, + {"iso-ir-101", "iso-8859-2"}, + {"iso_8859-2", "iso-8859-2"}, + {"latin2", "iso-8859-2"}, + {"l2", "iso-8859-2"}, + {"csisolatin2", "iso-8859-2"}, + {"iso_8859-3:1988", "iso-8859-3"}, + {"iso-ir-109", "iso-8859-3"}, + {"iso_8859-3", "iso-8859-3"}, + {"latin3", "iso-8859-3"}, + {"l3", "iso-8859-3"}, + {"csisolatin3", "iso-8859-3"}, + {"iso_8859-4:1988", "iso-8859-4"}, + {"iso-ir-110", "iso-8859-4"}, + {"iso_8859-4", "iso-8859-4"}, + {"latin4", "iso-8859-4"}, + {"l4", "iso-8859-4"}, + {"csisolatin4", "iso-8859-4"}, + {"iso_8859-6:1987", "iso-8859-6"}, + {"iso-ir-127", "iso-8859-6"}, + {"iso_8859-6", "iso-8859-6"}, + {"ecma-114", "iso-8859-6"}, + {"asmo-708", "iso-8859-6"}, + {"arabic", "iso-8859-6"}, + {"csisolatinarabic", "iso-8859-6"}, + {"iso_8859-7:1987", "iso-8859-7"}, + {"iso-ir-126", "iso-8859-7"}, + {"iso_8859-7", "iso-8859-7"}, + {"elot_928", "iso-8859-7"}, + {"ecma-118", "iso-8859-7"}, + {"greek", "iso-8859-7"}, + {"greek8", "iso-8859-7"}, + {"csisolatingreek", "iso-8859-7"}, + {"iso_8859-8:1988", "iso-8859-8"}, + {"iso-ir-138", "iso-8859-8"}, + {"iso_8859-8", "iso-8859-8"}, + {"hebrew", "iso-8859-8"}, + {"csisolatinhebrew", "iso-8859-8"}, + {"iso_8859-5:1988", "iso-8859-5"}, + {"iso-ir-144", "iso-8859-5"}, + {"iso_8859-5", "iso-8859-5"}, + {"cyrillic", "iso-8859-5"}, + {"csisolatincyrillic", "iso8859-5"}, + {"iso_8859-9:1989", "iso-8859-9"}, + {"iso-ir-148", "iso-8859-9"}, + {"iso_8859-9", "iso-8859-9"}, + {"latin5", "iso-8859-9"}, + {"l5", "iso-8859-9"}, + {"csisolatin5", "iso-8859-9"}, + {"iso_8859-10:1992", "iso-8859-10"}, + {"iso-ir-157", "iso-8859-10"}, + {"latin6", "iso-8859-10"}, + {"l6", "iso-8859-10"}, + {"csisolatin6", "iso-8859-10"}, + {"cskoi8r", "koi8-r"}, + {"ms_kanji", "shift_jis"}, + {"csshiftjis", "shift_jis"}, + {"extended_unix_code_packed_format_for_japanese", "euc-jp"}, + {"cseucpkdfmtjapanese", "euc-jp"}, + {"csgb2312", "gb2312"}, + {"csbig5", "big5"}, + /* end of official brain damage. + what follows has been taken * from glibc's localedata files. */ + {"iso_8859-13", "iso-8859-13"}, + {"iso-ir-179", "iso-8859-13"}, + {"latin7", "iso-8859-13"}, + {"l7", "iso-8859-13"}, + {"iso_8859-14", "iso-8859-14"}, + {"latin8", "iso-8859-14"}, + {"l8", "iso-8859-14"}, + {"iso_8859-15", "iso-8859-15"}, + {"latin9", "iso-8859-15"}, + {"latin0", "iso-8859-15"}, + {"iso_8859-16", "iso-8859-16"}, + {"latin10", "iso-8859-16"}, + {"646", "us-ascii"}, + {"eucjp", "euc-jp"}, + {"pck", "shift_jis"}, + {"ko_kr-euc", "euc-kr"}, + {"zh_tw-big5", "big5"}, + {"sjis", "shift_jis"}, + {"euc-jp-ms", "eucjp-ms"}, + {NULL, NULL} }; -#ifdef HAVE_LANGINFO_CODESET -# include - - void mutt_set_langinfo_charset (void) { - char buff[LONG_STRING]; - char buff2[LONG_STRING]; +#ifdef HAVE_LANGINFO_CODESET + char buff[LONG_STRING]; + char buff2[LONG_STRING]; - m_strcpy(buff, sizeof(buff), nl_langinfo(CODESET)); - mutt_canonical_charset (buff2, sizeof (buff2), buff); + m_strcpy(buff, sizeof(buff), nl_langinfo(CODESET)); + mutt_canonical_charset(buff2, sizeof(buff2), buff); - /* finally, set $charset */ - if (!(Charset = m_strdup(buff2))) - Charset = m_strdup("iso-8859-1"); + /* finally, set $charset */ + if (!(Charset = m_strdup(buff2))) +#endif + Charset = m_strdup("iso-8859-1"); } -#else -void mutt_set_langinfo_charset (void) +void mutt_canonical_charset(char *dest, ssize_t dlen, const char *name) { - Charset = m_strdup("iso-8859-1"); -} + ssize_t i; + char *p; + char scratch[LONG_STRING]; + + m_strcpy(scratch, sizeof(scratch), name); + m_strtolower(scratch); + + /* catch some common iso-8859-something misspellings */ + if (!strncmp(scratch, "8859", 4)) { + snprintf(scratch, sizeof(scratch), "iso-8859-%s", + name + 4 + (name[4] == '-')); + m_strtolower(scratch); + } else + if (!strncmp(scratch, "iso8859", 7)) { + snprintf(scratch, sizeof(scratch), "iso-8859-%s", + name + 7 + (name[7] == '-')); + m_strtolower(scratch); + } -#endif + for (i = 0; PreferredMIMENames[i].key; i++) { + if (!strcmp(scratch, PreferredMIMENames[i].key)) { + m_strcpy(dest, dlen, PreferredMIMENames[i].pref); + return; + } + } -void mutt_canonical_charset (char *dest, ssize_t dlen, const char *name) -{ - ssize_t i; - char *p; - char scratch[LONG_STRING]; - - /* catch some common iso-8859-something misspellings */ - if (!ascii_strncasecmp (name, "8859", 4) && name[4] != '-') - snprintf (scratch, sizeof (scratch), "iso-8859-%s", name + 4); - else if (!ascii_strncasecmp (name, "8859-", 5)) - snprintf (scratch, sizeof (scratch), "iso-8859-%s", name + 5); - else if (!ascii_strncasecmp (name, "iso8859", 7) && name[7] != '-') - snprintf (scratch, sizeof (scratch), "iso_8859-%s", name + 7); - else if (!ascii_strncasecmp (name, "iso8859-", 8)) - snprintf (scratch, sizeof (scratch), "iso_8859-%s", name + 8); - else - m_strcpy(scratch, sizeof(scratch), NONULL(name)); + m_strcpy(dest, dlen, scratch); +} - for (i = 0; PreferredMIMENames[i].key; i++) - if (!ascii_strcasecmp (scratch, PreferredMIMENames[i].key) || - !m_strcasecmp(scratch, PreferredMIMENames[i].key)) { - m_strcpy(dest, dlen, PreferredMIMENames[i].pref); - return; - } +static int mutt_chscmp(const char *s, const char *chs) +{ + char buffer[STRING]; - m_strcpy(dest, dlen, scratch); + if (!s) + return 0; - /* for cosmetics' sake, transform to lowercase. */ - for (p = dest; *p; p++) - *p = ascii_tolower (*p); + mutt_canonical_charset(buffer, sizeof(buffer), s); + return !strcmp(buffer, chs); } -int mutt_chscmp (const char *s, const char *chs) +int mutt_is_utf8(const char *s) { - char buffer[STRING]; - - if (!s) - return 0; + return mutt_chscmp(s, "utf-8"); +} - mutt_canonical_charset (buffer, sizeof (buffer), s); - return !ascii_strcasecmp (buffer, chs); +int mutt_is_us_ascii(const char *s) +{ + return mutt_chscmp(s, "us-ascii"); } @@ -620,17 +592,14 @@ void mutt_set_charset (char *charset) char buffer[STRING]; mutt_canonical_charset (buffer, sizeof (buffer), charset); - - Charset_is_utf8 = 0; - if (!strcmp (buffer, "utf-8")) - Charset_is_utf8 = 1; + Charset_is_utf8 = !strcmp(buffer, "utf-8"); #ifdef HAVE_BIND_TEXTDOMAIN_CODESET bind_textdomain_codeset (PACKAGE, buffer); #endif } -wchar_t replacement_char (void) +wchar_t replacement_char(void) { return Charset_is_utf8 ? 0xfffd : '?'; }