From d6e1782b1e788bd1c4767443712bf1713cc013ef Mon Sep 17 00:00:00 2001 From: Pierre Habouzit Date: Wed, 15 Nov 2006 00:06:59 +0100 Subject: [PATCH 1/1] more charset improvements. Signed-off-by: Pierre Habouzit --- charset.c | 59 +++++++++++++++++++++++----------------------- charset.def | 1 + charset.h | 13 +++++----- init.c | 4 +--- lib-crypt/pgp.c | 2 +- lib-mime/rfc2047.c | 2 +- sendlib.c | 4 ++-- 7 files changed, 41 insertions(+), 44 deletions(-) diff --git a/charset.c b/charset.c index a1541f0..8cea280 100644 --- a/charset.c +++ b/charset.c @@ -37,10 +37,10 @@ # define EILSEQ EINVAL #endif -char *Charset; +char *Charset = NULL; int Charset_is_utf8 = 0; -void mutt_set_langinfo_charset(void) +void charset_initialize(void) { #ifdef HAVE_LANGINFO_CODESET char buff[LONG_STRING]; @@ -50,32 +50,43 @@ void mutt_set_langinfo_charset(void) mutt_canonical_charset(buff2, sizeof(buff2), buff); /* finally, set $charset */ - if (!(Charset = m_strdup(buff2))) + if (!m_strisempty(buff2)) { + m_strreplace(&Charset, buff2); + } else +#endif + m_strreplace(&Charset, "iso-8859-1"); + + Charset_is_utf8 = !strcmp(Charset, "utf-8"); +#ifdef HAVE_BIND_TEXTDOMAIN_CODESET + bind_textdomain_codeset(PACKAGE, Charset); #endif - Charset = m_strdup("iso-8859-1"); } + #include "charset.gperf" void mutt_canonical_charset(char *dest, ssize_t dlen, const char *name) { const struct cset_pair *cp; char scratch[LONG_STRING]; - int i; + const char *p; + int i = 0; // canonize name: only keep a-z0-9 and dots, put into lowercase - for (i = 0; i < ssizeof(scratch); i++) { - if (isalnum(*name) || *name == '.') { - scratch[i] = tolower((unsigned char)*name); - } else - if (!*name || *name == ':' || i + 1 == ssizeof(scratch)) { - scratch[i] = '\0'; - break; + for (p = name; *p && *p != ':' && i < ssizeof(scratch) - 1; p++) { + if (isalnum(*p) || *p== '.') { + scratch[i++] = tolower((unsigned char)*p); } } + scratch[i] = '\0'; cp = mutt_canonical_charset_aux(scratch, strlen(scratch)); - m_strcpy(dest, dlen, cp ? cp->pref : name); + if (cp) { + m_strcpy(dest, dlen, cp->pref); + } else { + m_strcpy(dest, dlen, name); + m_strtolower(dest); + } } static int mutt_chscmp(const char *s, const char *chs) @@ -89,12 +100,12 @@ static int mutt_chscmp(const char *s, const char *chs) return !strcmp(buffer, chs); } -int mutt_is_utf8(const char *s) +int charset_is_utf8(const char *s) { return mutt_chscmp(s, "utf-8"); } -int mutt_is_us_ascii(const char *s) +int charset_is_us_ascii(const char *s) { return mutt_chscmp(s, "us-ascii"); } @@ -226,9 +237,9 @@ int mutt_convert_string (char **ps, const char *from, const char *to, const char **inrepls = NULL; const char *outrepl = NULL; - if (mutt_is_utf8 (to)) + if (charset_is_utf8 (to)) outrepl = "\357\277\275"; - else if (mutt_is_utf8 (from)) + else if (charset_is_utf8 (from)) inrepls = repls; else outrepl = "?"; @@ -289,7 +300,7 @@ FGETCONV *fgetconv_open (FILE * file, const char *from, const char *to, fc->p = fc->ob = fc->bufo; fc->ib = fc->bufi; fc->ibl = 0; - fc->inrepls = mutt_is_utf8 (to) ? repls : repls + 1; + fc->inrepls = charset_is_utf8 (to) ? repls : repls + 1; } else fc = p_new(struct fgetconv_s, 1); @@ -459,18 +470,6 @@ int mutt_convert_nonmime_string (char **ps) return -1; } -void mutt_set_charset (char *charset) -{ - char buffer[STRING]; - - mutt_canonical_charset (buffer, sizeof (buffer), charset); - Charset_is_utf8 = !strcmp(buffer, "utf-8"); - -#ifdef HAVE_BIND_TEXTDOMAIN_CODESET - bind_textdomain_codeset (PACKAGE, buffer); -#endif -} - wchar_t replacement_char(void) { return Charset_is_utf8 ? 0xfffd : '?'; diff --git a/charset.def b/charset.def index fc6abba..ea534d5 100644 --- a/charset.def +++ b/charset.def @@ -124,6 +124,7 @@ mskanji, "shift_jis" pck, "shift_jis" sjis, "shift_jis" us, "us-ascii" +utf8, "utf-8" zhtwbig5, "big5" 646, "us-ascii" %% diff --git a/charset.h b/charset.h index 702f8f1..b27a5c6 100644 --- a/charset.h +++ b/charset.h @@ -21,6 +21,12 @@ extern char *Charset; extern int Charset_is_utf8; +void charset_initialize(void); +void mutt_canonical_charset(char *, ssize_t, const char *); + +int charset_is_utf8(const char *s); +int charset_is_us_ascii(const char *s); + #ifdef HAVE_ICONV_H # include @@ -36,12 +42,6 @@ my_iconv(iconv_t ict, const char **in, ssize_t *il, char **out, ssize_t *ol) { # define iconv_close(a) 0 #endif -void mutt_set_langinfo_charset (void); -void mutt_canonical_charset(char *, ssize_t, const char *); - -int mutt_is_utf8(const char *s); -int mutt_is_us_ascii(const char *s); - int mutt_convert_string (char **, const char *, const char *, int); const char *mutt_get_first_charset (const char *); int mutt_convert_nonmime_string (char **); @@ -57,7 +57,6 @@ int fgetconv (FGETCONV *); char *fgetconvs (char *, ssize_t, FGETCONV *); void fgetconv_close (FGETCONV **); -void mutt_set_charset(char *charset); wchar_t replacement_char(void); #endif /* _CHARSET_H */ diff --git a/init.c b/init.c index d452111..583b02d 100644 --- a/init.c +++ b/init.c @@ -2661,9 +2661,7 @@ void mutt_init (int skip_sys_rc, string_list_t * commands) if ((p = getenv ("EMAIL")) != NULL) From = rfc822_parse_adrlist (NULL, p); - mutt_set_langinfo_charset (); - mutt_set_charset (Charset); - + charset_initialize(); /* Set standard defaults */ hash_map (ConfigOptions, mutt_set_default, 0); diff --git a/lib-crypt/pgp.c b/lib-crypt/pgp.c index 5c0d497..dcc905c 100644 --- a/lib-crypt/pgp.c +++ b/lib-crypt/pgp.c @@ -1348,7 +1348,7 @@ BODY *pgp_traditional_encryptsign (BODY * a, int flags, char *keylist) else from_charset = Charset; - if (!mutt_is_us_ascii (body_charset)) { + if (!charset_is_us_ascii (body_charset)) { int c; FGETCONV *fc; diff --git a/lib-mime/rfc2047.c b/lib-mime/rfc2047.c index 264caff..4900d7c 100644 --- a/lib-mime/rfc2047.c +++ b/lib-mime/rfc2047.c @@ -452,7 +452,7 @@ static int rfc2047_encode(const char *d, ssize_t dlen, int col, } /* Hack to avoid labelling 8-bit data as us-ascii. */ - if (!icode && mutt_is_us_ascii(tocode)) + if (!icode && charset_is_us_ascii(tocode)) tocode = "unknown-8bit"; /* Adjust t0 for maximum length of line. */ diff --git a/sendlib.c b/sendlib.c index 5f713b6..c185101 100644 --- a/sendlib.c +++ b/sendlib.c @@ -846,7 +846,7 @@ CONTENT *mutt_get_content_info (const char *fname, BODY * b) if (b != NULL && b->type == TYPETEXT && (!b->noconv && !b->force_charset)) mutt_set_parameter ("charset", (!info->hibin ? "us-ascii" : Charset - && !mutt_is_us_ascii (Charset) ? Charset : + && !charset_is_us_ascii (Charset) ? Charset : "unknown-8bit"), &b->parameter); return info; @@ -1151,7 +1151,7 @@ void mutt_update_encoding (BODY * a) char chsbuff[STRING]; /* override noconv when it's us-ascii */ - if (mutt_is_us_ascii (mutt_get_body_charset (chsbuff, sizeof (chsbuff), a))) + if (charset_is_us_ascii (mutt_get_body_charset (chsbuff, sizeof (chsbuff), a))) a->noconv = 0; if (!a->force_charset && !a->noconv) -- 2.20.1