Automatically handle native character sets on input to the mail utility.
If the mime header is set, then mail will provide the missing 'charset' parameter for each Content-Type header that begins with 'text/'. Its value will be determined by examining the 'charset' mail variable. If it is set to 'auto' (the default), the character set will be extracted from the value of the LC_ALL environment variable. If it is unset, it will be deduced from the LANG environment variable. Thus, provided that LC_ALL is set correctly, the following setting in .mailrc is recommended to ensure that mails in native character sets will be processed correctly: set charset=auto mime In most cases, it can be simplified to just 'set mime'. * NEWS: Update. * doc/texinfo/programs.texi: Update the description of the charset variable. * mail/mail.h (util_get_charset): New proto. * mail/send.c (attach_set_content_type): New function. (attlist_add, add_body): Use attach_set_content_type to set the content_type field. * mail/util.c (util_get_charset): New function. (util_rfc2047_decode): Use util_get_charset.
Showing
5 changed files
with
82 additions
and
22 deletions
... | @@ -104,6 +104,16 @@ in MIME format. In fact, the '--mime' option is equivalent to | ... | @@ -104,6 +104,16 @@ in MIME format. In fact, the '--mime' option is equivalent to |
104 | '-E set mime', except that it takes effect after all options are | 104 | '-E set mime', except that it takes effect after all options are |
105 | processed. | 105 | processed. |
106 | 106 | ||
107 | ** Character sets | ||
108 | |||
109 | The 'charset' variable controls both input and output operations. On | ||
110 | input it is used to set the value of the missing 'charset' parameter | ||
111 | in the 'Content-Type' MIME header, if its value begins with 'text/'. | ||
112 | This means, in particular, that if this variable is set to its default | ||
113 | value (charset=auto), the LC_ALL environment variable is correctly | ||
114 | set, and the 'mime' variable is set, then mail can safely be used to | ||
115 | send messages in native character sets. | ||
116 | |||
107 | ** New option --alternative | 117 | ** New option --alternative |
108 | 118 | ||
109 | When used with --attach or --attach-fd options, this option sets the | 119 | When used with --attach or --attach-fd options, this option sets the | ... | ... |
... | @@ -4797,12 +4797,25 @@ will fall back to using @acronym{SMTP} envelope. | ... | @@ -4797,12 +4797,25 @@ will fall back to using @acronym{SMTP} envelope. |
4797 | @*Default: @samp{auto} | 4797 | @*Default: @samp{auto} |
4798 | @vrindex charset, mail variable | 4798 | @vrindex charset, mail variable |
4799 | 4799 | ||
4800 | The value of this variable controls the output character set for the | 4800 | The value of this variable is the character set used for input and |
4801 | header fields encoding using RFC 2047. If the variable is unset, no | 4801 | output operations. If the value is @samp{auto}, @command{mail} will |
4802 | decoding is performed and the fields are printed as they are. If the | 4802 | try to deduce the name of the character set from the value of |
4803 | variable is set to @samp{auto}, @command{mail} tries to deduce the | 4803 | @samp{LC_ALL} environment variable. If the variable contains the |
4804 | name of the character set from the value of @code{LC_ALL} environment | 4804 | character set part (e.g. @samp{nb_NO.utf-8}), it will be used. |
4805 | variable. Otherwise, its value is taken as the name of the charset. | 4805 | Otherwise, @command{mail} will look up in its built-in database the |
4806 | value of the character for this language/territory combination. If | ||
4807 | @samp{LC_ALL} is not set, the @samp{LANG} environment variable is | ||
4808 | inspected. | ||
4809 | |||
4810 | The value of @samp{charset} controls both input and output | ||
4811 | operations. On input, it is used to set the value of the | ||
4812 | @samp{charset} parameter in the @samp{Content-Type} MIME header, if | ||
4813 | its value begins with @samp{text/} and @samp{charset} is not present. | ||
4814 | |||
4815 | On output, it is used to display values of the header fields encodied | ||
4816 | using RFC 2047. If the variable is unset, no decoding is performed | ||
4817 | and the fields are printed as they are. Otherwise, they are recoded | ||
4818 | to that character set. | ||
4806 | 4819 | ||
4807 | @kwindex cmd | 4820 | @kwindex cmd |
4808 | @item cmd | 4821 | @item cmd | ... | ... |
... | @@ -422,6 +422,7 @@ void util_cache_command (mu_list_t *list, const char *fmt, ...) MU_PRINTFLIKE(2, | ... | @@ -422,6 +422,7 @@ void util_cache_command (mu_list_t *list, const char *fmt, ...) MU_PRINTFLIKE(2, |
422 | void util_run_cached_commands (mu_list_t *list); | 422 | void util_run_cached_commands (mu_list_t *list); |
423 | const char *util_reply_prefix (void); | 423 | const char *util_reply_prefix (void); |
424 | void util_rfc2047_decode (char **value); | 424 | void util_rfc2047_decode (char **value); |
425 | char *util_get_charset (void); | ||
425 | 426 | ||
426 | void util_mark_read (mu_message_t msg); | 427 | void util_mark_read (mu_message_t msg); |
427 | 428 | ... | ... |
... | @@ -173,6 +173,25 @@ attlist_new (void) | ... | @@ -173,6 +173,25 @@ attlist_new (void) |
173 | } | 173 | } |
174 | 174 | ||
175 | static void | 175 | static void |
176 | attach_set_content_type (struct atchinfo *aptr, char const *content_type) | ||
177 | { | ||
178 | char *charset; | ||
179 | |||
180 | if (!content_type) | ||
181 | content_type = "text/plain"; | ||
182 | if (strncmp (content_type, "text/", 5) == 0 | ||
183 | && !strstr (content_type, "charset=") | ||
184 | && (charset = util_get_charset ())) | ||
185 | { | ||
186 | mu_asprintf (&aptr->content_type, "%s; charset=%s", | ||
187 | content_type, charset); | ||
188 | free (charset); | ||
189 | } | ||
190 | else | ||
191 | aptr->content_type = mu_strdup (content_type); | ||
192 | } | ||
193 | |||
194 | static void | ||
176 | attlist_add (mu_list_t attlist, char *id, char const *encoding, | 195 | attlist_add (mu_list_t attlist, char *id, char const *encoding, |
177 | char const *content_type, char const *content_name, | 196 | char const *content_type, char const *content_name, |
178 | char const *content_filename, | 197 | char const *content_filename, |
... | @@ -185,8 +204,9 @@ attlist_add (mu_list_t attlist, char *id, char const *encoding, | ... | @@ -185,8 +204,9 @@ attlist_add (mu_list_t attlist, char *id, char const *encoding, |
185 | 204 | ||
186 | aptr->id = id ? mu_strdup (id) : id; | 205 | aptr->id = id ? mu_strdup (id) : id; |
187 | aptr->encoding = mu_strdup (encoding); | 206 | aptr->encoding = mu_strdup (encoding); |
188 | aptr->content_type = mu_strdup (content_type ? | 207 | attach_set_content_type (aptr, |
189 | content_type : "application/octet-stream"); | 208 | content_type |
209 | ? content_type : "application/octet-stream"); | ||
190 | aptr->name = content_name ? mu_strdup (content_name) : NULL; | 210 | aptr->name = content_name ? mu_strdup (content_name) : NULL; |
191 | aptr->filename = content_filename ? mu_strdup (content_filename) : NULL; | 211 | aptr->filename = content_filename ? mu_strdup (content_filename) : NULL; |
192 | aptr->source = stream; | 212 | aptr->source = stream; |
... | @@ -512,8 +532,7 @@ add_body (mu_message_t inmsg, compose_env_t *env) | ... | @@ -512,8 +532,7 @@ add_body (mu_message_t inmsg, compose_env_t *env) |
512 | aptr = mu_alloc (sizeof (*aptr)); | 532 | aptr = mu_alloc (sizeof (*aptr)); |
513 | aptr->id = NULL; | 533 | aptr->id = NULL; |
514 | aptr->encoding = default_encoding ? mu_strdup (default_encoding) : NULL; | 534 | aptr->encoding = default_encoding ? mu_strdup (default_encoding) : NULL; |
515 | aptr->content_type = mu_strdup (default_content_type ? | 535 | attach_set_content_type (aptr, default_content_type); |
516 | default_content_type : "text/plain"); | ||
517 | aptr->name = NULL; | 536 | aptr->name = NULL; |
518 | aptr->filename = NULL; | 537 | aptr->filename = NULL; |
519 | aptr->source = str; | 538 | aptr->source = str; | ... | ... |
... | @@ -1044,31 +1044,50 @@ util_run_cached_commands (mu_list_t *list) | ... | @@ -1044,31 +1044,50 @@ util_run_cached_commands (mu_list_t *list) |
1044 | mu_list_destroy (list); | 1044 | mu_list_destroy (list); |
1045 | } | 1045 | } |
1046 | 1046 | ||
1047 | void | 1047 | char * |
1048 | util_rfc2047_decode (char **value) | 1048 | util_get_charset (void) |
1049 | { | 1049 | { |
1050 | char *charset = NULL; | 1050 | char *charset; |
1051 | char *tmp; | ||
1052 | int rc; | ||
1053 | struct mu_lc_all lc_all = { .flags = 0 }; | ||
1054 | 1051 | ||
1055 | if (!*value || mailvar_get (&charset, "charset", mailvar_type_string, 0)) | 1052 | if (mailvar_get (&charset, "charset", mailvar_type_string, 0)) |
1056 | return; | 1053 | return NULL; |
1057 | 1054 | ||
1058 | if (mu_c_strcasecmp (charset, "auto") == 0) | 1055 | if (mu_c_strcasecmp (charset, "auto") == 0) |
1059 | { | 1056 | { |
1060 | tmp = getenv ("LC_ALL"); | 1057 | struct mu_lc_all lc_all = { .flags = 0 }; |
1058 | char *tmp = getenv ("LC_ALL"); | ||
1061 | if (!tmp) | 1059 | if (!tmp) |
1062 | tmp = getenv ("LANG"); | 1060 | tmp = getenv ("LANG"); |
1063 | 1061 | ||
1064 | if (tmp && mu_parse_lc_all (tmp, &lc_all, MU_LC_CSET) == 0) | 1062 | if (tmp && mu_parse_lc_all (tmp, &lc_all, MU_LC_CSET) == 0) |
1065 | charset = lc_all.charset; | 1063 | { |
1064 | charset = mu_strdup (lc_all.charset); | ||
1065 | mu_lc_all_free (&lc_all); | ||
1066 | } | ||
1067 | else | ||
1068 | charset = NULL; | ||
1066 | } | 1069 | } |
1070 | else | ||
1071 | charset = mu_strdup (charset); | ||
1072 | |||
1073 | return charset; | ||
1074 | } | ||
1075 | |||
1076 | void | ||
1077 | util_rfc2047_decode (char **value) | ||
1078 | { | ||
1079 | char *charset, *tmp; | ||
1080 | int rc; | ||
1067 | 1081 | ||
1082 | if (!*value) | ||
1083 | return; | ||
1084 | charset = util_get_charset (); | ||
1068 | if (!charset) | 1085 | if (!charset) |
1069 | return; | 1086 | return; |
1070 | 1087 | ||
1071 | rc = mu_rfc2047_decode (charset, *value, &tmp); | 1088 | rc = mu_rfc2047_decode (charset, *value, &tmp); |
1089 | free (charset); | ||
1090 | |||
1072 | if (rc) | 1091 | if (rc) |
1073 | { | 1092 | { |
1074 | if (mailvar_is_true ("verbose")) | 1093 | if (mailvar_is_true ("verbose")) |
... | @@ -1079,8 +1098,6 @@ util_rfc2047_decode (char **value) | ... | @@ -1079,8 +1098,6 @@ util_rfc2047_decode (char **value) |
1079 | free (*value); | 1098 | free (*value); |
1080 | *value = tmp; | 1099 | *value = tmp; |
1081 | } | 1100 | } |
1082 | if (lc_all.flags) | ||
1083 | mu_lc_all_free (&lc_all); | ||
1084 | } | 1101 | } |
1085 | 1102 | ||
1086 | const char * | 1103 | const char * | ... | ... |
-
Please register or sign in to post a comment