Commit 94d116ca 94d116ca5e9735ba47a755e7cbfb2b429ec5e88e by Sergey Poznyakoff

Automatically handle native character sets on input to the mail utility.

If the mime header is set, then mail will provide the missing 'charset'
parameter for each Content-Type header that begins with 'text/'.
Its value will be determined by examining the 'charset' mail variable.
If it is set to 'auto' (the default), the character set will be extracted
from the value of the LC_ALL environment variable.  If it is unset, it
will be deduced from the LANG environment variable.

Thus, provided that LC_ALL is set correctly, the following setting in
.mailrc is recommended to ensure that mails in native character
sets will be processed correctly:

  set charset=auto mime

In most cases, it can be simplified to just 'set mime'.

* NEWS: Update.
* doc/texinfo/programs.texi: Update the description of the charset
variable.
* mail/mail.h (util_get_charset): New proto.
* mail/send.c (attach_set_content_type): New function.
(attlist_add, add_body): Use attach_set_content_type to
set the content_type field.
* mail/util.c (util_get_charset): New function.
(util_rfc2047_decode): Use util_get_charset.
1 parent 83d23534
...@@ -104,6 +104,16 @@ in MIME format. In fact, the '--mime' option is equivalent to ...@@ -104,6 +104,16 @@ in MIME format. In fact, the '--mime' option is equivalent to
104 '-E set mime', except that it takes effect after all options are 104 '-E set mime', except that it takes effect after all options are
105 processed. 105 processed.
106 106
107 ** Character sets
108
109 The 'charset' variable controls both input and output operations. On
110 input it is used to set the value of the missing 'charset' parameter
111 in the 'Content-Type' MIME header, if its value begins with 'text/'.
112 This means, in particular, that if this variable is set to its default
113 value (charset=auto), the LC_ALL environment variable is correctly
114 set, and the 'mime' variable is set, then mail can safely be used to
115 send messages in native character sets.
116
107 ** New option --alternative 117 ** New option --alternative
108 118
109 When used with --attach or --attach-fd options, this option sets the 119 When used with --attach or --attach-fd options, this option sets the
......
...@@ -4797,12 +4797,25 @@ will fall back to using @acronym{SMTP} envelope. ...@@ -4797,12 +4797,25 @@ will fall back to using @acronym{SMTP} envelope.
4797 @*Default: @samp{auto} 4797 @*Default: @samp{auto}
4798 @vrindex charset, mail variable 4798 @vrindex charset, mail variable
4799 4799
4800 The value of this variable controls the output character set for the 4800 The value of this variable is the character set used for input and
4801 header fields encoding using RFC 2047. If the variable is unset, no 4801 output operations. If the value is @samp{auto}, @command{mail} will
4802 decoding is performed and the fields are printed as they are. If the 4802 try to deduce the name of the character set from the value of
4803 variable is set to @samp{auto}, @command{mail} tries to deduce the 4803 @samp{LC_ALL} environment variable. If the variable contains the
4804 name of the character set from the value of @code{LC_ALL} environment 4804 character set part (e.g. @samp{nb_NO.utf-8}), it will be used.
4805 variable. Otherwise, its value is taken as the name of the charset. 4805 Otherwise, @command{mail} will look up in its built-in database the
4806 value of the character for this language/territory combination. If
4807 @samp{LC_ALL} is not set, the @samp{LANG} environment variable is
4808 inspected.
4809
4810 The value of @samp{charset} controls both input and output
4811 operations. On input, it is used to set the value of the
4812 @samp{charset} parameter in the @samp{Content-Type} MIME header, if
4813 its value begins with @samp{text/} and @samp{charset} is not present.
4814
4815 On output, it is used to display values of the header fields encodied
4816 using RFC 2047. If the variable is unset, no decoding is performed
4817 and the fields are printed as they are. Otherwise, they are recoded
4818 to that character set.
4806 4819
4807 @kwindex cmd 4820 @kwindex cmd
4808 @item cmd 4821 @item cmd
......
...@@ -422,6 +422,7 @@ void util_cache_command (mu_list_t *list, const char *fmt, ...) MU_PRINTFLIKE(2, ...@@ -422,6 +422,7 @@ void util_cache_command (mu_list_t *list, const char *fmt, ...) MU_PRINTFLIKE(2,
422 void util_run_cached_commands (mu_list_t *list); 422 void util_run_cached_commands (mu_list_t *list);
423 const char *util_reply_prefix (void); 423 const char *util_reply_prefix (void);
424 void util_rfc2047_decode (char **value); 424 void util_rfc2047_decode (char **value);
425 char *util_get_charset (void);
425 426
426 void util_mark_read (mu_message_t msg); 427 void util_mark_read (mu_message_t msg);
427 428
......
...@@ -173,6 +173,25 @@ attlist_new (void) ...@@ -173,6 +173,25 @@ attlist_new (void)
173 } 173 }
174 174
175 static void 175 static void
176 attach_set_content_type (struct atchinfo *aptr, char const *content_type)
177 {
178 char *charset;
179
180 if (!content_type)
181 content_type = "text/plain";
182 if (strncmp (content_type, "text/", 5) == 0
183 && !strstr (content_type, "charset=")
184 && (charset = util_get_charset ()))
185 {
186 mu_asprintf (&aptr->content_type, "%s; charset=%s",
187 content_type, charset);
188 free (charset);
189 }
190 else
191 aptr->content_type = mu_strdup (content_type);
192 }
193
194 static void
176 attlist_add (mu_list_t attlist, char *id, char const *encoding, 195 attlist_add (mu_list_t attlist, char *id, char const *encoding,
177 char const *content_type, char const *content_name, 196 char const *content_type, char const *content_name,
178 char const *content_filename, 197 char const *content_filename,
...@@ -184,9 +203,10 @@ attlist_add (mu_list_t attlist, char *id, char const *encoding, ...@@ -184,9 +203,10 @@ attlist_add (mu_list_t attlist, char *id, char const *encoding,
184 aptr = mu_alloc (sizeof (*aptr)); 203 aptr = mu_alloc (sizeof (*aptr));
185 204
186 aptr->id = id ? mu_strdup (id) : id; 205 aptr->id = id ? mu_strdup (id) : id;
187 aptr->encoding = mu_strdup (encoding); 206 aptr->encoding = mu_strdup (encoding);
188 aptr->content_type = mu_strdup (content_type ? 207 attach_set_content_type (aptr,
189 content_type : "application/octet-stream"); 208 content_type
209 ? content_type : "application/octet-stream");
190 aptr->name = content_name ? mu_strdup (content_name) : NULL; 210 aptr->name = content_name ? mu_strdup (content_name) : NULL;
191 aptr->filename = content_filename ? mu_strdup (content_filename) : NULL; 211 aptr->filename = content_filename ? mu_strdup (content_filename) : NULL;
192 aptr->source = stream; 212 aptr->source = stream;
...@@ -505,15 +525,14 @@ add_body (mu_message_t inmsg, compose_env_t *env) ...@@ -505,15 +525,14 @@ add_body (mu_message_t inmsg, compose_env_t *env)
505 mu_body_t body; 525 mu_body_t body;
506 mu_stream_t str; 526 mu_stream_t str;
507 struct atchinfo *aptr; 527 struct atchinfo *aptr;
508 528
509 mu_message_get_body (inmsg, &body); 529 mu_message_get_body (inmsg, &body);
510 mu_body_get_streamref (body, &str); 530 mu_body_get_streamref (body, &str);
511 531
512 aptr = mu_alloc (sizeof (*aptr)); 532 aptr = mu_alloc (sizeof (*aptr));
513 aptr->id = NULL; 533 aptr->id = NULL;
514 aptr->encoding = default_encoding ? mu_strdup (default_encoding) : NULL; 534 aptr->encoding = default_encoding ? mu_strdup (default_encoding) : NULL;
515 aptr->content_type = mu_strdup (default_content_type ? 535 attach_set_content_type (aptr, default_content_type);
516 default_content_type : "text/plain");
517 aptr->name = NULL; 536 aptr->name = NULL;
518 aptr->filename = NULL; 537 aptr->filename = NULL;
519 aptr->source = str; 538 aptr->source = str;
......
...@@ -1044,31 +1044,50 @@ util_run_cached_commands (mu_list_t *list) ...@@ -1044,31 +1044,50 @@ util_run_cached_commands (mu_list_t *list)
1044 mu_list_destroy (list); 1044 mu_list_destroy (list);
1045 } 1045 }
1046 1046
1047 void 1047 char *
1048 util_rfc2047_decode (char **value) 1048 util_get_charset (void)
1049 { 1049 {
1050 char *charset = NULL; 1050 char *charset;
1051 char *tmp;
1052 int rc;
1053 struct mu_lc_all lc_all = { .flags = 0 };
1054 1051
1055 if (!*value || mailvar_get (&charset, "charset", mailvar_type_string, 0)) 1052 if (mailvar_get (&charset, "charset", mailvar_type_string, 0))
1056 return; 1053 return NULL;
1057 1054
1058 if (mu_c_strcasecmp (charset, "auto") == 0) 1055 if (mu_c_strcasecmp (charset, "auto") == 0)
1059 { 1056 {
1060 tmp = getenv ("LC_ALL"); 1057 struct mu_lc_all lc_all = { .flags = 0 };
1058 char *tmp = getenv ("LC_ALL");
1061 if (!tmp) 1059 if (!tmp)
1062 tmp = getenv ("LANG"); 1060 tmp = getenv ("LANG");
1063 1061
1064 if (tmp && mu_parse_lc_all (tmp, &lc_all, MU_LC_CSET) == 0) 1062 if (tmp && mu_parse_lc_all (tmp, &lc_all, MU_LC_CSET) == 0)
1065 charset = lc_all.charset; 1063 {
1064 charset = mu_strdup (lc_all.charset);
1065 mu_lc_all_free (&lc_all);
1066 }
1067 else
1068 charset = NULL;
1066 } 1069 }
1070 else
1071 charset = mu_strdup (charset);
1072
1073 return charset;
1074 }
1075
1076 void
1077 util_rfc2047_decode (char **value)
1078 {
1079 char *charset, *tmp;
1080 int rc;
1067 1081
1082 if (!*value)
1083 return;
1084 charset = util_get_charset ();
1068 if (!charset) 1085 if (!charset)
1069 return; 1086 return;
1070 1087
1071 rc = mu_rfc2047_decode (charset, *value, &tmp); 1088 rc = mu_rfc2047_decode (charset, *value, &tmp);
1089 free (charset);
1090
1072 if (rc) 1091 if (rc)
1073 { 1092 {
1074 if (mailvar_is_true ("verbose")) 1093 if (mailvar_is_true ("verbose"))
...@@ -1079,8 +1098,6 @@ util_rfc2047_decode (char **value) ...@@ -1079,8 +1098,6 @@ util_rfc2047_decode (char **value)
1079 free (*value); 1098 free (*value);
1080 *value = tmp; 1099 *value = tmp;
1081 } 1100 }
1082 if (lc_all.flags)
1083 mu_lc_all_free (&lc_all);
1084 } 1101 }
1085 1102
1086 const char * 1103 const char *
......