Commit 94d116ca 94d116ca5e9735ba47a755e7cbfb2b429ec5e88e by Sergey Poznyakoff

Automatically handle native character sets on input to the mail utility.

If the mime header is set, then mail will provide the missing 'charset'
parameter for each Content-Type header that begins with 'text/'.
Its value will be determined by examining the 'charset' mail variable.
If it is set to 'auto' (the default), the character set will be extracted
from the value of the LC_ALL environment variable.  If it is unset, it
will be deduced from the LANG environment variable.

Thus, provided that LC_ALL is set correctly, the following setting in
.mailrc is recommended to ensure that mails in native character
sets will be processed correctly:

  set charset=auto mime

In most cases, it can be simplified to just 'set mime'.

* NEWS: Update.
* doc/texinfo/programs.texi: Update the description of the charset
variable.
* mail/mail.h (util_get_charset): New proto.
* mail/send.c (attach_set_content_type): New function.
(attlist_add, add_body): Use attach_set_content_type to
set the content_type field.
* mail/util.c (util_get_charset): New function.
(util_rfc2047_decode): Use util_get_charset.
1 parent 83d23534
......@@ -104,6 +104,16 @@ in MIME format. In fact, the '--mime' option is equivalent to
'-E set mime', except that it takes effect after all options are
processed.
** Character sets
The 'charset' variable controls both input and output operations. On
input it is used to set the value of the missing 'charset' parameter
in the 'Content-Type' MIME header, if its value begins with 'text/'.
This means, in particular, that if this variable is set to its default
value (charset=auto), the LC_ALL environment variable is correctly
set, and the 'mime' variable is set, then mail can safely be used to
send messages in native character sets.
** New option --alternative
When used with --attach or --attach-fd options, this option sets the
......
......@@ -4797,12 +4797,25 @@ will fall back to using @acronym{SMTP} envelope.
@*Default: @samp{auto}
@vrindex charset, mail variable
The value of this variable controls the output character set for the
header fields encoding using RFC 2047. If the variable is unset, no
decoding is performed and the fields are printed as they are. If the
variable is set to @samp{auto}, @command{mail} tries to deduce the
name of the character set from the value of @code{LC_ALL} environment
variable. Otherwise, its value is taken as the name of the charset.
The value of this variable is the character set used for input and
output operations. If the value is @samp{auto}, @command{mail} will
try to deduce the name of the character set from the value of
@samp{LC_ALL} environment variable. If the variable contains the
character set part (e.g. @samp{nb_NO.utf-8}), it will be used.
Otherwise, @command{mail} will look up in its built-in database the
value of the character for this language/territory combination. If
@samp{LC_ALL} is not set, the @samp{LANG} environment variable is
inspected.
The value of @samp{charset} controls both input and output
operations. On input, it is used to set the value of the
@samp{charset} parameter in the @samp{Content-Type} MIME header, if
its value begins with @samp{text/} and @samp{charset} is not present.
On output, it is used to display values of the header fields encodied
using RFC 2047. If the variable is unset, no decoding is performed
and the fields are printed as they are. Otherwise, they are recoded
to that character set.
@kwindex cmd
@item cmd
......
......@@ -422,6 +422,7 @@ void util_cache_command (mu_list_t *list, const char *fmt, ...) MU_PRINTFLIKE(2,
void util_run_cached_commands (mu_list_t *list);
const char *util_reply_prefix (void);
void util_rfc2047_decode (char **value);
char *util_get_charset (void);
void util_mark_read (mu_message_t msg);
......
......@@ -173,6 +173,25 @@ attlist_new (void)
}
static void
attach_set_content_type (struct atchinfo *aptr, char const *content_type)
{
char *charset;
if (!content_type)
content_type = "text/plain";
if (strncmp (content_type, "text/", 5) == 0
&& !strstr (content_type, "charset=")
&& (charset = util_get_charset ()))
{
mu_asprintf (&aptr->content_type, "%s; charset=%s",
content_type, charset);
free (charset);
}
else
aptr->content_type = mu_strdup (content_type);
}
static void
attlist_add (mu_list_t attlist, char *id, char const *encoding,
char const *content_type, char const *content_name,
char const *content_filename,
......@@ -184,9 +203,10 @@ attlist_add (mu_list_t attlist, char *id, char const *encoding,
aptr = mu_alloc (sizeof (*aptr));
aptr->id = id ? mu_strdup (id) : id;
aptr->encoding = mu_strdup (encoding);
aptr->content_type = mu_strdup (content_type ?
content_type : "application/octet-stream");
aptr->encoding = mu_strdup (encoding);
attach_set_content_type (aptr,
content_type
? content_type : "application/octet-stream");
aptr->name = content_name ? mu_strdup (content_name) : NULL;
aptr->filename = content_filename ? mu_strdup (content_filename) : NULL;
aptr->source = stream;
......@@ -505,15 +525,14 @@ add_body (mu_message_t inmsg, compose_env_t *env)
mu_body_t body;
mu_stream_t str;
struct atchinfo *aptr;
mu_message_get_body (inmsg, &body);
mu_body_get_streamref (body, &str);
aptr = mu_alloc (sizeof (*aptr));
aptr->id = NULL;
aptr->encoding = default_encoding ? mu_strdup (default_encoding) : NULL;
aptr->content_type = mu_strdup (default_content_type ?
default_content_type : "text/plain");
aptr->encoding = default_encoding ? mu_strdup (default_encoding) : NULL;
attach_set_content_type (aptr, default_content_type);
aptr->name = NULL;
aptr->filename = NULL;
aptr->source = str;
......
......@@ -1044,31 +1044,50 @@ util_run_cached_commands (mu_list_t *list)
mu_list_destroy (list);
}
void
util_rfc2047_decode (char **value)
char *
util_get_charset (void)
{
char *charset = NULL;
char *tmp;
int rc;
struct mu_lc_all lc_all = { .flags = 0 };
char *charset;
if (!*value || mailvar_get (&charset, "charset", mailvar_type_string, 0))
return;
if (mailvar_get (&charset, "charset", mailvar_type_string, 0))
return NULL;
if (mu_c_strcasecmp (charset, "auto") == 0)
{
tmp = getenv ("LC_ALL");
struct mu_lc_all lc_all = { .flags = 0 };
char *tmp = getenv ("LC_ALL");
if (!tmp)
tmp = getenv ("LANG");
if (tmp && mu_parse_lc_all (tmp, &lc_all, MU_LC_CSET) == 0)
charset = lc_all.charset;
{
charset = mu_strdup (lc_all.charset);
mu_lc_all_free (&lc_all);
}
else
charset = NULL;
}
else
charset = mu_strdup (charset);
return charset;
}
void
util_rfc2047_decode (char **value)
{
char *charset, *tmp;
int rc;
if (!*value)
return;
charset = util_get_charset ();
if (!charset)
return;
rc = mu_rfc2047_decode (charset, *value, &tmp);
free (charset);
if (rc)
{
if (mailvar_is_true ("verbose"))
......@@ -1079,8 +1098,6 @@ util_rfc2047_decode (char **value)
free (*value);
*value = tmp;
}
if (lc_all.flags)
mu_lc_all_free (&lc_all);
}
const char *
......