Commit 7892e5ba 7892e5bacca59d994918be4999ad0af3a8bbf33b by Sergey Poznyakoff

Improve line tracker.

The new version is able to hold input history from several source lines,
which makes it easier to implement #line directives in lexers.

* include/mailutils/locus.h (mu_linetrack_stat): Remove start_line
(mu_linetrack_origin): New proto.
* libmailutils/locus/linetrack.c: Improve indexing.  Keep track of several
input files.
* libmailutils/tests/linetrack.at: Add new tests.
* libmailutils/tests/linetrack.c: Improve command set.

* mimeview/grammar.y (make_node): Initialize locus
1 parent 4981ab84
......@@ -43,10 +43,9 @@ typedef struct mu_linetrack *mu_linetrack_t;
struct mu_linetrack_stat
{
unsigned start_line; /* Start line number (1-based) */
size_t n_lines; /* Number of lines, including the recent (incomplete)
one */
size_t n_chars; /* Total number of characters */
size_t n_files; /* Number of source files */
size_t n_lines; /* Number of lines, including the recent (incomplete) one */
size_t n_chars; /* Total number of characters */
};
int mu_ident_ref (char const *name, char const **refname);
......@@ -81,6 +80,7 @@ mu_locus_point_same_line (struct mu_locus_point const *a,
int mu_linetrack_create (mu_linetrack_t *ret,
char const *file_name, size_t max_lines);
int mu_linetrack_origin (mu_linetrack_t trk, struct mu_locus_point const *pt);
int mu_linetrack_rebase (mu_linetrack_t trk, struct mu_locus_point const *pt);
void mu_linetrack_free (mu_linetrack_t trk);
void mu_linetrack_destroy (mu_linetrack_t *trk);
......
......@@ -21,46 +21,176 @@
#include <mailutils/locus.h>
#include <mailutils/error.h>
/* The line-tracker structure keeps track of the last N lines read from a
text input file. For each line read it keeps the number of characters
/* The line-tracker structure keeps track of the last N lines read from one
or more input files. For each line read it keeps the number of characters
in that line including the newline. This information is stored in a
syclic stack of N elements. Top of stack always represents the current
line. For the purpose of line tracker, current line is the line that is
being visited, such that its final newline character has not yet been
seen. Once the newline is seen, the line is pushed on stack, and a new
current line is assumed.
cyclic stack of N elements (N >= 2). Top of stack always represents the
current line. For the purpose of line tracker, current line is the line
that is being visited, such that its final newline character has not yet
been seen. Once the newline is seen, the line is pushed on stack, and a
new current line is assumed.
The value of N must not be less than 2.
Each input file is represented by a directory entry keeping its name,
number of the first line that is stored in the tracker and the index of
that line in the cols stack. Entries form a doubly-linked list, with
head pointing to the most recent (current) source. When a new line is
being added to the stack which is full, its eldest entry is discarded
and is assigned to that line and the directory of the eldest source is
updated accordingly. If the entry represented the only line of the
source, the source is discarded.
*/
struct mu_linetrack
struct source
{
char const *file_name; /* Name of the source file */
size_t idx; /* Index of the first element on stack */
unsigned line; /* Number of line corresponding to cols[idx] */
struct source *next, *prev;
};
struct mu_linetrack
{
struct source *s_head, *s_tail;
/* Directory of source files. Most recent one is
s_head */
size_t max_lines; /* Max. number of lines history kept by tracker (N) */
size_t head; /* Index of the eldest element on stack */
size_t tos; /* Index of the most recent element on stack
(< max_lines) */
unsigned hline; /* Number of line corresponding to cols[head] */
unsigned *cols; /* Cyclic stack or character counts.
Number of characters in line (hline + n) is
Number of characters in line (line + n) is
cols[head + n] (0 <= n <= tos). */
};
static inline size_t
trk_incr (struct mu_linetrack *trk, size_t a)
{
return (a + 1) % trk->max_lines;
}
static inline size_t
trk_decr (struct mu_linetrack *trk, size_t a)
{
return (a + trk->max_lines - 1) % trk->max_lines;
}
static inline unsigned
count_lines (mu_linetrack_t trk, size_t from)
{
return (trk->tos + trk->max_lines - from) % trk->max_lines + 1;
}
#ifndef SIZE_MAX
# define SIZE_MAX (~((size_t)0))
#endif
static int
count_chars (struct mu_linetrack *trk, size_t i, size_t *ret)
{
size_t nch = 0;
while (1)
{
unsigned n = trk->cols[i];
if (SIZE_MAX - nch < n)
return ERANGE;
nch += n;
if (i == trk->tos)
break;
i = trk_incr (trk, i);
}
*ret = nch;
return 0;
}
static size_t
count_files (struct mu_linetrack *trk)
{
struct source *sp;
size_t n = 0;
for (sp = trk->s_head; sp; sp = sp->next)
n++;
return n;
}
static void
del_source (mu_linetrack_t trk, struct source *sp)
{
if (sp->prev)
sp->prev->next = sp->next;
else
trk->s_head = sp->next;
if (sp->next)
sp->next->prev = sp->prev;
else
trk->s_tail = sp->prev;
mu_ident_deref (sp->file_name);
free (sp);
}
static inline unsigned *
cols_ptr (mu_linetrack_t trk, size_t n)
push (mu_linetrack_t trk)
{
return &trk->cols[(trk->head + n) % trk->max_lines];
trk->tos = trk_incr (trk, trk->tos);
if (trk->tos == trk->head)
{
trk->head = trk_incr (trk, trk->head);
trk->s_tail->idx = trk->head;
trk->s_tail->line++;
}
if (trk->s_tail->prev && trk->s_tail->idx == trk->s_tail->prev->idx)
del_source (trk, trk->s_tail);
trk->cols[trk->tos] = 0;
return &trk->cols[trk->tos];
}
static inline unsigned *
cols_tos_ptr (mu_linetrack_t trk)
pop (mu_linetrack_t trk)
{
return cols_ptr (trk, trk->tos);
if (trk->tos == trk->head)
return NULL;
if (trk->tos == trk->s_head->idx)
del_source (trk, trk->s_head);
trk->tos = trk_decr (trk, trk->tos);
return &trk->cols[trk->tos];
}
static inline unsigned
cols_peek (mu_linetrack_t trk, size_t n)
int
mu_linetrack_origin (mu_linetrack_t trk, struct mu_locus_point const *pt)
{
return *cols_ptr (trk, n);
int rc;
struct source *sp;
if (!trk || !pt || !pt->mu_file || pt->mu_line == 0)
return EINVAL;
sp = malloc (sizeof *sp);
if (!sp)
return errno;
rc = mu_ident_ref (pt->mu_file, &sp->file_name);
if (rc)
{
free (sp);
return rc;
}
if (trk->cols[trk->tos])
push (trk);
sp->idx = trk->tos;
sp->line = pt->mu_line;
trk->cols[sp->idx] = pt->mu_col;
sp->prev = NULL;
sp->next = trk->s_head;
if (trk->s_head)
trk->s_head->prev = sp;
else
trk->s_tail = sp;
trk->s_head = sp;
return 0;
}
int
......@@ -69,10 +199,12 @@ mu_linetrack_create (mu_linetrack_t *ret,
{
int rc;
struct mu_linetrack *trk;
struct mu_locus_point pt;
trk = malloc (sizeof *trk);
if (!trk)
return errno;
trk->cols = calloc (max_lines, sizeof (trk->cols[0]));
if (!trk->cols)
{
......@@ -80,21 +212,25 @@ mu_linetrack_create (mu_linetrack_t *ret,
free (trk);
return rc;
}
rc = mu_ident_ref (file_name, &trk->file_name);
if (rc)
{
free (trk->cols);
free (trk);
return rc;
}
trk->s_head = trk->s_tail = NULL;
if (max_lines < 2)
max_lines = 2;
trk->max_lines = max_lines;
trk->head = 0;
trk->tos = 0;
trk->hline = 1;
trk->cols[0] = 0;
pt.mu_file = file_name;
pt.mu_line = 1;
pt.mu_col = 0;
rc = mu_linetrack_origin (trk, &pt);
if (rc)
{
free (trk->cols);
free (trk);
return rc;
}
*ret = trk;
return 0;
......@@ -107,10 +243,10 @@ mu_linetrack_rebase (mu_linetrack_t trk, struct mu_locus_point const *pt)
int rc = mu_ident_ref (pt->mu_file, &file_name);
if (rc)
return rc;
mu_ident_deref (trk->file_name);
trk->file_name = file_name;
trk->hline = pt->mu_line;
*cols_ptr (trk, 0) = pt->mu_col;
mu_ident_deref (trk->s_head->file_name);
trk->s_head->file_name = file_name;
trk->s_head->line = pt->mu_line;
trk->cols[trk->s_head->idx] = pt->mu_col;
return 0;
}
......@@ -119,7 +255,8 @@ mu_linetrack_free (mu_linetrack_t trk)
{
if (trk)
{
mu_ident_deref (trk->file_name);
while (trk->s_head)
del_source (trk, trk->s_head);
free (trk->cols);
free (trk);
}
......@@ -135,58 +272,20 @@ mu_linetrack_destroy (mu_linetrack_t *trk)
}
}
static inline unsigned *
push (mu_linetrack_t trk)
{
unsigned *ptr;
if (trk->tos == trk->max_lines - 1)
{
trk->head++;
trk->hline++;
}
else
trk->tos++;
*(ptr = cols_tos_ptr (trk)) = 0;
return ptr;
}
static inline unsigned *
pop (mu_linetrack_t trk)
{
if (trk->tos == 0)
return NULL;
trk->tos--;
return cols_tos_ptr (trk);
}
#ifndef SIZE_MAX
# define SIZE_MAX (~((size_t)0))
#endif
int
mu_linetrack_stat (struct mu_linetrack *trk, struct mu_linetrack_stat *st)
{
size_t i, nch = 0;
for (i = 0; i <= trk->tos; i++)
{
unsigned n = cols_peek (trk, i);
if (SIZE_MAX - nch < n)
return ERANGE;
nch += n;
}
st->start_line = trk->hline;
st->n_lines = trk->tos + 1;
st->n_chars = nch;
if (count_chars (trk, trk->head, &st->n_chars))
return ERANGE;
st->n_files = count_files (trk);
st->n_lines = count_lines (trk, trk->head);
return 0;
}
int
mu_linetrack_at_bol (struct mu_linetrack *trk)
{
return *cols_tos_ptr (trk) == 0;
return trk->cols[trk->tos] == 0;
}
void
......@@ -199,10 +298,11 @@ mu_linetrack_advance (struct mu_linetrack *trk,
if (text == NULL || leng == 0)
return;
mu_locus_point_set_file (&loc->beg, trk->file_name);
mu_locus_point_set_file (&loc->end, trk->file_name);
loc->beg.mu_line = trk->hline + trk->tos;
ptr = cols_tos_ptr (trk);
mu_locus_point_set_file (&loc->beg, trk->s_head->file_name);
mu_locus_point_set_file (&loc->end, trk->s_head->file_name);
loc->beg.mu_line =
trk->s_head->line + count_lines (trk, trk->s_head->idx) - 1;
ptr = &trk->cols[trk->tos];
loc->beg.mu_col = *ptr + 1;
while (leng--)
{
......@@ -211,16 +311,18 @@ mu_linetrack_advance (struct mu_linetrack *trk,
ptr = push (trk);
text++;
}
loc->end.mu_line =
trk->s_head->line + count_lines (trk, trk->s_head->idx) - 1;
if (*ptr)
{
loc->end.mu_line = trk->hline + trk->tos;
loc->end.mu_col = *ptr;
}
else
{
/* Text ends with a newline. Keep the previous line number. */
loc->end.mu_line = trk->hline + trk->tos - 1;
loc->end.mu_col = cols_peek (trk, trk->tos - 1) - 1;
loc->end.mu_line--;
loc->end.mu_col = trk->cols[trk_decr (trk, trk->tos)] - 1;
if (loc->end.mu_col + 1 == loc->beg.mu_col)
{
/* This happens if the previous line contained only newline. */
......@@ -232,21 +334,28 @@ mu_linetrack_advance (struct mu_linetrack *trk,
int
mu_linetrack_locus (struct mu_linetrack *trk, struct mu_locus_point *lp)
{
lp->mu_line = trk->hline + trk->tos;
return mu_locus_point_set_file (lp, trk->file_name);
int rc = mu_locus_point_set_file (lp, trk->s_head->file_name);
if (rc == 0)
{
lp->mu_line =
trk->s_head->line + count_lines (trk, trk->s_head->idx) - 1;
lp->mu_col = trk->cols[trk->tos];
}
return rc;
}
int
mu_linetrack_retreat (struct mu_linetrack *trk, size_t n)
{
struct mu_linetrack_stat st;
size_t nch;
mu_linetrack_stat (trk, &st);
if (n > st.n_chars)
if (count_chars (trk, trk->head, &nch))
return ERANGE;
if (n > nch)
return ERANGE;
else
{
unsigned *ptr = cols_tos_ptr (trk);
unsigned *ptr = &trk->cols[trk->tos];
while (n--)
{
if (*ptr == 0)
......
......@@ -54,11 +54,11 @@ liber:4.15-18: Naso
TRACKTEST([retreat],[],[3],
[
agnosco
\-4
#retreat 4
veteris
vestigia\n
flamme
\-8
#retreat 8
Publius
],
[liber:1.1-7: agnosco
......@@ -73,7 +73,7 @@ TRACKTEST([retreat over several lines],[],[4],
one\n
two\n
three
\-11
#retreat 11
four
],
[liber:1.1-3: one\n
......@@ -85,7 +85,7 @@ liber:1.3-6: four
TRACKTEST([retreat to the beginning],[],[4],
[one\n
two\n
\-8
#retreat 8
three
],
[liber:1.1-3: one\n
......@@ -96,7 +96,7 @@ liber:1.1-5: three
TRACKTEST([too big retreat],[],[2],
[one\n
two\n
\-10
#retreat 10
three
],
[liber:1.1-3: one\n
......@@ -106,4 +106,174 @@ liber:3.1-5: three
[linetrack: retreat count too big
])
TRACKTEST([origin 1],[],[10],
[one\n
two\n
three\n
#origin B 5 0
four\n
five\n
#origin C 2 0
six\n
seven\n
eight\n
#stat
],
[liber:1.1-3: one\n
liber:2.1-3: two\n
liber:3.1-5: three\n
B:5.1-4: four\n
B:6.1-4: five\n
C:2.1-3: six\n
C:3.1-5: seven\n
C:4.1-5: eight\n
n_files=3
n_lines=9
n_chars=40
])
TRACKTEST([origin 2],[],[8],
[one\n
two\n
three\n
#origin B 5 0
four\n
five\n
#origin C 2 0
six\n
seven\n
eight\n
#stat
],
[liber:1.1-3: one\n
liber:2.1-3: two\n
liber:3.1-5: three\n
B:5.1-4: four\n
B:6.1-4: five\n
C:2.1-3: six\n
C:3.1-5: seven\n
C:4.1-5: eight\n
n_files=3
n_lines=8
n_chars=36
])
TRACKTEST([origin 3],[],[7],
[one\n
two\n
three\n
#origin B 5 0
four\n
five\n
#origin C 2 0
six\n
seven\n
eight\n
#stat
],
[liber:1.1-3: one\n
liber:2.1-3: two\n
liber:3.1-5: three\n
B:5.1-4: four\n
B:6.1-4: five\n
C:2.1-3: six\n
C:3.1-5: seven\n
C:4.1-5: eight\n
n_files=3
n_lines=7
n_chars=32
])
TRACKTEST([origin 4],[],[6],
[one\n
two\n
three\n
#origin B 5 0
four\n
five\n
#origin C 2 0
six\n
seven\n
eight\n
#stat
],
[liber:1.1-3: one\n
liber:2.1-3: two\n
liber:3.1-5: three\n
B:5.1-4: four\n
B:6.1-4: five\n
C:2.1-3: six\n
C:3.1-5: seven\n
C:4.1-5: eight\n
n_files=2
n_lines=6
n_chars=26
])
TRACKTEST([retreat over origin],[],[9],
[one\n
two\n
three\n
#origin B 5 0
four\n
five\n
#origin C 2 0
six\n
seven\n
eight\n
#retreat 17
nine
#stat
],[liber:1.1-3: one\n
liber:2.1-3: two\n
liber:3.1-5: three\n
B:5.1-4: four\n
B:6.1-4: five\n
C:2.1-3: six\n
C:3.1-5: seven\n
C:4.1-5: eight\n
B:6.5-8: nine
n_files=2
n_lines=5
n_chars=27
])
TRACKTEST([retreat over two origins],[],[9],
[one\n
two\n
three\n
#origin B 5 0
four\n
five\n
#origin C 2 0
six\n
seven\n
eight\n
#retreat 32
nine
#stat
],[liber:1.1-3: one\n
liber:2.1-3: two\n
liber:3.1-5: three\n
B:5.1-4: four\n
B:6.1-4: five\n
C:2.1-3: six\n
C:3.1-5: seven\n
C:4.1-5: eight\n
liber:3.1-4: nine
n_files=1
n_lines=3
n_chars=12
])
TRACKTEST([rebase],[],[9],
[one
#rebase archivum 5 3
two
],
[liber:1.1-3: one
archivum:5.4-6: two
])
m4_popdef([TRACKTEST])
......
......@@ -2,6 +2,122 @@
#include <mailutils/locus.h>
int
getnum (char const *arg, unsigned *ret)
{
char *end;
unsigned long x = strtoul (arg, &end, 10);
if (*end)
{
mu_error ("bad number: %s", arg);
return -1;
}
*ret = x;
return 0;
}
static void
com_retreat (mu_linetrack_t trk, size_t argc, char **argv)
{
unsigned x;
if (getnum (argv[1], &x) == 0)
{
int rc = mu_linetrack_retreat (trk, x);
if (rc == ERANGE)
mu_error ("retreat count too big");
else if (rc)
mu_diag_funcall (MU_DIAG_ERROR, "mu_linetrack_retreat", argv[1], rc);
}
}
static void
com_origin (mu_linetrack_t trk, size_t argc, char **argv)
{
int rc;
struct mu_locus_point pt;
pt.mu_file = argv[1];
if (getnum (argv[2], &pt.mu_line))
return;
if (getnum (argv[3], &pt.mu_col))
return;
rc = mu_linetrack_origin (trk, &pt);
if (rc)
mu_diag_funcall (MU_DIAG_ERROR, "mu_linetrack_origin", NULL, rc);
}
static void
com_rebase (mu_linetrack_t trk, size_t argc, char **argv)
{
int rc;
struct mu_locus_point pt;
pt.mu_file = argv[1];
if (getnum (argv[2], &pt.mu_line))
return;
if (getnum (argv[3], &pt.mu_col))
return;
rc = mu_linetrack_rebase (trk, &pt);
if (rc)
mu_diag_funcall (MU_DIAG_ERROR, "mu_linetrack_rebase", NULL, rc);
}
static void
com_point (mu_linetrack_t trk, size_t argc, char **argv)
{
struct mu_locus_range lr = MU_LOCUS_RANGE_INITIALIZER;
int rc;
rc = mu_linetrack_locus (trk, &lr.beg);
if (rc)
mu_diag_funcall (MU_DIAG_ERROR, "mu_linetrack_locus", NULL, rc);
else
{
mu_stream_lprintf (mu_strout, &lr, "%s\n", argv[0]);
mu_locus_range_deinit (&lr);
}
}
static void
com_bol_p (mu_linetrack_t trk, size_t argc, char **argv)
{
mu_printf ("%d\n", mu_linetrack_at_bol (trk));
}
static void
com_stat (mu_linetrack_t trk, size_t argc, char **argv)
{
int rc;
struct mu_linetrack_stat st;
rc = mu_linetrack_stat (trk, &st);
if (rc)
mu_diag_funcall (MU_DIAG_ERROR, "mu_linetrack_stat", NULL, rc);
else
{
mu_printf ("n_files=%zu\n", st.n_files);
mu_printf ("n_lines=%zu\n", st.n_lines);
mu_printf ("n_chars=%zu\n", st.n_chars);
}
}
struct command
{
char *name;
size_t argc;
void (*fun) (mu_linetrack_t trk, size_t argc, char **argv);
};
static struct command comtab[] = {
{ "retreat", 2, com_retreat },
{ "origin", 4, com_origin },
{ "point", 1, com_point },
{ "rebase", 4, com_rebase },
{ "bol", 1, com_bol_p },
{ "stat", 1, com_stat },
{ NULL }
};
int
main (int argc, char **argv)
{
unsigned long max_lines;
......@@ -10,6 +126,9 @@ main (int argc, char **argv)
int rc;
char *buf = NULL;
size_t size, n;
struct mu_wordsplit ws;
int wsf = MU_WRDSF_NOVAR | MU_WRDSF_NOCMD
| MU_WRDSF_SHOWERR | MU_WRDSF_ENOMEMABRT;
mu_set_program_name (argv[0]);
mu_stdstream_setup (MU_STDSTREAM_RESET_NONE);
......@@ -29,35 +148,37 @@ main (int argc, char **argv)
MU_ASSERT (mu_linetrack_create (&trk, argv[1], max_lines));
while ((rc = mu_stream_getline (mu_strin, &buf, &size, &n)) == 0 && n > 0)
{
struct mu_locus_range lr = MU_LOCUS_RANGE_INITIALIZER;
char *tok;
n = mu_rtrim_class (buf, MU_CTYPE_SPACE);
if (n == 0)
continue;
if (buf[0] == '\\' && buf[1] == '-')
if (buf[0] == '#')
{
long x = strtol (buf+2, &end, 10);
if (*end || x == 0)
{
mu_error ("bad number");
continue;
}
rc = mu_linetrack_retreat (trk, x);
if (rc == ERANGE)
mu_error ("retreat count too big");
else if (rc)
mu_diag_funcall (MU_DIAG_ERROR, "mu_linetrack_retreat", buf+2,
rc);
struct command *com;
mu_wordsplit (buf+1, &ws, wsf);
wsf |= MU_WRDSF_REUSE;
for (com = comtab; com->name; com++)
if (strcmp (com->name, ws.ws_wordv[0]) == 0
&& com->argc == ws.ws_wordc)
break;
if (com->name)
com->fun (trk, ws.ws_wordc, ws.ws_wordv);
else
mu_error ("unrecognized command");
}
else
{
struct mu_locus_range lr = MU_LOCUS_RANGE_INITIALIZER;
mu_c_str_unescape (buf, "\\\n", "\\n", &tok);
mu_linetrack_advance (trk, &lr, tok, strlen (tok));
free (tok);
mu_stream_lprintf (mu_strout, &lr, "%s\n", buf);
mu_locus_range_deinit (&lr);
}
mu_locus_range_deinit (&lr);
}
mu_linetrack_destroy (&trk);
return 0;
......
......@@ -315,6 +315,7 @@ make_node (enum node_type type, struct mu_locus_range const *loc)
{
struct node *p = mimetypes_malloc (sizeof *p);
p->type = type;
mu_locus_range_init (&p->loc);
mu_locus_range_copy (&p->loc, loc);
return p;
}
......