Skip to content
Snippets Groups Projects
Commit ab1cecb4 authored by Herbert Xu's avatar Herbert Xu
Browse files

parser: Add syntax stack for recursive parsing


Without a stack of syntaxes we cannot correctly these two cases
together:

        "${a#'$$'}"
        "${a#"${b-'$$'}"}"

A recursive parser also helps in some other corner cases such
as nested arithmetic expansion with paratheses.

This patch adds a syntax stack allocated from the stack using
alloca.  As a side-effect this allows us to remove the naked
backslashes for patterns within double-quotes, which means that
EXP_QPAT also has to go.

This patch also fixes removes any backslashes that precede right
braces when they are present within a parameter expansion context,
and backslashes that precede double quotes within inner double
quotes inside a parameter expansion in a here-document context.

The idea of a recursive parser is based on a patch by Harald van
Dijk.

Signed-off-by: default avatarHerbert Xu <herbert@gondor.apana.org.au>
parent 6bbc71d8
No related branches found
No related tags found
No related merge requests found
Loading
Loading
@@ -85,7 +85,7 @@
#define RMESCAPE_HEAP 0x10 /* Malloc strings instead of stalloc */
 
/* Add CTLESC when necessary. */
#define QUOTES_ESC (EXP_FULL | EXP_CASE | EXP_QPAT)
#define QUOTES_ESC (EXP_FULL | EXP_CASE)
/* Do not skip NUL characters. */
#define QUOTES_KEEPNUL EXP_TILDE
 
Loading
Loading
@@ -335,16 +335,6 @@ addquote:
case CTLESC:
startloc++;
length++;
/*
* Quoted parameter expansion pattern: remove quote
* unless inside inner quotes or we have a literal
* backslash.
*/
if (((flag | inquotes) & (EXP_QPAT | EXP_QUOTED)) ==
EXP_QPAT && *p != '\\')
break;
goto addquote;
case CTLVAR:
p = evalvar(p, flag | inquotes);
Loading
Loading
@@ -653,8 +643,7 @@ subevalvar(char *p, char *str, int strloc, int subtype, int startloc, int varfla
char *(*scan)(char *, char *, char *, char *, int , int);
 
argstr(p, EXP_TILDE | (subtype != VSASSIGN && subtype != VSQUESTION ?
(flag & (EXP_QUOTED | EXP_QPAT) ?
EXP_QPAT : EXP_CASE) : 0));
EXP_CASE : 0));
STPUTC('\0', expdest);
argbackq = saveargbackq;
startp = stackblock() + startloc;
Loading
Loading
@@ -1646,7 +1635,6 @@ char *
_rmescapes(char *str, int flag)
{
char *p, *q, *r;
unsigned inquotes;
int notescaped;
int globbing;
 
Loading
Loading
@@ -1676,24 +1664,23 @@ _rmescapes(char *str, int flag)
q = mempcpy(q, str, len);
}
}
inquotes = 0;
globbing = flag & RMESCAPE_GLOB;
notescaped = globbing;
while (*p) {
if (*p == (char)CTLQUOTEMARK) {
inquotes = ~inquotes;
p++;
notescaped = globbing;
continue;
}
if (*p == '\\') {
/* naked back slash */
notescaped = 0;
goto copy;
}
if (*p == (char)CTLESC) {
p++;
if (notescaped)
*q++ = '\\';
} else if (*p == '\\' && !inquotes) {
/* naked back slash */
notescaped = 0;
goto copy;
}
notescaped = globbing;
copy:
Loading
Loading
Loading
Loading
@@ -55,7 +55,6 @@ struct arglist {
#define EXP_VARTILDE 0x4 /* expand tildes in an assignment */
#define EXP_REDIR 0x8 /* file glob for a redirection (1 match only) */
#define EXP_CASE 0x10 /* keeps quotes around for CASE pattern */
#define EXP_QPAT 0x20 /* pattern in quoted parameter expansion */
#define EXP_VARTILDE2 0x40 /* expand tildes after colons only */
#define EXP_WORD 0x80 /* expand word in parameter expansion */
#define EXP_QUOTED 0x100 /* expand word in double quotes */
Loading
Loading
Loading
Loading
@@ -80,6 +80,18 @@ struct heredoc {
int striptabs; /* if set, strip leading tabs */
};
 
struct synstack {
const char *syntax;
struct synstack *prev;
struct synstack *next;
int innerdq;
int varpushed;
int dblquote;
int varnest; /* levels of variables expansion */
int parenlevel; /* levels of parens in arithmetic */
int dqvarnest; /* levels of variables expansion within double quotes */
};
 
 
struct heredoc *heredoclist; /* list of here documents to read */
Loading
Loading
@@ -841,6 +853,21 @@ static int pgetc_eatbnl(void)
return c;
}
 
static void synstack_push(struct synstack **stack, struct synstack *next,
const char *syntax)
{
memset(next, 0, sizeof(*next));
next->syntax = syntax;
next->next = *stack;
(*stack)->prev = next;
*stack = next;
}
static void synstack_pop(struct synstack **stack)
{
*stack = (*stack)->next;
}
 
 
/*
Loading
Loading
@@ -870,24 +897,15 @@ readtoken1(int firstc, char const *syntax, char *eofmark, int striptabs)
size_t len;
struct nodelist *bqlist;
int quotef;
int dblquote;
int varnest; /* levels of variables expansion */
int arinest; /* levels of arithmetic expansion */
int parenlevel; /* levels of parens in arithmetic */
int dqvarnest; /* levels of variables expansion within double quotes */
int oldstyle;
/* syntax before arithmetic */
char const *uninitialized_var(prevsyntax);
/* syntax stack */
struct synstack synbase = { .syntax = syntax };
struct synstack *synstack = &synbase;
 
dblquote = 0;
if (syntax == DQSYNTAX)
dblquote = 1;
synstack->dblquote = 1;
quotef = 0;
bqlist = NULL;
varnest = 0;
arinest = 0;
parenlevel = 0;
dqvarnest = 0;
 
STARTSTACKSTR(out);
loop: { /* for each line, until end of word */
Loading
Loading
@@ -895,7 +913,7 @@ readtoken1(int firstc, char const *syntax, char *eofmark, int striptabs)
if (c == '\034' && doprompt
&& attyset() && ! equal(termval(), "emacs")) {
attyline();
if (syntax == BASESYNTAX)
if (synstack->syntax == BASESYNTAX)
return readtoken();
c = syntax == SQSYNTAX ? pgetc() : pgetc_eatbnl();
goto loop;
Loading
Loading
@@ -904,9 +922,9 @@ readtoken1(int firstc, char const *syntax, char *eofmark, int striptabs)
CHECKEND(); /* set c to PEOF if at end of here document */
for (;;) { /* until end of line or end of word */
CHECKSTRSPACE(4, out); /* permit 4 calls to USTPUTC */
switch(syntax[c]) {
switch(synstack->syntax[c]) {
case CNL: /* '\n' */
if (syntax == BASESYNTAX)
if (synstack->syntax == BASESYNTAX)
goto endword; /* exit outer loop */
USTPUTC(c, out);
nlprompt();
Loading
Loading
@@ -916,7 +934,7 @@ readtoken1(int firstc, char const *syntax, char *eofmark, int striptabs)
USTPUTC(c, out);
break;
case CCTL:
if (eofmark == NULL || dblquote)
if (eofmark == NULL || synstack->dblquote)
USTPUTC(CTLESC, out);
USTPUTC(c, out);
break;
Loading
Loading
@@ -929,13 +947,18 @@ readtoken1(int firstc, char const *syntax, char *eofmark, int striptabs)
pungetc();
} else {
if (
dblquote &&
synstack->dblquote &&
c != '\\' && c != '`' &&
c != '$' && (
c != '"' ||
eofmark != NULL
(eofmark != NULL &&
!synstack->varnest)
) && (
c != '}' ||
!synstack->varnest
)
) {
USTPUTC(CTLESC, out);
USTPUTC('\\', out);
}
USTPUTC(CTLESC, out);
Loading
Loading
@@ -944,55 +967,64 @@ readtoken1(int firstc, char const *syntax, char *eofmark, int striptabs)
}
break;
case CSQUOTE:
syntax = SQSYNTAX;
synstack->syntax = SQSYNTAX;
quotemark:
if (eofmark == NULL) {
USTPUTC(CTLQUOTEMARK, out);
}
break;
case CDQUOTE:
syntax = DQSYNTAX;
dblquote = 1;
synstack->syntax = DQSYNTAX;
synstack->dblquote = 1;
toggledq:
if (synstack->varnest)
synstack->innerdq ^= 1;
goto quotemark;
case CENDQUOTE:
if (eofmark && !varnest)
if (eofmark && !synstack->varnest) {
USTPUTC(c, out);
else {
if (dqvarnest == 0) {
syntax = BASESYNTAX;
dblquote = 0;
}
quotef++;
goto quotemark;
break;
}
break;
if (synstack->dqvarnest == 0) {
synstack->syntax = BASESYNTAX;
synstack->dblquote = 0;
}
quotef++;
if (c == '"')
goto toggledq;
goto quotemark;
case CVAR: /* '$' */
PARSESUB(); /* parse substitution */
break;
case CENDVAR: /* '}' */
if (varnest > 0) {
varnest--;
if (dqvarnest > 0) {
dqvarnest--;
}
if (!synstack->innerdq &&
synstack->varnest > 0) {
if (!--synstack->varnest &&
synstack->varpushed)
synstack_pop(&synstack);
else if (synstack->dqvarnest > 0)
synstack->dqvarnest--;
USTPUTC(CTLENDVAR, out);
} else {
USTPUTC(c, out);
}
break;
case CLP: /* '(' in arithmetic */
parenlevel++;
synstack->parenlevel++;
USTPUTC(c, out);
break;
case CRP: /* ')' in arithmetic */
if (parenlevel > 0) {
if (synstack->parenlevel > 0) {
USTPUTC(c, out);
--parenlevel;
--synstack->parenlevel;
} else {
if (pgetc_eatbnl() == ')') {
USTPUTC(CTLENDARI, out);
if (!--arinest)
syntax = prevsyntax;
synstack_pop(&synstack);
} else {
/*
* unbalanced parens
Loading
Loading
@@ -1011,7 +1043,7 @@ quotemark:
case CIGN:
break;
default:
if (varnest == 0)
if (synstack->varnest == 0)
goto endword; /* exit outer loop */
if (c != PEOA) {
USTPUTC(c, out);
Loading
Loading
@@ -1021,11 +1053,11 @@ quotemark:
}
}
endword:
if (syntax == ARISYNTAX)
if (synstack->syntax == ARISYNTAX)
synerror("Missing '))'");
if (syntax != BASESYNTAX && eofmark == NULL)
if (synstack->syntax != BASESYNTAX && eofmark == NULL)
synerror("Unterminated quoted string");
if (varnest != 0) {
if (synstack->varnest != 0) {
/* { */
synerror("Missing '}'");
}
Loading
Loading
@@ -1202,6 +1234,8 @@ parsesub: {
PARSEBACKQNEW();
}
} else {
const char *newsyn = synstack->syntax;
USTPUTC(CTLVAR, out);
typeloc = out - (char *)stackblock();
STADJUST(1, out);
Loading
Loading
@@ -1252,6 +1286,8 @@ varname:
}
 
if (subtype == 0) {
int cc = c;
switch (c) {
case ':':
subtype = VSNUL;
Loading
Loading
@@ -1265,27 +1301,40 @@ varname:
break;
case '%':
case '#':
{
int cc = c;
subtype = c == '#' ? VSTRIMLEFT :
VSTRIMRIGHT;
c = pgetc_eatbnl();
if (c == cc)
subtype++;
else
pungetc();
break;
}
subtype = c == '#' ? VSTRIMLEFT :
VSTRIMRIGHT;
c = pgetc_eatbnl();
if (c == cc)
subtype++;
else
pungetc();
newsyn = BASESYNTAX;
break;
}
} else {
badsub:
pungetc();
}
if (newsyn == ARISYNTAX && subtype > VSNORMAL)
newsyn = DQSYNTAX;
if (newsyn != synstack->syntax) {
synstack_push(&synstack,
synstack->prev ?:
alloca(sizeof(*synstack)),
newsyn);
synstack->varpushed++;
synstack->dblquote = newsyn != BASESYNTAX;
}
*((char *)stackblock() + typeloc) = subtype;
if (subtype != VSNORMAL) {
varnest++;
if (dblquote)
dqvarnest++;
synstack->varnest++;
if (synstack->dblquote)
synstack->dqvarnest++;
}
STPUTC('=', out);
}
Loading
Loading
@@ -1335,7 +1384,7 @@ parsebackq: {
case '\\':
pc = pgetc_eatbnl();
if (pc != '\\' && pc != '`' && pc != '$'
&& (!dblquote || pc != '"'))
&& (!synstack->dblquote || pc != '"'))
STPUTC('\\', pout);
if (pc > PEOA) {
break;
Loading
Loading
@@ -1411,10 +1460,10 @@ done:
*/
parsearith: {
 
if (++arinest == 1) {
prevsyntax = syntax;
syntax = ARISYNTAX;
}
synstack_push(&synstack,
synstack->prev ?: alloca(sizeof(*synstack)),
ARISYNTAX);
synstack->dblquote = 1;
USTPUTC(CTLARI, out);
goto parsearith_return;
}
Loading
Loading
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment