Hi,
Yes, I tried to use recursive descent parsing techniques but ended up
with too many complications that are not justified for the simple
syntax rules that I have.
In short I am trying to parse Boolean expressions like the following
to verify their syntax.
a) (a+!b).(!!c+d)
b) ((w+!!!c).(!e+((!!!q))))
To avoid having overly complicated code for a seemingly simple task, I
switched to use an old algorithm I devised some 20 years ago.
In my algorithm, tokens are checked for acceptable preceding and
succeeding tokens. In case the presence or absence of a nearby token
changes the rules, deeper checking is employed. The resultant code is
simple and seems to work as intended.
I have tried to send this mail to this mailing thread several times
unsuccessfully. For some reason gmail is sending it to the wrong
thread.
The code follows from here:
#include <stdio.h>
#include <ctype.h>
#include <stdlib.h>
#include <string.h>
#define INVALID 0
#define VALID 1
typedef char* pchar;
pchar* tokens; /* global variable to hold token list */
int token_count = 0; /* global variable to keep track of token count */
int error_pos = 0; /* global variable to hold error position */
#define token_delta 50
typedef struct {
int start_bracket;
int stop_bracket;
int consecutive_brackets;
} t_bracket_values;
void tokenise(char* expression) {
char ch; /* character at iterator's position */
char* atoken; /* token to be added to token list */
char* start = NULL; /* holds a char* to point to start of
multicharacter token */
char* string_token; /* holds multicharacter token */
int char_count = 0; /* keeps track of number of characters in
multicharacter token */
int while_counter = 0;
/* expr_len_and_null holds the total length of input string
including null terminator */
int expr_len_and_null = strlen(expression) + 1;
while (while_counter++ <= expr_len_and_null) {
ch = *expression++;
switch (ch) {
case '(':
case ')':
case '.':
case '+':
case '!':
case '\0':
if (start && char_count > 0) {
string_token = malloc((char_count + 1)*sizeof(char));
strncpy(string_token, start, char_count);
tokens[token_count++] = string_token;
start = NULL;
char_count = 0;
}
if (ch == '\0') return;
atoken = malloc(2*sizeof(char));
atoken[0] = ch;
atoken[1] = '\0';
tokens[token_count++] = atoken;
break;
case '\n':
break;
default:
if (start == NULL)
start = expression - 1;
char_count++;
}
}
}
void free_memory() {
int k = token_count;
while (--k >= 0) free(tokens[k]);
free(tokens);
}
/****************PARSER*****************/
static char* getToken(pchar* tokens, int i) {
if (i < token_count)
return tokens[i];
else return NULL;
}
#define alpha "abcdefghijklmnopqrstuvwxyz"
/* succeeds ". + )" end */
#define alpha_cl_br ")"alpha
/* succeeds "! alpha " */
#define oper_op_br "!+.("
/* at the beginning of expression */
#define alpha_excl_op_br "!("alpha
static int isOperand(pchar* tokens, int i) {
char* atoken = getToken(tokens, i);
if (atoken == NULL)
return 0;
else {
if (strlen(atoken) > 1)
return 1;
else return (strchr(alpha, atoken[0]) != NULL);
}
}
int getErrorPosn(pchar* tokens) {
int i = -1;
char prev_ch = '\0', ch;
int error_pos1 = -1, error_pos2 = -1;
char* atoken;
if (token_count == 1) {
if (!isOperand(tokens, 0))
i = 0;
error_pos = i;
return error_pos;
}
while (++i < token_count) {
atoken = getToken(tokens, i);
ch = atoken[0];
if (i > 0 && i < token_count - 1) {
if ( (ch == '+' || ch == '.' || ch == ')') &&
(prev_ch == ')' || isOperand(tokens, i - 1)) )
{
prev_ch = ch;
continue;
}
if ( (ch == '!' || ch == '(' || isOperand(tokens, i)) &&
strchr(oper_op_br, prev_ch) )
{
prev_ch = ch;
continue;
}
}
prev_ch = ch;
/* at the end of token list */
if ( (i == token_count - 1) && (ch == ')' || isOperand(tokens, i)) )
continue;
/* at the beginning of token list */
if ( (i == 0) && (ch == '!' || ch == '(' || isOperand(tokens, i)) )
continue;
error_pos1 = i;
break;
}
int brackets = 0;
for (i = 0; i < token_count; i++) {
atoken = getToken(tokens, i);
ch = atoken[0];
if (ch == '(') brackets++;
else if (ch == ')')
brackets--;
if (brackets < 0) {
error_pos2 = i;
break;
}
}
if (brackets > 0)
error_pos2 = i;
/* choose the earlier error for output */
if (error_pos1 > error_pos2)
error_pos = error_pos1;
else error_pos = error_pos2;
return error_pos;
}
/***************************************/
int main() {
extern int error_pos;
char input[101];
tokens = malloc(token_delta*sizeof(char*));
fgets(input, 100, stdin);
tokenise(input);
int k = -1;
/* print the tokens one per line */
printf("token count is %d\n", token_count);
while(++k < token_count)
printf("%s\n", tokens[k]);
getErrorPosn(tokens);
if (error_pos > -1)
printf("Error found at position %d\n", error_pos + 1);
else printf("No error found.\n");
free_memory();
return 0;
}
Edward