Hi,
Syntax:
a) (a+!b).(c+(!q))
b) (!!!!q+r).!w
For the simple syntax rules I have, code was getting too complicated
using recursive descent parsing. So, I switched algorithm to use an
old algorithm I used some 20 years ago to implement algebraic
expression parsing. In this algorithm tokens are checked for the
presence of acceptable tokens that precede and succeed them. If the
presence or absence of some nearby tokens implies a different context,
a deeper checking is done.
This time the code is very simple compared to what I was getting using
recursive descent parsing. Yet, the code seems to do its job.
I am attaching the source code for the parser for comments in case
readers want to make some comments.
Edward
#include <stdio.h>
#include <ctype.h>
#include <stdlib.h>
#include <string.h>
#define INVALID 0
#define VALID 1
typedef char* pchar;
pchar* tokens; /* global variable to hold token list */
int token_count = 0; /* global variable to keep track of token count */
int error_pos = 0; /* global variable to hold error position */
#define token_delta 50
typedef struct {
int start_bracket;
int stop_bracket;
int consecutive_brackets;
} t_bracket_values;
void tokenise(char* expression) {
char ch; /* character at iterator's position */
char* atoken; /* token to be added to token list */
char* start = NULL; /* holds a char* to point to start of multicharacter token */
char* string_token; /* holds multicharacter token */
int char_count = 0; /* keeps track of number of characters in multicharacter token */
int while_counter = 0;
/* expr_len_and_null holds the total length of input string including null terminator */
int expr_len_and_null = strlen(expression) + 1;
while (while_counter++ <= expr_len_and_null) {
ch = *expression++;
switch (ch) {
case '(':
case ')':
case '.':
case '+':
case '!':
case '\0':
if (start && char_count > 0) {
string_token = malloc((char_count + 1)*sizeof(char));
strncpy(string_token, start, char_count);
tokens[token_count++] = string_token;
start = NULL;
char_count = 0;
}
if (ch == '\0') return;
atoken = malloc(2*sizeof(char));
atoken[0] = ch;
atoken[1] = '\0';
tokens[token_count++] = atoken;
break;
case '\n':
break;
default:
if (start == NULL)
start = expression - 1;
char_count++;
}
}
}
void free_memory() {
int k = token_count;
while (--k >= 0) free(tokens[k]);
free(tokens);
}
/****************PARSER*****************/
static char* getToken(pchar* tokens, int i) {
if (i < token_count)
return tokens[i];
else return NULL;
}
#define alpha "abcdefghijklmnopqrstuvwxyz"
/* succeeds ". + )" end */
#define alpha_cl_br ")"alpha
/* succeeds "! alpha " */
#define oper_op_br "!+.("
/* at the beginning of expression */
#define alpha_excl_op_br "!("alpha
static int isOperand(pchar* tokens, int i) {
char* atoken = getToken(tokens, i);
if (atoken == NULL)
return 0;
else {
if (strlen(atoken) > 1)
return 1;
else return (strchr(alpha, atoken[0]) != NULL);
}
}
int getErrorPosn(pchar* tokens) {
int i = -1;
char prev_ch = '\0', ch;
int error_pos1 = -1, error_pos2 = -1;
char* atoken;
if (token_count == 1) {
if (!isOperand(tokens, 0))
i = 0;
error_pos = i;
return error_pos;
}
while (++i < token_count) {
atoken = getToken(tokens, i);
ch = atoken[0];
if (i > 0 && i < token_count - 1) {
if ( (ch == '+' || ch == '.' || ch == ')') &&
(prev_ch == ')' || isOperand(tokens, i - 1)) )
{
prev_ch = ch;
continue;
}
if ( (ch == '!' || ch == '(' || isOperand(tokens, i)) &&
strchr(oper_op_br, prev_ch) )
{
prev_ch = ch;
continue;
}
}
prev_ch = ch;
/* at the end of token list */
if ( (i == token_count - 1) && (ch == ')' || isOperand(tokens, i)) )
continue;
/* at the beginning of token list */
if ( (i == 0) && (ch == '!' || ch == '(' || isOperand(tokens, i)) )
continue;
error_pos1 = i;
break;
}
int brackets = 0;
for (i = 0; i < token_count; i++) {
atoken = getToken(tokens, i);
ch = atoken[0];
if (ch == '(') brackets++;
else if (ch == ')')
brackets--;
if (brackets < 0) {
error_pos2 = i;
break;
}
}
if (brackets > 0)
error_pos2 = i;
/* choose the earlier error for output */
if (error_pos1 > error_pos2)
error_pos = error_pos1;
else error_pos = error_pos2;
return error_pos;
}
/***************************************/
int main() {
extern int error_pos;
char input[101];
tokens = malloc(token_delta*sizeof(char*));
fgets(input, 100, stdin);
tokenise(input);
int k = -1;
/* print the tokens one per line */
printf("token count is %d\n", token_count);
while(++k < token_count)
printf("%s\n", tokens[k]);
getErrorPosn(tokens);
if (error_pos > -1)
printf("Error found at position %d\n", error_pos + 1);
else printf("No error found.\n");
free_memory();
return 0;
}