:: Re: [DNG] Studying C as told. (For …
Top Page
Delete this message
Reply to this message
Author: Edward Bartolo
Date:  
To: KatolaZ
CC: dng
Subject: Re: [DNG] Studying C as told. (For help)
Hi,

Yes, I tried to use recursive descent parsing techniques but ended up
with too many complications that are not justified for the simple
syntax rules that I have.

In short I am trying to parse Boolean expressions like the following
to verify their syntax.
a) (a+!b).(!!c+d)
b) ((w+!!!c).(!e+((!!!q))))

To avoid having overly complicated code for a seemingly simple task, I
switched to use an old algorithm I devised some 20 years ago.

In my algorithm, tokens are checked for acceptable preceding and
succeeding tokens. In case the presence or absence of a nearby token
changes the rules, deeper checking is employed. The resultant code is
simple and seems to work as intended.

I have tried to send this mail to this mailing thread several times
unsuccessfully. For some reason gmail is sending it to the wrong
thread.

The code follows from here:

#include <stdio.h>
#include <ctype.h>
#include <stdlib.h>
#include <string.h>

#define INVALID 0
#define VALID 1

typedef char* pchar;
pchar* tokens;             /* global variable to hold token list */
int token_count = 0;       /* global variable to keep track of token count */
int error_pos = 0;         /* global variable to hold error position */


#define token_delta 50

typedef struct {
int start_bracket;
int stop_bracket;
int consecutive_brackets;
} t_bracket_values;

void tokenise(char* expression) {
  char ch;              /* character at iterator's position */
  char* atoken;         /* token to be added to token list  */
  char* start = NULL;   /* holds a char* to point to start of
multicharacter token */
  char* string_token;   /* holds multicharacter token */
  int char_count = 0;   /* keeps track of number of characters in
multicharacter token */


  int while_counter = 0;
  /* expr_len_and_null holds the total length of input string
including null terminator */
  int expr_len_and_null = strlen(expression) + 1;
  while (while_counter++ <= expr_len_and_null) {
    ch = *expression++;
    switch (ch) {
      case '(':
      case ')':
      case '.':
      case '+':
      case '!':
      case '\0':
        if (start && char_count > 0) {
          string_token = malloc((char_count + 1)*sizeof(char));
          strncpy(string_token, start, char_count);
          tokens[token_count++] = string_token;


          start = NULL;
          char_count = 0;
        }


        if (ch == '\0') return;
        atoken = malloc(2*sizeof(char));
        atoken[0] = ch;
        atoken[1] = '\0';
        tokens[token_count++] = atoken;
        break;


      case '\n':
        break;


      default:
        if (start == NULL)
          start = expression - 1;
        char_count++;
    }
  }
}


void free_memory() {
int k = token_count;

while (--k >= 0) free(tokens[k]);
free(tokens);
}

/****************PARSER*****************/

static char* getToken(pchar* tokens, int i) {
  if (i < token_count)
    return tokens[i];
    else return NULL;
}
#define alpha              "abcdefghijklmnopqrstuvwxyz"


/* succeeds ". + )" end */
#define alpha_cl_br        ")"alpha


/* succeeds "! alpha " */
#define oper_op_br         "!+.("


/* at the beginning of expression */
#define alpha_excl_op_br "!("alpha

static int isOperand(pchar* tokens, int i) {
char* atoken = getToken(tokens, i);

  if (atoken == NULL)
    return 0;
  else {
    if (strlen(atoken) > 1)
      return 1;
      else return (strchr(alpha, atoken[0]) != NULL);
  }
}


int getErrorPosn(pchar* tokens) {
int i = -1;
char prev_ch = '\0', ch;
int error_pos1 = -1, error_pos2 = -1;
char* atoken;

  if (token_count == 1) {
    if (!isOperand(tokens, 0))
      i = 0;


    error_pos = i;
    return error_pos;
  }


  while (++i < token_count) {
    atoken = getToken(tokens, i);
    ch = atoken[0];


    if (i > 0 && i < token_count - 1) {
      if ( (ch == '+' || ch == '.' || ch == ')') &&
           (prev_ch == ')' ||  isOperand(tokens, i - 1))  )
      {
        prev_ch = ch;
        continue;
      }


      if ( (ch == '!' || ch == '(' || isOperand(tokens, i)) &&
           strchr(oper_op_br, prev_ch) )
      {
        prev_ch = ch;
        continue;
      }
    }
    prev_ch = ch;


    /* at the end of token list */
    if ( (i == token_count - 1) && (ch == ')' || isOperand(tokens, i)) )
      continue;
    /* at the beginning of token list */
    if ( (i == 0) && (ch == '!' || ch == '(' || isOperand(tokens, i)) )
      continue;


    error_pos1 = i;
    break;
  }


  int brackets = 0;
  for (i = 0; i < token_count; i++) {
    atoken = getToken(tokens, i);
    ch = atoken[0];


    if (ch == '(') brackets++;
      else if (ch == ')')
        brackets--;


    if (brackets < 0) {
      error_pos2 = i;
      break;
    }
  }


  if (brackets > 0)
    error_pos2 = i;


  /* choose the earlier error for output */
  if (error_pos1 > error_pos2)
    error_pos = error_pos1;
    else error_pos = error_pos2;


return error_pos;
}


/***************************************/

int main() {
extern int error_pos;

char input[101];

tokens = malloc(token_delta*sizeof(char*));
fgets(input, 100, stdin);
tokenise(input);

  int k = -1;
  /* print the tokens one per line */
  printf("token count is %d\n", token_count);
  while(++k < token_count)
    printf("%s\n", tokens[k]);


  getErrorPosn(tokens);
  if (error_pos > -1)
    printf("Error found at position %d\n", error_pos + 1);
    else printf("No error found.\n");


free_memory();

return 0;
}

Edward