/*---------------------------------------------------------------------- File : in2post.c Contents: convert an infix expression in integer numbers to a postfix expression in non-negative integer numbers Author : Christian Borgelt History : 21.05.1998 file created 22.05.1998 comments added ----------------------------------------------------------------------*/ #include #include #include #include "stack.h" #ifndef VERSION #define VERSION 2 /* default: compile second version */ #endif /*---------------------------------------------------------------------- Preprocessor Definitions ----------------------------------------------------------------------*/ /* --- token types --- */ #define T_END 256 /* end of input */ #define T_NUM 257 /* a number */ #define SIGN 0x0200 /* sign marker */ /* --- error codes --- */ #define OK 0 /* no error */ #define E_NONE 0 /* no error */ #define E_NOMEM (-1) /* not enough memory */ #define E_ILLCHAR (-2) /* illegal character */ #define E_SYNTAX (-3) /* syntax error */ #define E_UNKNOWN (-4) /* unknown error */ /*---------------------------------------------------------------------- Constants ----------------------------------------------------------------------*/ const char *errmsgs[] = { /* error messages */ /* E_NONE */ "no error\n", /* E_NOMEM -1 */ "not enough memory\n", /* E_ILLCHAR -2 */ "illegal character in input\n", /* E_SYNTAX -3 */ "syntax error\n", /* E_UNKNOWN -4 */ "unknown error\n", }; /*---------------------------------------------------------------------- Global Variables ----------------------------------------------------------------------*/ char *prgname; /* program name for error messages */ STACK *stack; /* operator stack for conversion */ /*---------------------------------------------------------------------- Functions ----------------------------------------------------------------------*/ int get_token (int *value) { /* --- get a token */ int c; /* current character */ do { /* skip leading blanks, i.e. */ c = getchar(); /* spaces and tabulators */ } while ((c == ' ') || (c == '\t')); switch (c) { /* test the character read */ case EOF: case '\n': /* if at the end of a line, */ return T_END; /* return `end of input' */ case '+': case '-': case '*': case '/': case '(': case ')': /* if the character is an operator */ return c; /* or if it is a parenthesis, */ } /* return it */ if ((c < '0') || (c > '9')) /* if the character is not a digit, */ return E_ILLCHAR; /* it is illegal, so abort */ ungetc(c, stdin); /* put back the digit read */ scanf("%i", value); /* and read the number */ return T_NUM; /* return `number' (token type) */ } /* get_token() */ /*---------------------------------------------------------------------- The above function converts a character stream (read from stdin) into a token stream and thus simplifies the task of the function `in2post' considerably. In addition, the function removes any blank characters that may be present in between the tokens. A `token' is a sequence of characters that have to be treated as one input object. For example the sequence of digits 123 has to be treated as one object, since it represents the number onehundred and twenty-three. In a C program a variable name, which can consist of several letters and/or digits, has to be treated as a single object. Of course, a token need not consist of several characters. Some tokens consist only of one character, for example the operator '+'. A token stream is much easier to process than a character stream, since in it the characters that have to be treated together are already grouped and hence the function processing it need not bother about the grouping task. The above function proceeds as follows. First it repeatedly reads a character until the character is not a space or a tabulator, thus skipping any blanks that may be present in front of the next token. In case the character read is a newline or the end of the file has been reached (indicated by the special character EOF), it returns the special token T_END to indicate that the end of the input has been reached. In case the character is an operator or a parenthesis it simply returns the character code of the corresponding character. Then it checks whether the character is a digit, since digits are the only other allowed characters apart from operators and parenthesis. If it is no digit, it is clearly an illegal character, so an error code is returned. Otherwise the character is returned to the input stream in order to be able to read the number with the `scanf' function. To indicate that a number has been read it returns the special token T_NUM. Note that the special tokens T_END and T_NUM are defined as 256 and 257, respectively, in order to avoid a conflict with any one character token (characters are represented by the numbers 0 to 255). ----------------------------------------------------------------------*/ #if (VERSION == 1) /* if to compile the first version */ int in2post (void) { /* --- convert infix to postfix */ int token; /* current token, e.g. T_NUM */ int value; /* value of the current token */ int op = 0; /* whether an operator must follow */ int t; /* temporary buffer for an operator */ while (1) { /* conversion loop */ token = get_token(&value); /* get the next token and its value */ switch (token) { /* evaluate the token read */ case T_NUM: /* if the next token is a number, */ if (op) return E_SYNTAX;/* check the operator flag, */ op = 1; /* then set it */ printf("%i ", value); /* print the number and */ continue; /* continue with the next token */ case '(': /* if an opening parenthesis follows, */ if (op) return E_SYNTAX;/* check the operator flag */ break; /* push it onto the stack */ case ')': /* if a closing parenthesis follows */ if (!op) return E_SYNTAX; /* check the operator flag */ while (1) { /* print operators down to a '(' */ if (stk_empty(stack)) /* if the stack gets empty, */ return E_SYNTAX; /* a '(' is missing */ t = (int)stk_pop(stack, 0); if (t == '(') break; /* stop at the '(' and print all */ printf("%c ", t & ~SIGN); /* operators preceding it */ } continue; /* continue with the next token */ case '*': case '/': /* if multiplication or division */ if (!op) return E_SYNTAX; /* check the operator flag */ while (!stk_empty(stack)) { /* reduce the stack */ t = (int)stk_pop(stack, 1); if ((t != '*') /* if the operator on the stack */ && (t != '/') /* is not of the same type */ && !(t & SIGN)) /* as the token and no sign, */ break; /* abort the reduction loop */ printf("%c ", (int)stk_pop(stack, 0) & ~SIGN); } /* otherwise print the operator */ op = 0; break; /* clear the operator flag */ case '+': case '-': /* if addition or subtraction */ if (!op) { /* if the operator is unary, */ printf("0 "); /* insert a zero to make it binary */ token |= SIGN; break; /* and mark the token in order to */ } /* make it recognizable as a sign */ while (!stk_empty(stack) /* reduce the stack */ && ((int)stk_pop(stack, 1) != '(')) printf("%c ", (int)stk_pop(stack, 0) & ~SIGN); op = 0; /* print operators from the stack */ break; /* and clear the operator flag */ case T_END: /* if at the end of the input */ if (!op) return E_SYNTAX; /* check the operator flag */ while (!stk_empty(stack)) { /* empty the operator stack */ t = (int)stk_pop(stack, 0); if (t == '(') /* if there is a '(' on the */ return E_SYNTAX; /* stack, a ')' is missing */ printf("%c ", t & ~SIGN); } return 0; /* otherwise print the operator */ default : /* if the token is unknown */ return token; /* or an error code, return it */ } /* switch (token) */ if (stk_push(stack, (void*)token) != 0) return E_NOMEM; /* push the operator onto the stack */ } /* while (1) */ } /* in2post() */ #endif /* #if (VERSION == 1) */ /*---------------------------------------------------------------------- The above function converts an arithmetic expression in infix notation, which it reads using the `get_token' function, into an arithmetic expression in postfix notation. It does so using an operator stack and the following rules: 1. If the next token is a number, print it. 2. If the next token is an opening parenthesis, push it onto the stack. 3. If the next token is a closing parenthesis, pop and print all operators on the stack down to the corresponding opening parenthesis. 4. If the next token is a multiplicative operator ('*' or '/'), inspect the top element on the stack. If it is also a multiplicative operator print the top stack element and push the new operator onto the stack. 5. If the next token is an additive operator ('+' or '-'), pop and print all operators on the stack down to an opening parenthesis or until the stack is empty. Then push the new operator onto the stack. 6. At the end of the input pop and print all operators on the stack. To ensure that the expression read is well-formed, a flag `op' is used to check that numbers and operators alternate with each other. Note that a unary + or - is treated by adding a zero to the output to make it binary. I.e. 'x*+y' and 'x*-y' are replaced by 'x 0 y + *' and 'x 0 y - *', respectively. Unfortunately, in order to deal with this case correctly, matters have to be complicated by adding a SIGN marker to a unary + or -. This is necessary, because otherwise the unary + or minus would not be emitted directly, if a '*' or '/' followed the expression with the sign. ----------------------------------------------------------------------*/ #if (VERSION == 2) /* if to compile the second version */ int in2post (void) { /* --- convert infix to postfix */ int token; /* current token, e.g. T_NUM */ int value; /* value of the current token */ int op = 0; /* whether an operator must follow */ int neg = 0; /* whether the next term is negated */ int mulop = 0, addop = 0; /* multiplicat. and additive operator */ while (1) { /* conversion loop */ token = get_token(&value); /* get the next token and its value */ switch (token) { /* evaluate the token read */ case T_NUM: /* if the next token is a number, */ if (op) return E_SYNTAX;/* check the operator flag */ if (neg) printf("0 "); /* if the number is negated, add a 0 */ printf("%i ", value); /* print the number and */ op = 1; break; /* set the operator flag */ case '(': /* if an opening parenthesis follows, */ if (op) return E_SYNTAX;/* check the operator flag */ if (neg) printf("0 "); /* if the expr. is negated, add a 0 */ neg = (neg << 7) | (mulop << 8) | addop; if (stk_push(stack, (void*)neg) != 0) return E_NOMEM; /* note the operators of this level */ neg = mulop = addop = 0;/* and reinitialize them to none */ break; /* for the new parenthesis level */ case T_END: /* if at the end of the input */ case ')': /* or a closing parenthesis follows */ if (!op) return E_SYNTAX; /* check the operator flag */ if (neg) printf("- "); /* emit all pending operators */ if (mulop) printf("%c ", mulop); if (addop) printf("%c ", addop); if (token == T_END) /* if at the end of the input, abort */ return (stk_empty(stack)) ? 0 : E_SYNTAX; if (stk_empty(stack)) /* if the stack is empty, */ return E_SYNTAX; /* an opening parenthesis is missing */ neg = (int)stk_pop(stack, 0); /* get from the stack */ addop = neg & 0x7f; /* the operators */ mulop = (neg >> 8) & 0x7f; /* of the preceding */ neg = (neg >> 7) & 0x01; /* parenthesis level */ break; case '*': case '/': /* if multiplication or division */ if (!op) return E_SYNTAX; /* check the operator flag */ if (neg) { printf("- "); neg = 0; } if (mulop) printf("%c ", mulop); mulop = token; /* emit pending higher and same level */ op = 0; break; /* operators and note the new token */ case '+': case '-': /* if addition or subtraction */ if (!op) { /* if the operator is unary */ if (token == '-') neg ^= 1; break; /* note the negation status and */ } /* combine multiple signs into one */ if (neg) { printf("- "); neg = 0; } if (mulop) { printf("%c ", mulop); mulop = 0; } if (addop) printf("%c ", addop); addop = token; /* emit pending higher and same level */ op = 0; break; /* operators and note the new token */ default : /* if the token is unknown */ return token; /* or an error code, return it */ } /* switch (token) */ } /* while (1) */ } /* in2post() */ #endif /* #if (VERSION == 2) */ /*---------------------------------------------------------------------- The above function converts an arithmetic expression in infix notation, which it reads using the `get_token' function, into an arithmetic expression in postfix notation. The idea underlying this function is to use a stack to keep track of the parenthesis levels. Within one level of parentheses there are only three levels of precedence of operators: additive operators ('+' and '-', lowest precedence level), multiplicative operators ('*' and '/', medium precedence level), and signs (unary additive operators, '-' and '+', highest precedence level). If any of these operators are present, they are noted in the variables `addop', `mulop', and `neg', where the last only indicates whether the next expression is negated or not (multiple signs are combined into one). When a new operator is read all operators on a higher or the same precedence level are emitted and the new operator is noted. If an opening parenthesis is encountered, all operators are pushed onto the stack and the corresponding variables are cleared, in order to set up things as for the beginning of an expression. If a closing parenthesis is encountered all pending operators are emitted (in the order of their precedence). The same is done, if the end of the input is reached. In the former case the old set of operators is popped from the stack to reestablish the situation as it was before the opening parenthesis was found, whereas in the latter case the function simply returns. The only really difficult thing in the function above is how the pending operators are stored on the stack, since they are combined into one number to save space. This is possible, since they are only characters. Of course, they could also have been pushed onto the stack as three separate objects. ----------------------------------------------------------------------*/ void error (int code, ...) { /* --- print an error message */ const char *msg; /* error message */ va_list args; /* list of variable arguments */ if ((code > 0) || (code < E_UNKNOWN)) code = E_UNKNOWN; /* check error code */ msg = errmsgs[-code]; /* get error message */ fprintf(stderr, "\n%s: ", prgname); va_start(args, code); /* get variable arguments, */ vfprintf(stderr, msg, args); /* print error message and */ va_end(args); /* end argument evaluation */ exit(code); /* abort the programm */ } /* error() */ /*--------------------------------------------------------------------*/ int main (int argc, char *argv[]) { /* --- main function */ prgname = argv[0]; /* get program name for error msgs. */ stack = stk_create(); /* create an operator stack */ if (!stack) error(E_NOMEM); /* for the conversion */ argc = in2post(); /* convert infix to postfix */ if (argc != 0) error(argc); /* and check for an error */ stk_delete(stack); /* delete the operator stack */ printf("\n"); /* terminate the output line */ return 0; /* return 'ok' */ } /* main() */ /*---------------------------------------------------------------------- This program converts an infix expression in integer numbers it reads from stdin into a postfix expression in non-negative integer numbers its writes to stdout. To evaluate the resulting postfix expression combine it with the program `postfix' using a pipe: in2post | postfix This program uses the module stack.c. Compile it with gcc -ansi -Wall -pedantic stack.c in2post.c -o in2post ----------------------------------------------------------------------*/