/*---------------------------------------------------------------------- File : convert.c Contents: convert a number from one number system to another Author : Christian Borgelt History : 12.11.1997 file created 18.11.1997 conditional compilation added 19.11.1997 function getnum programmed (in separate program) 20.11.1997 programs for functions putnum and getnum merged 21.11.1997 unnecessary condition removed in function getnum 16.02.1998 third version simplified ----------------------------------------------------------------------*/ #include #include #ifndef VERSION #define VERSION 1 /* default: compile first version */ #endif /*---------------------------------------------------------------------- An explanation of this way of using conditional compilation is given in the program `perfect.c'. It enables choosing the version with a compiler option (-D for the Gnu C-compiler). ----------------------------------------------------------------------*/ /*---------------------------------------------------------------------- Functions ----------------------------------------------------------------------*/ #if (VERSION == 1) /* if to compile first version */ void rec (int n, int base) { /* --- recursive part of `putnum' */ if (n <= 0) return; /* do not print leading zeros */ rec(n / base, base); /* print higher valued digits */ n %= base; /* compute next digit and print it */ putchar(n +((n <= 9) ? '0' : ('a' -10))); } /* rec() */ /*---------------------------------------------------------------------- Note that this function does not print anything, if n is 0, and that it cannot deal with negative numbers. Hence numbers <= 0 have to be treated in the first version of the function `putnum' (which calls this function). Of course, for negative numbers, this treatment simply consists of printing a minus sign and calling rec with a negated n. Note also, that this function assumes 'base' to be in the range 2-36, which is made sure in the main function. (The reason for this range is that we need some characters for digits with a value greater than 9. An obvious choice are the letters. But if all letters are used, which is the case for base 36, we are not sure how to continue. Hence we restrict the range.) Finally, there is this really strange `putchar' call. At first glance it is totally unreadable. But it is simpler than it appears. The `putchar' call is equivalent to if (n <= 9) putchar(n +'0'); else putchar(n -10 +'a'); This statement you should be able to understand immediately. If the value of the digit to print is less than 10, it can be represented by one of the usual characters '0' to '9'. These characters are placed at codes that succeed each other. That is, the code of the character '3' can be computed from the code of the character '0' by simply adding 3 to that code. Hence if the value of the digit is less than 10, the code of the corresponding character can be computed simply as n +'0'. A similar argument applies for n -10 +'a', since in the ASCII all letters follow each other in the order everybody knows. (There is another code, EBCDIC, formerly used on IBM computers, in which this is not the case. You need not care about it.) The only thing we have to take care of is that we have to subtract 10 from n, since 'a' is intended to stand for a digit with value 10. If we take a closer look at the if-statement we recognize that the only thing that differs between the 'then' and the 'else' branch is the offset to be added to the value of n. This offset can be computed with conditional evaluation. You already know conditional evaluation from the applicative algorithms. In C it works exactly like that, it is just written differently. First there is the condition. Here it is '(n <= 9)'. Then it follows a question mark, which can be interpreted as 'Test the condition'. Then there are two expressions, separated by a colon. Here they are '0' and 'a'-10. If the condition preceding the question mark is true, the first of them is evaluated, otherwise the second is evaluated. Thus conditional evaluation is just like an 'if'. The only difference is that a conditional evaluation statement can have a value, whereas an 'if' statement can not. ----------------------------------------------------------------------*/ void putnum (int n, int base) { /* --- recursive version */ if (n == 0) { /* if the number n is zero, */ printf("0\n"); return; } /* print it directly and abort */ if (n < 0) { /* if n is negative, print a */ putchar('-'); n = -n; } /* minus sign and remove the sign */ rec(n, base); /* call the recursive function */ putchar('\n'); /* terminate the output line */ } /* putnum() */ #endif /* #if (VERSION == 1) */ /*---------------------------------------------------------------------- For a comment on this function see the comments on the function `rec'. ----------------------------------------------------------------------*/ #if (VERSION == 2) /* if to compile second version */ void putnum (int n, int base) { /* --- iterative version 1 */ int pval = 1; /* place value (init. to base^0 = 1) */ int digit; /* next digit to print */ if (n < 0) { /* if n is negative, print a */ putchar('-'); n = -n; } /* minus sign and remove the sign */ while (n / base >= pval) /* while not highest place reached, */ pval *= base; /* compute next place value */ while (pval > 0) { /* while not all digits printed */ digit = n / pval; /* compute next digit and print it */ putchar(digit +((digit <= 9) ? '0' : ('a' -10))); n -= digit *pval; /* remove printed digit from n */ pval /= base; /* and compute the place value */ } /* of the next lower place */ printf("\n"); /* terminate the output line */ } /* putnum() */ #endif /* #if (VERSION == 2) */ /*---------------------------------------------------------------------- In this function the conversion is carried out by determining first the value of the highest place of the number and then printing the digits in the order of decreasing place values. Note that this function assumes `base' to be in the range 2-36 (just as the function 'rec' above and the third version of function `putnum' below). Note that it is better to avoid writing the stopping criterion of the first while statement as 'n >= base *pval', since for large n the value of base *pval may exceed the largest number representable as an `int'. The above condition is safe and with it also the computation of pval *base in the loop: Since n can be represented and pval *base is less than or at most equal to n (because of the condition), it must also be representable. Remark: Instead of `n -= digit *pval;' in second while-loop, you can also write `n %= pval;' which has the same effect. ----------------------------------------------------------------------*/ #if (VERSION == 3) /* if to compile third version */ void putnum (int n, int base) { /* --- iterative version 2 */ char buf[32]; /* buffer for digits */ int i = 32; /* field index */ int digit; /* next digit to store */ if (n < 0) { /* if n is negative, print a */ putchar('-'); n = -n; } /* minus sign and remove sign */ buf[--i] = '\0'; /* store end of string marker */ do { /* while there is another digit */ digit = n % base; /* compute next digit and store it */ buf[--i] = digit +((digit <= 9) ? '0' : ('a' -10)); n /= base; /* remove stored digit from n */ } while (n > 0); /* while there is another digit */ printf("%s\n", buf +i); /* print buffered digits */ } /* putnum() */ #endif /* #if (VERSION == 3) */ /*---------------------------------------------------------------------- One of the main problems of the conversion consists in the fact that the digits of the number as represented in the new number system can be easily computed, if the we start with the digit that has the lowest place value: It can be determined by computing the remainder of a division of the number n by the base. But this is not the order in which to print these digits. In the first version of the function `putnum', we used a recursive function to `buffer' the digits. By first calling the function recursively and then printing the digit, we produce the correct order. In the second version of the function `putnum' this problem was handled by determining first the highest occuring place and then working downward. The third version of the function `putnum' finally works upward, i.e. computes the digits for the lower places first and then those for the higher places. The trick is that it does not print these digits immediately, but buffers them until the digit of the highest place is determined. Afterwards the whole string of digits is printed --- without any further computations --- in one go. How do we achieve this? First we need a buffer for the digits. This is the variable `buf', which is defined to be a vector of 32 characters (see file `dow.c' for some explanations on vectors). The reason for the 32 is as follows: An integer number can have at most 31 binary digits (the 32nd digit is interpreted as a sign). Since we need the most digits in the binary number system, we will never need to print more than 31 digits. We add one for a special character that is used to terminate a string. Call it the `end of string marker'. It can be written as '\0' and has code 0. We need this additional character to make the contents of the buffer a valid C-string, which can be printed with the 'printf' function. Of course, we could also do without this additional character, if we had a loop at the end of the function which printed the characters separately with, say, the `putchar' function. But using `printf' is much simpler. The variable i always contains the field index of the last character stored. At the beginning of the program it is initialized to 32, that is, it points to an undefined field, since we did note store any characters yet. After we dealt with the special cases of 0 and negative numbers, we store the end of string marker in the last field of the buffer. Then we work from right to left. We compute the next digit and store it in the buffer, each time decreasing the index i by one. Thus, when the loop terminates, the variable i contains the field index of the highest valued place. Since the number can have less then 31 digits, i need not be 0 after the loop. It may have any value from 0 to 30. If it is, say, 15, then the first 15 fields of the buffer have not been assigned a value. Hence we must not print them. To skip these empty fields, we do not pass `buf', but `buf +i' to the `printf' function. Why this is possible is explained in more detail in the comment on the function below. ----------------------------------------------------------------------*/ int getnum (char *s, int base) { /* --- read a base `base' number */ int n = 0; /* converted number (result) */ int neg = 0; /* flag for negative number */ int d; /* next digit of the number in s */ while ((*s == ' ') || (*s == '\t')) s++; /* skip leading spaces and tabs */ if (*s == '+') /* if a plus sign is present, */ s++; /* skip it */ else if (*s == '-') { /* if a minus sign is present, */ s++; neg = 1; } /* skip it and set the negative flag */ while (1) { /* traverse rem. characters in s */ d = *s++; /* get next character in s */ if ((d >= '0') && (d <= '9')) d -= '0'; else if ((d >= 'a') && (d <= 'z')) d -= 'a' -10; else if ((d >= 'A') && (d <= 'Z')) d -= 'A' -10; else break; /* determine next digit's value and */ if (d >= base) break; /* if it is invalid, abort the loop */ n = n *base +d; /* compute the new value of n */ } /* (incorporate the digit d into n) */ return (neg) ? -n : n; /* return the conversion result */ } /* getnum() */ /*---------------------------------------------------------------------- The algorithm used in this function to convert a given base `base' number to the decimal system is very simple: Traverse the string representation of the number in the order of decreasing place value. For each place multiply the result computed up to that place by the base, determine the digit and add it to the result. This is the same as evaluating the polynomial that has the place values as coefficients for x = base with Horner's algorithm. Note that this function interprets only those characters that have a meaning in the chosen number system. If a character is encountered that does not fit into the given number system (i.e. if a '#' is found or a a '9' with base <= 9 or a 'c' with base <= 12), the function is aborted with the result computed up to that point. All other characters in s are ignored in this case. Since the end of string marker '\0' cannot be interpreted independent of the number system, the loop must terminate. Two new programming techniques are introduced in this function. The first is the termination of a loop by the 'break' statement and the second is the use of a pointer to traverse a string of characters. The first technique is easily explained. If a break-statement is encountered, the loop enclosing it is terminated, i.e. the execution continues after the loop. The only exception from this rule is a `break' within a switch-statement. A `break' in a switch-statement only terminates the switch and not the surrounding loop, if there is any. In the above function the break-statements are very important, because without them the loop would be an infinite loop (since the condition after the `while' is simply 1 = true). To understand the second technique we need to know something about pointers. Pointers are best explained by referring to the concept of a register machine introduced in the lecture. In such a machine there are storage cells, each of which has a unique number. You can think of a variable in a C-program as it it were a storage cell in a register machine. The only difference is that a C-variable has a name, whereas a storage cell has only a number. (This is one reason why C-programs are much easier to read than register machine programs. To get an executable program the variable names have to be replaced by cell numbers, but you need not worry about that --- the compiler does this for you.) A pointer is a special type of variable. It has a name, so there is a cell with a certain number holding its value. The value of a pointer variable is a number. But it is not just a number as the value of an integer variable is a number. It is the number of another cell. Look at the following diagram to understand this. Let s be a pointer variable in a C-program. Suppose that the compiler maps the name `s' to the cell number 123. Let the contents of cell 123 be 234. We said that the contents of a pointer variable is a cell number. Hence the contents of the cell 123 directs us to the cell with number 234, i.e. it `points' or `refers' to cell 234. This other cell may or may not have a name in a C-program and its contents is completely independent of the contents of the cell 123. In the diagram below it contains the number 72, which, in the ASCII, corresponds to an 'H'. variable name s cell number 123 .... 234 235 236 237 cell contents 234 72 = 'H' 105 = 'i' 33 = '!' 0 = '\0' | ^ +--------+ Strings consist of several characters, hence one cell is not enough to store a string. We need a set of cells to store it. The convention is that the cells holding the characters of a string have adjacent numbers. That is, if the first character is stored in cell number 234, then the second character is stored in cell 235. This cell contains the number 105, which corresponds to an 'i'. Cell 236 contains the number 33, which is an exclamation mark and, finally, cell 237 contains a zero, which is the end of string marker in C. And now we know how strings are stored and referenced in C. The variable s does not contain the string. It contains the number of the cell that contains the first character of the string. The end of the string is marked by an end of string marker. The end of string marker is always the last character of a C-string. It is a special character with the value 0. Note that this is not the character '0', which has code 48 in the ASCII, but the character with code 0. This character is not printable, i.e. you cannot print it on the terminal. If you try, you will see nothing. That a variable does not contain a normal value but points to another cell is indicated in C by an asterix (a '*') in front of the variable name. That is, in the above program the variable s points to a cell, whose contents is a character. To get the value of this character, we have to `dereference' the pointer variable s. This is done by writing an asterix (a '*') in front of the variable name. The compiler interprets this as: Get the value of cell 123 (which corresponds to the name s). Interpret this value as a cell number. Get the value of the cell with that number. A very nice thing about pointers is that they can be modified with arithmetic operations. That is, if you have a string variable, i.e. a pointer to the first character of a string, you can derive from it a pointer to the second character by simply adding 1. I.e. with '*(s+1)' you can access the second character of a string, with `*(s+2)' the third, and so on. Or you can just modify the pointer variable as it is done several times in the above function. After the first character is processed, you can just go to the second by increasing s by 1: `s++;'. As you may have conjectured already, pointers and vectors are very closely related. Indeed, in C they are nearly identical. That is, to get the first character of the string s points to, you can also write s[0], to get the second, you can write s[1] and so on, just as if s were a vector. If s is a pointer, then s[i] and *(s+i) are identical. This is also the reason, why we could pass `buf +i' to the `printf' function in the third version of the function `putnum'. Now for a detailed explanation of the above function. The first loop skips leading spaces and tabs that may be present. Then an optional sign is evaluated. If it is a '+', we just skip it, since by default we interpret numbers as positive. If it is a '-', we skip the character and note the minus sign by setting a flag, which, if set, is later used to negate the result. Then we start the conversion loop. We interpret a character as a place in a base `base' number and determine the value of the corresponding digit. If the character cannot be interpreted as a digit or if it is a digit that exceeds the largest digit in the number system defined by `base', we abort the loop and return the result. Thus any character that cannot be evaluated terminates the conversion. Since there is an end of string marker at the end of the string, which cannot be interpreted whatever the base may be, we can be sure that the loop eventually terminates. (Note that if no character can be converted, the result is 0.) If the character can be converted to a valid digit, it is incorporated into the result by multiplying the current result by the base and adding the digit. After the loop the result is returned to the calling function. If necessary, it is negated first. ----------------------------------------------------------------------*/ int main (int argc, char *argv[]) { /* --- main function */ int b_in = 10; /* base of the number system of n */ int b_out = 10; /* base of the system to convert to */ if ((argc < 2) || (argc > 4)) { /* if wrong number of args. given */ printf("usage: %s n [b_in] [b_out]\n", argv[0]); printf("convert a number from one number system to another\n"); printf("n number to convert\n"); printf("b_in base of the number system of n " "(default: %i)\n", b_in); printf("b_out base of the system to convert to " "(default: %i)\n", b_out); return 0; /* print a usage message */ } /* and abort the program */ if (argc > 2) { /* if a second argument is given, */ b_in = atoi(argv[2]); /* get from it the input base */ if ((b_in < 2) || (b_in > 36)) { printf("%s: illegal base\n", argv[0]); return 1; } } /* check the given base */ if (argc > 3) { /* if a third argument is given, */ b_out = atoi(argv[3]); /* get from it the output base */ if ((b_out < 2) || (b_out > 36)) { printf("%s: illegal base\n", argv[0]); return 1; } } /* check the given base */ putnum(getnum(argv[1], b_in), b_out); return 0; /* convert the number and return 'ok' */ } /* main() */ /*---------------------------------------------------------------------- Due to the special way in which numbers are stored in the computer and the range restrictions on the `int' data type, all of the functions presented above cannot deal properly with certain numbers. For example, no number greater than 2147483647 (decimal) is converted properly. This is just because 2147483647 = 2^31 -1 is the largest number that can be represented as an `int'. For the same reason no number smaller than -2147483648 is converted correctly. A very strange case is the number -2147483648. It can be represented as an `int', but it is not converted correctly, since it cannot be negated! Its negation, strange enough, is the same number. The reason is a little tricky, so we omit an explanation here. You will be given that explanation in the lecture on computer architecture. ----------------------------------------------------------------------*/