/*---------------------------------------------------------------------- File : match.c Contents: test whether a string matches a *|? pattern Author : Christian Borgelt History : 18.12.1995 file created ----------------------------------------------------------------------*/ #define _POSIX_SOURCE #include /*---------------------------------------------------------------------- Functions ----------------------------------------------------------------------*/ int match (char s[], char p[]) { /* --- test whether s matches p */ switch (p[0]) { /* evaluate next pattern character */ case '*': if (match(s, p+1)) return 1; /* '*' matches no char */ if (s[0] == '\0') return 0; /* unmatched char in p */ return match(s+1, p); /* '*' matches char in s */ case '?': if (s[0] == '\0') return 0; /* no char to match '?' */ return match(s+1, p+1); /* '?' matches char in s */ default : if (s[0] != p[0]) return 0; /* unmatched char in s/p */ if (s[0] == '\0') return 1; /* end of string reached */ return match(s+1, p+1); /* chars in s and p match */ } /* recursively match s and p */ } /* match() */ /*---------------------------------------------------------------------- The above function tests whether the string s matches the pattern p. By `pattern' we mean a string in which the characters '*' and '?' have a special meaning. A '*' matches any number of arbitrary characters, a '?' matches exactly one arbitrary character. Any other character matches itself. To understand this look at the following examples: `abc' (string) matches `abc' (pattern), since `a' matches `a', `b' matches `b', and `c' matches `c'. `abcd' does not match `abc', since there is nothing in the pattern to match the `d'. `abc', `a2c', 'a%c', `a?b', `a*c', etc. all match `a?c', since `a' matches `a', `?' matches any character, and `c' matches `c' `abbc' does not match `a?c', since a `?' matches only one character, hence there is nothing in the pattern to match the second `b'. `abbc' matches 'a??c', since each each `?' matches one `b' `ab', `acb', `a111c', `ajksgewb', etc. all match `a*b', since `a' matches `a', a `*' matches any number of characters (zero characters for string = `ab'), and `b' matches `b'. `ab' does not match `*?ab', since there is nothing in the string that matches the `?'. On the command line (UNIX as well as MSDOS), such pattern matching is used to let you select a group of files whose names match a given pattern. E.g. if you want to list all C source files, you can type `ls *.c' and you will be given a list of all files that end in `.c'. The above function uses a recursive approach to determine whether a given string matches a given pattern. It does so by applying the following rules, depending on the next character in the pattern: next character is a '*': s matches p, if either s matches p without the '*' (in this case the '*' matches zero characters), or if s without the first character matches p (in this case the `*' matches one character --- or more, due to the recursion). It is obvious, that we can do the second test only, if there is another character in s. Hence, before we do the test, we check for another character in s. If there is no such character (if the next character is the end of string marker `\0') and s did not match p without the `*', then there must be a character in p that is not matched by s. Therefore s does not match p. next character is a '?': s matches p, if there is another character in s and p without the '?' matches s without its first character. Hence we first test whether the next character in s is the end of string marker. If it is, there is nothing in s to match the '?', hence s does not match p. Otherwise we test wether p without the '?' matches 's' without its first character by a recursive call. any other character: s matches p, if the next character in s and the next character in p are identical and the remainder of s matches the remainder of p. In case the next character is the end of string marker, the remainder of s as well as p is empty, hence s matches p. Otherwise we have to test whether s without its first character matches p without its first character by a recursive call. ----------------------------------------------------------------------*/ int main (int argc, char *argv[]) { /* --- main function */ if (argc != 3) { /* if wrong number of arguments given */ printf("usage: %s string pattern\n", argv[0]); printf("test whether a string matches a *|? pattern\n"); return 0; /* print a usage message */ } /* and abort the function */ printf("%i\n", match(argv[1], argv[2])); return 0; /* compute result and return 'ok' */ } /* main() */