diff options
Diffstat (limited to 'src/misspell.c')
-rw-r--r-- | src/misspell.c | 91 |
1 files changed, 91 insertions, 0 deletions
diff --git a/src/misspell.c b/src/misspell.c new file mode 100644 index 00000000..23290819 --- /dev/null +++ b/src/misspell.c @@ -0,0 +1,91 @@ +#include <stdlib.h> +#include <string.h> +#include <limits.h> +#include <utils.h> +#include <misspell.h> + +enum string_distance_function { + DELETION = 0, /* m1 */ + INSERTION, /* m2 */ + TRANSFORMATION, /* m3 */ +}; +#define DISTANCE_MAX (TRANSFORMATION + 1) + +static unsigned int min_distance(unsigned int *cost) +{ + unsigned int min = UINT_MAX; + int k; + + for (k = 0; k < DISTANCE_MAX; k++) { + if (cost[k] < min) + min = cost[k]; + } + + return min; +} + +/* A simple implementation of "The string-to-string correction problem (1974)" + * by Robert A. Wagner. + */ +static unsigned int string_distance(const char *a, const char *b) +{ + unsigned int len_a = strlen(a); + unsigned int len_b = strlen(b); + unsigned int *distance; + unsigned int i, j, ret; + + distance = xzalloc((len_a + 1) * (len_b + 1) * sizeof(unsigned int)); + +#define DISTANCE(__i, __j) distance[(__i) * len_b + (__j)] + + for (i = 0; i <= len_a; i++) + DISTANCE(i, 0) = i; + for (j = 0; j <= len_b; j++) + DISTANCE(0, j) = j; + + for (i = 1; i <= len_a; i++) { + for (j = 1; j <= len_b; j++) { + unsigned int subcost = (a[i] == b[j]) ? 0 : 1; + unsigned int cost[3]; + + cost[DELETION] = DISTANCE(i - 1, j) + 1; + cost[INSERTION] = DISTANCE(i, j - 1) + 1; + cost[TRANSFORMATION] = DISTANCE(i - 1, j - 1) + subcost; + DISTANCE(i, j) = min_distance(cost); + + if (i > 1 && j > 1 && + a[i] == b[j - 1] && + a[i - 1] == b[j]) + DISTANCE(i, j) = + min(DISTANCE(i, j), + DISTANCE(i - 2, j - 2) + subcost); + } + } + + ret = DISTANCE(len_a, len_b); + + xfree(distance); + + return ret; +} + +void string_misspell_init(struct string_misspell_state *st) +{ + st->obj = NULL; + st->min_distance = UINT_MAX; +} + +int string_misspell_update(const char *a, const char *b, + void *obj, struct string_misspell_state *st) +{ + unsigned int distance; + + distance = string_distance(a, b); + + if (distance < st->min_distance) { + st->min_distance = distance; + st->obj = obj; + return 1; + } + return 0; +} |