summaryrefslogtreecommitdiffstats
path: root/src/misspell.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/misspell.c')
-rw-r--r--src/misspell.c91
1 files changed, 91 insertions, 0 deletions
diff --git a/src/misspell.c b/src/misspell.c
new file mode 100644
index 00000000..23290819
--- /dev/null
+++ b/src/misspell.c
@@ -0,0 +1,91 @@
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#include <utils.h>
+#include <misspell.h>
+
+enum string_distance_function {
+ DELETION = 0, /* m1 */
+ INSERTION, /* m2 */
+ TRANSFORMATION, /* m3 */
+};
+#define DISTANCE_MAX (TRANSFORMATION + 1)
+
+static unsigned int min_distance(unsigned int *cost)
+{
+ unsigned int min = UINT_MAX;
+ int k;
+
+ for (k = 0; k < DISTANCE_MAX; k++) {
+ if (cost[k] < min)
+ min = cost[k];
+ }
+
+ return min;
+}
+
+/* A simple implementation of "The string-to-string correction problem (1974)"
+ * by Robert A. Wagner.
+ */
+static unsigned int string_distance(const char *a, const char *b)
+{
+ unsigned int len_a = strlen(a);
+ unsigned int len_b = strlen(b);
+ unsigned int *distance;
+ unsigned int i, j, ret;
+
+ distance = xzalloc((len_a + 1) * (len_b + 1) * sizeof(unsigned int));
+
+#define DISTANCE(__i, __j) distance[(__i) * len_b + (__j)]
+
+ for (i = 0; i <= len_a; i++)
+ DISTANCE(i, 0) = i;
+ for (j = 0; j <= len_b; j++)
+ DISTANCE(0, j) = j;
+
+ for (i = 1; i <= len_a; i++) {
+ for (j = 1; j <= len_b; j++) {
+ unsigned int subcost = (a[i] == b[j]) ? 0 : 1;
+ unsigned int cost[3];
+
+ cost[DELETION] = DISTANCE(i - 1, j) + 1;
+ cost[INSERTION] = DISTANCE(i, j - 1) + 1;
+ cost[TRANSFORMATION] = DISTANCE(i - 1, j - 1) + subcost;
+ DISTANCE(i, j) = min_distance(cost);
+
+ if (i > 1 && j > 1 &&
+ a[i] == b[j - 1] &&
+ a[i - 1] == b[j])
+ DISTANCE(i, j) =
+ min(DISTANCE(i, j),
+ DISTANCE(i - 2, j - 2) + subcost);
+ }
+ }
+
+ ret = DISTANCE(len_a, len_b);
+
+ xfree(distance);
+
+ return ret;
+}
+
+void string_misspell_init(struct string_misspell_state *st)
+{
+ st->obj = NULL;
+ st->min_distance = UINT_MAX;
+}
+
+int string_misspell_update(const char *a, const char *b,
+ void *obj, struct string_misspell_state *st)
+{
+ unsigned int distance;
+
+ distance = string_distance(a, b);
+
+ if (distance < st->min_distance) {
+ st->min_distance = distance;
+ st->obj = obj;
+ return 1;
+ }
+ return 0;
+}