Simple implementation of levenshtein distance for MySQL
commit
6c15eb5b94
|
@ -0,0 +1,9 @@
|
|||
levenshtein.so: levenshtein.c
|
||||
gcc -Wall -fpic $(CFLAGS) -I/usr/include/mysql -shared -o $@ $<
|
||||
|
||||
install: levenshtein.so
|
||||
cp levenshtein.so /usr/lib/mysql/plugin/
|
||||
echo 'CREATE FUNCTION levenshtein RETURNS INTEGER soname "levenshtein.so"' | mysql -u root -p
|
||||
|
||||
clean:
|
||||
rm -f levenshtein.so
|
|
@ -0,0 +1,73 @@
|
|||
/*
|
||||
#############################################################################
|
||||
# License: GNU GPL 2.0 or later
|
||||
#############################################################################
|
||||
# Header from the template:
|
||||
#
|
||||
# http://dev.mysql.com/sources/doxygen/mysql-5.1/udf__example_8c-source.html
|
||||
#############################################################################
|
||||
*/
|
||||
|
||||
#include <my_global.h>
|
||||
#include <my_sys.h>
|
||||
#include <m_string.h>
|
||||
#include <mysql.h>
|
||||
#include <ctype.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#ifdef HAVE_DLOPEN
|
||||
my_bool levenshtein_init(UDF_INIT *initid, UDF_ARGS *args, char *message);
|
||||
longlong levenshtein(UDF_INIT *initid, UDF_ARGS *args, char *is_null,
|
||||
char *error);
|
||||
#endif
|
||||
longlong levenshtein(UDF_INIT *initid __attribute__((unused)), UDF_ARGS *args,
|
||||
char *is_null __attribute__((unused)),
|
||||
char *error __attribute__((unused)))
|
||||
{
|
||||
if (args->arg_count != 2)
|
||||
return -3;
|
||||
if (args->arg_type[0] != STRING_RESULT)
|
||||
return -1;
|
||||
if (args->arg_type[1] != STRING_RESULT)
|
||||
return -2;
|
||||
const char *wA = args->args[0];
|
||||
const char *wB = args->args[1];
|
||||
int m = args->lengths[0];
|
||||
int n = args->lengths[1];
|
||||
int cost =0;
|
||||
int i;
|
||||
int j;
|
||||
longlong D[(m+1)][(n+1)];
|
||||
for (i = 0; i <= m; i++){
|
||||
D[i][0] = i;
|
||||
}
|
||||
for (i = 1; i <= n; i++){
|
||||
D[0][i] = i;
|
||||
}
|
||||
for (i = 1; i <= m; i++){
|
||||
for (j = 1; j <= n; j++){
|
||||
if (wA[i-1] == wB[j-1])
|
||||
cost = 0;
|
||||
else
|
||||
cost = 1;
|
||||
if ((D[i-1][j] + 1 <= D[i][j-1] + 1) && (D[i-1][j] + 1 <= D[i-1][j-1] + cost)){ // min(D[i-1][j]+1)
|
||||
D[i][j] = D[i-1][j] + 1;
|
||||
}
|
||||
else{
|
||||
if ((D[i][j-1] + 1 <= D[i-1][j] + 1) && (D[i][j-1] + 1 <= D[i-1][j-1] + cost)) { // min(D[i][j-1]+1
|
||||
D[i][j] = D[i][j-1] + 1;
|
||||
}
|
||||
else{
|
||||
D[i][j] = D[i-1][j-1] + cost; // min(D[i-1][j-1]+1
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return D[m][n];
|
||||
}
|
||||
my_bool levenshtein_init(UDF_INIT *initid __attribute__((unused)),
|
||||
UDF_ARGS *args __attribute__((unused)),
|
||||
char *message __attribute__((unused)))
|
||||
{
|
||||
return 0;
|
||||
}
|
Loading…
Reference in New Issue