Code: Alles auswählen
/* Levenshtein Distance for Python, coded in C for speed. */
#include <Python.h>
#define MAX(a,b) ((a)<(b)?(b):(a))
#define MIN3(a,b,c) ((a)>(b)?((c)>(b)?(b):(c)):((a)>(c)?(c):(a)))
static PyObject *levenshtein(PyObject *self, PyObject *args)
{
char *str1, *str2;
int slen1, slen2, i, j, *array = NULL;
PyObject *rv;
if( !PyArg_ParseTuple(args,"s#s#",&str1,&slen1,&str2,&slen2) )
return NULL;
if( !slen1 )
return PyInt_FromLong(slen2);
if( !slen2 )
return PyInt_FromLong(slen1);
if( !( array = (int*)PyMem_Malloc(sizeof(int)*(slen1+1)*(slen2+1)) ) ) {
PyErr_SetString(PyExc_MemoryError,"Cannot allocate array of distances");
return NULL;
}
for( i = 0; i <= slen1; i++ )
array[i*(slen2+1)] = i;
for( j = 1; j <= slen2; j++ )
array[j] = j;
for( i = 0; i < slen1; i++ )
for( j = 0; j < slen2; j++ )
array[(i+1)*(slen2+1)+j+1] = MIN3(array[i*(slen2+1)+j+1]+1,
array[(i+1)*(slen2+1)+j]+1,
array[i*(slen2+1)+j] +
(str1[i]==str2[j]?0:1));
rv = PyInt_FromLong(array[(slen1+1)*(slen2+1)-1]);
PyMem_Free(array);
return rv;
}
static PyMethodDef LevenshteinMethods[] = {
{"levenshtein",levenshtein,METH_VARARGS,
"Calculate the levenshtein distance between two strings."},
{NULL, NULL, 0, NULL}
};
PyMODINIT_FUNC initlevenshtein()
{
Py_InitModule("levenshtein",LevenshteinMethods);
}
Code: Alles auswählen
# -*- coding: iso-8859-15 -*-
from distutils.core import setup, Extension
levc = Extension('levenshtein',sources=['levenshtein.c'])
setup(name='levenshtein',version='0.1',
description='Calculates the levenshtein distance between two strings',
ext_modules=[levc])
Ahso, wem's wirklich um Geschwindigkeit geht, der wird um:
Code: Alles auswählen
CFLAGS="-O3" python setup.py install
Use at your own discretion...
