1import numpy as np
2def levenshtein_ratio_and_distance(s, t, ratio_calc = False):
3 """ levenshtein_ratio_and_distance:
4 Calculates levenshtein distance between two strings.
5 If ratio_calc = True, the function computes the
6 levenshtein distance ratio of similarity between two strings
7 For all i and j, distance[i,j] will contain the Levenshtein
8 distance between the first i characters of s and the
9 first j characters of t
10 """
11 # Initialize matrix of zeros
12 rows = len(s)+1
13 cols = len(t)+1
14 distance = np.zeros((rows,cols),dtype = int)
15
16 # Populate matrix of zeros with the indeces of each character of both strings
17 for i in range(1, rows):
18 for k in range(1,cols):
19 distance[i][0] = i
20 distance[0][k] = k
21
22 # Iterate over the matrix to compute the cost of deletions,insertions and/or substitutions
23 for col in range(1, cols):
24 for row in range(1, rows):
25 if s[row-1] == t[col-1]:
26 cost = 0 # If the characters are the same in the two strings in a given position [i,j] then the cost is 0
27 else:
28 # In order to align the results with those of the Python Levenshtein package, if we choose to calculate the ratio
29 # the cost of a substitution is 2. If we calculate just distance, then the cost of a substitution is 1.
30 if ratio_calc == True:
31 cost = 2
32 else:
33 cost = 1
34 distance[row][col] = min(distance[row-1][col] + 1, # Cost of deletions
35 distance[row][col-1] + 1, # Cost of insertions
36 distance[row-1][col-1] + cost) # Cost of substitutions
37 if ratio_calc == True:
38 # Computation of the Levenshtein Distance Ratio
39 Ratio = ((len(s)+len(t)) - distance[row][col]) / (len(s)+len(t))
40 return Ratio
41 else:
42 # print(distance) # Uncomment if you want to see the matrix showing how the algorithm computes the cost of deletions,
43 # insertions and/or substitutions
44 # This is the minimum number of edits needed to convert string a to string b
45 return "The strings are {} edits away".format(distance[row][col])
46