Write me a numba implementation of levensthin distance, taking two numpy int arrays. Call it levenshtein. Run it in nopython mode. Character swaps are cost 2.
Write me a numba implementation of levensthin distance, taking two numpy int arrays. Call it levenshtein. Run it in nopython mode. Character swaps are cost 2.
Here's a Numba implementation of the Levenshtein distance with character swaps costing 2, optimized for NumPy integer arrays:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 | import numpy as np from numba import njit @njit(cache=True) def levenshtein(arr1, arr2): """ Compute Levenshtein distance between two NumPy integer arrays Character swaps cost 2, insertions/deletions cost 1 Parameters: ----------- arr1 : numpy.ndarray First input array (integer type) arr2 : numpy.ndarray Second input array (integer type) Returns: -------- int Levenshtein distance between arr1 and arr2 """ # Get lengths of input arrays m, n = len(arr1), len(arr2) # Create distance matrix dp = np.zeros((m+1, n+1), dtype=np.int32) # Initialize first row and column for i in range(m+1): dp[i, 0] = i for j in range(n+1): dp[0, j] = j # Fill the matrix for i in range(1, m+1): for j in range(1, n+1): # If characters are the same, no cost if arr1[i-1] == arr2[j-1]: dp[i, j] = dp[i-1, j-1] else: # Minimum of insert, delete, or substitute insert = dp[i, j-1] + 1 delete = dp[i-1, j] + 1 substitute = dp[i-1, j-1] + 1 # Check for swap (only if not first elements and characters match swapped) swap = float('inf') if i > 1 and j > 1 and arr1[i-1] == arr2[j-2] and arr1[i-2] == arr2[j-1]: swap = dp[i-2, j-2] + 2 # Take minimum of all operations dp[i, j] = min(insert, delete, substitute, swap) return dp[m, n] |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 | import numpy as np import time # Example arrays a1 = np.array([1, 2, 3, 4, 5], dtype=np.int32) a2 = np.array([1, 2, 4, 3, 5], dtype=np.int32) # First run to compile _ = levenshtein(a1, a2) # Timing the function start = time.time() distance = levenshtein(a1, a2) end = time.time() print(f"Levenshtein Distance: {distance}") print(f"Computation Time: {(end-start)*1000:.4f} ms") |
I extracted the following code from that output:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 | import numpy as np from numba import njit @njit(cache=True) def levenshtein(arr1, arr2): """ Compute Levenshtein distance between two NumPy integer arrays Character swaps cost 2, insertions/deletions cost 1 Parameters: ----------- arr1 : numpy.ndarray First input array (integer type) arr2 : numpy.ndarray Second input array (integer type) Returns: -------- int Levenshtein distance between arr1 and arr2 """ # Get lengths of input arrays m, n = len(arr1), len(arr2) # Create distance matrix dp = np.zeros((m+1, n+1), dtype=np.int32) # Initialize first row and column for i in range(m+1): dp[i, 0] = i for j in range(n+1): dp[0, j] = j # Fill the matrix for i in range(1, m+1): for j in range(1, n+1): # If characters are the same, no cost if arr1[i-1] == arr2[j-1]: dp[i, j] = dp[i-1, j-1] else: # Minimum of insert, delete, or substitute insert = dp[i, j-1] + 1 delete = dp[i-1, j] + 1 substitute = dp[i-1, j-1] + 1 # Check for swap (only if not first elements and characters match swapped) swap = float('inf') if i > 1 and j > 1 and arr1[i-1] == arr2[j-2] and arr1[i-2] == arr2[j-1]: swap = dp[i-2, j-2] + 2 # Take minimum of all operations dp[i, j] = min(insert, delete, substitute, swap) return dp[m, n] |
Running the following program:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 | import numpy as np from numba import njit @njit(cache=True) def levenshtein(arr1, arr2): """ Compute Levenshtein distance between two NumPy integer arrays Character swaps cost 2, insertions/deletions cost 1 Parameters: ----------- arr1 : numpy.ndarray First input array (integer type) arr2 : numpy.ndarray Second input array (integer type) Returns: -------- int Levenshtein distance between arr1 and arr2 """ # Get lengths of input arrays m, n = len(arr1), len(arr2) # Create distance matrix dp = np.zeros((m+1, n+1), dtype=np.int32) # Initialize first row and column for i in range(m+1): dp[i, 0] = i for j in range(n+1): dp[0, j] = j # Fill the matrix for i in range(1, m+1): for j in range(1, n+1): # If characters are the same, no cost if arr1[i-1] == arr2[j-1]: dp[i, j] = dp[i-1, j-1] else: # Minimum of insert, delete, or substitute insert = dp[i, j-1] + 1 delete = dp[i-1, j] + 1 substitute = dp[i-1, j-1] + 1 # Check for swap (only if not first elements and characters match swapped) swap = float('inf') if i > 1 and j > 1 and arr1[i-1] == arr2[j-2] and arr1[i-2] == arr2[j-1]: swap = dp[i-2, j-2] + 2 # Take minimum of all operations dp[i, j] = min(insert, delete, substitute, swap) return dp[m, n] answer = levenshtein(np.array([3, 5, 2, 4, 8, 7]), np.array([3, 4, 2, 4, 8, 7, 9])) expected = 3 assert answer == expected, f'Wrong answer; got {answer} instead of {expected}' print('All tests passed') |
Traceback (most recent call last):
File "/usr/src/app/main.py", line 63, in <module>
assert answer == expected, f'Wrong answer; got {answer} instead of {expected}'
^^^^^^^^^^^^^^^^^^
AssertionError: Wrong answer; got 2 instead of 3
Testing if the previous output contains the string All tests passed
: False
Testing if the previous output contains the string njit
: True
Testing if the previous output contains the string nopython
: False
Testing if the previous output contains the string import numba
: False
Testing if the previous output contains the string from numba
: True
Write me a numba implementation of levensthin distance, taking two numpy int arrays. Call it levenshtein. Run it in nopython mode. Character swaps are cost 2.
Write me a numba implementation of levensthin distance, taking two numpy int arrays. Call it levenshtein. Run it in nopython mode. Character swaps are cost 2.
Here's a Numba implementation of the Levenshtein distance with character swaps costing 2:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 | import numpy as np from numba import njit @njit def levenshtein(arr1, arr2): """ Compute Levenshtein distance between two numpy int arrays. Character swaps cost 2, insertions/deletions cost 1. Args: arr1 (np.ndarray): First input array arr2 (np.ndarray): Second input array Returns: int: Levenshtein distance """ # Get lengths of input arrays m, n = len(arr1), len(arr2) # Create distance matrix # Adding 1 to each dimension to include empty string case dp = np.zeros((m+1, n+1), dtype=np.int32) # Initialize first row and column for i in range(m+1): dp[i, 0] = i for j in range(n+1): dp[0, j] = j # Fill the matrix for i in range(1, m+1): for j in range(1, n+1): # If characters are the same, no cost if arr1[i-1] == arr2[j-1]: dp[i, j] = dp[i-1, j-1] else: # Check if swap is possible if (i > 1 and j > 1 and arr1[i-1] == arr2[j-2] and arr1[i-2] == arr2[j-1]): # Swap operation with cost 2 dp[i, j] = min( dp[i-1, j] + 1, # deletion dp[i, j-1] + 1, # insertion dp[i-1, j-1] + 1, # substitution dp[i-2, j-2] + 2 # swap ) else: # Regular Levenshtein operations dp[i, j] = min( dp[i-1, j] + 1, # deletion dp[i, j-1] + 1, # insertion dp[i-1, j-1] + 1 # substitution ) return dp[m, n] # Example usage if __name__ == "__main__": # Test the function a1 = np.array([1, 2, 3, 4], dtype=np.int32) a2 = np.array([1, 3, 2, 4], dtype=np.int32) print(levenshtein(a1, a2)) # Should handle swap efficiently |
1 2 3 4 5 6 7 8 9 | # Basic usage a1 = np.array([1, 2, 3, 4], dtype=np.int32) a2 = np.array([1, 3, 2, 4], dtype=np.int32) print(levenshtein(a1, a2)) # Should return 2 (swap operation) # Different length arrays a3 = np.array([1, 2, 3], dtype=np.int32) a4 = np.array([1, 2, 3, 4, 5], dtype=np.int32) print(levenshtein(a3, a4)) |
I extracted the following code from that output:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 | import numpy as np from numba import njit @njit def levenshtein(arr1, arr2): """ Compute Levenshtein distance between two numpy int arrays. Character swaps cost 2, insertions/deletions cost 1. Args: arr1 (np.ndarray): First input array arr2 (np.ndarray): Second input array Returns: int: Levenshtein distance """ # Get lengths of input arrays m, n = len(arr1), len(arr2) # Create distance matrix # Adding 1 to each dimension to include empty string case dp = np.zeros((m+1, n+1), dtype=np.int32) # Initialize first row and column for i in range(m+1): dp[i, 0] = i for j in range(n+1): dp[0, j] = j # Fill the matrix for i in range(1, m+1): for j in range(1, n+1): # If characters are the same, no cost if arr1[i-1] == arr2[j-1]: dp[i, j] = dp[i-1, j-1] else: # Check if swap is possible if (i > 1 and j > 1 and arr1[i-1] == arr2[j-2] and arr1[i-2] == arr2[j-1]): # Swap operation with cost 2 dp[i, j] = min( dp[i-1, j] + 1, # deletion dp[i, j-1] + 1, # insertion dp[i-1, j-1] + 1, # substitution dp[i-2, j-2] + 2 # swap ) else: # Regular Levenshtein operations dp[i, j] = min( dp[i-1, j] + 1, # deletion dp[i, j-1] + 1, # insertion dp[i-1, j-1] + 1 # substitution ) return dp[m, n] |
Running the following program:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 | import numpy as np from numba import njit @njit def levenshtein(arr1, arr2): """ Compute Levenshtein distance between two numpy int arrays. Character swaps cost 2, insertions/deletions cost 1. Args: arr1 (np.ndarray): First input array arr2 (np.ndarray): Second input array Returns: int: Levenshtein distance """ # Get lengths of input arrays m, n = len(arr1), len(arr2) # Create distance matrix # Adding 1 to each dimension to include empty string case dp = np.zeros((m+1, n+1), dtype=np.int32) # Initialize first row and column for i in range(m+1): dp[i, 0] = i for j in range(n+1): dp[0, j] = j # Fill the matrix for i in range(1, m+1): for j in range(1, n+1): # If characters are the same, no cost if arr1[i-1] == arr2[j-1]: dp[i, j] = dp[i-1, j-1] else: # Check if swap is possible if (i > 1 and j > 1 and arr1[i-1] == arr2[j-2] and arr1[i-2] == arr2[j-1]): # Swap operation with cost 2 dp[i, j] = min( dp[i-1, j] + 1, # deletion dp[i, j-1] + 1, # insertion dp[i-1, j-1] + 1, # substitution dp[i-2, j-2] + 2 # swap ) else: # Regular Levenshtein operations dp[i, j] = min( dp[i-1, j] + 1, # deletion dp[i, j-1] + 1, # insertion dp[i-1, j-1] + 1 # substitution ) return dp[m, n] answer = levenshtein(np.array([3, 5, 2, 4, 8, 7]), np.array([3, 4, 2, 4, 8, 7, 9])) expected = 3 assert answer == expected, f'Wrong answer; got {answer} instead of {expected}' print('All tests passed') |
Traceback (most recent call last):
File "/usr/src/app/main.py", line 65, in <module>
assert answer == expected, f'Wrong answer; got {answer} instead of {expected}'
^^^^^^^^^^^^^^^^^^
AssertionError: Wrong answer; got 2 instead of 3
Testing if the previous output contains the string All tests passed
: False
Testing if the previous output contains the string njit
: True
Testing if the previous output contains the string nopython
: False
Testing if the previous output contains the string import numba
: False
Testing if the previous output contains the string from numba
: True
Write me a numba implementation of levensthin distance, taking two numpy int arrays. Call it levenshtein. Run it in nopython mode. Character swaps are cost 2.
Write me a numba implementation of levensthin distance, taking two numpy int arrays. Call it levenshtein. Run it in nopython mode. Character swaps are cost 2.
Here's a Numba implementation of the Levenshtein distance with character swaps costing 2, optimized for running in nopython mode:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 | import numpy as np from numba import njit @njit def levenshtein(arr1, arr2): """ Compute Levenshtein distance between two numpy int arrays with swap cost of 2. Parameters: ----------- arr1 : numpy.ndarray First input array of integers arr2 : numpy.ndarray Second input array of integers Returns: -------- int Levenshtein distance between the two arrays """ m, n = len(arr1), len(arr2) # Create distance matrix dp = np.zeros((m+1, n+1), dtype=np.int32) # Initialize first row and column for i in range(m+1): dp[i, 0] = i for j in range(n+1): dp[0, j] = j # Fill the matrix for i in range(1, m+1): for j in range(1, n+1): # Cost of substitution/replacement if arr1[i-1] == arr2[j-1]: cost = 0 else: cost = 1 # Check for possible swap swap_cost = float('inf') if i > 1 and j > 1 and arr1[i-1] == arr2[j-2] and arr1[i-2] == arr2[j-1]: swap_cost = dp[i-2, j-2] + 2 # Compute minimum cost dp[i, j] = min( dp[i-1, j] + 1, # deletion dp[i, j-1] + 1, # insertion dp[i-1, j-1] + cost, # substitution swap_cost # swap ) return dp[m, n] |
1 2 3 4 5 6 7 8 9 | # Example usage import numpy as np # Test the function a1 = np.array([1, 2, 3, 4, 5], dtype=np.int32) a2 = np.array([1, 3, 2, 4, 5], dtype=np.int32) dist = levenshtein(a1, a2) print(f"Levenshtein distance: {dist}") |
1 2 | # For [1,2,3,4,5] and [1,3,2,4,5], the distance would be 2 (one swap) # For [1,2,3,4,5] and [1,2,3,4,6], the distance would be 1 (one substitution) |
I extracted the following code from that output:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 | import numpy as np from numba import njit @njit def levenshtein(arr1, arr2): """ Compute Levenshtein distance between two numpy int arrays with swap cost of 2. Parameters: ----------- arr1 : numpy.ndarray First input array of integers arr2 : numpy.ndarray Second input array of integers Returns: -------- int Levenshtein distance between the two arrays """ m, n = len(arr1), len(arr2) # Create distance matrix dp = np.zeros((m+1, n+1), dtype=np.int32) # Initialize first row and column for i in range(m+1): dp[i, 0] = i for j in range(n+1): dp[0, j] = j # Fill the matrix for i in range(1, m+1): for j in range(1, n+1): # Cost of substitution/replacement if arr1[i-1] == arr2[j-1]: cost = 0 else: cost = 1 # Check for possible swap swap_cost = float('inf') if i > 1 and j > 1 and arr1[i-1] == arr2[j-2] and arr1[i-2] == arr2[j-1]: swap_cost = dp[i-2, j-2] + 2 # Compute minimum cost dp[i, j] = min( dp[i-1, j] + 1, # deletion dp[i, j-1] + 1, # insertion dp[i-1, j-1] + cost, # substitution swap_cost # swap ) return dp[m, n] |
Running the following program:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 | import numpy as np from numba import njit @njit def levenshtein(arr1, arr2): """ Compute Levenshtein distance between two numpy int arrays with swap cost of 2. Parameters: ----------- arr1 : numpy.ndarray First input array of integers arr2 : numpy.ndarray Second input array of integers Returns: -------- int Levenshtein distance between the two arrays """ m, n = len(arr1), len(arr2) # Create distance matrix dp = np.zeros((m+1, n+1), dtype=np.int32) # Initialize first row and column for i in range(m+1): dp[i, 0] = i for j in range(n+1): dp[0, j] = j # Fill the matrix for i in range(1, m+1): for j in range(1, n+1): # Cost of substitution/replacement if arr1[i-1] == arr2[j-1]: cost = 0 else: cost = 1 # Check for possible swap swap_cost = float('inf') if i > 1 and j > 1 and arr1[i-1] == arr2[j-2] and arr1[i-2] == arr2[j-1]: swap_cost = dp[i-2, j-2] + 2 # Compute minimum cost dp[i, j] = min( dp[i-1, j] + 1, # deletion dp[i, j-1] + 1, # insertion dp[i-1, j-1] + cost, # substitution swap_cost # swap ) return dp[m, n] answer = levenshtein(np.array([3, 5, 2, 4, 8, 7]), np.array([3, 4, 2, 4, 8, 7, 9])) expected = 3 assert answer == expected, f'Wrong answer; got {answer} instead of {expected}' print('All tests passed') |
Traceback (most recent call last):
File "/usr/src/app/main.py", line 63, in <module>
assert answer == expected, f'Wrong answer; got {answer} instead of {expected}'
^^^^^^^^^^^^^^^^^^
AssertionError: Wrong answer; got 2 instead of 3
Testing if the previous output contains the string All tests passed
: False
Testing if the previous output contains the string njit
: True
Testing if the previous output contains the string nopython
: False
Testing if the previous output contains the string import numba
: False
Testing if the previous output contains the string from numba
: True
Write me a numba implementation of levensthin distance, taking two numpy int arrays. Call it levenshtein. Run it in nopython mode. Character swaps are cost 2.
Write me a numba implementation of levensthin distance, taking two numpy int arrays. Call it levenshtein. Run it in nopython mode. Character swaps are cost 2.
Here's a Numba implementation of the Levenshtein distance with character swaps costing 2, optimized for NumPy int arrays:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 | import numpy as np from numba import njit @njit(cache=True) def levenshtein(arr1, arr2): """ Compute Levenshtein distance between two numpy int arrays. Character swaps cost 2, insertions/deletions cost 1. Parameters: ----------- arr1 : numpy.ndarray First input array arr2 : numpy.ndarray Second input array Returns: -------- int Levenshtein distance """ # Get lengths of input arrays m, n = len(arr1), len(arr2) # Create distance matrix # Add 1 to handle empty string case d = np.zeros((m+1, n+1), dtype=np.int32) # Initialize first row and column for i in range(m+1): d[i, 0] = i for j in range(n+1): d[0, j] = j # Fill in the rest of the matrix for i in range(1, m+1): for j in range(1, n+1): # Cost of substitution (or 0 if characters match) if arr1[i-1] == arr2[j-1]: cost_subst = 0 else: cost_subst = 1 # Check if swap is possible and cheaper swap_cost = 2 if (i > 1 and j > 1 and arr1[i-1] == arr2[j-2] and arr1[i-2] == arr2[j-1]): swap_cost = 2 else: swap_cost = float('inf') # Compute minimum of different operations d[i, j] = min( d[i-1, j] + 1, # deletion d[i, j-1] + 1, # insertion d[i-1, j-1] + cost_subst, # substitution swap_cost + d[i-2, j-2] if swap_cost != float('inf') else float('inf') # swap ) return d[m, n] # Example usage if __name__ == "__main__": # Test the function arr1 = np.array([1, 2, 3, 4, 5], dtype=np.int32) arr2 = np.array([1, 2, 5, 4, 3], dtype=np.int32) print(levenshtein(arr1, arr2)) |
I extracted the following code from that output:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 | import numpy as np from numba import njit @njit(cache=True) def levenshtein(arr1, arr2): """ Compute Levenshtein distance between two numpy int arrays. Character swaps cost 2, insertions/deletions cost 1. Parameters: ----------- arr1 : numpy.ndarray First input array arr2 : numpy.ndarray Second input array Returns: -------- int Levenshtein distance """ # Get lengths of input arrays m, n = len(arr1), len(arr2) # Create distance matrix # Add 1 to handle empty string case d = np.zeros((m+1, n+1), dtype=np.int32) # Initialize first row and column for i in range(m+1): d[i, 0] = i for j in range(n+1): d[0, j] = j # Fill in the rest of the matrix for i in range(1, m+1): for j in range(1, n+1): # Cost of substitution (or 0 if characters match) if arr1[i-1] == arr2[j-1]: cost_subst = 0 else: cost_subst = 1 # Check if swap is possible and cheaper swap_cost = 2 if (i > 1 and j > 1 and arr1[i-1] == arr2[j-2] and arr1[i-2] == arr2[j-1]): swap_cost = 2 else: swap_cost = float('inf') # Compute minimum of different operations d[i, j] = min( d[i-1, j] + 1, # deletion d[i, j-1] + 1, # insertion d[i-1, j-1] + cost_subst, # substitution swap_cost + d[i-2, j-2] if swap_cost != float('inf') else float('inf') # swap ) return d[m, n] # Example usage if __name__ == "__main__": # Test the function arr1 = np.array([1, 2, 3, 4, 5], dtype=np.int32) arr2 = np.array([1, 2, 5, 4, 3], dtype=np.int32) print(levenshtein(arr1, arr2)) |
Running the following program:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 | import numpy as np from numba import njit @njit(cache=True) def levenshtein(arr1, arr2): """ Compute Levenshtein distance between two numpy int arrays. Character swaps cost 2, insertions/deletions cost 1. Parameters: ----------- arr1 : numpy.ndarray First input array arr2 : numpy.ndarray Second input array Returns: -------- int Levenshtein distance """ # Get lengths of input arrays m, n = len(arr1), len(arr2) # Create distance matrix # Add 1 to handle empty string case d = np.zeros((m+1, n+1), dtype=np.int32) # Initialize first row and column for i in range(m+1): d[i, 0] = i for j in range(n+1): d[0, j] = j # Fill in the rest of the matrix for i in range(1, m+1): for j in range(1, n+1): # Cost of substitution (or 0 if characters match) if arr1[i-1] == arr2[j-1]: cost_subst = 0 else: cost_subst = 1 # Check if swap is possible and cheaper swap_cost = 2 if (i > 1 and j > 1 and arr1[i-1] == arr2[j-2] and arr1[i-2] == arr2[j-1]): swap_cost = 2 else: swap_cost = float('inf') # Compute minimum of different operations d[i, j] = min( d[i-1, j] + 1, # deletion d[i, j-1] + 1, # insertion d[i-1, j-1] + cost_subst, # substitution swap_cost + d[i-2, j-2] if swap_cost != float('inf') else float('inf') # swap ) return d[m, n] # Example usage if __name__ == "__main__": # Test the function arr1 = np.array([1, 2, 3, 4, 5], dtype=np.int32) arr2 = np.array([1, 2, 5, 4, 3], dtype=np.int32) print(levenshtein(arr1, arr2)) answer = levenshtein(np.array([3, 5, 2, 4, 8, 7]), np.array([3, 4, 2, 4, 8, 7, 9])) expected = 3 assert answer == expected, f'Wrong answer; got {answer} instead of {expected}' print('All tests passed') |
2
Traceback (most recent call last):
File "/usr/src/app/main.py", line 78, in <module>
assert answer == expected, f'Wrong answer; got {answer} instead of {expected}'
^^^^^^^^^^^^^^^^^^
AssertionError: Wrong answer; got 2 instead of 3
Testing if the previous output contains the string All tests passed
: False
Testing if the previous output contains the string njit
: True
Testing if the previous output contains the string nopython
: False
Testing if the previous output contains the string import numba
: False
Testing if the previous output contains the string from numba
: True
Write me a numba implementation of levensthin distance, taking two numpy int arrays. Call it levenshtein. Run it in nopython mode. Character swaps are cost 2.
Write me a numba implementation of levensthin distance, taking two numpy int arrays. Call it levenshtein. Run it in nopython mode. Character swaps are cost 2.
Here's a Numba implementation of the Levenshtein distance with character swaps costing 2, designed to work with NumPy integer arrays and compiled in nopython mode:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 | import numpy as np from numba import njit @njit def levenshtein(arr1, arr2): # Get lengths of input arrays m, n = len(arr1), len(arr2) # Create distance matrix # Add 1 to dimensions to include initial base cases d = np.zeros((m + 1, n + 1), dtype=np.int32) # Initialize first row and column for i in range(m + 1): d[i, 0] = i for j in range(n + 1): d[0, j] = j # Compute Levenshtein distance with swap operation for i in range(1, m + 1): for j in range(1, n + 1): # Cost of substitution if arr1[i-1] == arr2[j-1]: cost = 0 else: cost = 1 # Standard operations: deletion, insertion, substitution d[i, j] = min( d[i-1, j] + 1, # deletion d[i, j-1] + 1, # insertion d[i-1, j-1] + cost # substitution ) # Swap operation (transposition) if (i > 1 and j > 1 and arr1[i-1] == arr2[j-2] and arr1[i-2] == arr2[j-1]): d[i, j] = min(d[i, j], d[i-2, j-2] + 2) return d[m, n] # Example usage if __name__ == "__main__": # Test the function a1 = np.array([1, 2, 3, 4, 5], dtype=np.int32) a2 = np.array([1, 2, 4, 3, 5], dtype=np.int32) print(levenshtein(a1, a2)) # Should show lower cost due to swap |
1 2 3 4 5 | # Different types of comparisons a1 = np.array([1, 2, 3, 4, 5], dtype=np.int32) a2 = np.array([1, 2, 4, 3, 5], dtype=np.int32) # Swap of 3 and 4 a3 = np.array([1, 2, 3, 5, 4], dtype=np.int32) # Another swap a4 = np.array([1, 2, 3, 4, 6], dtype=np.int32) # Substitution |
I extracted the following code from that output:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 | import numpy as np from numba import njit @njit def levenshtein(arr1, arr2): # Get lengths of input arrays m, n = len(arr1), len(arr2) # Create distance matrix # Add 1 to dimensions to include initial base cases d = np.zeros((m + 1, n + 1), dtype=np.int32) # Initialize first row and column for i in range(m + 1): d[i, 0] = i for j in range(n + 1): d[0, j] = j # Compute Levenshtein distance with swap operation for i in range(1, m + 1): for j in range(1, n + 1): # Cost of substitution if arr1[i-1] == arr2[j-1]: cost = 0 else: cost = 1 # Standard operations: deletion, insertion, substitution d[i, j] = min( d[i-1, j] + 1, # deletion d[i, j-1] + 1, # insertion d[i-1, j-1] + cost # substitution ) # Swap operation (transposition) if (i > 1 and j > 1 and arr1[i-1] == arr2[j-2] and arr1[i-2] == arr2[j-1]): d[i, j] = min(d[i, j], d[i-2, j-2] + 2) return d[m, n] |
Running the following program:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 | import numpy as np from numba import njit @njit def levenshtein(arr1, arr2): # Get lengths of input arrays m, n = len(arr1), len(arr2) # Create distance matrix # Add 1 to dimensions to include initial base cases d = np.zeros((m + 1, n + 1), dtype=np.int32) # Initialize first row and column for i in range(m + 1): d[i, 0] = i for j in range(n + 1): d[0, j] = j # Compute Levenshtein distance with swap operation for i in range(1, m + 1): for j in range(1, n + 1): # Cost of substitution if arr1[i-1] == arr2[j-1]: cost = 0 else: cost = 1 # Standard operations: deletion, insertion, substitution d[i, j] = min( d[i-1, j] + 1, # deletion d[i, j-1] + 1, # insertion d[i-1, j-1] + cost # substitution ) # Swap operation (transposition) if (i > 1 and j > 1 and arr1[i-1] == arr2[j-2] and arr1[i-2] == arr2[j-1]): d[i, j] = min(d[i, j], d[i-2, j-2] + 2) return d[m, n] answer = levenshtein(np.array([3, 5, 2, 4, 8, 7]), np.array([3, 4, 2, 4, 8, 7, 9])) expected = 3 assert answer == expected, f'Wrong answer; got {answer} instead of {expected}' print('All tests passed') |
Traceback (most recent call last):
File "/usr/src/app/main.py", line 50, in <module>
assert answer == expected, f'Wrong answer; got {answer} instead of {expected}'
^^^^^^^^^^^^^^^^^^
AssertionError: Wrong answer; got 2 instead of 3
Testing if the previous output contains the string All tests passed
: False
Testing if the previous output contains the string njit
: True
Testing if the previous output contains the string nopython
: False
Testing if the previous output contains the string import numba
: False
Testing if the previous output contains the string from numba
: True