Write me a numba implementation of levensthin distance, taking two numpy int arrays. Call it levenshtein. Run it in nopython mode. Character swaps are cost 2.
Write me a numba implementation of levensthin distance, taking two numpy int arrays. Call it levenshtein. Run it in nopython mode. Character swaps are cost 2.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 | import numpy as np from numba import njit @njit(nopython=True) def levenshtein(seq1, seq2): """ Calculates the Levenshtein distance between two numpy integer arrays. Args: seq1: The first numpy integer array. seq2: The second numpy integer array. Returns: The Levenshtein distance between the two sequences. """ size_x = len(seq1) + 1 size_y = len(seq2) + 1 matrix = np.zeros((size_x, size_y), dtype=np.int32) for x in range(size_x): matrix[x, 0] = x for y in range(size_y): matrix[0, y] = y for x in range(1, size_x): for y in range(1, size_y): if seq1[x-1] == seq2[y-1]: matrix[x,y] = min( matrix[x-1, y] + 1, matrix[x-1, y-1], matrix[x, y-1] + 1 ) else: matrix[x,y] = min( matrix[x-1,y] + 1, matrix[x-1,y-1] + 2, # Cost of 2 for a substitution/swap matrix[x,y-1] + 1 ) return matrix[size_x - 1, size_y - 1] # Example usage: seq1 = np.array([1, 2, 3, 4, 5], dtype=np.int32) seq2 = np.array([1, 2, 4, 3, 6], dtype=np.int32) distance = levenshtein(seq1, seq2) print(f"Levenshtein distance: {distance}") # Output should be 3 (1 insertion + 1 deletion + 1 swap/substitution(cost 2)) seq3 = np.array([1, 2, 3], dtype=np.int32) seq4 = np.array([1, 2, 3, 4], dtype=np.int32) distance2 = levenshtein(seq3, seq4) print(f"Levenshtein distance: {distance2}") # Output should be 1 seq5 = np.array([1, 2, 3], dtype=np.int32) seq6 = np.array([4, 5, 6], dtype=np.int32) distance3 = levenshtein(seq5, seq6) print(f"Levenshtein distance: {distance3}") # Output should be 6 (all substitutions with cost 2 each) |
I extracted the following code from that output:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 | import numpy as np from numba import njit @njit(nopython=True) def levenshtein(seq1, seq2): """ Calculates the Levenshtein distance between two numpy integer arrays. Args: seq1: The first numpy integer array. seq2: The second numpy integer array. Returns: The Levenshtein distance between the two sequences. """ size_x = len(seq1) + 1 size_y = len(seq2) + 1 matrix = np.zeros((size_x, size_y), dtype=np.int32) for x in range(size_x): matrix[x, 0] = x for y in range(size_y): matrix[0, y] = y for x in range(1, size_x): for y in range(1, size_y): if seq1[x-1] == seq2[y-1]: matrix[x,y] = min( matrix[x-1, y] + 1, matrix[x-1, y-1], matrix[x, y-1] + 1 ) else: matrix[x,y] = min( matrix[x-1,y] + 1, matrix[x-1,y-1] + 2, # Cost of 2 for a substitution/swap matrix[x,y-1] + 1 ) return matrix[size_x - 1, size_y - 1] # Example usage: seq1 = np.array([1, 2, 3, 4, 5], dtype=np.int32) seq2 = np.array([1, 2, 4, 3, 6], dtype=np.int32) distance = levenshtein(seq1, seq2) print(f"Levenshtein distance: {distance}") # Output should be 3 (1 insertion + 1 deletion + 1 swap/substitution(cost 2)) seq3 = np.array([1, 2, 3], dtype=np.int32) seq4 = np.array([1, 2, 3, 4], dtype=np.int32) distance2 = levenshtein(seq3, seq4) print(f"Levenshtein distance: {distance2}") # Output should be 1 seq5 = np.array([1, 2, 3], dtype=np.int32) seq6 = np.array([4, 5, 6], dtype=np.int32) distance3 = levenshtein(seq5, seq6) print(f"Levenshtein distance: {distance3}") # Output should be 6 (all substitutions with cost 2 each) |
Running the following program:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 | import numpy as np from numba import njit @njit(nopython=True) def levenshtein(seq1, seq2): """ Calculates the Levenshtein distance between two numpy integer arrays. Args: seq1: The first numpy integer array. seq2: The second numpy integer array. Returns: The Levenshtein distance between the two sequences. """ size_x = len(seq1) + 1 size_y = len(seq2) + 1 matrix = np.zeros((size_x, size_y), dtype=np.int32) for x in range(size_x): matrix[x, 0] = x for y in range(size_y): matrix[0, y] = y for x in range(1, size_x): for y in range(1, size_y): if seq1[x-1] == seq2[y-1]: matrix[x,y] = min( matrix[x-1, y] + 1, matrix[x-1, y-1], matrix[x, y-1] + 1 ) else: matrix[x,y] = min( matrix[x-1,y] + 1, matrix[x-1,y-1] + 2, # Cost of 2 for a substitution/swap matrix[x,y-1] + 1 ) return matrix[size_x - 1, size_y - 1] # Example usage: seq1 = np.array([1, 2, 3, 4, 5], dtype=np.int32) seq2 = np.array([1, 2, 4, 3, 6], dtype=np.int32) distance = levenshtein(seq1, seq2) print(f"Levenshtein distance: {distance}") # Output should be 3 (1 insertion + 1 deletion + 1 swap/substitution(cost 2)) seq3 = np.array([1, 2, 3], dtype=np.int32) seq4 = np.array([1, 2, 3, 4], dtype=np.int32) distance2 = levenshtein(seq3, seq4) print(f"Levenshtein distance: {distance2}") # Output should be 1 seq5 = np.array([1, 2, 3], dtype=np.int32) seq6 = np.array([4, 5, 6], dtype=np.int32) distance3 = levenshtein(seq5, seq6) print(f"Levenshtein distance: {distance3}") # Output should be 6 (all substitutions with cost 2 each) answer = levenshtein(np.array([3, 5, 2, 4, 8, 7]), np.array([3, 4, 2, 4, 8, 7, 9])) expected = 3 assert answer == expected, f'Wrong answer; got {answer} instead of {expected}' print('All tests passed') |
Levenshtein distance: 4
Levenshtein distance: 1
Levenshtein distance: 6
All tests passed
/usr/local/lib/python3.12/dist-packages/numba/core/decorators.py:246: RuntimeWarning: nopython is set for njit and is ignored
warnings.warn('nopython is set for njit and is ignored', RuntimeWarning)
Testing if the previous output contains the string All tests passed
: True
Testing if the previous output contains the string njit
: True
Testing if the previous output contains the string nopython
: True
Testing if the previous output contains the string import numba
: False
Testing if the previous output contains the string from numba
: True
Write me a numba implementation of levensthin distance, taking two numpy int arrays. Call it levenshtein. Run it in nopython mode. Character swaps are cost 2.
Write me a numba implementation of levensthin distance, taking two numpy int arrays. Call it levenshtein. Run it in nopython mode. Character swaps are cost 2.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 | import numpy as np from numba import njit @njit(nopython=True) def levenshtein(s1, s2): """ Calculates the Levenshtein distance between two numpy int arrays. Args: s1: The first numpy int array. s2: The second numpy int array. Returns: The Levenshtein distance between s1 and s2. """ n = len(s1) m = len(s2) # Initialize the distance matrix d = np.zeros((n + 1, m + 1), dtype=np.int64) for i in range(n + 1): d[i, 0] = i for j in range(m + 1): d[0, j] = j # Calculate the distances for j in range(1, m + 1): for i in range(1, n + 1): if s1[i - 1] == s2[j - 1]: substitution_cost = 0 else: substitution_cost = 1 # Explicitly handle the swap cost swap_cost = 2 if i > 1 and j > 1 and s1[i-1] == s2[j-2] and s1[i-2] == s2[j-1] else float('inf') d[i, j] = min(d[i - 1, j] + 1, # Deletion d[i, j - 1] + 1, # Insertion d[i - 1, j - 1] + substitution_cost, # Substitution d[i-2, j-2] + swap_cost if i > 1 and j > 1 else float('inf')) # Transposition/Swap return d[n, m] # Example usage: s1 = np.array([1, 2, 3, 4], dtype=np.int64) s2 = np.array([1, 3, 2, 4], dtype=np.int64) distance = levenshtein(s1, s2) print(f"Levenshtein distance: {distance}") # Output: Levenshtein distance: 2 s3 = np.array([1, 2, 3, 4], dtype=np.int64) s4 = np.array([1, 3, 4], dtype=np.int64) distance = levenshtein(s3, s4) print(f"Levenshtein distance: {distance}") # Output: Levenshtein distance: 1 s5 = np.array([1, 2, 3, 4], dtype=np.int64) s6 = np.array([1, 4, 3, 2], dtype=np.int64) distance = levenshtein(s5, s6) print(f"Levenshtein distance: {distance}") # Output: Levenshtein distance: 2 (because of the swap now, not 4 insertions/deletions) |
I extracted the following code from that output:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 | import numpy as np from numba import njit @njit(nopython=True) def levenshtein(s1, s2): """ Calculates the Levenshtein distance between two numpy int arrays. Args: s1: The first numpy int array. s2: The second numpy int array. Returns: The Levenshtein distance between s1 and s2. """ n = len(s1) m = len(s2) # Initialize the distance matrix d = np.zeros((n + 1, m + 1), dtype=np.int64) for i in range(n + 1): d[i, 0] = i for j in range(m + 1): d[0, j] = j # Calculate the distances for j in range(1, m + 1): for i in range(1, n + 1): if s1[i - 1] == s2[j - 1]: substitution_cost = 0 else: substitution_cost = 1 # Explicitly handle the swap cost swap_cost = 2 if i > 1 and j > 1 and s1[i-1] == s2[j-2] and s1[i-2] == s2[j-1] else float('inf') d[i, j] = min(d[i - 1, j] + 1, # Deletion d[i, j - 1] + 1, # Insertion d[i - 1, j - 1] + substitution_cost, # Substitution d[i-2, j-2] + swap_cost if i > 1 and j > 1 else float('inf')) # Transposition/Swap return d[n, m] # Example usage: s1 = np.array([1, 2, 3, 4], dtype=np.int64) s2 = np.array([1, 3, 2, 4], dtype=np.int64) distance = levenshtein(s1, s2) print(f"Levenshtein distance: {distance}") # Output: Levenshtein distance: 2 s3 = np.array([1, 2, 3, 4], dtype=np.int64) s4 = np.array([1, 3, 4], dtype=np.int64) distance = levenshtein(s3, s4) print(f"Levenshtein distance: {distance}") # Output: Levenshtein distance: 1 s5 = np.array([1, 2, 3, 4], dtype=np.int64) s6 = np.array([1, 4, 3, 2], dtype=np.int64) distance = levenshtein(s5, s6) print(f"Levenshtein distance: {distance}") # Output: Levenshtein distance: 2 (because of the swap now, not 4 insertions/deletions) |
Running the following program:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 | import numpy as np from numba import njit @njit(nopython=True) def levenshtein(s1, s2): """ Calculates the Levenshtein distance between two numpy int arrays. Args: s1: The first numpy int array. s2: The second numpy int array. Returns: The Levenshtein distance between s1 and s2. """ n = len(s1) m = len(s2) # Initialize the distance matrix d = np.zeros((n + 1, m + 1), dtype=np.int64) for i in range(n + 1): d[i, 0] = i for j in range(m + 1): d[0, j] = j # Calculate the distances for j in range(1, m + 1): for i in range(1, n + 1): if s1[i - 1] == s2[j - 1]: substitution_cost = 0 else: substitution_cost = 1 # Explicitly handle the swap cost swap_cost = 2 if i > 1 and j > 1 and s1[i-1] == s2[j-2] and s1[i-2] == s2[j-1] else float('inf') d[i, j] = min(d[i - 1, j] + 1, # Deletion d[i, j - 1] + 1, # Insertion d[i - 1, j - 1] + substitution_cost, # Substitution d[i-2, j-2] + swap_cost if i > 1 and j > 1 else float('inf')) # Transposition/Swap return d[n, m] # Example usage: s1 = np.array([1, 2, 3, 4], dtype=np.int64) s2 = np.array([1, 3, 2, 4], dtype=np.int64) distance = levenshtein(s1, s2) print(f"Levenshtein distance: {distance}") # Output: Levenshtein distance: 2 s3 = np.array([1, 2, 3, 4], dtype=np.int64) s4 = np.array([1, 3, 4], dtype=np.int64) distance = levenshtein(s3, s4) print(f"Levenshtein distance: {distance}") # Output: Levenshtein distance: 1 s5 = np.array([1, 2, 3, 4], dtype=np.int64) s6 = np.array([1, 4, 3, 2], dtype=np.int64) distance = levenshtein(s5, s6) print(f"Levenshtein distance: {distance}") # Output: Levenshtein distance: 2 (because of the swap now, not 4 insertions/deletions) answer = levenshtein(np.array([3, 5, 2, 4, 8, 7]), np.array([3, 4, 2, 4, 8, 7, 9])) expected = 3 assert answer == expected, f'Wrong answer; got {answer} instead of {expected}' print('All tests passed') |
Levenshtein distance: 2
Levenshtein distance: 1
Levenshtein distance: 2
/usr/local/lib/python3.12/dist-packages/numba/core/decorators.py:246: RuntimeWarning: nopython is set for njit and is ignored
warnings.warn('nopython is set for njit and is ignored', RuntimeWarning)
Traceback (most recent call last):
File "/usr/src/app/main.py", line 71, in <module>
assert answer == expected, f'Wrong answer; got {answer} instead of {expected}'
^^^^^^^^^^^^^^^^^^
AssertionError: Wrong answer; got 2 instead of 3
Testing if the previous output contains the string All tests passed
: False
Testing if the previous output contains the string njit
: True
Testing if the previous output contains the string nopython
: True
Testing if the previous output contains the string import numba
: False
Testing if the previous output contains the string from numba
: True
Write me a numba implementation of levensthin distance, taking two numpy int arrays. Call it levenshtein. Run it in nopython mode. Character swaps are cost 2.
Write me a numba implementation of levensthin distance, taking two numpy int arrays. Call it levenshtein. Run it in nopython mode. Character swaps are cost 2.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 | import numpy as np from numba import njit @njit(nopython=True) def levenshtein(s1, s2): """ Calculates the Levenshtein distance between two numpy int arrays. Args: s1: The first numpy int array. s2: The second numpy int array. Returns: The Levenshtein distance between s1 and s2. """ n = len(s1) m = len(s2) # Create a matrix to store distances d = np.zeros((n + 1, m + 1), dtype=np.int32) # Initialize first row and column for i in range(n + 1): d[i, 0] = i for j in range(m + 1): d[0, j] = j # Calculate distances for j in range(1, m + 1): for i in range(1, n + 1): if s1[i - 1] == s2[j - 1]: substitution_cost = 0 else: substitution_cost = 1 # Cost of substitution d[i, j] = min(d[i - 1, j] + 1, # Deletion d[i, j - 1] + 1, # Insertion d[i - 1, j - 1] + substitution_cost) # Substitution if i > 1 and j > 1 and s1[i-1] == s2[j-2] and s1[i-2] == s2[j-1]: # Transposition / Swap d[i, j] = min(d[i, j], d[i-2, j-2] + 2) return d[n, m] # Example usage: s1 = np.array([1, 2, 3, 4], dtype=np.int32) s2 = np.array([1, 3, 2, 4], dtype=np.int32) distance = levenshtein(s1, s2) print(f"Levenshtein distance: {distance}") # Output: Levenshtein distance: 2 s1 = np.array([1, 2, 3, 4], dtype=np.int32) s2 = np.array([1, 2, 4, 3], dtype=np.int32) distance = levenshtein(s1, s2) print(f"Levenshtein distance: {distance}") # Output: Levenshtein distance: 2 s1 = np.array([1, 2, 3], dtype=np.int32) s2 = np.array([3, 2, 1], dtype=np.int32) distance = levenshtein(s1, s2) print(f"Levenshtein distance: {distance}") # Output: 2 |
I extracted the following code from that output:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 | import numpy as np from numba import njit @njit(nopython=True) def levenshtein(s1, s2): """ Calculates the Levenshtein distance between two numpy int arrays. Args: s1: The first numpy int array. s2: The second numpy int array. Returns: The Levenshtein distance between s1 and s2. """ n = len(s1) m = len(s2) # Create a matrix to store distances d = np.zeros((n + 1, m + 1), dtype=np.int32) # Initialize first row and column for i in range(n + 1): d[i, 0] = i for j in range(m + 1): d[0, j] = j # Calculate distances for j in range(1, m + 1): for i in range(1, n + 1): if s1[i - 1] == s2[j - 1]: substitution_cost = 0 else: substitution_cost = 1 # Cost of substitution d[i, j] = min(d[i - 1, j] + 1, # Deletion d[i, j - 1] + 1, # Insertion d[i - 1, j - 1] + substitution_cost) # Substitution if i > 1 and j > 1 and s1[i-1] == s2[j-2] and s1[i-2] == s2[j-1]: # Transposition / Swap d[i, j] = min(d[i, j], d[i-2, j-2] + 2) return d[n, m] # Example usage: s1 = np.array([1, 2, 3, 4], dtype=np.int32) s2 = np.array([1, 3, 2, 4], dtype=np.int32) distance = levenshtein(s1, s2) print(f"Levenshtein distance: {distance}") # Output: Levenshtein distance: 2 s1 = np.array([1, 2, 3, 4], dtype=np.int32) s2 = np.array([1, 2, 4, 3], dtype=np.int32) distance = levenshtein(s1, s2) print(f"Levenshtein distance: {distance}") # Output: Levenshtein distance: 2 s1 = np.array([1, 2, 3], dtype=np.int32) s2 = np.array([3, 2, 1], dtype=np.int32) distance = levenshtein(s1, s2) print(f"Levenshtein distance: {distance}") # Output: 2 |
Running the following program:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 | import numpy as np from numba import njit @njit(nopython=True) def levenshtein(s1, s2): """ Calculates the Levenshtein distance between two numpy int arrays. Args: s1: The first numpy int array. s2: The second numpy int array. Returns: The Levenshtein distance between s1 and s2. """ n = len(s1) m = len(s2) # Create a matrix to store distances d = np.zeros((n + 1, m + 1), dtype=np.int32) # Initialize first row and column for i in range(n + 1): d[i, 0] = i for j in range(m + 1): d[0, j] = j # Calculate distances for j in range(1, m + 1): for i in range(1, n + 1): if s1[i - 1] == s2[j - 1]: substitution_cost = 0 else: substitution_cost = 1 # Cost of substitution d[i, j] = min(d[i - 1, j] + 1, # Deletion d[i, j - 1] + 1, # Insertion d[i - 1, j - 1] + substitution_cost) # Substitution if i > 1 and j > 1 and s1[i-1] == s2[j-2] and s1[i-2] == s2[j-1]: # Transposition / Swap d[i, j] = min(d[i, j], d[i-2, j-2] + 2) return d[n, m] # Example usage: s1 = np.array([1, 2, 3, 4], dtype=np.int32) s2 = np.array([1, 3, 2, 4], dtype=np.int32) distance = levenshtein(s1, s2) print(f"Levenshtein distance: {distance}") # Output: Levenshtein distance: 2 s1 = np.array([1, 2, 3, 4], dtype=np.int32) s2 = np.array([1, 2, 4, 3], dtype=np.int32) distance = levenshtein(s1, s2) print(f"Levenshtein distance: {distance}") # Output: Levenshtein distance: 2 s1 = np.array([1, 2, 3], dtype=np.int32) s2 = np.array([3, 2, 1], dtype=np.int32) distance = levenshtein(s1, s2) print(f"Levenshtein distance: {distance}") # Output: 2 answer = levenshtein(np.array([3, 5, 2, 4, 8, 7]), np.array([3, 4, 2, 4, 8, 7, 9])) expected = 3 assert answer == expected, f'Wrong answer; got {answer} instead of {expected}' print('All tests passed') |
Levenshtein distance: 2
Levenshtein distance: 2
Levenshtein distance: 2
/usr/local/lib/python3.12/dist-packages/numba/core/decorators.py:246: RuntimeWarning: nopython is set for njit and is ignored
warnings.warn('nopython is set for njit and is ignored', RuntimeWarning)
Traceback (most recent call last):
File "/usr/src/app/main.py", line 74, in <module>
assert answer == expected, f'Wrong answer; got {answer} instead of {expected}'
^^^^^^^^^^^^^^^^^^
AssertionError: Wrong answer; got 2 instead of 3
Testing if the previous output contains the string All tests passed
: False
Testing if the previous output contains the string njit
: True
Testing if the previous output contains the string nopython
: True
Testing if the previous output contains the string import numba
: False
Testing if the previous output contains the string from numba
: True
Write me a numba implementation of levensthin distance, taking two numpy int arrays. Call it levenshtein. Run it in nopython mode. Character swaps are cost 2.
Write me a numba implementation of levensthin distance, taking two numpy int arrays. Call it levenshtein. Run it in nopython mode. Character swaps are cost 2.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 | import numpy as np from numba import njit @njit(nopython=True) def levenshtein(s1, s2): """ Calculates the Levenshtein distance between two numpy int arrays. Args: s1: The first numpy int array. s2: The second numpy int array. Returns: The Levenshtein distance between s1 and s2. """ n = len(s1) m = len(s2) # Initialize the distance matrix d = np.zeros((n + 1, m + 1), dtype=np.int32) for i in range(n + 1): d[i, 0] = i for j in range(m + 1): d[0, j] = j # Calculate the distances for j in range(1, m + 1): for i in range(1, n + 1): if s1[i - 1] == s2[j - 1]: substitution_cost = 0 else: substitution_cost = 1 # Cost for substitution d[i, j] = min(d[i - 1, j] + 1, # Deletion d[i, j - 1] + 1, # Insertion d[i - 1, j - 1] + substitution_cost) # Substitution if i > 1 and j > 1 and s1[i-1] == s2[j-2] and s1[i-2] == s2[j-1]: # Transposition/Swap d[i, j] = min(d[i, j], d[i-2, j-2] + 2) # Swap cost is 2 return d[n, m] # Example usage s1 = np.array([1, 2, 3, 4, 5], dtype=np.int32) # Example arrays s2 = np.array([1, 3, 2, 4, 6], dtype=np.int32) distance = levenshtein(s1, s2) print(f"Levenshtein distance: {distance}") s3 = np.array([1, 2, 3, 4], dtype=np.int32) s4 = np.array([1, 3, 2, 4], dtype=np.int32) distance2 = levenshtein(s3, s4) print(f"Levenshtein distance with swap: {distance2}") # Should be 2 s5 = np.array([1, 2, 3, 4, 5, 6], dtype=np.int32) # Longer example s6 = np.array([7, 2, 1, 4, 8, 9], dtype=np.int32) distance3 = levenshtein(s5, s6) print(f"Levenshtein distance longer example: {distance3}") |
I extracted the following code from that output:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 | import numpy as np from numba import njit @njit(nopython=True) def levenshtein(s1, s2): """ Calculates the Levenshtein distance between two numpy int arrays. Args: s1: The first numpy int array. s2: The second numpy int array. Returns: The Levenshtein distance between s1 and s2. """ n = len(s1) m = len(s2) # Initialize the distance matrix d = np.zeros((n + 1, m + 1), dtype=np.int32) for i in range(n + 1): d[i, 0] = i for j in range(m + 1): d[0, j] = j # Calculate the distances for j in range(1, m + 1): for i in range(1, n + 1): if s1[i - 1] == s2[j - 1]: substitution_cost = 0 else: substitution_cost = 1 # Cost for substitution d[i, j] = min(d[i - 1, j] + 1, # Deletion d[i, j - 1] + 1, # Insertion d[i - 1, j - 1] + substitution_cost) # Substitution if i > 1 and j > 1 and s1[i-1] == s2[j-2] and s1[i-2] == s2[j-1]: # Transposition/Swap d[i, j] = min(d[i, j], d[i-2, j-2] + 2) # Swap cost is 2 return d[n, m] # Example usage s1 = np.array([1, 2, 3, 4, 5], dtype=np.int32) # Example arrays s2 = np.array([1, 3, 2, 4, 6], dtype=np.int32) distance = levenshtein(s1, s2) print(f"Levenshtein distance: {distance}") s3 = np.array([1, 2, 3, 4], dtype=np.int32) s4 = np.array([1, 3, 2, 4], dtype=np.int32) distance2 = levenshtein(s3, s4) print(f"Levenshtein distance with swap: {distance2}") # Should be 2 s5 = np.array([1, 2, 3, 4, 5, 6], dtype=np.int32) # Longer example s6 = np.array([7, 2, 1, 4, 8, 9], dtype=np.int32) distance3 = levenshtein(s5, s6) print(f"Levenshtein distance longer example: {distance3}") |
Running the following program:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 | import numpy as np from numba import njit @njit(nopython=True) def levenshtein(s1, s2): """ Calculates the Levenshtein distance between two numpy int arrays. Args: s1: The first numpy int array. s2: The second numpy int array. Returns: The Levenshtein distance between s1 and s2. """ n = len(s1) m = len(s2) # Initialize the distance matrix d = np.zeros((n + 1, m + 1), dtype=np.int32) for i in range(n + 1): d[i, 0] = i for j in range(m + 1): d[0, j] = j # Calculate the distances for j in range(1, m + 1): for i in range(1, n + 1): if s1[i - 1] == s2[j - 1]: substitution_cost = 0 else: substitution_cost = 1 # Cost for substitution d[i, j] = min(d[i - 1, j] + 1, # Deletion d[i, j - 1] + 1, # Insertion d[i - 1, j - 1] + substitution_cost) # Substitution if i > 1 and j > 1 and s1[i-1] == s2[j-2] and s1[i-2] == s2[j-1]: # Transposition/Swap d[i, j] = min(d[i, j], d[i-2, j-2] + 2) # Swap cost is 2 return d[n, m] # Example usage s1 = np.array([1, 2, 3, 4, 5], dtype=np.int32) # Example arrays s2 = np.array([1, 3, 2, 4, 6], dtype=np.int32) distance = levenshtein(s1, s2) print(f"Levenshtein distance: {distance}") s3 = np.array([1, 2, 3, 4], dtype=np.int32) s4 = np.array([1, 3, 2, 4], dtype=np.int32) distance2 = levenshtein(s3, s4) print(f"Levenshtein distance with swap: {distance2}") # Should be 2 s5 = np.array([1, 2, 3, 4, 5, 6], dtype=np.int32) # Longer example s6 = np.array([7, 2, 1, 4, 8, 9], dtype=np.int32) distance3 = levenshtein(s5, s6) print(f"Levenshtein distance longer example: {distance3}") answer = levenshtein(np.array([3, 5, 2, 4, 8, 7]), np.array([3, 4, 2, 4, 8, 7, 9])) expected = 3 assert answer == expected, f'Wrong answer; got {answer} instead of {expected}' print('All tests passed') |
Levenshtein distance: 3
Levenshtein distance with swap: 2
Levenshtein distance longer example: 4
/usr/local/lib/python3.12/dist-packages/numba/core/decorators.py:246: RuntimeWarning: nopython is set for njit and is ignored
warnings.warn('nopython is set for njit and is ignored', RuntimeWarning)
Traceback (most recent call last):
File "/usr/src/app/main.py", line 73, in <module>
assert answer == expected, f'Wrong answer; got {answer} instead of {expected}'
^^^^^^^^^^^^^^^^^^
AssertionError: Wrong answer; got 2 instead of 3
Testing if the previous output contains the string All tests passed
: False
Testing if the previous output contains the string njit
: True
Testing if the previous output contains the string nopython
: True
Testing if the previous output contains the string import numba
: False
Testing if the previous output contains the string from numba
: True
Write me a numba implementation of levensthin distance, taking two numpy int arrays. Call it levenshtein. Run it in nopython mode. Character swaps are cost 2.
Write me a numba implementation of levensthin distance, taking two numpy int arrays. Call it levenshtein. Run it in nopython mode. Character swaps are cost 2.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 | import numpy as np from numba import njit @njit(nopython=True) def levenshtein(s1, s2): """ Calculates the Levenshtein distance between two numpy integer arrays. Args: s1: The first numpy integer array. s2: The second numpy integer array. Returns: The Levenshtein distance between s1 and s2. """ n = len(s1) m = len(s2) # Initialize the distance matrix d = np.zeros((n + 1, m + 1), dtype=np.int32) for i in range(n + 1): d[i, 0] = i for j in range(m + 1): d[0, j] = j # Calculate the distances for j in range(1, m + 1): for i in range(1, n + 1): if s1[i - 1] == s2[j - 1]: substitution_cost = 0 else: substitution_cost = 1 d[i, j] = min(d[i - 1, j] + 1, # Deletion d[i, j - 1] + 1, # Insertion d[i - 1, j - 1] + substitution_cost) # Substitution if i > 1 and j > 1 and s1[i-1] == s2[j-2] and s1[i-2] == s2[j-1]: d[i,j] = min(d[i,j], d[i-2, j-2] + 2) # Transposition (swap) return d[n, m] # Example usage: a = np.array([1, 2, 3, 4, 5], dtype=np.int32) b = np.array([1, 3, 2, 4, 6], dtype=np.int32) distance = levenshtein(a, b) print(f"Levenshtein distance: {distance}") # Output: 3 (1 substitution + 1 swap) c = np.array([1, 2, 3, 4], dtype=np.int32) d = np.array([1, 2, 4, 3], dtype=np.int32) distance2 = levenshtein(c,d) print(f"Levenshtein distance: {distance2}") # Output: 2 (swap) e = np.array([1, 2, 3], dtype=np.int32) f = np.array([1, 4, 3], dtype=np.int32) distance3 = levenshtein(e,f) print(f"Levenshtein distance: {distance3}") # Output: 1 (substitution) |
I extracted the following code from that output:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 | import numpy as np from numba import njit @njit(nopython=True) def levenshtein(s1, s2): """ Calculates the Levenshtein distance between two numpy integer arrays. Args: s1: The first numpy integer array. s2: The second numpy integer array. Returns: The Levenshtein distance between s1 and s2. """ n = len(s1) m = len(s2) # Initialize the distance matrix d = np.zeros((n + 1, m + 1), dtype=np.int32) for i in range(n + 1): d[i, 0] = i for j in range(m + 1): d[0, j] = j # Calculate the distances for j in range(1, m + 1): for i in range(1, n + 1): if s1[i - 1] == s2[j - 1]: substitution_cost = 0 else: substitution_cost = 1 d[i, j] = min(d[i - 1, j] + 1, # Deletion d[i, j - 1] + 1, # Insertion d[i - 1, j - 1] + substitution_cost) # Substitution if i > 1 and j > 1 and s1[i-1] == s2[j-2] and s1[i-2] == s2[j-1]: d[i,j] = min(d[i,j], d[i-2, j-2] + 2) # Transposition (swap) return d[n, m] # Example usage: a = np.array([1, 2, 3, 4, 5], dtype=np.int32) b = np.array([1, 3, 2, 4, 6], dtype=np.int32) distance = levenshtein(a, b) print(f"Levenshtein distance: {distance}") # Output: 3 (1 substitution + 1 swap) c = np.array([1, 2, 3, 4], dtype=np.int32) d = np.array([1, 2, 4, 3], dtype=np.int32) distance2 = levenshtein(c,d) print(f"Levenshtein distance: {distance2}") # Output: 2 (swap) e = np.array([1, 2, 3], dtype=np.int32) f = np.array([1, 4, 3], dtype=np.int32) distance3 = levenshtein(e,f) print(f"Levenshtein distance: {distance3}") # Output: 1 (substitution) |
Running the following program:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 | import numpy as np from numba import njit @njit(nopython=True) def levenshtein(s1, s2): """ Calculates the Levenshtein distance between two numpy integer arrays. Args: s1: The first numpy integer array. s2: The second numpy integer array. Returns: The Levenshtein distance between s1 and s2. """ n = len(s1) m = len(s2) # Initialize the distance matrix d = np.zeros((n + 1, m + 1), dtype=np.int32) for i in range(n + 1): d[i, 0] = i for j in range(m + 1): d[0, j] = j # Calculate the distances for j in range(1, m + 1): for i in range(1, n + 1): if s1[i - 1] == s2[j - 1]: substitution_cost = 0 else: substitution_cost = 1 d[i, j] = min(d[i - 1, j] + 1, # Deletion d[i, j - 1] + 1, # Insertion d[i - 1, j - 1] + substitution_cost) # Substitution if i > 1 and j > 1 and s1[i-1] == s2[j-2] and s1[i-2] == s2[j-1]: d[i,j] = min(d[i,j], d[i-2, j-2] + 2) # Transposition (swap) return d[n, m] # Example usage: a = np.array([1, 2, 3, 4, 5], dtype=np.int32) b = np.array([1, 3, 2, 4, 6], dtype=np.int32) distance = levenshtein(a, b) print(f"Levenshtein distance: {distance}") # Output: 3 (1 substitution + 1 swap) c = np.array([1, 2, 3, 4], dtype=np.int32) d = np.array([1, 2, 4, 3], dtype=np.int32) distance2 = levenshtein(c,d) print(f"Levenshtein distance: {distance2}") # Output: 2 (swap) e = np.array([1, 2, 3], dtype=np.int32) f = np.array([1, 4, 3], dtype=np.int32) distance3 = levenshtein(e,f) print(f"Levenshtein distance: {distance3}") # Output: 1 (substitution) answer = levenshtein(np.array([3, 5, 2, 4, 8, 7]), np.array([3, 4, 2, 4, 8, 7, 9])) expected = 3 assert answer == expected, f'Wrong answer; got {answer} instead of {expected}' print('All tests passed') |
Levenshtein distance: 3
Levenshtein distance: 2
Levenshtein distance: 1
/usr/local/lib/python3.12/dist-packages/numba/core/decorators.py:246: RuntimeWarning: nopython is set for njit and is ignored
warnings.warn('nopython is set for njit and is ignored', RuntimeWarning)
Traceback (most recent call last):
File "/usr/src/app/main.py", line 70, in <module>
assert answer == expected, f'Wrong answer; got {answer} instead of {expected}'
^^^^^^^^^^^^^^^^^^
AssertionError: Wrong answer; got 2 instead of 3
Testing if the previous output contains the string All tests passed
: False
Testing if the previous output contains the string njit
: True
Testing if the previous output contains the string nopython
: True
Testing if the previous output contains the string import numba
: False
Testing if the previous output contains the string from numba
: True