Write a function strides() that given an array of tokens T as numpy array, using strided trick, returns [T[0:50], T[10:60], T[20: 70] ... ]
Write a function strides() that given an array of tokens T as numpy array, using strided trick, returns [T[0:50], T[10:60], T[20: 70] ... ]
Okay, here's the Python function `strides` using `numpy.lib.stride_tricks.as_strided` to achieve the desired result.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 | import numpy as np from numpy.lib.stride_tricks import as_strided from numpy.typing import NDArray # Optional: for type hinting def strides(T: NDArray, window_size: int = 50, step: int = 10) -> NDArray: """ Creates a strided view of a 1D array T. Generates an array where each row is a slice of T of length `window_size`, with the starting index of each slice advancing by `step`. Equivalent to stacking [T[0:window_size], T[step:step+window_size], T[2*step:2*step+window_size], ...]. Args: T: The input 1D NumPy array (e.g., array of tokens). window_size: The desired length of each window/slice (default: 50). step: The step size between the start of consecutive windows (default: 10). Returns: A 2D NumPy array (view) where each row is a window into T. The view is non-writeable by default for safety due to potential data overlap. Returns an empty array with the correct number of columns if T is too short to contain even one full window. Raises: ValueError: If T is not a 1D array or if window_size or step are not positive integers. """ # Input validation if not isinstance(T, np.ndarray) or T.ndim != 1: raise ValueError("Input T must be a 1D NumPy array.") if not isinstance(window_size, int) or window_size <= 0: raise ValueError("window_size must be a positive integer.") if not isinstance(step, int) or step <= 0: raise ValueError("step must be a positive integer.") n = T.shape[0] # Total number of elements in T itemsize = T.itemsize # Size in bytes of each element in T # Calculate the number of windows that fit completely if n < window_size: num_windows = 0 else: # The last window starts at index `(num_windows - 1) * step` # It must end at or before index `n - 1`. # So, `(num_windows - 1) * step + window_size <= n` num_windows = (n - window_size) // step + 1 # Handle the case where no full windows fit if num_windows <= 0: # Return an empty array with the correct shape (0 rows, window_size columns) # and the same dtype as T return np.empty((0, window_size), dtype=T.dtype) # Define the shape and strides for the new view out_shape = (num_windows, window_size) # Calculate strides: # - To move to the next element *within* a window (column): jump `itemsize` bytes # - To move to the start of the *next* window (row): jump `step * itemsize` bytes out_strides = (step * itemsize, itemsize) # Create the strided view using as_strided # Set writeable=False for safety, as the windows likely overlap in memory. # Modifying the view could unintentionally change multiple parts of the # underlying data if writeable=True. strided_view = as_strided(T, shape=out_shape, strides=out_strides, writeable=False) return strided_view # --- Example Usage --- # Create a sample array of tokens (e.g., integers) tokens = np.arange(100) # Example: tokens 0 through 99 # Get the strided view strided_tokens = strides(tokens, window_size=50, step=10) print("Original array shape:", tokens.shape) print("Strided array shape:", strided_tokens.shape) # Expected: (6, 50) for 100 tokens # Verify the content of the first few windows print("\nFirst window (T[0:50]):") print(strided_tokens[0]) print("Matches np.arange(0, 50):", np.array_equal(strided_tokens[0], np.arange(0, 50))) print("\nSecond window (T[10:60]):") print(strided_tokens[1]) print("Matches np.arange(10, 60):", np.array_equal(strided_tokens[1], np.arange(10, 60))) print("\nThird window (T[20:70]):") print(strided_tokens[2]) print("Matches np.arange(20, 70):", np.array_equal(strided_tokens[2], np.arange(20, 70))) print("\nLast window (T[50:100]):") print(strided_tokens[-1]) print("Matches np.arange(50, 100):", np.array_equal(strided_tokens[-1], np.arange(50, 100))) # Example with a shorter array short_tokens = np.arange(65) strided_short = strides(short_tokens, window_size=50, step=10) print("\nShort array (len 65) strided shape:", strided_short.shape) # Expected: (2, 50) print("Last window (T[10:60]):", strided_short[-1]) print("Matches np.arange(10, 60):", np.array_equal(strided_short[-1], np.arange(10, 60))) # Example with an array too short for one window too_short_tokens = np.arange(40) strided_too_short = strides(too_short_tokens, window_size=50, step=10) print("\nToo short array (len 40) strided shape:", strided_too_short.shape) # Expected: (0, 50) |
I extracted the following code from that output:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 | import numpy as np from numpy.lib.stride_tricks import as_strided from numpy.typing import NDArray # Optional: for type hinting def strides(T: NDArray, window_size: int = 50, step: int = 10) -> NDArray: """ Creates a strided view of a 1D array T. Generates an array where each row is a slice of T of length `window_size`, with the starting index of each slice advancing by `step`. Equivalent to stacking [T[0:window_size], T[step:step+window_size], T[2*step:2*step+window_size], ...]. Args: T: The input 1D NumPy array (e.g., array of tokens). window_size: The desired length of each window/slice (default: 50). step: The step size between the start of consecutive windows (default: 10). Returns: A 2D NumPy array (view) where each row is a window into T. The view is non-writeable by default for safety due to potential data overlap. Returns an empty array with the correct number of columns if T is too short to contain even one full window. Raises: ValueError: If T is not a 1D array or if window_size or step are not positive integers. """ # Input validation if not isinstance(T, np.ndarray) or T.ndim != 1: raise ValueError("Input T must be a 1D NumPy array.") if not isinstance(window_size, int) or window_size <= 0: raise ValueError("window_size must be a positive integer.") if not isinstance(step, int) or step <= 0: raise ValueError("step must be a positive integer.") n = T.shape[0] # Total number of elements in T itemsize = T.itemsize # Size in bytes of each element in T # Calculate the number of windows that fit completely if n < window_size: num_windows = 0 else: # The last window starts at index `(num_windows - 1) * step` # It must end at or before index `n - 1`. # So, `(num_windows - 1) * step + window_size <= n` num_windows = (n - window_size) // step + 1 # Handle the case where no full windows fit if num_windows <= 0: # Return an empty array with the correct shape (0 rows, window_size columns) # and the same dtype as T return np.empty((0, window_size), dtype=T.dtype) # Define the shape and strides for the new view out_shape = (num_windows, window_size) # Calculate strides: # - To move to the next element *within* a window (column): jump `itemsize` bytes # - To move to the start of the *next* window (row): jump `step * itemsize` bytes out_strides = (step * itemsize, itemsize) # Create the strided view using as_strided # Set writeable=False for safety, as the windows likely overlap in memory. # Modifying the view could unintentionally change multiple parts of the # underlying data if writeable=True. strided_view = as_strided(T, shape=out_shape, strides=out_strides, writeable=False) return strided_view # --- Example Usage --- # Create a sample array of tokens (e.g., integers) tokens = np.arange(100) # Example: tokens 0 through 99 # Get the strided view strided_tokens = strides(tokens, window_size=50, step=10) print("Original array shape:", tokens.shape) print("Strided array shape:", strided_tokens.shape) # Expected: (6, 50) for 100 tokens # Verify the content of the first few windows print("\nFirst window (T[0:50]):") print(strided_tokens[0]) print("Matches np.arange(0, 50):", np.array_equal(strided_tokens[0], np.arange(0, 50))) print("\nSecond window (T[10:60]):") print(strided_tokens[1]) print("Matches np.arange(10, 60):", np.array_equal(strided_tokens[1], np.arange(10, 60))) print("\nThird window (T[20:70]):") print(strided_tokens[2]) print("Matches np.arange(20, 70):", np.array_equal(strided_tokens[2], np.arange(20, 70))) print("\nLast window (T[50:100]):") print(strided_tokens[-1]) print("Matches np.arange(50, 100):", np.array_equal(strided_tokens[-1], np.arange(50, 100))) # Example with a shorter array short_tokens = np.arange(65) strided_short = strides(short_tokens, window_size=50, step=10) print("\nShort array (len 65) strided shape:", strided_short.shape) # Expected: (2, 50) print("Last window (T[10:60]):", strided_short[-1]) print("Matches np.arange(10, 60):", np.array_equal(strided_short[-1], np.arange(10, 60))) # Example with an array too short for one window too_short_tokens = np.arange(40) strided_too_short = strides(too_short_tokens, window_size=50, step=10) print("\nToo short array (len 40) strided shape:", strided_too_short.shape) # Expected: (0, 50) |
Testing if the previous output contains the string as_strided
: True
Running the following program:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 | import numpy as np from numpy.lib.stride_tricks import as_strided from numpy.typing import NDArray # Optional: for type hinting def strides(T: NDArray, window_size: int = 50, step: int = 10) -> NDArray: """ Creates a strided view of a 1D array T. Generates an array where each row is a slice of T of length `window_size`, with the starting index of each slice advancing by `step`. Equivalent to stacking [T[0:window_size], T[step:step+window_size], T[2*step:2*step+window_size], ...]. Args: T: The input 1D NumPy array (e.g., array of tokens). window_size: The desired length of each window/slice (default: 50). step: The step size between the start of consecutive windows (default: 10). Returns: A 2D NumPy array (view) where each row is a window into T. The view is non-writeable by default for safety due to potential data overlap. Returns an empty array with the correct number of columns if T is too short to contain even one full window. Raises: ValueError: If T is not a 1D array or if window_size or step are not positive integers. """ # Input validation if not isinstance(T, np.ndarray) or T.ndim != 1: raise ValueError("Input T must be a 1D NumPy array.") if not isinstance(window_size, int) or window_size <= 0: raise ValueError("window_size must be a positive integer.") if not isinstance(step, int) or step <= 0: raise ValueError("step must be a positive integer.") n = T.shape[0] # Total number of elements in T itemsize = T.itemsize # Size in bytes of each element in T # Calculate the number of windows that fit completely if n < window_size: num_windows = 0 else: # The last window starts at index `(num_windows - 1) * step` # It must end at or before index `n - 1`. # So, `(num_windows - 1) * step + window_size <= n` num_windows = (n - window_size) // step + 1 # Handle the case where no full windows fit if num_windows <= 0: # Return an empty array with the correct shape (0 rows, window_size columns) # and the same dtype as T return np.empty((0, window_size), dtype=T.dtype) # Define the shape and strides for the new view out_shape = (num_windows, window_size) # Calculate strides: # - To move to the next element *within* a window (column): jump `itemsize` bytes # - To move to the start of the *next* window (row): jump `step * itemsize` bytes out_strides = (step * itemsize, itemsize) # Create the strided view using as_strided # Set writeable=False for safety, as the windows likely overlap in memory. # Modifying the view could unintentionally change multiple parts of the # underlying data if writeable=True. strided_view = as_strided(T, shape=out_shape, strides=out_strides, writeable=False) return strided_view # --- Example Usage --- # Create a sample array of tokens (e.g., integers) tokens = np.arange(100) # Example: tokens 0 through 99 # Get the strided view strided_tokens = strides(tokens, window_size=50, step=10) print("Original array shape:", tokens.shape) print("Strided array shape:", strided_tokens.shape) # Expected: (6, 50) for 100 tokens # Verify the content of the first few windows print("\nFirst window (T[0:50]):") print(strided_tokens[0]) print("Matches np.arange(0, 50):", np.array_equal(strided_tokens[0], np.arange(0, 50))) print("\nSecond window (T[10:60]):") print(strided_tokens[1]) print("Matches np.arange(10, 60):", np.array_equal(strided_tokens[1], np.arange(10, 60))) print("\nThird window (T[20:70]):") print(strided_tokens[2]) print("Matches np.arange(20, 70):", np.array_equal(strided_tokens[2], np.arange(20, 70))) print("\nLast window (T[50:100]):") print(strided_tokens[-1]) print("Matches np.arange(50, 100):", np.array_equal(strided_tokens[-1], np.arange(50, 100))) # Example with a shorter array short_tokens = np.arange(65) strided_short = strides(short_tokens, window_size=50, step=10) print("\nShort array (len 65) strided shape:", strided_short.shape) # Expected: (2, 50) print("Last window (T[10:60]):", strided_short[-1]) print("Matches np.arange(10, 60):", np.array_equal(strided_short[-1], np.arange(10, 60))) # Example with an array too short for one window too_short_tokens = np.arange(40) strided_too_short = strides(too_short_tokens, window_size=50, step=10) print("\nToo short array (len 40) strided shape:", strided_too_short.shape) # Expected: (0, 50) answer = strides(np.arange(100)).shape expected = (6, 50) assert answer == expected, f'Wrong answer; got {answer} instead of {expected}' answer = strides(np.arange(100)).sum() expected = 14850 assert answer == expected, f'Wrong answer; got {answer} instead of {expected}' print('All tests passed') |
Original array shape: (100,)
Strided array shape: (6, 50)
First window (T[0:50]):
[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
48 49]
Matches np.arange(0, 50): True
Second window (T[10:60]):
[10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33
34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57
58 59]
Matches np.arange(10, 60): True
Third window (T[20:70]):
[20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43
44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67
68 69]
Matches np.arange(20, 70): True
Last window (T[50:100]):
[50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73
74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97
98 99]
Matches np.arange(50, 100): True
Short array (len 65) strided shape: (2, 50)
Last window (T[10:60]): [10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33
34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57
58 59]
Matches np.arange(10, 60): True
Too short array (len 40) strided shape: (0, 50)
All tests passed
Testing if the previous output contains the string All tests passed
: True