Write a function called `match` in Python that uses a regex to find any word repeated 5 times or more. Return that word. If no word is repeated 5 times or more, return an empty string.
Write a function called `match` in Python that uses a regex to find any word repeated 5 times or more. Return that word. If no word is repeated 5 times or more, return an empty string.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 | import re def match(text: str) -> str: """ Uses a regex to find the first word repeated consecutively 5 or more times. Args: text: The input string to search within. Returns: The repeated word if found (repeated 5 or more times consecutively), otherwise an empty string. """ # Regex breakdown: # (\b\w+\b) : Capture group 1: Match a whole word. # \b ensures it's a whole word (word boundary). # \w+ matches one or more alphanumeric characters (letters, numbers, _). # (?:\s+\1) : Non-capturing group: # \s+ matches one or more whitespace characters (space, tab, newline etc.) # \1 is a backreference to the exact text matched by capture group 1 (the word). # {4,} : Quantifier: Match the preceding non-capturing group (whitespace + same word) # 4 or more times. # This means the word appears 1 (from group 1) + 4 (from repetitions) = 5 or more times total. # \b : Ensure the last repetition also ends at a word boundary. This prevents matching # "go go go go gogo" as 5 repetitions of "go". pattern = r"(\b\w+\b)(?:\s+\1){4,}\b" search_result = re.search(pattern, text) if search_result: # Group 1 contains the word itself that was repeated return search_result.group(1) else: # No match found return "" # Example Usage: print(f"'hello hello hello hello hello world' -> '{match('hello hello hello hello hello world')}'") print(f"'test test test test' -> '{match('test test test test')}'") # Only 4 times print(f"'go go go go go go' -> '{match('go go go go go go')}'") # 6 times >= 5 print(f"'word word word word word.' -> '{match('word word word word word.')}'") # Punctuation acts as boundary print(f"'abcabcabcabcabc' -> '{match('abcabcabcabcabc')}'") # Not separated by spaces print(f"' one one one one one two' -> '{match(' one one one one one two')}'") print(f"' five five five five fivefive' -> '{match(' five five five five fivefive')}'") # Last one isn't whole word print(f"'no repeats here' -> '{match('no repeats here')}'") print(f"'' -> '{match('')}'") print(f"'repeat repeat repeat repeat repeat! yes' -> '{match('repeat repeat repeat repeat repeat! yes')}'") print(f"'test test test test test test test test test test' -> '{match('test test test test test test test test test test')}'") # 10 times |
I extracted the following code from that output:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 | import re def match(text: str) -> str: """ Uses a regex to find the first word repeated consecutively 5 or more times. Args: text: The input string to search within. Returns: The repeated word if found (repeated 5 or more times consecutively), otherwise an empty string. """ # Regex breakdown: # (\b\w+\b) : Capture group 1: Match a whole word. # \b ensures it's a whole word (word boundary). # \w+ matches one or more alphanumeric characters (letters, numbers, _). # (?:\s+\1) : Non-capturing group: # \s+ matches one or more whitespace characters (space, tab, newline etc.) # \1 is a backreference to the exact text matched by capture group 1 (the word). # {4,} : Quantifier: Match the preceding non-capturing group (whitespace + same word) # 4 or more times. # This means the word appears 1 (from group 1) + 4 (from repetitions) = 5 or more times total. # \b : Ensure the last repetition also ends at a word boundary. This prevents matching # "go go go go gogo" as 5 repetitions of "go". pattern = r"(\b\w+\b)(?:\s+\1){4,}\b" search_result = re.search(pattern, text) if search_result: # Group 1 contains the word itself that was repeated return search_result.group(1) else: # No match found return "" # Example Usage: print(f"'hello hello hello hello hello world' -> '{match('hello hello hello hello hello world')}'") print(f"'test test test test' -> '{match('test test test test')}'") # Only 4 times print(f"'go go go go go go' -> '{match('go go go go go go')}'") # 6 times >= 5 print(f"'word word word word word.' -> '{match('word word word word word.')}'") # Punctuation acts as boundary print(f"'abcabcabcabcabc' -> '{match('abcabcabcabcabc')}'") # Not separated by spaces print(f"' one one one one one two' -> '{match(' one one one one one two')}'") print(f"' five five five five fivefive' -> '{match(' five five five five fivefive')}'") # Last one isn't whole word print(f"'no repeats here' -> '{match('no repeats here')}'") print(f"'' -> '{match('')}'") print(f"'repeat repeat repeat repeat repeat! yes' -> '{match('repeat repeat repeat repeat repeat! yes')}'") print(f"'test test test test test test test test test test' -> '{match('test test test test test test test test test test')}'") # 10 times |
Running the following program:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 | import re def match(text: str) -> str: """ Uses a regex to find the first word repeated consecutively 5 or more times. Args: text: The input string to search within. Returns: The repeated word if found (repeated 5 or more times consecutively), otherwise an empty string. """ # Regex breakdown: # (\b\w+\b) : Capture group 1: Match a whole word. # \b ensures it's a whole word (word boundary). # \w+ matches one or more alphanumeric characters (letters, numbers, _). # (?:\s+\1) : Non-capturing group: # \s+ matches one or more whitespace characters (space, tab, newline etc.) # \1 is a backreference to the exact text matched by capture group 1 (the word). # {4,} : Quantifier: Match the preceding non-capturing group (whitespace + same word) # 4 or more times. # This means the word appears 1 (from group 1) + 4 (from repetitions) = 5 or more times total. # \b : Ensure the last repetition also ends at a word boundary. This prevents matching # "go go go go gogo" as 5 repetitions of "go". pattern = r"(\b\w+\b)(?:\s+\1){4,}\b" search_result = re.search(pattern, text) if search_result: # Group 1 contains the word itself that was repeated return search_result.group(1) else: # No match found return "" # Example Usage: print(f"'hello hello hello hello hello world' -> '{match('hello hello hello hello hello world')}'") print(f"'test test test test' -> '{match('test test test test')}'") # Only 4 times print(f"'go go go go go go' -> '{match('go go go go go go')}'") # 6 times >= 5 print(f"'word word word word word.' -> '{match('word word word word word.')}'") # Punctuation acts as boundary print(f"'abcabcabcabcabc' -> '{match('abcabcabcabcabc')}'") # Not separated by spaces print(f"' one one one one one two' -> '{match(' one one one one one two')}'") print(f"' five five five five fivefive' -> '{match(' five five five five fivefive')}'") # Last one isn't whole word print(f"'no repeats here' -> '{match('no repeats here')}'") print(f"'' -> '{match('')}'") print(f"'repeat repeat repeat repeat repeat! yes' -> '{match('repeat repeat repeat repeat repeat! yes')}'") print(f"'test test test test test test test test test test' -> '{match('test test test test test test test test test test')}'") # 10 times answer = match('hi hi hi hi hi') expected = "hi" assert answer == expected, f'Wrong answer; got {answer} instead of {expected}' answer = match('hi hi hi hi') expected = "" assert answer == expected, f'Wrong answer; got {answer} instead of {expected}' answer = match('hello this is is is is is a test') expected = "is" assert answer == expected, f'Wrong answer; got {answer} instead of {expected}' print('All tests passed') |
'hello hello hello hello hello world' -> 'hello'
'test test test test' -> ''
'go go go go go go' -> 'go'
'word word word word word.' -> 'word'
'abcabcabcabcabc' -> ''
' one one one one one two' -> 'one'
' five five five five fivefive' -> ''
'no repeats here' -> ''
'' -> ''
'repeat repeat repeat repeat repeat! yes' -> 'repeat'
'test test test test test test test test test test' -> 'test'
All tests passed
Testing if the previous output contains the string All tests passed
: True