from evaluator import *
DESCRIPTION = "Test if the model can write a Python script that merges a list of file paths into 16 files of approximately equal size."
TAGS = ['code', 'python']
question_easy = """
I have a list of several thousand file paths in /tmp/files. Write a python script that merges these all together and splits the content into 16 files called /tmp/merged_%d for i in 0..15 where each is approximately the same size. To do this, sort files by size (largest first) and add files in order always to the smallest one. "/tmp/files" is a list of file paths, one per line.
"""
question = """
I have a list of several thousand file paths in /tmp/files. Write a python script that merges the content int 16 files called /tmp/merged_%d for i in 0..15 where each is approximately the same size. "/tmp/files" is a list of file paths, one per line.
"""
def setup_fn():
import random
import os
os.mkdir("/tmp/a")
with open("/tmp/files", "w") as f:
for i in range(1000):
f.write("/tmp/a/file_%d\n" % i)
with open("/tmp/a/file_%d" % i, "w") as f2:
if i < 5:
f2.write("x" * random.randint(10000, 30000))
else:
f2.write("x" * random.randint(1, 1000))
def test():
import os
import numpy as np
sz = []
for i in range(16):
sz.append(len(open("/tmp/merged_%d" % i).read()))
size_before = sum(os.path.getsize("/tmp/a/file_%d"%i) for i in range(1000))
size_after = sum(sz)
# I ddn't say exactly how to merge, adding newlines is okay
if abs(size_before-size_after) > 2000:
return False
# easy to hit with proper merging, hard otherwise
# 10 trials suggest the range is usualy [10, 40]
return max(sz) - min(sz) < 200
TestMake16FilesEasy = Setup(setup_fn) >> question_easy >> LLMRun() >> ExtractCode(keep_main=True) >> Echo() >> PythonRun() >> PyEvaluator(test)
TestMake16Files = Setup(setup_fn) >> question >> LLMRun() >> ExtractCode(keep_main=True) >> Echo() >> PythonRun() >> PyEvaluator(test)
if __name__ == "__main__":
print(run_test(TestMake16FilesEasy))