from evaluator import *
DESCRIPTION = "Test if the model can guide a user through a series of git commands to identify and cherrypick a specific commit from a branch onto the main branch."
TAGS = ['bash', "git", "agent"]
question = """
You are in a git repository. I have two branches, main and some other one I forgot the name of. Take just the first commit that differs between main and the other branch, and commit it onto main.
On each round you should just give me just one command to type, I'll run it exactly as-is, and give you the exact output. I will not make any changes. So do not write any placholder text. I'll get started:
INPUT: `pwd`
OUTPUT: `/usr/src/app`
INPUT: `ls`
OUTPUT: `main.py a b`
Now your turn. What do you want to run? Do not explain yourself, just give me exactly one command. Start your response with INPUT:"""
def test_ok():
import os
try:
return open("e").read().strip() == "e" and not os.path.exists("d") and open("c").read().strip() == "c"
except:
return False
return success
def setup():
import os
print(os.popen("""
git config --global user.name "Your Name"
git config --global user.email "you@example.com"
git config --global init.defaultBranch main
echo "a" > a
echo "b" > b
git init
git add a b
git commit -a -m "Initial commit"
git branch other
git checkout other
echo "c" > c
git add c
git commit -a -m "Add c"
echo "d" > d
git add d
git commit -a -m "Add d"
git log
git checkout main
echo "e" > e
git add e
git commit -a -m "Add e"
git log
""").read())
def do_extract(x):
if 'INPUT:' in x:
x = x.split("INPUT:")[1]
try:
to_send = x.split("`")[1]
except:
to_send = x.split("\n")[0]
return to_send
def do_prepare(x):
print("Preparing to pass back", x)
return f"I get an OUTPUT: `{x}`.\n\nWhat is the exact command I should run next? Start your response with INPUT:"
TestGitCherrypick = Setup(setup) >> Echo() >> question >> UntilDone(PyEvaluator(test_ok), (LLMConversation() >> PyFunc(do_extract) >> TerminalRun() >> PyFunc(do_prepare)), max_iters=6) >> PyEvaluator(test_ok)
if __name__ == "__main__":
print(run_test(TestGitCherrypick))