1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122 | from evaluator import *
DESCRIPTION = "Test if the model can guide a user through a series of git commands to merge a specific branch into the main branch."
TAGS = ['bash', "git", "agent"]
question = """
You are in a git repository. I have two branches, main and some other one I forgot the name of. Merge the other branch into main.
On each round you should just give me just one command to type, I'll run it exactly as-is, and give you the exact output. I will not make any changes. So do not write any placholder text. I'll get started:
INPUT: `pwd`
OUTPUT: `/usr/src/app`
INPUT: `ls`
OUTPUT: `main.py a b`
Now your turn. What do you want to run? Do not explain yourself, just give me exactly one command. Start your response with INPUT:"""
def test_ok_merge():
try:
math_content = open("math.py").read().strip()
readme_content = open("README.md").read().strip()
mul_content = open("mul.py").read().strip()
return (
math_content == "x + y\nx - y"
and readme_content == "A repo about something\nYou run it like this"
and mul_content == "x * y"
)
except:
return False
def test_merge_conflict_resolved():
import os
try:
return open("sum.py").read().strip() == "x + y" and open("b").read().strip() == "b"
except:
return False
def setup_merge():
import os
print(os.popen("""
git config --global user.name "Your Name"
git config --global user.email "you@example.com"
git config --global init.defaultBranch main
echo "x + y" > math.py
echo "A repo about something" > README.md
git init
git add math.py README.md
git commit -a -m "Initial commit"
git branch other
git checkout other
echo "x + y\nx - y" > math.py
git add math.py
git commit -a -m "Subtraction"
echo "A repo about something\nYou run it like this" > README.md
git add README.md
git commit -a -m "README"
git log
git checkout main
echo "x * y" > mul.py
git add mul.py
git commit -a -m "Multiplication"
git log
""").read())
def setup_merge_conflict():
import os
print(os.popen("""
git config --global user.name "Your Name"
git config --global user.email "you@example.com"
git config --global init.defaultBranch main
echo "a" > sum.py
echo "b" > b
git init
git add sum.py b
git commit -a -m "Initial commit"
git branch other
git checkout other
echo "x + y" > sum.py
git add sum.py
git commit -a -m "addition"
git log
git checkout main
echo "x - y" > sum.py
git add sum.py
git commit -a -m "subtraction"
git log
""").read())
def do_extract(x):
if 'INPUT:' in x:
x = x.split("INPUT:")[1]
try:
to_send = x.split("`")[1]
except:
to_send = x.split("\n")[0]
return to_send
def do_prepare(x):
print("Preparing to pass back", x)
return f"I get an OUTPUT: `{x}`.\n\nWhat is the exact command I should run next? Start your response with INPUT:"
TestGitMerge = Setup(setup_merge) >> Echo() >> question >> UntilDone(PyEvaluator(test_ok_merge), (LLMConversation(
) >> PyFunc(do_extract) >> TerminalRun() >> PyFunc(do_prepare)), max_iters=6) >> PyEvaluator(test_ok_merge)
TestGitMergeConflict = Setup(setup_merge_conflict) >> Echo() >> question >> UntilDone(PyEvaluator(test_merge_conflict_resolved), (LLMConversation(
) >> PyFunc(do_extract) >> TerminalRun() >> PyFunc(do_prepare)), max_iters=10) >> PyEvaluator(test_merge_conflict_resolved)
if __name__ == "__main__":
print(run_test(TestGitMerge))
print(run_test(TestGitMergeConflict))
|