1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
from evaluator import *

DESCRIPTION = "Test if the model can guide a user through a series of git commands to merge a specific branch into the main branch."

TAGS = ['bash', "git", "agent"]

question = """
You are in a git repository. I have two branches, main and some other one I forgot the name of. Merge the other branch into main.

On each round you should just give me just one command to type, I'll run it exactly as-is, and give you the exact output. I will not make any changes. So do not write any placholder text. I'll get started:

INPUT: `pwd`

OUTPUT: `/usr/src/app`

INPUT: `ls`

OUTPUT: `main.py a b`

Now your turn. What do you want to run? Do not explain yourself, just give me exactly one command. Start your response with INPUT:"""


def test_ok_merge():
    try:
        math_content = open("math.py").read().strip()
        readme_content = open("README.md").read().strip()
        mul_content = open("mul.py").read().strip()

        return (
            math_content == "x + y\nx - y"
            and readme_content == "A repo about something\nYou run it like this"
            and mul_content == "x * y"
        )
    except:
        return False


def test_merge_conflict_resolved():
    import os
    try:
        return open("sum.py").read().strip() == "x + y" and open("b").read().strip() == "b"
    except:
        return False


def setup_merge():
    import os
    print(os.popen("""
    git config --global user.name "Your Name"
    git config --global user.email "you@example.com"
    git config --global init.defaultBranch main
    echo "x + y" > math.py
    echo "A repo about something" > README.md
    git init
    git add math.py README.md
    git commit -a -m "Initial commit"
    git branch other
    git checkout other
    echo "x + y\nx - y" > math.py
    git add math.py
    git commit -a -m "Subtraction"
    echo "A repo about something\nYou run it like this" > README.md
    git add README.md
    git commit -a -m "README"
    git log
    git checkout main
    echo "x * y" > mul.py
    git add mul.py
    git commit -a -m "Multiplication"
    git log
    """).read())


def setup_merge_conflict():
    import os
    print(os.popen("""
    git config --global user.name "Your Name"
    git config --global user.email "you@example.com"
    git config --global init.defaultBranch main
    echo "a" > sum.py
    echo "b" > b
    git init
    git add sum.py b
    git commit -a -m "Initial commit"
    git branch other
    git checkout other
    echo "x + y" > sum.py
    git add sum.py
    git commit -a -m "addition"
    git log
    git checkout main
    echo "x - y" > sum.py
    git add sum.py
    git commit -a -m "subtraction"
    git log
    """).read())


def do_extract(x):
    if 'INPUT:' in x:
        x = x.split("INPUT:")[1]
    try:
        to_send = x.split("`")[1]
    except:
        to_send = x.split("\n")[0]
    return to_send


def do_prepare(x):
    print("Preparing to pass back", x)
    return f"I get an OUTPUT: `{x}`.\n\nWhat is the exact command I should run next? Start your response with INPUT:"


TestGitMerge = Setup(setup_merge) >> Echo() >> question >> UntilDone(PyEvaluator(test_ok_merge), (LLMConversation(
) >> PyFunc(do_extract) >> TerminalRun() >> PyFunc(do_prepare)), max_iters=6) >> PyEvaluator(test_ok_merge)

TestGitMergeConflict = Setup(setup_merge_conflict) >> Echo() >> question >> UntilDone(PyEvaluator(test_merge_conflict_resolved), (LLMConversation(
) >> PyFunc(do_extract) >> TerminalRun() >> PyFunc(do_prepare)), max_iters=10) >> PyEvaluator(test_merge_conflict_resolved)

if __name__ == "__main__":
    print(run_test(TestGitMerge))
    print(run_test(TestGitMergeConflict))