1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
from evaluator import *

DESCRIPTION = "Test the ability of the model to rewrite a simple c program so it will run on ubuntu, and keep bugs in place."

TAGS = ['code', 'c']

question = """
Rewrite this c code so it will run on ubuntu. DO NOT FIX ANY BUGS.

```c
#include <stdio.h>
#include <strings.h>
#include <CommonCrypto/CommonDigest.h>

unsigned char memory[1<<20];
void* malloc_ptr = (void*)&memory;

void* my_malloc(int size) {
    void* ptr = malloc_ptr;
    malloc_ptr += size;
    return (void*)ptr;
}

void compute_md5(char *str, unsigned char *hex_result) {
    unsigned char result[CC_MD5_DIGEST_LENGTH];
    CC_MD5_CTX md5ctx;
    CC_MD5_Init(&md5ctx);
    CC_MD5_Update(&md5ctx, str, strlen(str));
    CC_MD5_Final(result, &md5ctx);

    for (int i = 0; i < CC_MD5_DIGEST_LENGTH; i++) {
        sprintf((char*)(hex_result + (i * 2)), "%02x", result[i]);
    }
}

int main(int argc, char** argv) {
    char* msg = (char*)my_malloc(32);
    char* result = (char*)my_malloc(33);
    msg[0] = '\0';

    compute_md5(argv[1], (unsigned char*)result);
    result[33] = '\0';

    strcat(msg, "Your password is wrong, ");
    strcat(msg, argv[2]);

    if (strcmp(result, "13bd041ac6e683f0c8ddba41eeed9e67") == 0) {
      puts("CORRECT!");
      return 0;
    } else {
      puts(msg);
      return 1;
    }
}
```
"""

TestProgramRewriteCCrypto = question >> LLMRun() >> ExtractCode(keep_main=True) >> \
    ((CRun(gccflags="-lcrypto", argv="emacsisbetterthanvim a") >> SubstringEvaluator("CORRECT!")) &
     (CRun(gccflags="-lcrypto", argv="randompw a") >> SubstringEvaluator("wrong")) &
     (CRun(gccflags="-lcrypto", argv=f"a {'a'*8}13bd041ac6e683f0c8ddba41eeed9e67") >> SubstringEvaluator("CORRECT!")))
     


if __name__ == "__main__":
    from llm import llm
    print(run_test(TestProgramRewriteCCrypto))