1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
from evaluator import *

DESCRIPTION = "Test if a model can solve a simple capture-the-flag like entry in C."

TAGS = ['c', "explain"]

INPUT = '''#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define MAX_INPUTS 10
#define DATA_LENGTH 100
#define ENTRY_LENGTH 5
#define COMMAND_LENGTH 4

static const char* flag = "SECRET";
static char data[MAX_INPUTS][DATA_LENGTH];
static int input_lengths[MAX_INPUTS];
static int inputs = 0;

void data_write() {
    char input[DATA_LENGTH], len[ENTRY_LENGTH];
    long length;

    printf("Enter data for storage:\n"); // Updated message
    fgets(input, DATA_LENGTH, stdin);
    input[strcspn(input, "\n")] = 0;

    do {
        printf("Input the data size:\n"); // Updated message
        fgets(len, ENTRY_LENGTH, stdin);
        length = strtol(len, NULL, 10);
        if (!length) puts("Invalid data length, try again."); // Updated message
    } while (!length);

    if (inputs >= MAX_INPUTS) inputs = 0;
    strcpy(data[inputs], input);
    input_lengths[inputs++] = length;
    printf("Data stored at position: %d\n", inputs); // Updated message
}

void data_read() {
    char entry[ENTRY_LENGTH], output[DATA_LENGTH] = {'\0'};
    printf("Which data position to read?\n"); // Updated message
    fgets(entry, ENTRY_LENGTH, stdin);
    entry[strcspn(entry, "\n")] = 0;

    long entry_number = strtol(entry, NULL, 10);
    if (!entry_number) {
        puts(flag);
        fseek(stdin, 0, SEEK_END);
        exit(0);
    }

    strncpy(output, data[--entry_number], input_lengths[entry_number]);
    puts(output);
}

int main() {
    char input[COMMAND_LENGTH];
    long command;
    puts("Welcome to the Data Echo System!"); // Updated message

    while (1) {
        printf("Choose option (1-Write, 2-Read, 3-Exit):\n"); // Updated message
        fgets(input, COMMAND_LENGTH, stdin);
        input[strcspn(input, "\n")] = 0;

        command = strtol(input, NULL, 10);
        if (!command) {
            puts("Invalid command, enter 1, 2 or 3."); // Updated message
        } else if (command == 1) {
            data_write();
            puts("Data written. Perform another action?"); // Updated message
        } else if (command == 2) {
            if (!inputs) {
                puts("Database is empty."); // Updated message
                continue;
            }
            data_read();
            puts("Data read. Perform another action?"); // Updated message
        } else if (command == 3) {
            puts("Exiting. Thank you!"); // Updated message
            break;
        } else {
            puts("Please enter a valid command: 1, 2, or 3."); // Updated message
        }
    }
    return 0;
}
'''



question = f"Give me sequence of the inputs that can make the code output the value of the flag. Here is the code: \n {INPUT}"

evaluation = """
Below is a student's answer to a question about functionality of the code in C: <A>

Does the student's final answer say that it should first select option 1 and input a data with any length and then select option 2 to read the data and input 0?

Ignore their reasoning. Just look at the final answer, if they say **EXPLICITLY** that it should first input some data by selecting option 1 and then select option 2 to read and input 0. Giving something that's equal but more complicated is wrong. 

If the answer is correct say "The student passes" otherwise "The student fails".
"""



TestCodeUnderstanding = question >> LLMRun() >> LLMRun(evaluation, llm=EVAL_LLM) >> SubstringEvaluator("student passes")
    

if __name__ == "__main__":
    print(run_test(TestCodeUnderstanding))