1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113 | from evaluator import *
DESCRIPTION = "Test if a model can solve a simple capture-the-flag like entry in C."
TAGS = ['c', "explain"]
INPUT = '''#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAX_INPUTS 10
#define DATA_LENGTH 100
#define ENTRY_LENGTH 5
#define COMMAND_LENGTH 4
static const char* flag = "SECRET";
static char data[MAX_INPUTS][DATA_LENGTH];
static int input_lengths[MAX_INPUTS];
static int inputs = 0;
void data_write() {
char input[DATA_LENGTH], len[ENTRY_LENGTH];
long length;
printf("Enter data for storage:\n"); // Updated message
fgets(input, DATA_LENGTH, stdin);
input[strcspn(input, "\n")] = 0;
do {
printf("Input the data size:\n"); // Updated message
fgets(len, ENTRY_LENGTH, stdin);
length = strtol(len, NULL, 10);
if (!length) puts("Invalid data length, try again."); // Updated message
} while (!length);
if (inputs >= MAX_INPUTS) inputs = 0;
strcpy(data[inputs], input);
input_lengths[inputs++] = length;
printf("Data stored at position: %d\n", inputs); // Updated message
}
void data_read() {
char entry[ENTRY_LENGTH], output[DATA_LENGTH] = {'\0'};
printf("Which data position to read?\n"); // Updated message
fgets(entry, ENTRY_LENGTH, stdin);
entry[strcspn(entry, "\n")] = 0;
long entry_number = strtol(entry, NULL, 10);
if (!entry_number) {
puts(flag);
fseek(stdin, 0, SEEK_END);
exit(0);
}
strncpy(output, data[--entry_number], input_lengths[entry_number]);
puts(output);
}
int main() {
char input[COMMAND_LENGTH];
long command;
puts("Welcome to the Data Echo System!"); // Updated message
while (1) {
printf("Choose option (1-Write, 2-Read, 3-Exit):\n"); // Updated message
fgets(input, COMMAND_LENGTH, stdin);
input[strcspn(input, "\n")] = 0;
command = strtol(input, NULL, 10);
if (!command) {
puts("Invalid command, enter 1, 2 or 3."); // Updated message
} else if (command == 1) {
data_write();
puts("Data written. Perform another action?"); // Updated message
} else if (command == 2) {
if (!inputs) {
puts("Database is empty."); // Updated message
continue;
}
data_read();
puts("Data read. Perform another action?"); // Updated message
} else if (command == 3) {
puts("Exiting. Thank you!"); // Updated message
break;
} else {
puts("Please enter a valid command: 1, 2, or 3."); // Updated message
}
}
return 0;
}
'''
question = f"Give me sequence of the inputs that can make the code output the value of the flag. Here is the code: \n {INPUT}"
evaluation = """
Below is a student's answer to a question about functionality of the code in C: <A>
Does the student's final answer say that it should first select option 1 and input a data with any length and then select option 2 to read the data and input 0?
Ignore their reasoning. Just look at the final answer, if they say **EXPLICITLY** that it should first input some data by selecting option 1 and then select option 2 to read and input 0. Giving something that's equal but more complicated is wrong.
If the answer is correct say "The student passes" otherwise "The student fails".
"""
TestCodeUnderstanding = question >> LLMRun() >> LLMRun(evaluation, llm=EVAL_LLM) >> SubstringEvaluator("student passes")
if __name__ == "__main__":
print(run_test(TestCodeUnderstanding))
|