I have setup the docker container to run the model evaluation.
Write a self-contained Rust program that prints a JSON output of how often each word appears in a text file located in /tmp/file.txt. So if the file contains "This is is is, an example" then the program should print to stdout. {"This": 1, "is": 2, "is,": 1, "an": 1, "example": 1} Don't import any crates that would require a Cargo.toml file. I'll just compile it with `rustc yourfile.rs` and it should work. The program should be run by calling /path/to/rust_word_count with no arguments. Make it run in parallel on 4 cores at the same time, each job taking ~1/4 of the file. Print out 4 differnt json dicts, one per line. I'll do the merging. Do not import serde_json, write json output from scratch.
Write a self-contained Rust program that prints a JSON output of how often each word appears in a text file located in /tmp/file.txt. So if the file contains "This is is is, an example" then the program should print to stdout. {"This": 1, "is": 2, "is,": 1, "an": 1, "example": 1} Don't import any crates that would require a Cargo.toml file. I'll just compile it with `rustc yourfile.rs` and it should work. The program should be run by calling /path/to/rust_word_count with no arguments. Make it run in parallel on 4 cores at the same time, each job taking ~1/4 of the file. Print out 4 differnt json dicts, one per line. I'll do the merging. Do not import serde_json, write json output from scratch.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 | use std::collections::HashMap; use std::fs; use std::io::{self, ErrorKind}; use std::sync::Arc; use std::thread; use std::process; const FILE_PATH: &str = "/tmp/file.txt"; const NUM_THREADS: usize = 4; /// Formats a HashMap<String, usize> into a JSON string manually. /// Example: {"word1": 10, "word2": 5} fn format_json(map: &HashMap<String, usize>) -> String { if map.is_empty() { return "{}".to_string(); } let mut result = String::new(); result.push('{'); let mut first = true; for (key, value) in map.iter() { if !first { result.push_str(", "); } // Basic JSON string escaping for the key (only handles quotes for simplicity) let escaped_key = key.replace('\\', "\\\\").replace('"', "\\\""); result.push('"'); result.push_str(&escaped_key); result.push_str("\": "); result.push_str(&value.to_string()); first = false; } result.push('}'); result } /// Counts words in a given text slice. fn count_words(text_slice: &str) -> HashMap<String, usize> { let mut counts = HashMap::new(); for word in text_slice.split_whitespace() { // Don't add empty strings if there are multiple spaces if !word.is_empty() { *counts.entry(word.to_string()).or_insert(0) += 1; } } counts } fn main() { // 1. Read the file content let content = match fs::read_to_string(FILE_PATH) { Ok(text) => Arc::new(text), // Use Arc for safe sharing across threads Err(e) => { match e.kind() { ErrorKind::NotFound => eprintln!("Error: File not found at '{}'", FILE_PATH), ErrorKind::PermissionDenied => eprintln!("Error: Permission denied reading file '{}'", FILE_PATH), _ => eprintln!("Error reading file '{}': {}", FILE_PATH, e), } process::exit(1); } }; if content.is_empty() { // Handle empty file: print empty JSON for each thread for _ in 0..NUM_THREADS { println!("{{}}"); } return; } let mut thread_handles = vec![]; let total_len = content.len(); let mut chunk_start = 0; // 2. Divide work and spawn threads for i in 0..NUM_THREADS { let content_arc = Arc::clone(&content); // Calculate the approximate end of the chunk let mut chunk_end = (total_len * (i + 1)) / NUM_THREADS; // Adjust chunk_end to the next whitespace boundary (unless it's the last chunk) if i < NUM_THREADS - 1 && chunk_end < total_len { // Find the next whitespace character at or after the calculated chunk_end if let Some(boundary_pos) = content_arc[chunk_end..].find(|c: char| c.is_whitespace()) { chunk_end += boundary_pos; // Advance past the whitespace itself if possible if chunk_end + 1 < total_len { chunk_end += 1; } } else { // No whitespace found after chunk_end, give the rest to this thread chunk_end = total_len; } } else { // Ensure the last chunk goes exactly to the end chunk_end = total_len; } // Ensure start doesn't exceed end (can happen with very small files/many threads) let current_chunk_start = std::cmp::min(chunk_start, chunk_end); let current_chunk_end = chunk_end; // Spawn a thread to process this chunk let handle = thread::spawn(move || { // Get the slice for this thread // Check bounds carefully in case of empty slices let slice = if current_chunk_start < content_arc.len() && current_chunk_start < current_chunk_end { &content_arc[current_chunk_start..current_chunk_end] } else { "" // Empty slice if start is out of bounds or start >= end }; // Count words in the slice let word_counts = count_words(slice); // Format the result as JSON let json_output = format_json(&word_counts); // Print the JSON output for this thread // Use println! as it's implicitly synchronized (locks stdout) println!("{}", json_output); }); thread_handles.push(handle); // Prepare for the next chunk chunk_start = chunk_end; // Optimization: If we've already processed the whole file, stop spawning threads if chunk_start >= total_len && i < NUM_THREADS -1 { // Print empty JSON for remaining threads for _ in (i+1)..NUM_THREADS { println!("{{}}"); } break; } } // 3. Wait for all threads to complete for handle in thread_handles { if let Err(e) = handle.join() { eprintln!("Error: A worker thread panicked: {:?}", e); // Depending on requirements, you might want to exit here or just report. // process::exit(1); } } } |
1 2 | echo "This is a test file. This file is for testing the word count program. Count count count is is." > /tmp/file.txt |
1 2 | echo "This is is is, an example" > /tmp/file.txt |
1 2 | rustc word_count.rs |
1 2 | ./word_count |
1 2 3 4 | {"This": 1} {"is": 1} {"is,": 1, "is": 1} {"an": 1, "example": 1} |
1 2 3 4 | {"This": 1, "is": 1} {"is": 1, "is,": 1} {"an": 1} {"example": 1} |
I extracted the following code from that output:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 | use std::collections::HashMap; use std::fs; use std::io::{self, ErrorKind}; use std::sync::Arc; use std::thread; use std::process; const FILE_PATH: &str = "/tmp/file.txt"; const NUM_THREADS: usize = 4; /// Formats a HashMap<String, usize> into a JSON string manually. /// Example: {"word1": 10, "word2": 5} fn format_json(map: &HashMap<String, usize>) -> String { if map.is_empty() { return "{}".to_string(); } let mut result = String::new(); result.push('{'); let mut first = true; for (key, value) in map.iter() { if !first { result.push_str(", "); } // Basic JSON string escaping for the key (only handles quotes for simplicity) let escaped_key = key.replace('\\', "\\\\").replace('"', "\\\""); result.push('"'); result.push_str(&escaped_key); result.push_str("\": "); result.push_str(&value.to_string()); first = false; } result.push('}'); result } /// Counts words in a given text slice. fn count_words(text_slice: &str) -> HashMap<String, usize> { let mut counts = HashMap::new(); for word in text_slice.split_whitespace() { // Don't add empty strings if there are multiple spaces if !word.is_empty() { *counts.entry(word.to_string()).or_insert(0) += 1; } } counts } fn main() { // 1. Read the file content let content = match fs::read_to_string(FILE_PATH) { Ok(text) => Arc::new(text), // Use Arc for safe sharing across threads Err(e) => { match e.kind() { ErrorKind::NotFound => eprintln!("Error: File not found at '{}'", FILE_PATH), ErrorKind::PermissionDenied => eprintln!("Error: Permission denied reading file '{}'", FILE_PATH), _ => eprintln!("Error reading file '{}': {}", FILE_PATH, e), } process::exit(1); } }; if content.is_empty() { // Handle empty file: print empty JSON for each thread for _ in 0..NUM_THREADS { println!("{{}}"); } return; } let mut thread_handles = vec![]; let total_len = content.len(); let mut chunk_start = 0; // 2. Divide work and spawn threads for i in 0..NUM_THREADS { let content_arc = Arc::clone(&content); // Calculate the approximate end of the chunk let mut chunk_end = (total_len * (i + 1)) / NUM_THREADS; // Adjust chunk_end to the next whitespace boundary (unless it's the last chunk) if i < NUM_THREADS - 1 && chunk_end < total_len { // Find the next whitespace character at or after the calculated chunk_end if let Some(boundary_pos) = content_arc[chunk_end..].find(|c: char| c.is_whitespace()) { chunk_end += boundary_pos; // Advance past the whitespace itself if possible if chunk_end + 1 < total_len { chunk_end += 1; } } else { // No whitespace found after chunk_end, give the rest to this thread chunk_end = total_len; } } else { // Ensure the last chunk goes exactly to the end chunk_end = total_len; } // Ensure start doesn't exceed end (can happen with very small files/many threads) let current_chunk_start = std::cmp::min(chunk_start, chunk_end); let current_chunk_end = chunk_end; // Spawn a thread to process this chunk let handle = thread::spawn(move || { // Get the slice for this thread // Check bounds carefully in case of empty slices let slice = if current_chunk_start < content_arc.len() && current_chunk_start < current_chunk_end { &content_arc[current_chunk_start..current_chunk_end] } else { "" // Empty slice if start is out of bounds or start >= end }; // Count words in the slice let word_counts = count_words(slice); // Format the result as JSON let json_output = format_json(&word_counts); // Print the JSON output for this thread // Use println! as it's implicitly synchronized (locks stdout) println!("{}", json_output); }); thread_handles.push(handle); // Prepare for the next chunk chunk_start = chunk_end; // Optimization: If we've already processed the whole file, stop spawning threads if chunk_start >= total_len && i < NUM_THREADS -1 { // Print empty JSON for remaining threads for _ in (i+1)..NUM_THREADS { println!("{{}}"); } break; } } // 3. Wait for all threads to complete for handle in thread_handles { if let Err(e) = handle.join() { eprintln!("Error: A worker thread panicked: {:?}", e); // Depending on requirements, you might want to exit here or just report. // process::exit(1); } } } |
Below is some student's code. I asked them to parallelize some code. Do not judge if it is correct, I have test cases for that. Logic errors are okay. So are synatx errors. Just tell me if they tried to parallelize it. use std::collections::HashMap; use std::fs; use std::io::{self, ErrorKind}; use std::sync::Arc; use std::thread; use std::process; const FILE_PATH: &str = "/tmp/file.txt"; const NUM_THREADS: usize = 4; /// Formats a HashMap<String, usize> into a JSON string manually. /// Example: {"word1": 10, "word2": 5} fn format_json(map: &HashMap<String, usize>) -> String { if map.is_empty() { return "{}".to_string(); } let mut result = String::new(); result.push('{'); let mut first = true; for (key, value) in map.iter() { if !first { result.push_str(", "); } // Basic JSON string escaping for the key (only handles quotes for simplicity) let escaped_key = key.replace('\\', "\\\\").replace('"', "\\\""); result.push('"'); result.push_str(&escaped_key); result.push_str("\": "); result.push_str(&value.to_string()); first = false; } result.push('}'); result } /// Counts words in a given text slice. fn count_words(text_slice: &str) -> HashMap<String, usize> { let mut counts = HashMap::new(); for word in text_slice.split_whitespace() { // Don't add empty strings if there are multiple spaces if !word.is_empty() { *counts.entry(word.to_string()).or_insert(0) += 1; } } counts } fn main() { // 1. Read the file content let content = match fs::read_to_string(FILE_PATH) { Ok(text) => Arc::new(text), // Use Arc for safe sharing across threads Err(e) => { match e.kind() { ErrorKind::NotFound => eprintln!("Error: File not found at '{}'", FILE_PATH), ErrorKind::PermissionDenied => eprintln!("Error: Permission denied reading file '{}'", FILE_PATH), _ => eprintln!("Error reading file '{}': {}", FILE_PATH, e), } process::exit(1); } }; if content.is_empty() { // Handle empty file: print empty JSON for each thread for _ in 0..NUM_THREADS { println!("{{}}"); } return; } let mut thread_handles = vec![]; let total_len = content.len(); let mut chunk_start = 0; // 2. Divide work and spawn threads for i in 0..NUM_THREADS { let content_arc = Arc::clone(&content); // Calculate the approximate end of the chunk let mut chunk_end = (total_len * (i + 1)) / NUM_THREADS; // Adjust chunk_end to the next whitespace boundary (unless it's the last chunk) if i < NUM_THREADS - 1 && chunk_end < total_len { // Find the next whitespace character at or after the calculated chunk_end if let Some(boundary_pos) = content_arc[chunk_end..].find(|c: char| c.is_whitespace()) { chunk_end += boundary_pos; // Advance past the whitespace itself if possible if chunk_end + 1 < total_len { chunk_end += 1; } } else { // No whitespace found after chunk_end, give the rest to this thread chunk_end = total_len; } } else { // Ensure the last chunk goes exactly to the end chunk_end = total_len; } // Ensure start doesn't exceed end (can happen with very small files/many threads) let current_chunk_start = std::cmp::min(chunk_start, chunk_end); let current_chunk_end = chunk_end; // Spawn a thread to process this chunk let handle = thread::spawn(move || { // Get the slice for this thread // Check bounds carefully in case of empty slices let slice = if current_chunk_start < content_arc.len() && current_chunk_start < current_chunk_end { &content_arc[current_chunk_start..current_chunk_end] } else { "" // Empty slice if start is out of bounds or start >= end }; // Count words in the slice let word_counts = count_words(slice); // Format the result as JSON let json_output = format_json(&word_counts); // Print the JSON output for this thread // Use println! as it's implicitly synchronized (locks stdout) println!("{}", json_output); }); thread_handles.push(handle); // Prepare for the next chunk chunk_start = chunk_end; // Optimization: If we've already processed the whole file, stop spawning threads if chunk_start >= total_len && i < NUM_THREADS -1 { // Print empty JSON for remaining threads for _ in (i+1)..NUM_THREADS { println!("{{}}"); } break; } } // 3. Wait for all threads to complete for handle in thread_handles { if let Err(e) = handle.join() { eprintln!("Error: A worker thread panicked: {:?}", e); // Depending on requirements, you might want to exit here or just report. // process::exit(1); } } } Answer either 'The student passes' or 'The student fails'.
The student passes.
Testing if the previous output contains the string student passes
: True
Running the following program:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 | use std::collections::HashMap; use std::fs; use std::io::{self, ErrorKind}; use std::sync::Arc; use std::thread; use std::process; const FILE_PATH: &str = "/tmp/file.txt"; const NUM_THREADS: usize = 4; /// Formats a HashMap<String, usize> into a JSON string manually. /// Example: {"word1": 10, "word2": 5} fn format_json(map: &HashMap<String, usize>) -> String { if map.is_empty() { return "{}".to_string(); } let mut result = String::new(); result.push('{'); let mut first = true; for (key, value) in map.iter() { if !first { result.push_str(", "); } // Basic JSON string escaping for the key (only handles quotes for simplicity) let escaped_key = key.replace('\\', "\\\\").replace('"', "\\\""); result.push('"'); result.push_str(&escaped_key); result.push_str("\": "); result.push_str(&value.to_string()); first = false; } result.push('}'); result } /// Counts words in a given text slice. fn count_words(text_slice: &str) -> HashMap<String, usize> { let mut counts = HashMap::new(); for word in text_slice.split_whitespace() { // Don't add empty strings if there are multiple spaces if !word.is_empty() { *counts.entry(word.to_string()).or_insert(0) += 1; } } counts } fn main() { // 1. Read the file content let content = match fs::read_to_string(FILE_PATH) { Ok(text) => Arc::new(text), // Use Arc for safe sharing across threads Err(e) => { match e.kind() { ErrorKind::NotFound => eprintln!("Error: File not found at '{}'", FILE_PATH), ErrorKind::PermissionDenied => eprintln!("Error: Permission denied reading file '{}'", FILE_PATH), _ => eprintln!("Error reading file '{}': {}", FILE_PATH, e), } process::exit(1); } }; if content.is_empty() { // Handle empty file: print empty JSON for each thread for _ in 0..NUM_THREADS { println!("{{}}"); } return; } let mut thread_handles = vec![]; let total_len = content.len(); let mut chunk_start = 0; // 2. Divide work and spawn threads for i in 0..NUM_THREADS { let content_arc = Arc::clone(&content); // Calculate the approximate end of the chunk let mut chunk_end = (total_len * (i + 1)) / NUM_THREADS; // Adjust chunk_end to the next whitespace boundary (unless it's the last chunk) if i < NUM_THREADS - 1 && chunk_end < total_len { // Find the next whitespace character at or after the calculated chunk_end if let Some(boundary_pos) = content_arc[chunk_end..].find(|c: char| c.is_whitespace()) { chunk_end += boundary_pos; // Advance past the whitespace itself if possible if chunk_end + 1 < total_len { chunk_end += 1; } } else { // No whitespace found after chunk_end, give the rest to this thread chunk_end = total_len; } } else { // Ensure the last chunk goes exactly to the end chunk_end = total_len; } // Ensure start doesn't exceed end (can happen with very small files/many threads) let current_chunk_start = std::cmp::min(chunk_start, chunk_end); let current_chunk_end = chunk_end; // Spawn a thread to process this chunk let handle = thread::spawn(move || { // Get the slice for this thread // Check bounds carefully in case of empty slices let slice = if current_chunk_start < content_arc.len() && current_chunk_start < current_chunk_end { &content_arc[current_chunk_start..current_chunk_end] } else { "" // Empty slice if start is out of bounds or start >= end }; // Count words in the slice let word_counts = count_words(slice); // Format the result as JSON let json_output = format_json(&word_counts); // Print the JSON output for this thread // Use println! as it's implicitly synchronized (locks stdout) println!("{}", json_output); }); thread_handles.push(handle); // Prepare for the next chunk chunk_start = chunk_end; // Optimization: If we've already processed the whole file, stop spawning threads if chunk_start >= total_len && i < NUM_THREADS -1 { // Print empty JSON for remaining threads for _ in (i+1)..NUM_THREADS { println!("{{}}"); } break; } } // 3. Wait for all threads to complete for handle in thread_handles { if let Err(e) = handle.join() { eprintln!("Error: A worker thread panicked: {:?}", e); // Depending on requirements, you might want to exit here or just report. // process::exit(1); } } } |
{"the": 4, "age": 2, "foolishness,": 1, "was": 4, "it": 4, "of": 4, "wisdom,": 1, "worst": 1, "best": 1, "times,": 2}
{"incredulity,": 1, "the": 4, "it": 4, "season": 1, "Light,": 1, "was": 4, "of": 3, "belief,": 1, "epoch": 2}
{"had": 1, "way": 1, "all": 2, "direct": 2, "before": 1, "going": 2, "the": 1, "other": 1, "we": 3, "to": 1, "Heaven,": 1, "nothing": 1, "were": 2, "us,": 1}
{"of": 3, "us,": 1, "winter": 1, "was": 2, "the": 2, "it": 2, "spring": 1, "season": 1, "hope,": 1, "everything": 1, "despair,": 1, "we": 1, "before": 1, "had": 1, "Darkness,": 1}
warning: unused import: `self`
--> main.rs:4:15
|
4 | use std::io::{self, ErrorKind};
| ^^^^
|
= note: `#[warn(unused_imports)]` on by default
warning: 1 warning emitted
Resulting in output: [('Darkness,', 1), ('Heaven,', 1), ('Light,', 1), ('age', 2), ('all', 2), ('before', 2), ('belief,', 1), ('best', 1), ('despair,', 1), ('direct', 2), ('epoch', 2), ('everything', 1), ('foolishness,', 1), ('going', 2), ('had', 2), ('hope,', 1), ('incredulity,', 1), ('it', 10), ('nothing', 1), ('of', 10), ('other', 1), ('season', 2), ('spring', 1), ('the', 11), ('times,', 2), ('to', 1), ('us,', 2), ('was', 10), ('way', 1), ('we', 4), ('were', 2), ('winter', 1), ('wisdom,', 1), ('worst', 1)]
Testing if the previous output equals the string [('Darkness,', 1), ('Heaven,', 1), ('Light,', 1), ('age', 2), ('all', 2), ('before', 2), ('belief,', 1), ('best', 1), ('despair,', 1), ('direct', 2), ('epoch', 2), ('everything', 1), ('foolishness,', 1), ('going', 2), ('had', 2), ('hope,', 1), ('incredulity,', 1), ('it', 10), ('nothing', 1), ('of', 10), ('other', 1), ('season', 2), ('spring', 1), ('the', 11), ('times,', 2), ('to', 1), ('us,', 2), ('was', 10), ('way', 1), ('we', 4), ('were', 2), ('winter', 1), ('wisdom,', 1), ('worst', 1)]
: True