I have setup the docker container to run the model evaluation.
Write a self-contained Rust program that prints a JSON output of how often each word appears in a text file located in /tmp/file.txt. So if the file contains "This is is is, an example" then the program should print to stdout. {"This": 1, "is": 2, "is,": 1, "an": 1, "example": 1} Don't import any crates that would require a Cargo.toml file. I'll just compile it with `rustc yourfile.rs` and it should work. The program should be run by calling /path/to/rust_word_count with no arguments. Make it run in parallel on 4 cores at the same time, each job taking ~1/4 of the file. Print out 4 differnt json dicts, one per line. I'll do the merging. Do not import serde_json, write json output from scratch.
Write a self-contained Rust program that prints a JSON output of how often each word appears in a text file located in /tmp/file.txt. So if the file contains "This is is is, an example" then the program should print to stdout. {"This": 1, "is": 2, "is,": 1, "an": 1, "example": 1} Don't import any crates that would require a Cargo.toml file. I'll just compile it with `rustc yourfile.rs` and it should work. The program should be run by calling /path/to/rust_word_count with no arguments. Make it run in parallel on 4 cores at the same time, each job taking ~1/4 of the file. Print out 4 differnt json dicts, one per line. I'll do the merging. Do not import serde_json, write json output from scratch.
// This is a self-contained Rust program that reads the file "/tmp/file.txt", // splits its content into words (separated by whitespace), divides the word list // approximately into 4 equal slices, counts the frequency of words in each slice in its own thread, // and then prints 4 separate JSON dictionaries (one per line) to stdout. // Compile with: rustc yourfile.rs // Run with: /path/to/rust_word_count // // Note: This program does not use any external crates (no Cargo.toml required). use std::fs::File; use std::io::Read; use std::collections::HashMap; use std::thread; fn main() { // Read the entire file into a String. let mut file = match File::open("/tmp/file.txt") { Ok(f) => f, Err(e) => { eprintln!("Error opening file: {}", e); return; } }; let mut contents = String::new(); if let Err(e) = file.read_to_string(&mut contents) { eprintln!("Error reading file: {}", e); return; } // Split the file by whitespace into tokens. We preserve punctuation etc. // For example, "This is is is, an example" yields tokens: // ["This", "is", "is", "is,", "an", "example"] let tokens: Vec<&str> = contents.split_whitespace().collect(); let total_tokens = tokens.len(); // We want to split the token list into 4 chunks. let num_threads = 4; let mut handles = Vec::new(); // Compute base chunk size and remainder to distribute if tokens not divisible by 4. let chunk_size = total_tokens / num_threads; let remainder = total_tokens % num_threads; // start index for the first chunk let mut start = 0; for i in 0..num_threads { // Calculate end index: distribute the remainder // Each of the first `remainder` chunks gets one extra token. let extra = if i < remainder { 1 } else { 0 }; let end = start + chunk_size + extra; // Create the slice for this chunk (convert &str tokens to owned String for moving into thread) // Note: We clone each token string slice for the hashmap. let token_slice = tokens[start..end].iter().map(|s| s.to_string()).collect::<Vec<String>>(); // Spawn a thread to count frequencies in this token slice. let handle = thread::spawn(move || { let mut counts = HashMap::new(); for token in token_slice { *counts.entry(token).or_insert(0) += 1; } // Return the HashMap. counts }); handles.push(handle); start = end; } // Join the threads and print each thread's JSON output on a separate line. for handle in handles { // Retrieve the counts hashmap from the thread. let counts: HashMap<String, usize> = match handle.join() { Ok(map) => map, Err(_) => { eprintln!("A thread panicked"); continue; } }; // Build a JSON dictionary as a String. // Format: {"key": value, "key2": value2, ...} // We do not worry about key ordering. let mut json_parts = Vec::new(); for (key, value) in counts { // Each key is escaped using basic formatting (we assume the key does not include special characters). let part = format!("\"{}\": {}", key, value); json_parts.push(part); } let json_output = format!("{{{}}}", json_parts.join(", ")); // Print the JSON dictionary line. println!("{}", json_output); } }
I extracted the following code from that output:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 | // This is a self-contained Rust program that reads the file "/tmp/file.txt", // splits its content into words (separated by whitespace), divides the word list // approximately into 4 equal slices, counts the frequency of words in each slice in its own thread, // and then prints 4 separate JSON dictionaries (one per line) to stdout. // Compile with: rustc yourfile.rs // Run with: /path/to/rust_word_count // // Note: This program does not use any external crates (no Cargo.toml required). use std::fs::File; use std::io::Read; use std::collections::HashMap; use std::thread; fn main() { // Read the entire file into a String. let mut file = match File::open("/tmp/file.txt") { Ok(f) => f, Err(e) => { eprintln!("Error opening file: {}", e); return; } }; let mut contents = String::new(); if let Err(e) = file.read_to_string(&mut contents) { eprintln!("Error reading file: {}", e); return; } // Split the file by whitespace into tokens. We preserve punctuation etc. // For example, "This is is is, an example" yields tokens: // ["This", "is", "is", "is,", "an", "example"] let tokens: Vec<&str> = contents.split_whitespace().collect(); let total_tokens = tokens.len(); // We want to split the token list into 4 chunks. let num_threads = 4; let mut handles = Vec::new(); // Compute base chunk size and remainder to distribute if tokens not divisible by 4. let chunk_size = total_tokens / num_threads; let remainder = total_tokens % num_threads; // start index for the first chunk let mut start = 0; for i in 0..num_threads { // Calculate end index: distribute the remainder // Each of the first `remainder` chunks gets one extra token. let extra = if i < remainder { 1 } else { 0 }; let end = start + chunk_size + extra; // Create the slice for this chunk (convert &str tokens to owned String for moving into thread) // Note: We clone each token string slice for the hashmap. let token_slice = tokens[start..end].iter().map(|s| s.to_string()).collect::<Vec<String>>(); // Spawn a thread to count frequencies in this token slice. let handle = thread::spawn(move || { let mut counts = HashMap::new(); for token in token_slice { *counts.entry(token).or_insert(0) += 1; } // Return the HashMap. counts }); handles.push(handle); start = end; } // Join the threads and print each thread's JSON output on a separate line. for handle in handles { // Retrieve the counts hashmap from the thread. let counts: HashMap<String, usize> = match handle.join() { Ok(map) => map, Err(_) => { eprintln!("A thread panicked"); continue; } }; // Build a JSON dictionary as a String. // Format: {"key": value, "key2": value2, ...} // We do not worry about key ordering. let mut json_parts = Vec::new(); for (key, value) in counts { // Each key is escaped using basic formatting (we assume the key does not include special characters). let part = format!("\"{}\": {}", key, value); json_parts.push(part); } let json_output = format!("{{{}}}", json_parts.join(", ")); // Print the JSON dictionary line. println!("{}", json_output); } } |
Below is some student's code. I asked them to parallelize some code. Do not judge if it is correct, I have test cases for that. Logic errors are okay. So are synatx errors. Just tell me if they tried to parallelize it. // This is a self-contained Rust program that reads the file "/tmp/file.txt", // splits its content into words (separated by whitespace), divides the word list // approximately into 4 equal slices, counts the frequency of words in each slice in its own thread, // and then prints 4 separate JSON dictionaries (one per line) to stdout. // Compile with: rustc yourfile.rs // Run with: /path/to/rust_word_count // // Note: This program does not use any external crates (no Cargo.toml required). use std::fs::File; use std::io::Read; use std::collections::HashMap; use std::thread; fn main() { // Read the entire file into a String. let mut file = match File::open("/tmp/file.txt") { Ok(f) => f, Err(e) => { eprintln!("Error opening file: {}", e); return; } }; let mut contents = String::new(); if let Err(e) = file.read_to_string(&mut contents) { eprintln!("Error reading file: {}", e); return; } // Split the file by whitespace into tokens. We preserve punctuation etc. // For example, "This is is is, an example" yields tokens: // ["This", "is", "is", "is,", "an", "example"] let tokens: Vec<&str> = contents.split_whitespace().collect(); let total_tokens = tokens.len(); // We want to split the token list into 4 chunks. let num_threads = 4; let mut handles = Vec::new(); // Compute base chunk size and remainder to distribute if tokens not divisible by 4. let chunk_size = total_tokens / num_threads; let remainder = total_tokens % num_threads; // start index for the first chunk let mut start = 0; for i in 0..num_threads { // Calculate end index: distribute the remainder // Each of the first `remainder` chunks gets one extra token. let extra = if i < remainder { 1 } else { 0 }; let end = start + chunk_size + extra; // Create the slice for this chunk (convert &str tokens to owned String for moving into thread) // Note: We clone each token string slice for the hashmap. let token_slice = tokens[start..end].iter().map(|s| s.to_string()).collect::<Vec<String>>(); // Spawn a thread to count frequencies in this token slice. let handle = thread::spawn(move || { let mut counts = HashMap::new(); for token in token_slice { *counts.entry(token).or_insert(0) += 1; } // Return the HashMap. counts }); handles.push(handle); start = end; } // Join the threads and print each thread's JSON output on a separate line. for handle in handles { // Retrieve the counts hashmap from the thread. let counts: HashMap<String, usize> = match handle.join() { Ok(map) => map, Err(_) => { eprintln!("A thread panicked"); continue; } }; // Build a JSON dictionary as a String. // Format: {"key": value, "key2": value2, ...} // We do not worry about key ordering. let mut json_parts = Vec::new(); for (key, value) in counts { // Each key is escaped using basic formatting (we assume the key does not include special characters). let part = format!("\"{}\": {}", key, value); json_parts.push(part); } let json_output = format!("{{{}}}", json_parts.join(", ")); // Print the JSON dictionary line. println!("{}", json_output); } } Answer either 'The student passes' or 'The student fails'.
The student passes.
Testing if the previous output contains the string student passes
: True
Running the following program:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 | // This is a self-contained Rust program that reads the file "/tmp/file.txt", // splits its content into words (separated by whitespace), divides the word list // approximately into 4 equal slices, counts the frequency of words in each slice in its own thread, // and then prints 4 separate JSON dictionaries (one per line) to stdout. // Compile with: rustc yourfile.rs // Run with: /path/to/rust_word_count // // Note: This program does not use any external crates (no Cargo.toml required). use std::fs::File; use std::io::Read; use std::collections::HashMap; use std::thread; fn main() { // Read the entire file into a String. let mut file = match File::open("/tmp/file.txt") { Ok(f) => f, Err(e) => { eprintln!("Error opening file: {}", e); return; } }; let mut contents = String::new(); if let Err(e) = file.read_to_string(&mut contents) { eprintln!("Error reading file: {}", e); return; } // Split the file by whitespace into tokens. We preserve punctuation etc. // For example, "This is is is, an example" yields tokens: // ["This", "is", "is", "is,", "an", "example"] let tokens: Vec<&str> = contents.split_whitespace().collect(); let total_tokens = tokens.len(); // We want to split the token list into 4 chunks. let num_threads = 4; let mut handles = Vec::new(); // Compute base chunk size and remainder to distribute if tokens not divisible by 4. let chunk_size = total_tokens / num_threads; let remainder = total_tokens % num_threads; // start index for the first chunk let mut start = 0; for i in 0..num_threads { // Calculate end index: distribute the remainder // Each of the first `remainder` chunks gets one extra token. let extra = if i < remainder { 1 } else { 0 }; let end = start + chunk_size + extra; // Create the slice for this chunk (convert &str tokens to owned String for moving into thread) // Note: We clone each token string slice for the hashmap. let token_slice = tokens[start..end].iter().map(|s| s.to_string()).collect::<Vec<String>>(); // Spawn a thread to count frequencies in this token slice. let handle = thread::spawn(move || { let mut counts = HashMap::new(); for token in token_slice { *counts.entry(token).or_insert(0) += 1; } // Return the HashMap. counts }); handles.push(handle); start = end; } // Join the threads and print each thread's JSON output on a separate line. for handle in handles { // Retrieve the counts hashmap from the thread. let counts: HashMap<String, usize> = match handle.join() { Ok(map) => map, Err(_) => { eprintln!("A thread panicked"); continue; } }; // Build a JSON dictionary as a String. // Format: {"key": value, "key2": value2, ...} // We do not worry about key ordering. let mut json_parts = Vec::new(); for (key, value) in counts { // Each key is escaped using basic formatting (we assume the key does not include special characters). let part = format!("\"{}\": {}", key, value); json_parts.push(part); } let json_output = format!("{{{}}}", json_parts.join(", ")); // Print the JSON dictionary line. println!("{}", json_output); } } |
{"it": 4, "the": 4, "worst": 1, "wisdom,": 1, "times,": 2, "best": 1, "of": 3, "age": 2, "was": 4}
{"was": 3, "of": 4, "incredulity,": 1, "Light,": 1, "season": 1, "foolishness,": 1, "it": 4, "epoch": 2, "the": 3, "belief,": 1}
{"it": 2, "season": 1, "everything": 1, "hope,": 1, "of": 3, "spring": 1, "Darkness,": 1, "the": 3, "winter": 1, "had": 1, "before": 1, "despair,": 1, "we": 1, "was": 3}
{"had": 1, "nothing": 1, "direct": 2, "going": 2, "us,": 2, "to": 1, "other": 1, "Heaven,": 1, "were": 2, "the": 1, "way": 1, "we": 3, "before": 1, "all": 2}
Resulting in output: [('Darkness,', 1), ('Heaven,', 1), ('Light,', 1), ('age', 2), ('all', 2), ('before', 2), ('belief,', 1), ('best', 1), ('despair,', 1), ('direct', 2), ('epoch', 2), ('everything', 1), ('foolishness,', 1), ('going', 2), ('had', 2), ('hope,', 1), ('incredulity,', 1), ('it', 10), ('nothing', 1), ('of', 10), ('other', 1), ('season', 2), ('spring', 1), ('the', 11), ('times,', 2), ('to', 1), ('us,', 2), ('was', 10), ('way', 1), ('we', 4), ('were', 2), ('winter', 1), ('wisdom,', 1), ('worst', 1)]
Testing if the previous output equals the string [('Darkness,', 1), ('Heaven,', 1), ('Light,', 1), ('age', 2), ('all', 2), ('before', 2), ('belief,', 1), ('best', 1), ('despair,', 1), ('direct', 2), ('epoch', 2), ('everything', 1), ('foolishness,', 1), ('going', 2), ('had', 2), ('hope,', 1), ('incredulity,', 1), ('it', 10), ('nothing', 1), ('of', 10), ('other', 1), ('season', 2), ('spring', 1), ('the', 11), ('times,', 2), ('to', 1), ('us,', 2), ('was', 10), ('way', 1), ('we', 4), ('were', 2), ('winter', 1), ('wisdom,', 1), ('worst', 1)]
: True
I have setup the docker container to run the model evaluation.
Write a self-contained Rust program that prints a JSON output of how often each word appears in a text file located in /tmp/file.txt. So if the file contains "This is is is, an example" then the program should print to stdout. {"This": 1, "is": 2, "is,": 1, "an": 1, "example": 1} Don't import any crates that would require a Cargo.toml file. I'll just compile it with `rustc yourfile.rs` and it should work. The program should be run by calling /path/to/rust_word_count with no arguments. Make it run in parallel on 4 cores at the same time, each job taking ~1/4 of the file. Print out 4 differnt json dicts, one per line. I'll do the merging. Do not import serde_json, write json output from scratch.
Write a self-contained Rust program that prints a JSON output of how often each word appears in a text file located in /tmp/file.txt. So if the file contains "This is is is, an example" then the program should print to stdout. {"This": 1, "is": 2, "is,": 1, "an": 1, "example": 1} Don't import any crates that would require a Cargo.toml file. I'll just compile it with `rustc yourfile.rs` and it should work. The program should be run by calling /path/to/rust_word_count with no arguments. Make it run in parallel on 4 cores at the same time, each job taking ~1/4 of the file. Print out 4 differnt json dicts, one per line. I'll do the merging. Do not import serde_json, write json output from scratch.
Below is one complete Rust program (in a single file) that reads the file /tmp/file.txt, splits its content into four chunks (each “chunk” being roughly ¼ of the file, adjusted so that words aren’t cut in half), launches four threads (one per “core”) to count word frequencies in their region, and then prints four separate JSON dictionaries (one per line). (No external crates are used.) You can save the code to, say, rust_word_count.rs and compile it with rustc rust_word_count.rs Then run /path/to/rust_word_count with no arguments. ------------------------------------------------------------ #!/usr/bin/env rust use std::collections::HashMap; use std::fs; use std::sync::Arc; use std::thread; // Given a text and a requested number of chunks, compute chunk boundaries // that “snap” to token (whitespace) boundaries so that we don’t cut a word in half. fn chunk_boundaries(text: &str, num_chunks: usize) -> Vec<(usize, usize)> { let len = text.len(); let mut boundaries = Vec::with_capacity(num_chunks); if num_chunks == 0 { return boundaries; } // We work in bytes. (This works fine if the file is standard ASCII.) let bytes = text.as_bytes(); let avg = if num_chunks > 0 { len / num_chunks } else { len }; let mut current_start = 0; for _i in 0..(num_chunks - 1) { // Candidate end: ideally current_start plus avg. let mut candidate = current_start + avg; if candidate >= len { candidate = len; } else { // Move candidate forward until we hit whitespace, // thereby ensuring we do not break a word. while candidate < len && !bytes[candidate].is_ascii_whitespace() { candidate += 1; } } let end = candidate; boundaries.push((current_start, end)); // For the next chunk, skip any whitespace so that we start at a token. let mut new_start = candidate; while new_start < len && bytes[new_start].is_ascii_whitespace() { new_start += 1; } current_start = new_start; } // Last chunk goes from current_start to end of text. boundaries.push((current_start, len)); boundaries } // Given a HashMap, convert it to a JSON string in the format: // {"key1": count1, "key2": count2, ... } fn map_to_json(map: &HashMap<String, usize>) -> String { let mut s = String::from("{"); // We do not care about ordering. (And we write JSON “by hand”.) let mut first = true; for (key, value) in map { if !first { s.push_str(", "); } first = false; // We assume key does not contain quotes or control characters. s.push_str(&format!("\"{}\": {}", key, value)); } s.push('}'); s } fn main() { // Read the entire file into a String. let contents = fs::read_to_string("/tmp/file.txt") .unwrap_or_else(|err| panic!("Could not read /tmp/file.txt: {}", err)); // Wrap the file contents in an Arc so that threads can share it. let contents = Arc::new(contents); // Determine approximate boundaries for 4 chunks. let boundaries = chunk_boundaries(&contents, 4); if boundaries.len() != 4 { panic!("Unexpected boundaries length: expected 4, got {}", boundaries.len()); } // Spawn 4 threads, each processing a slice of the file. let mut handles = Vec::new(); for (start, end) in boundaries { let contents_clone = Arc::clone(&contents); // For each chunk, create a thread that counts each word. let handle = thread::spawn(move || { // Get the slice for this chunk. This might be empty. let part = &contents_clone[start..end]; let mut map = HashMap::new(); // Use split_whitespace() so that punctuation remains attached (e.g. "is," remains). for word in part.split_whitespace() { // Increase the count for this word. *map.entry(word.to_string()).or_insert(0) += 1; } // Return the JSON string for this chunk. map_to_json(&map) }); handles.push(handle); } // Collect the results and print one JSON dictionary per line. for handle in handles { match handle.join() { Ok(json) => println!("{}", json), Err(e) => eprintln!("Error joining thread: {:?}", e), } } } ------------------------------------------------------------ Explanation: 1. The function chunk_boundaries(text, num_chunks) computes a vector of (start, end) indices (byte offsets into the file) so that words are not split. 2. Each thread processes its chunk (using split_whitespace) to count words. 3. The map_to_json() function formats a HashMap into a JSON-style dictionary. 4. The main() function wraps the file’s content in an Arc (so threads can share the data safely), spawns 4 threads (one per chunk), and prints each thread’s output on its own line. Compile it with rustc and run; feel free to merge the JSON dictionaries afterward as needed. Enjoy!
I extracted the following code from that output:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 | #!/usr/bin/env rust use std::collections::HashMap; use std::fs; use std::sync::Arc; use std::thread; // Given a text and a requested number of chunks, compute chunk boundaries // that “snap” to token (whitespace) boundaries so that we don’t cut a word in half. fn chunk_boundaries(text: &str, num_chunks: usize) -> Vec<(usize, usize)> { let len = text.len(); let mut boundaries = Vec::with_capacity(num_chunks); if num_chunks == 0 { return boundaries; } // We work in bytes. (This works fine if the file is standard ASCII.) let bytes = text.as_bytes(); let avg = if num_chunks > 0 { len / num_chunks } else { len }; let mut current_start = 0; for _i in 0..(num_chunks - 1) { // Candidate end: ideally current_start plus avg. let mut candidate = current_start + avg; if candidate >= len { candidate = len; } else { // Move candidate forward until we hit whitespace, // thereby ensuring we do not break a word. while candidate < len && !bytes[candidate].is_ascii_whitespace() { candidate += 1; } } let end = candidate; boundaries.push((current_start, end)); // For the next chunk, skip any whitespace so that we start at a token. let mut new_start = candidate; while new_start < len && bytes[new_start].is_ascii_whitespace() { new_start += 1; } current_start = new_start; } // Last chunk goes from current_start to end of text. boundaries.push((current_start, len)); boundaries } // Given a HashMap, convert it to a JSON string in the format: // {"key1": count1, "key2": count2, ... } fn map_to_json(map: &HashMap<String, usize>) -> String { let mut s = String::from("{"); // We do not care about ordering. (And we write JSON “by hand”.) let mut first = true; for (key, value) in map { if !first { s.push_str(", "); } first = false; // We assume key does not contain quotes or control characters. s.push_str(&format!("\"{}\": {}", key, value)); } s.push('}'); s } fn main() { // Read the entire file into a String. let contents = fs::read_to_string("/tmp/file.txt") .unwrap_or_else(|err| panic!("Could not read /tmp/file.txt: {}", err)); // Wrap the file contents in an Arc so that threads can share it. let contents = Arc::new(contents); // Determine approximate boundaries for 4 chunks. let boundaries = chunk_boundaries(&contents, 4); if boundaries.len() != 4 { panic!("Unexpected boundaries length: expected 4, got {}", boundaries.len()); } // Spawn 4 threads, each processing a slice of the file. let mut handles = Vec::new(); for (start, end) in boundaries { let contents_clone = Arc::clone(&contents); // For each chunk, create a thread that counts each word. let handle = thread::spawn(move || { // Get the slice for this chunk. This might be empty. let part = &contents_clone[start..end]; let mut map = HashMap::new(); // Use split_whitespace() so that punctuation remains attached (e.g. "is," remains). for word in part.split_whitespace() { // Increase the count for this word. *map.entry(word.to_string()).or_insert(0) += 1; } // Return the JSON string for this chunk. map_to_json(&map) }); handles.push(handle); } // Collect the results and print one JSON dictionary per line. for handle in handles { match handle.join() { Ok(json) => println!("{}", json), Err(e) => eprintln!("Error joining thread: {:?}", e), } } } |
Below is some student's code. I asked them to parallelize some code. Do not judge if it is correct, I have test cases for that. Logic errors are okay. So are synatx errors. Just tell me if they tried to parallelize it. #!/usr/bin/env rust use std::collections::HashMap; use std::fs; use std::sync::Arc; use std::thread; // Given a text and a requested number of chunks, compute chunk boundaries // that “snap” to token (whitespace) boundaries so that we don’t cut a word in half. fn chunk_boundaries(text: &str, num_chunks: usize) -> Vec<(usize, usize)> { let len = text.len(); let mut boundaries = Vec::with_capacity(num_chunks); if num_chunks == 0 { return boundaries; } // We work in bytes. (This works fine if the file is standard ASCII.) let bytes = text.as_bytes(); let avg = if num_chunks > 0 { len / num_chunks } else { len }; let mut current_start = 0; for _i in 0..(num_chunks - 1) { // Candidate end: ideally current_start plus avg. let mut candidate = current_start + avg; if candidate >= len { candidate = len; } else { // Move candidate forward until we hit whitespace, // thereby ensuring we do not break a word. while candidate < len && !bytes[candidate].is_ascii_whitespace() { candidate += 1; } } let end = candidate; boundaries.push((current_start, end)); // For the next chunk, skip any whitespace so that we start at a token. let mut new_start = candidate; while new_start < len && bytes[new_start].is_ascii_whitespace() { new_start += 1; } current_start = new_start; } // Last chunk goes from current_start to end of text. boundaries.push((current_start, len)); boundaries } // Given a HashMap, convert it to a JSON string in the format: // {"key1": count1, "key2": count2, ... } fn map_to_json(map: &HashMap<String, usize>) -> String { let mut s = String::from("{"); // We do not care about ordering. (And we write JSON “by hand”.) let mut first = true; for (key, value) in map { if !first { s.push_str(", "); } first = false; // We assume key does not contain quotes or control characters. s.push_str(&format!("\"{}\": {}", key, value)); } s.push('}'); s } fn main() { // Read the entire file into a String. let contents = fs::read_to_string("/tmp/file.txt") .unwrap_or_else(|err| panic!("Could not read /tmp/file.txt: {}", err)); // Wrap the file contents in an Arc so that threads can share it. let contents = Arc::new(contents); // Determine approximate boundaries for 4 chunks. let boundaries = chunk_boundaries(&contents, 4); if boundaries.len() != 4 { panic!("Unexpected boundaries length: expected 4, got {}", boundaries.len()); } // Spawn 4 threads, each processing a slice of the file. let mut handles = Vec::new(); for (start, end) in boundaries { let contents_clone = Arc::clone(&contents); // For each chunk, create a thread that counts each word. let handle = thread::spawn(move || { // Get the slice for this chunk. This might be empty. let part = &contents_clone[start..end]; let mut map = HashMap::new(); // Use split_whitespace() so that punctuation remains attached (e.g. "is," remains). for word in part.split_whitespace() { // Increase the count for this word. *map.entry(word.to_string()).or_insert(0) += 1; } // Return the JSON string for this chunk. map_to_json(&map) }); handles.push(handle); } // Collect the results and print one JSON dictionary per line. for handle in handles { match handle.join() { Ok(json) => println!("{}", json), Err(e) => eprintln!("Error joining thread: {:?}", e), } } } Answer either 'The student passes' or 'The student fails'.
The student passes.
Testing if the previous output contains the string student passes
: True
Running the following program:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 | #!/usr/bin/env rust use std::collections::HashMap; use std::fs; use std::sync::Arc; use std::thread; // Given a text and a requested number of chunks, compute chunk boundaries // that “snap” to token (whitespace) boundaries so that we don’t cut a word in half. fn chunk_boundaries(text: &str, num_chunks: usize) -> Vec<(usize, usize)> { let len = text.len(); let mut boundaries = Vec::with_capacity(num_chunks); if num_chunks == 0 { return boundaries; } // We work in bytes. (This works fine if the file is standard ASCII.) let bytes = text.as_bytes(); let avg = if num_chunks > 0 { len / num_chunks } else { len }; let mut current_start = 0; for _i in 0..(num_chunks - 1) { // Candidate end: ideally current_start plus avg. let mut candidate = current_start + avg; if candidate >= len { candidate = len; } else { // Move candidate forward until we hit whitespace, // thereby ensuring we do not break a word. while candidate < len && !bytes[candidate].is_ascii_whitespace() { candidate += 1; } } let end = candidate; boundaries.push((current_start, end)); // For the next chunk, skip any whitespace so that we start at a token. let mut new_start = candidate; while new_start < len && bytes[new_start].is_ascii_whitespace() { new_start += 1; } current_start = new_start; } // Last chunk goes from current_start to end of text. boundaries.push((current_start, len)); boundaries } // Given a HashMap, convert it to a JSON string in the format: // {"key1": count1, "key2": count2, ... } fn map_to_json(map: &HashMap<String, usize>) -> String { let mut s = String::from("{"); // We do not care about ordering. (And we write JSON “by hand”.) let mut first = true; for (key, value) in map { if !first { s.push_str(", "); } first = false; // We assume key does not contain quotes or control characters. s.push_str(&format!("\"{}\": {}", key, value)); } s.push('}'); s } fn main() { // Read the entire file into a String. let contents = fs::read_to_string("/tmp/file.txt") .unwrap_or_else(|err| panic!("Could not read /tmp/file.txt: {}", err)); // Wrap the file contents in an Arc so that threads can share it. let contents = Arc::new(contents); // Determine approximate boundaries for 4 chunks. let boundaries = chunk_boundaries(&contents, 4); if boundaries.len() != 4 { panic!("Unexpected boundaries length: expected 4, got {}", boundaries.len()); } // Spawn 4 threads, each processing a slice of the file. let mut handles = Vec::new(); for (start, end) in boundaries { let contents_clone = Arc::clone(&contents); // For each chunk, create a thread that counts each word. let handle = thread::spawn(move || { // Get the slice for this chunk. This might be empty. let part = &contents_clone[start..end]; let mut map = HashMap::new(); // Use split_whitespace() so that punctuation remains attached (e.g. "is," remains). for word in part.split_whitespace() { // Increase the count for this word. *map.entry(word.to_string()).or_insert(0) += 1; } // Return the JSON string for this chunk. map_to_json(&map) }); handles.push(handle); } // Collect the results and print one JSON dictionary per line. for handle in handles { match handle.join() { Ok(json) => println!("{}", json), Err(e) => eprintln!("Error joining thread: {:?}", e), } } } |
{"age": 2, "times,": 2, "the": 4, "was": 4, "worst": 1, "wisdom,": 1, "foolishness,": 1, "best": 1, "of": 4, "it": 4}
{"belief,": 1, "epoch": 2, "it": 4, "incredulity,": 1, "of": 3, "season": 2, "was": 4, "the": 4, "Light,": 1}
{"spring": 1, "of": 3, "hope,": 1, "had": 2, "was": 2, "winter": 1, "despair,": 1, "everything": 1, "before": 1, "Darkness,": 1, "we": 2, "us,": 1, "it": 2, "the": 2}
{"before": 1, "nothing": 1, "us,": 1, "going": 2, "direct": 2, "we": 2, "to": 1, "Heaven,": 1, "way": 1, "other": 1, "the": 1, "all": 2, "were": 2}
Resulting in output: [('Darkness,', 1), ('Heaven,', 1), ('Light,', 1), ('age', 2), ('all', 2), ('before', 2), ('belief,', 1), ('best', 1), ('despair,', 1), ('direct', 2), ('epoch', 2), ('everything', 1), ('foolishness,', 1), ('going', 2), ('had', 2), ('hope,', 1), ('incredulity,', 1), ('it', 10), ('nothing', 1), ('of', 10), ('other', 1), ('season', 2), ('spring', 1), ('the', 11), ('times,', 2), ('to', 1), ('us,', 2), ('was', 10), ('way', 1), ('we', 4), ('were', 2), ('winter', 1), ('wisdom,', 1), ('worst', 1)]
Testing if the previous output equals the string [('Darkness,', 1), ('Heaven,', 1), ('Light,', 1), ('age', 2), ('all', 2), ('before', 2), ('belief,', 1), ('best', 1), ('despair,', 1), ('direct', 2), ('epoch', 2), ('everything', 1), ('foolishness,', 1), ('going', 2), ('had', 2), ('hope,', 1), ('incredulity,', 1), ('it', 10), ('nothing', 1), ('of', 10), ('other', 1), ('season', 2), ('spring', 1), ('the', 11), ('times,', 2), ('to', 1), ('us,', 2), ('was', 10), ('way', 1), ('we', 4), ('were', 2), ('winter', 1), ('wisdom,', 1), ('worst', 1)]
: True