I have setup the docker container to run the model evaluation.
Write a self-contained Rust program that prints a JSON output of how often each word appears in a text file located in /tmp/file.txt. So if the file contains "This is is is, an example" then the program should print to stdout. {"This": 1, "is": 2, "is,": 1, "an": 1, "example": 1} Don't import any crates that would require a Cargo.toml file. I'll just compile it with `rustc yourfile.rs` and it should work. The program should be run by calling /path/to/rust_word_count with no arguments. Make it run in parallel on 4 cores at the same time, each job taking ~1/4 of the file. Print out 4 differnt json dicts, one per line. I'll do the merging. Do not import serde_json, write json output from scratch.
Write a self-contained Rust program that prints a JSON output of how often each word appears in a text file located in /tmp/file.txt. So if the file contains "This is is is, an example" then the program should print to stdout. {"This": 1, "is": 2, "is,": 1, "an": 1, "example": 1} Don't import any crates that would require a Cargo.toml file. I'll just compile it with `rustc yourfile.rs` and it should work. The program should be run by calling /path/to/rust_word_count with no arguments. Make it run in parallel on 4 cores at the same time, each job taking ~1/4 of the file. Print out 4 differnt json dicts, one per line. I'll do the merging. Do not import serde_json, write json output from scratch.
// This is a self-contained Rust program that reads the file "/tmp/file.txt", // splits its content into words (separated by whitespace), divides the word list // approximately into 4 equal slices, counts the frequency of words in each slice in its own thread, // and then prints 4 separate JSON dictionaries (one per line) to stdout. // Compile with: rustc yourfile.rs // Run with: /path/to/rust_word_count // // Note: This program does not use any external crates (no Cargo.toml required). use std::fs::File; use std::io::Read; use std::collections::HashMap; use std::thread; fn main() { // Read the entire file into a String. let mut file = match File::open("/tmp/file.txt") { Ok(f) => f, Err(e) => { eprintln!("Error opening file: {}", e); return; } }; let mut contents = String::new(); if let Err(e) = file.read_to_string(&mut contents) { eprintln!("Error reading file: {}", e); return; } // Split the file by whitespace into tokens. We preserve punctuation etc. // For example, "This is is is, an example" yields tokens: // ["This", "is", "is", "is,", "an", "example"] let tokens: Vec<&str> = contents.split_whitespace().collect(); let total_tokens = tokens.len(); // We want to split the token list into 4 chunks. let num_threads = 4; let mut handles = Vec::new(); // Compute base chunk size and remainder to distribute if tokens not divisible by 4. let chunk_size = total_tokens / num_threads; let remainder = total_tokens % num_threads; // start index for the first chunk let mut start = 0; for i in 0..num_threads { // Calculate end index: distribute the remainder // Each of the first `remainder` chunks gets one extra token. let extra = if i < remainder { 1 } else { 0 }; let end = start + chunk_size + extra; // Create the slice for this chunk (convert &str tokens to owned String for moving into thread) // Note: We clone each token string slice for the hashmap. let token_slice = tokens[start..end].iter().map(|s| s.to_string()).collect::<Vec<String>>(); // Spawn a thread to count frequencies in this token slice. let handle = thread::spawn(move || { let mut counts = HashMap::new(); for token in token_slice { *counts.entry(token).or_insert(0) += 1; } // Return the HashMap. counts }); handles.push(handle); start = end; } // Join the threads and print each thread's JSON output on a separate line. for handle in handles { // Retrieve the counts hashmap from the thread. let counts: HashMap<String, usize> = match handle.join() { Ok(map) => map, Err(_) => { eprintln!("A thread panicked"); continue; } }; // Build a JSON dictionary as a String. // Format: {"key": value, "key2": value2, ...} // We do not worry about key ordering. let mut json_parts = Vec::new(); for (key, value) in counts { // Each key is escaped using basic formatting (we assume the key does not include special characters). let part = format!("\"{}\": {}", key, value); json_parts.push(part); } let json_output = format!("{{{}}}", json_parts.join(", ")); // Print the JSON dictionary line. println!("{}", json_output); } }
I extracted the following code from that output:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 | // This is a self-contained Rust program that reads the file "/tmp/file.txt", // splits its content into words (separated by whitespace), divides the word list // approximately into 4 equal slices, counts the frequency of words in each slice in its own thread, // and then prints 4 separate JSON dictionaries (one per line) to stdout. // Compile with: rustc yourfile.rs // Run with: /path/to/rust_word_count // // Note: This program does not use any external crates (no Cargo.toml required). use std::fs::File; use std::io::Read; use std::collections::HashMap; use std::thread; fn main() { // Read the entire file into a String. let mut file = match File::open("/tmp/file.txt") { Ok(f) => f, Err(e) => { eprintln!("Error opening file: {}", e); return; } }; let mut contents = String::new(); if let Err(e) = file.read_to_string(&mut contents) { eprintln!("Error reading file: {}", e); return; } // Split the file by whitespace into tokens. We preserve punctuation etc. // For example, "This is is is, an example" yields tokens: // ["This", "is", "is", "is,", "an", "example"] let tokens: Vec<&str> = contents.split_whitespace().collect(); let total_tokens = tokens.len(); // We want to split the token list into 4 chunks. let num_threads = 4; let mut handles = Vec::new(); // Compute base chunk size and remainder to distribute if tokens not divisible by 4. let chunk_size = total_tokens / num_threads; let remainder = total_tokens % num_threads; // start index for the first chunk let mut start = 0; for i in 0..num_threads { // Calculate end index: distribute the remainder // Each of the first `remainder` chunks gets one extra token. let extra = if i < remainder { 1 } else { 0 }; let end = start + chunk_size + extra; // Create the slice for this chunk (convert &str tokens to owned String for moving into thread) // Note: We clone each token string slice for the hashmap. let token_slice = tokens[start..end].iter().map(|s| s.to_string()).collect::<Vec<String>>(); // Spawn a thread to count frequencies in this token slice. let handle = thread::spawn(move || { let mut counts = HashMap::new(); for token in token_slice { *counts.entry(token).or_insert(0) += 1; } // Return the HashMap. counts }); handles.push(handle); start = end; } // Join the threads and print each thread's JSON output on a separate line. for handle in handles { // Retrieve the counts hashmap from the thread. let counts: HashMap<String, usize> = match handle.join() { Ok(map) => map, Err(_) => { eprintln!("A thread panicked"); continue; } }; // Build a JSON dictionary as a String. // Format: {"key": value, "key2": value2, ...} // We do not worry about key ordering. let mut json_parts = Vec::new(); for (key, value) in counts { // Each key is escaped using basic formatting (we assume the key does not include special characters). let part = format!("\"{}\": {}", key, value); json_parts.push(part); } let json_output = format!("{{{}}}", json_parts.join(", ")); // Print the JSON dictionary line. println!("{}", json_output); } } |
Below is some student's code. I asked them to parallelize some code. Do not judge if it is correct, I have test cases for that. Logic errors are okay. So are synatx errors. Just tell me if they tried to parallelize it. // This is a self-contained Rust program that reads the file "/tmp/file.txt", // splits its content into words (separated by whitespace), divides the word list // approximately into 4 equal slices, counts the frequency of words in each slice in its own thread, // and then prints 4 separate JSON dictionaries (one per line) to stdout. // Compile with: rustc yourfile.rs // Run with: /path/to/rust_word_count // // Note: This program does not use any external crates (no Cargo.toml required). use std::fs::File; use std::io::Read; use std::collections::HashMap; use std::thread; fn main() { // Read the entire file into a String. let mut file = match File::open("/tmp/file.txt") { Ok(f) => f, Err(e) => { eprintln!("Error opening file: {}", e); return; } }; let mut contents = String::new(); if let Err(e) = file.read_to_string(&mut contents) { eprintln!("Error reading file: {}", e); return; } // Split the file by whitespace into tokens. We preserve punctuation etc. // For example, "This is is is, an example" yields tokens: // ["This", "is", "is", "is,", "an", "example"] let tokens: Vec<&str> = contents.split_whitespace().collect(); let total_tokens = tokens.len(); // We want to split the token list into 4 chunks. let num_threads = 4; let mut handles = Vec::new(); // Compute base chunk size and remainder to distribute if tokens not divisible by 4. let chunk_size = total_tokens / num_threads; let remainder = total_tokens % num_threads; // start index for the first chunk let mut start = 0; for i in 0..num_threads { // Calculate end index: distribute the remainder // Each of the first `remainder` chunks gets one extra token. let extra = if i < remainder { 1 } else { 0 }; let end = start + chunk_size + extra; // Create the slice for this chunk (convert &str tokens to owned String for moving into thread) // Note: We clone each token string slice for the hashmap. let token_slice = tokens[start..end].iter().map(|s| s.to_string()).collect::<Vec<String>>(); // Spawn a thread to count frequencies in this token slice. let handle = thread::spawn(move || { let mut counts = HashMap::new(); for token in token_slice { *counts.entry(token).or_insert(0) += 1; } // Return the HashMap. counts }); handles.push(handle); start = end; } // Join the threads and print each thread's JSON output on a separate line. for handle in handles { // Retrieve the counts hashmap from the thread. let counts: HashMap<String, usize> = match handle.join() { Ok(map) => map, Err(_) => { eprintln!("A thread panicked"); continue; } }; // Build a JSON dictionary as a String. // Format: {"key": value, "key2": value2, ...} // We do not worry about key ordering. let mut json_parts = Vec::new(); for (key, value) in counts { // Each key is escaped using basic formatting (we assume the key does not include special characters). let part = format!("\"{}\": {}", key, value); json_parts.push(part); } let json_output = format!("{{{}}}", json_parts.join(", ")); // Print the JSON dictionary line. println!("{}", json_output); } } Answer either 'The student passes' or 'The student fails'.
The student passes.
Testing if the previous output contains the string student passes
: True
Running the following program:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 | // This is a self-contained Rust program that reads the file "/tmp/file.txt", // splits its content into words (separated by whitespace), divides the word list // approximately into 4 equal slices, counts the frequency of words in each slice in its own thread, // and then prints 4 separate JSON dictionaries (one per line) to stdout. // Compile with: rustc yourfile.rs // Run with: /path/to/rust_word_count // // Note: This program does not use any external crates (no Cargo.toml required). use std::fs::File; use std::io::Read; use std::collections::HashMap; use std::thread; fn main() { // Read the entire file into a String. let mut file = match File::open("/tmp/file.txt") { Ok(f) => f, Err(e) => { eprintln!("Error opening file: {}", e); return; } }; let mut contents = String::new(); if let Err(e) = file.read_to_string(&mut contents) { eprintln!("Error reading file: {}", e); return; } // Split the file by whitespace into tokens. We preserve punctuation etc. // For example, "This is is is, an example" yields tokens: // ["This", "is", "is", "is,", "an", "example"] let tokens: Vec<&str> = contents.split_whitespace().collect(); let total_tokens = tokens.len(); // We want to split the token list into 4 chunks. let num_threads = 4; let mut handles = Vec::new(); // Compute base chunk size and remainder to distribute if tokens not divisible by 4. let chunk_size = total_tokens / num_threads; let remainder = total_tokens % num_threads; // start index for the first chunk let mut start = 0; for i in 0..num_threads { // Calculate end index: distribute the remainder // Each of the first `remainder` chunks gets one extra token. let extra = if i < remainder { 1 } else { 0 }; let end = start + chunk_size + extra; // Create the slice for this chunk (convert &str tokens to owned String for moving into thread) // Note: We clone each token string slice for the hashmap. let token_slice = tokens[start..end].iter().map(|s| s.to_string()).collect::<Vec<String>>(); // Spawn a thread to count frequencies in this token slice. let handle = thread::spawn(move || { let mut counts = HashMap::new(); for token in token_slice { *counts.entry(token).or_insert(0) += 1; } // Return the HashMap. counts }); handles.push(handle); start = end; } // Join the threads and print each thread's JSON output on a separate line. for handle in handles { // Retrieve the counts hashmap from the thread. let counts: HashMap<String, usize> = match handle.join() { Ok(map) => map, Err(_) => { eprintln!("A thread panicked"); continue; } }; // Build a JSON dictionary as a String. // Format: {"key": value, "key2": value2, ...} // We do not worry about key ordering. let mut json_parts = Vec::new(); for (key, value) in counts { // Each key is escaped using basic formatting (we assume the key does not include special characters). let part = format!("\"{}\": {}", key, value); json_parts.push(part); } let json_output = format!("{{{}}}", json_parts.join(", ")); // Print the JSON dictionary line. println!("{}", json_output); } } |
{"it": 4, "the": 4, "worst": 1, "wisdom,": 1, "times,": 2, "best": 1, "of": 3, "age": 2, "was": 4}
{"was": 3, "of": 4, "incredulity,": 1, "Light,": 1, "season": 1, "foolishness,": 1, "it": 4, "epoch": 2, "the": 3, "belief,": 1}
{"it": 2, "season": 1, "everything": 1, "hope,": 1, "of": 3, "spring": 1, "Darkness,": 1, "the": 3, "winter": 1, "had": 1, "before": 1, "despair,": 1, "we": 1, "was": 3}
{"had": 1, "nothing": 1, "direct": 2, "going": 2, "us,": 2, "to": 1, "other": 1, "Heaven,": 1, "were": 2, "the": 1, "way": 1, "we": 3, "before": 1, "all": 2}
Resulting in output: [('Darkness,', 1), ('Heaven,', 1), ('Light,', 1), ('age', 2), ('all', 2), ('before', 2), ('belief,', 1), ('best', 1), ('despair,', 1), ('direct', 2), ('epoch', 2), ('everything', 1), ('foolishness,', 1), ('going', 2), ('had', 2), ('hope,', 1), ('incredulity,', 1), ('it', 10), ('nothing', 1), ('of', 10), ('other', 1), ('season', 2), ('spring', 1), ('the', 11), ('times,', 2), ('to', 1), ('us,', 2), ('was', 10), ('way', 1), ('we', 4), ('were', 2), ('winter', 1), ('wisdom,', 1), ('worst', 1)]
Testing if the previous output equals the string [('Darkness,', 1), ('Heaven,', 1), ('Light,', 1), ('age', 2), ('all', 2), ('before', 2), ('belief,', 1), ('best', 1), ('despair,', 1), ('direct', 2), ('epoch', 2), ('everything', 1), ('foolishness,', 1), ('going', 2), ('had', 2), ('hope,', 1), ('incredulity,', 1), ('it', 10), ('nothing', 1), ('of', 10), ('other', 1), ('season', 2), ('spring', 1), ('the', 11), ('times,', 2), ('to', 1), ('us,', 2), ('was', 10), ('way', 1), ('we', 4), ('were', 2), ('winter', 1), ('wisdom,', 1), ('worst', 1)]
: True
I have setup the docker container to run the model evaluation.
Write a self-contained Rust program that prints a JSON output of how often each word appears in a text file located in /tmp/file.txt. So if the file contains "This is is is, an example" then the program should print to stdout. {"This": 1, "is": 2, "is,": 1, "an": 1, "example": 1} Don't import any crates that would require a Cargo.toml file. I'll just compile it with `rustc yourfile.rs` and it should work. The program should be run by calling /path/to/rust_word_count with no arguments. Make it run in parallel on 4 cores at the same time, each job taking ~1/4 of the file. Print out 4 differnt json dicts, one per line. I'll do the merging. Do not import serde_json, write json output from scratch.
Write a self-contained Rust program that prints a JSON output of how often each word appears in a text file located in /tmp/file.txt. So if the file contains "This is is is, an example" then the program should print to stdout. {"This": 1, "is": 2, "is,": 1, "an": 1, "example": 1} Don't import any crates that would require a Cargo.toml file. I'll just compile it with `rustc yourfile.rs` and it should work. The program should be run by calling /path/to/rust_word_count with no arguments. Make it run in parallel on 4 cores at the same time, each job taking ~1/4 of the file. Print out 4 differnt json dicts, one per line. I'll do the merging. Do not import serde_json, write json output from scratch.
Below is one complete Rust program (in a single file) that reads the file /tmp/file.txt, splits its content into four chunks (each “chunk†being roughly ¼ of the file, adjusted so that words aren’t cut in half), launches four threads (one per “coreâ€) to count word frequencies in their region, and then prints four separate JSON dictionaries (one per line). (No external crates are used.) You can save the code to, say, rust_word_count.rs and compile it with   rustc rust_word_count.rs Then run /path/to/rust_word_count with no arguments. ------------------------------------------------------------ #!/usr/bin/env rust use std::collections::HashMap; use std::fs; use std::sync::Arc; use std::thread; // Given a text and a requested number of chunks, compute chunk boundaries // that “snap†to token (whitespace) boundaries so that we don’t cut a word in half. fn chunk_boundaries(text: &str, num_chunks: usize) -> Vec<(usize, usize)> { let len = text.len(); let mut boundaries = Vec::with_capacity(num_chunks); if num_chunks == 0 { return boundaries; } // We work in bytes. (This works fine if the file is standard ASCII.) let bytes = text.as_bytes(); let avg = if num_chunks > 0 { len / num_chunks } else { len }; let mut current_start = 0; for _i in 0..(num_chunks - 1) { // Candidate end: ideally current_start plus avg. let mut candidate = current_start + avg; if candidate >= len { candidate = len; } else { // Move candidate forward until we hit whitespace, // thereby ensuring we do not break a word. while candidate < len && !bytes[candidate].is_ascii_whitespace() { candidate += 1; } } let end = candidate; boundaries.push((current_start, end)); // For the next chunk, skip any whitespace so that we start at a token. let mut new_start = candidate; while new_start < len && bytes[new_start].is_ascii_whitespace() { new_start += 1; } current_start = new_start; } // Last chunk goes from current_start to end of text. boundaries.push((current_start, len)); boundaries } // Given a HashMap, convert it to a JSON string in the format: // {"key1": count1, "key2": count2, ... } fn map_to_json(map: &HashMap<String, usize>) -> String { let mut s = String::from("{"); // We do not care about ordering. (And we write JSON “by handâ€.) let mut first = true; for (key, value) in map { if !first { s.push_str(", "); } first = false; // We assume key does not contain quotes or control characters. s.push_str(&format!("\"{}\": {}", key, value)); } s.push('}'); s } fn main() { // Read the entire file into a String. let contents = fs::read_to_string("/tmp/file.txt") .unwrap_or_else(|err| panic!("Could not read /tmp/file.txt: {}", err)); // Wrap the file contents in an Arc so that threads can share it. let contents = Arc::new(contents); // Determine approximate boundaries for 4 chunks. let boundaries = chunk_boundaries(&contents, 4); if boundaries.len() != 4 { panic!("Unexpected boundaries length: expected 4, got {}", boundaries.len()); } // Spawn 4 threads, each processing a slice of the file. let mut handles = Vec::new(); for (start, end) in boundaries { let contents_clone = Arc::clone(&contents); // For each chunk, create a thread that counts each word. let handle = thread::spawn(move || { // Get the slice for this chunk. This might be empty. let part = &contents_clone[start..end]; let mut map = HashMap::new(); // Use split_whitespace() so that punctuation remains attached (e.g. "is," remains). for word in part.split_whitespace() { // Increase the count for this word. *map.entry(word.to_string()).or_insert(0) += 1; } // Return the JSON string for this chunk. map_to_json(&map) }); handles.push(handle); } // Collect the results and print one JSON dictionary per line. for handle in handles { match handle.join() { Ok(json) => println!("{}", json), Err(e) => eprintln!("Error joining thread: {:?}", e), } } } ------------------------------------------------------------ Explanation: 1. The function chunk_boundaries(text, num_chunks) computes a vector of (start, end) indices (byte offsets into the file) so that words are not split. 2. Each thread processes its chunk (using split_whitespace) to count words. 3. The map_to_json() function formats a HashMap into a JSON-style dictionary. 4. The main() function wraps the file’s content in an Arc (so threads can share the data safely), spawns 4 threads (one per chunk), and prints each thread’s output on its own line. Compile it with rustc and run; feel free to merge the JSON dictionaries afterward as needed. Enjoy!
I extracted the following code from that output:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 | #!/usr/bin/env rust use std::collections::HashMap; use std::fs; use std::sync::Arc; use std::thread; // Given a text and a requested number of chunks, compute chunk boundaries // that “snap†to token (whitespace) boundaries so that we don’t cut a word in half. fn chunk_boundaries(text: &str, num_chunks: usize) -> Vec<(usize, usize)> { let len = text.len(); let mut boundaries = Vec::with_capacity(num_chunks); if num_chunks == 0 { return boundaries; } // We work in bytes. (This works fine if the file is standard ASCII.) let bytes = text.as_bytes(); let avg = if num_chunks > 0 { len / num_chunks } else { len }; let mut current_start = 0; for _i in 0..(num_chunks - 1) { // Candidate end: ideally current_start plus avg. let mut candidate = current_start + avg; if candidate >= len { candidate = len; } else { // Move candidate forward until we hit whitespace, // thereby ensuring we do not break a word. while candidate < len && !bytes[candidate].is_ascii_whitespace() { candidate += 1; } } let end = candidate; boundaries.push((current_start, end)); // For the next chunk, skip any whitespace so that we start at a token. let mut new_start = candidate; while new_start < len && bytes[new_start].is_ascii_whitespace() { new_start += 1; } current_start = new_start; } // Last chunk goes from current_start to end of text. boundaries.push((current_start, len)); boundaries } // Given a HashMap, convert it to a JSON string in the format: // {"key1": count1, "key2": count2, ... } fn map_to_json(map: &HashMap<String, usize>) -> String { let mut s = String::from("{"); // We do not care about ordering. (And we write JSON “by handâ€.) let mut first = true; for (key, value) in map { if !first { s.push_str(", "); } first = false; // We assume key does not contain quotes or control characters. s.push_str(&format!("\"{}\": {}", key, value)); } s.push('}'); s } fn main() { // Read the entire file into a String. let contents = fs::read_to_string("/tmp/file.txt") .unwrap_or_else(|err| panic!("Could not read /tmp/file.txt: {}", err)); // Wrap the file contents in an Arc so that threads can share it. let contents = Arc::new(contents); // Determine approximate boundaries for 4 chunks. let boundaries = chunk_boundaries(&contents, 4); if boundaries.len() != 4 { panic!("Unexpected boundaries length: expected 4, got {}", boundaries.len()); } // Spawn 4 threads, each processing a slice of the file. let mut handles = Vec::new(); for (start, end) in boundaries { let contents_clone = Arc::clone(&contents); // For each chunk, create a thread that counts each word. let handle = thread::spawn(move || { // Get the slice for this chunk. This might be empty. let part = &contents_clone[start..end]; let mut map = HashMap::new(); // Use split_whitespace() so that punctuation remains attached (e.g. "is," remains). for word in part.split_whitespace() { // Increase the count for this word. *map.entry(word.to_string()).or_insert(0) += 1; } // Return the JSON string for this chunk. map_to_json(&map) }); handles.push(handle); } // Collect the results and print one JSON dictionary per line. for handle in handles { match handle.join() { Ok(json) => println!("{}", json), Err(e) => eprintln!("Error joining thread: {:?}", e), } } } |
Below is some student's code. I asked them to parallelize some code. Do not judge if it is correct, I have test cases for that. Logic errors are okay. So are synatx errors. Just tell me if they tried to parallelize it. #!/usr/bin/env rust use std::collections::HashMap; use std::fs; use std::sync::Arc; use std::thread; // Given a text and a requested number of chunks, compute chunk boundaries // that “snap†to token (whitespace) boundaries so that we don’t cut a word in half. fn chunk_boundaries(text: &str, num_chunks: usize) -> Vec<(usize, usize)> { let len = text.len(); let mut boundaries = Vec::with_capacity(num_chunks); if num_chunks == 0 { return boundaries; } // We work in bytes. (This works fine if the file is standard ASCII.) let bytes = text.as_bytes(); let avg = if num_chunks > 0 { len / num_chunks } else { len }; let mut current_start = 0; for _i in 0..(num_chunks - 1) { // Candidate end: ideally current_start plus avg. let mut candidate = current_start + avg; if candidate >= len { candidate = len; } else { // Move candidate forward until we hit whitespace, // thereby ensuring we do not break a word. while candidate < len && !bytes[candidate].is_ascii_whitespace() { candidate += 1; } } let end = candidate; boundaries.push((current_start, end)); // For the next chunk, skip any whitespace so that we start at a token. let mut new_start = candidate; while new_start < len && bytes[new_start].is_ascii_whitespace() { new_start += 1; } current_start = new_start; } // Last chunk goes from current_start to end of text. boundaries.push((current_start, len)); boundaries } // Given a HashMap, convert it to a JSON string in the format: // {"key1": count1, "key2": count2, ... } fn map_to_json(map: &HashMap<String, usize>) -> String { let mut s = String::from("{"); // We do not care about ordering. (And we write JSON “by handâ€.) let mut first = true; for (key, value) in map { if !first { s.push_str(", "); } first = false; // We assume key does not contain quotes or control characters. s.push_str(&format!("\"{}\": {}", key, value)); } s.push('}'); s } fn main() { // Read the entire file into a String. let contents = fs::read_to_string("/tmp/file.txt") .unwrap_or_else(|err| panic!("Could not read /tmp/file.txt: {}", err)); // Wrap the file contents in an Arc so that threads can share it. let contents = Arc::new(contents); // Determine approximate boundaries for 4 chunks. let boundaries = chunk_boundaries(&contents, 4); if boundaries.len() != 4 { panic!("Unexpected boundaries length: expected 4, got {}", boundaries.len()); } // Spawn 4 threads, each processing a slice of the file. let mut handles = Vec::new(); for (start, end) in boundaries { let contents_clone = Arc::clone(&contents); // For each chunk, create a thread that counts each word. let handle = thread::spawn(move || { // Get the slice for this chunk. This might be empty. let part = &contents_clone[start..end]; let mut map = HashMap::new(); // Use split_whitespace() so that punctuation remains attached (e.g. "is," remains). for word in part.split_whitespace() { // Increase the count for this word. *map.entry(word.to_string()).or_insert(0) += 1; } // Return the JSON string for this chunk. map_to_json(&map) }); handles.push(handle); } // Collect the results and print one JSON dictionary per line. for handle in handles { match handle.join() { Ok(json) => println!("{}", json), Err(e) => eprintln!("Error joining thread: {:?}", e), } } } Answer either 'The student passes' or 'The student fails'.
The student passes.
Testing if the previous output contains the string student passes
: True
Running the following program:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 | #!/usr/bin/env rust use std::collections::HashMap; use std::fs; use std::sync::Arc; use std::thread; // Given a text and a requested number of chunks, compute chunk boundaries // that “snap†to token (whitespace) boundaries so that we don’t cut a word in half. fn chunk_boundaries(text: &str, num_chunks: usize) -> Vec<(usize, usize)> { let len = text.len(); let mut boundaries = Vec::with_capacity(num_chunks); if num_chunks == 0 { return boundaries; } // We work in bytes. (This works fine if the file is standard ASCII.) let bytes = text.as_bytes(); let avg = if num_chunks > 0 { len / num_chunks } else { len }; let mut current_start = 0; for _i in 0..(num_chunks - 1) { // Candidate end: ideally current_start plus avg. let mut candidate = current_start + avg; if candidate >= len { candidate = len; } else { // Move candidate forward until we hit whitespace, // thereby ensuring we do not break a word. while candidate < len && !bytes[candidate].is_ascii_whitespace() { candidate += 1; } } let end = candidate; boundaries.push((current_start, end)); // For the next chunk, skip any whitespace so that we start at a token. let mut new_start = candidate; while new_start < len && bytes[new_start].is_ascii_whitespace() { new_start += 1; } current_start = new_start; } // Last chunk goes from current_start to end of text. boundaries.push((current_start, len)); boundaries } // Given a HashMap, convert it to a JSON string in the format: // {"key1": count1, "key2": count2, ... } fn map_to_json(map: &HashMap<String, usize>) -> String { let mut s = String::from("{"); // We do not care about ordering. (And we write JSON “by handâ€.) let mut first = true; for (key, value) in map { if !first { s.push_str(", "); } first = false; // We assume key does not contain quotes or control characters. s.push_str(&format!("\"{}\": {}", key, value)); } s.push('}'); s } fn main() { // Read the entire file into a String. let contents = fs::read_to_string("/tmp/file.txt") .unwrap_or_else(|err| panic!("Could not read /tmp/file.txt: {}", err)); // Wrap the file contents in an Arc so that threads can share it. let contents = Arc::new(contents); // Determine approximate boundaries for 4 chunks. let boundaries = chunk_boundaries(&contents, 4); if boundaries.len() != 4 { panic!("Unexpected boundaries length: expected 4, got {}", boundaries.len()); } // Spawn 4 threads, each processing a slice of the file. let mut handles = Vec::new(); for (start, end) in boundaries { let contents_clone = Arc::clone(&contents); // For each chunk, create a thread that counts each word. let handle = thread::spawn(move || { // Get the slice for this chunk. This might be empty. let part = &contents_clone[start..end]; let mut map = HashMap::new(); // Use split_whitespace() so that punctuation remains attached (e.g. "is," remains). for word in part.split_whitespace() { // Increase the count for this word. *map.entry(word.to_string()).or_insert(0) += 1; } // Return the JSON string for this chunk. map_to_json(&map) }); handles.push(handle); } // Collect the results and print one JSON dictionary per line. for handle in handles { match handle.join() { Ok(json) => println!("{}", json), Err(e) => eprintln!("Error joining thread: {:?}", e), } } } |
{"age": 2, "times,": 2, "the": 4, "was": 4, "worst": 1, "wisdom,": 1, "foolishness,": 1, "best": 1, "of": 4, "it": 4}
{"belief,": 1, "epoch": 2, "it": 4, "incredulity,": 1, "of": 3, "season": 2, "was": 4, "the": 4, "Light,": 1}
{"spring": 1, "of": 3, "hope,": 1, "had": 2, "was": 2, "winter": 1, "despair,": 1, "everything": 1, "before": 1, "Darkness,": 1, "we": 2, "us,": 1, "it": 2, "the": 2}
{"before": 1, "nothing": 1, "us,": 1, "going": 2, "direct": 2, "we": 2, "to": 1, "Heaven,": 1, "way": 1, "other": 1, "the": 1, "all": 2, "were": 2}
Resulting in output: [('Darkness,', 1), ('Heaven,', 1), ('Light,', 1), ('age', 2), ('all', 2), ('before', 2), ('belief,', 1), ('best', 1), ('despair,', 1), ('direct', 2), ('epoch', 2), ('everything', 1), ('foolishness,', 1), ('going', 2), ('had', 2), ('hope,', 1), ('incredulity,', 1), ('it', 10), ('nothing', 1), ('of', 10), ('other', 1), ('season', 2), ('spring', 1), ('the', 11), ('times,', 2), ('to', 1), ('us,', 2), ('was', 10), ('way', 1), ('we', 4), ('were', 2), ('winter', 1), ('wisdom,', 1), ('worst', 1)]
Testing if the previous output equals the string [('Darkness,', 1), ('Heaven,', 1), ('Light,', 1), ('age', 2), ('all', 2), ('before', 2), ('belief,', 1), ('best', 1), ('despair,', 1), ('direct', 2), ('epoch', 2), ('everything', 1), ('foolishness,', 1), ('going', 2), ('had', 2), ('hope,', 1), ('incredulity,', 1), ('it', 10), ('nothing', 1), ('of', 10), ('other', 1), ('season', 2), ('spring', 1), ('the', 11), ('times,', 2), ('to', 1), ('us,', 2), ('was', 10), ('way', 1), ('we', 4), ('were', 2), ('winter', 1), ('wisdom,', 1), ('worst', 1)]
: True