I have setup the docker container to run the model evaluation.
I have a list of several thousand file paths in /tmp/files. Write a python script that merges these all together and splits the content into 16 files called /tmp/merged_%d for i in 0..15 where each is approximately the same size. To do this, sort files by size (largest first) and add files in order always to the smallest one. "/tmp/files" is a list of file paths, one per line.
I have a list of several thousand file paths in /tmp/files. Write a python script that merges these all together and splits the content into 16 files called /tmp/merged_%d for i in 0..15 where each is approximately the same size. To do this, sort files by size (largest first) and add files in order always to the smallest one. "/tmp/files" is a list of file paths, one per line.
Here is a Python script that accomplishes the task you described:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 | import os import math # read file paths from /tmp/files with open('/tmp/files', 'r') as f: file_paths = [line.strip() for line in f] # calculate total size of all files total_size = sum(os.path.getsize(f) for f in file_paths) # calculate target size for each merged file target_size = math.ceil(total_size / 16) # initialize merged files merged_files = [open(f'/tmp/merged_{i}', 'wb') for i in range(16)] merged_sizes = [0] * 16 # sort files by size, largest first file_paths.sort(key=os.path.getsize, reverse=True) # add files to merged files in order, always choosing the smallest one for file_path in file_paths: file_size = os.path.getsize(file_path) min_index = min(range(16), key=lambda i: merged_sizes[i]) merged_files[min_index].write(open(file_path, 'rb').read()) merged_sizes[min_index] += file_size # close merged files for merged_file in merged_files: merged_file.close() |
I extracted the following code from that output:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 | import os import math # read file paths from /tmp/files with open('/tmp/files', 'r') as f: file_paths = [line.strip() for line in f] # calculate total size of all files total_size = sum(os.path.getsize(f) for f in file_paths) # calculate target size for each merged file target_size = math.ceil(total_size / 16) # initialize merged files merged_files = [open(f'/tmp/merged_{i}', 'wb') for i in range(16)] merged_sizes = [0] * 16 # sort files by size, largest first file_paths.sort(key=os.path.getsize, reverse=True) # add files to merged files in order, always choosing the smallest one for file_path in file_paths: file_size = os.path.getsize(file_path) min_index = min(range(16), key=lambda i: merged_sizes[i]) merged_files[min_index].write(open(file_path, 'rb').read()) merged_sizes[min_index] += file_size # close merged files for merged_file in merged_files: merged_file.close() |
Running the following program:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 | import os import math # read file paths from /tmp/files with open('/tmp/files', 'r') as f: file_paths = [line.strip() for line in f] # calculate total size of all files total_size = sum(os.path.getsize(f) for f in file_paths) # calculate target size for each merged file target_size = math.ceil(total_size / 16) # initialize merged files merged_files = [open(f'/tmp/merged_{i}', 'wb') for i in range(16)] merged_sizes = [0] * 16 # sort files by size, largest first file_paths.sort(key=os.path.getsize, reverse=True) # add files to merged files in order, always choosing the smallest one for file_path in file_paths: file_size = os.path.getsize(file_path) min_index = min(range(16), key=lambda i: merged_sizes[i]) merged_files[min_index].write(open(file_path, 'rb').read()) merged_sizes[min_index] += file_size # close merged files for merged_file in merged_files: merged_file.close() |
final: True Resulting in output: True
I have setup the docker container to run the model evaluation.
I have a list of several thousand file paths in /tmp/files. Write a python script that merges these all together and splits the content into 16 files called /tmp/merged_%d for i in 0..15 where each is approximately the same size. To do this, sort files by size (largest first) and add files in order always to the smallest one. "/tmp/files" is a list of file paths, one per line.
I have a list of several thousand file paths in /tmp/files. Write a python script that merges these all together and splits the content into 16 files called /tmp/merged_%d for i in 0..15 where each is approximately the same size. To do this, sort files by size (largest first) and add files in order always to the smallest one. "/tmp/files" is a list of file paths, one per line.
Here's a Python script that does what you asked for:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 | import os import math # Read the list of file paths from /tmp/files with open('/tmp/files', 'r') as f: file_paths = [line.strip() for line in f] # Get the size of each file and sort the list by size in descending order file_sizes = [(path, os.path.getsize(path)) for path in file_paths] file_sizes.sort(key=lambda x: x[1], reverse=True) # Initialize the merged files with zero size merged_files = [{'path': '/tmp/merged_{}'.format(i), 'size': 0} for i in range(16)] # Distribute the files among the merged files for file_path, file_size in file_sizes: # Find the merged file with the smallest size smallest_index = min(range(16), key=lambda i: merged_files[i]['size']) # Add the file size to the merged file merged_files[smallest_index]['size'] += file_size # Write the file to the merged file with open(merged_files[smallest_index]['path'], 'ab') as f: with open(file_path, 'rb') as g: f.write(g.read()) # Pad the merged files to make them approximately the same size total_size = sum(f['size'] for f in merged_files) avg_size = math.ceil(total_size / 16) for i, file_info in enumerate(merged_files): with open(file_info['path'], 'ab') as f: f.seek(file_info['size']) padding = b' ' * (avg_size - file_info['size']) f.write(padding) |
I extracted the following code from that output:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 | import os import math # Read the list of file paths from /tmp/files with open('/tmp/files', 'r') as f: file_paths = [line.strip() for line in f] # Get the size of each file and sort the list by size in descending order file_sizes = [(path, os.path.getsize(path)) for path in file_paths] file_sizes.sort(key=lambda x: x[1], reverse=True) # Initialize the merged files with zero size merged_files = [{'path': '/tmp/merged_{}'.format(i), 'size': 0} for i in range(16)] # Distribute the files among the merged files for file_path, file_size in file_sizes: # Find the merged file with the smallest size smallest_index = min(range(16), key=lambda i: merged_files[i]['size']) # Add the file size to the merged file merged_files[smallest_index]['size'] += file_size # Write the file to the merged file with open(merged_files[smallest_index]['path'], 'ab') as f: with open(file_path, 'rb') as g: f.write(g.read()) # Pad the merged files to make them approximately the same size total_size = sum(f['size'] for f in merged_files) avg_size = math.ceil(total_size / 16) for i, file_info in enumerate(merged_files): with open(file_info['path'], 'ab') as f: f.seek(file_info['size']) padding = b' ' * (avg_size - file_info['size']) f.write(padding) |
Running the following program:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 | import os import math # Read the list of file paths from /tmp/files with open('/tmp/files', 'r') as f: file_paths = [line.strip() for line in f] # Get the size of each file and sort the list by size in descending order file_sizes = [(path, os.path.getsize(path)) for path in file_paths] file_sizes.sort(key=lambda x: x[1], reverse=True) # Initialize the merged files with zero size merged_files = [{'path': '/tmp/merged_{}'.format(i), 'size': 0} for i in range(16)] # Distribute the files among the merged files for file_path, file_size in file_sizes: # Find the merged file with the smallest size smallest_index = min(range(16), key=lambda i: merged_files[i]['size']) # Add the file size to the merged file merged_files[smallest_index]['size'] += file_size # Write the file to the merged file with open(merged_files[smallest_index]['path'], 'ab') as f: with open(file_path, 'rb') as g: f.write(g.read()) # Pad the merged files to make them approximately the same size total_size = sum(f['size'] for f in merged_files) avg_size = math.ceil(total_size / 16) for i, file_info in enumerate(merged_files): with open(file_info['path'], 'ab') as f: f.seek(file_info['size']) padding = b' ' * (avg_size - file_info['size']) f.write(padding) |
final: True Resulting in output: True
I have setup the docker container to run the model evaluation.
I have a list of several thousand file paths in /tmp/files. Write a python script that merges these all together and splits the content into 16 files called /tmp/merged_%d for i in 0..15 where each is approximately the same size. To do this, sort files by size (largest first) and add files in order always to the smallest one. "/tmp/files" is a list of file paths, one per line.
I have a list of several thousand file paths in /tmp/files. Write a python script that merges these all together and splits the content into 16 files called /tmp/merged_%d for i in 0..15 where each is approximately the same size. To do this, sort files by size (largest first) and add files in order always to the smallest one. "/tmp/files" is a list of file paths, one per line.
Here's a Python script that accomplishes the task:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 | import os import sys import math from collections import defaultdict # Function to read file and return its content and size def read_file(file_path): with open(file_path, 'rb') as f: content = f.read() size = len(content) return content, size # Function to write data to a file def write_to_file(file_path, data): with open(file_path, 'ab') as f: f.write(data) # Read the list of file paths file_paths = [] with open('/tmp/files', 'r') as f: file_paths = [line.strip() for line in f] # Sort files by size (largest first) file_sizes = [(path, os.path.getsize(path)) for path in file_paths] file_sizes.sort(key=lambda x: x[1], reverse=True) # Initialize merged files with their file handles merged_files = defaultdict(lambda: open(f'/tmp/merged_{hash(os.times()) % 16}', 'ab')) # Add files to the merged files total_size = 0 for path, size in file_sizes: content, size = read_file(path) target_file, target_size = min([(f, s) for f, s in merged_files.items()], key=lambda x: x[1]) merged_files[target_file].write(content) merged_files[target_file] = open(f'/tmp/merged_{hash(os.times()) % 16}', 'ab') if target_size + size > 1.2 * total_size / 16 else target_file total_size += size # Close all merged files for file_handle in merged_files.values(): file_handle.close() |
I extracted the following code from that output:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 | import os import sys import math from collections import defaultdict # Function to read file and return its content and size def read_file(file_path): with open(file_path, 'rb') as f: content = f.read() size = len(content) return content, size # Function to write data to a file def write_to_file(file_path, data): with open(file_path, 'ab') as f: f.write(data) # Read the list of file paths file_paths = [] with open('/tmp/files', 'r') as f: file_paths = [line.strip() for line in f] # Sort files by size (largest first) file_sizes = [(path, os.path.getsize(path)) for path in file_paths] file_sizes.sort(key=lambda x: x[1], reverse=True) # Initialize merged files with their file handles merged_files = defaultdict(lambda: open(f'/tmp/merged_{hash(os.times()) % 16}', 'ab')) # Add files to the merged files total_size = 0 for path, size in file_sizes: content, size = read_file(path) target_file, target_size = min([(f, s) for f, s in merged_files.items()], key=lambda x: x[1]) merged_files[target_file].write(content) merged_files[target_file] = open(f'/tmp/merged_{hash(os.times()) % 16}', 'ab') if target_size + size > 1.2 * total_size / 16 else target_file total_size += size # Close all merged files for file_handle in merged_files.values(): file_handle.close() |
Running the following program:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 | import os import sys import math from collections import defaultdict # Function to read file and return its content and size def read_file(file_path): with open(file_path, 'rb') as f: content = f.read() size = len(content) return content, size # Function to write data to a file def write_to_file(file_path, data): with open(file_path, 'ab') as f: f.write(data) # Read the list of file paths file_paths = [] with open('/tmp/files', 'r') as f: file_paths = [line.strip() for line in f] # Sort files by size (largest first) file_sizes = [(path, os.path.getsize(path)) for path in file_paths] file_sizes.sort(key=lambda x: x[1], reverse=True) # Initialize merged files with their file handles merged_files = defaultdict(lambda: open(f'/tmp/merged_{hash(os.times()) % 16}', 'ab')) # Add files to the merged files total_size = 0 for path, size in file_sizes: content, size = read_file(path) target_file, target_size = min([(f, s) for f, s in merged_files.items()], key=lambda x: x[1]) merged_files[target_file].write(content) merged_files[target_file] = open(f'/tmp/merged_{hash(os.times()) % 16}', 'ab') if target_size + size > 1.2 * total_size / 16 else target_file total_size += size # Close all merged files for file_handle in merged_files.values(): file_handle.close() |
Traceback (most recent call last):
File "/usr/src/app/main.py", line 35, in <module>
target_file, target_size = min([(f, s) for f, s in merged_files.items()], key=lambda x: x[1])
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ValueError: min() arg is an empty sequence
Traceback (most recent call last): File "/usr/src/app/check.py", line 20, in <module> print('final: ' + str(test())) ^^^^^^ File "/usr/src/app/check.py", line 6, in test sz.append(len(open("/tmp/merged_%d" % i).read())) ^^^^^^^^^^^^^^^^^^^^^^^^^^ FileNotFoundError: [Errno 2] No such file or directory: '/tmp/merged_0' Resulting in output: False
I have setup the docker container to run the model evaluation.
I have a list of several thousand file paths in /tmp/files. Write a python script that merges these all together and splits the content into 16 files called /tmp/merged_%d for i in 0..15 where each is approximately the same size. To do this, sort files by size (largest first) and add files in order always to the smallest one. "/tmp/files" is a list of file paths, one per line.
I have a list of several thousand file paths in /tmp/files. Write a python script that merges these all together and splits the content into 16 files called /tmp/merged_%d for i in 0..15 where each is approximately the same size. To do this, sort files by size (largest first) and add files in order always to the smallest one. "/tmp/files" is a list of file paths, one per line.
Here is a possible solution for your problem:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 | import os import math # Read file paths from /tmp/files with open('/tmp/files', 'r') as f: file_paths = [line.strip() for line in f] # Calculate total size of all files total_size = sum(os.path.getsize(f) for f in file_paths) # Get the number of output files num_files = 16 # Calculate the target size for each output file target_size = math.ceil(total_size / num_files) # Initialize a list to keep track of the current size of each output file file_sizes = [0] * num_files # Initialize a list to keep track of the output file handles output_files = [None] * num_files # Open all output files for i in range(num_files): output_files[i] = open(f'/tmp/merged_{i:02d}', 'wb') # Sort files by size (largest first) file_paths.sort(key=os.path.getsize, reverse=True) # Iterate over all input files and distribute their contents to the output files for file_path in file_paths: # Find the output file with the smallest size idx = min(range(num_files), key=lambda i: file_sizes[i]) # Write the contents of the input file to the output file with open(file_path, 'rb') as f: while True: # Read a chunk of data from the input file data = f.read(1024 * 1024) if not data: break # Calculate the new size of the output file after writing the data new_size = file_sizes[idx] + len(data) # Check if the new size exceeds the target size if new_size > target_size: # If it does, distribute the remaining data to other output files for j in range(num_files): if j == idx or file_sizes[j] >= target_size: continue remaining_size = target_size - file_sizes[j] if remaining_size >= len(data): # If there is enough space in the current output file, write all remaining data output_files[j].write(data) file_sizes[j] += len(data) break else: # Otherwise, write as much data as possible and continue to the next output file output_files[j].write(data[:remaining_size]) file_sizes[j] += remaining_size data = data[remaining_size:] # If we have written all data to other output files, break the loop if not data: break # Otherwise, update the size of the current output file and continue to the next input file file_sizes[idx] = target_size break # If the new size does not exceed the target size, write the data to the output file output_files[idx].write(data) file_sizes[idx] = new_size # Close all output files for f in output_files: f.close() |
I extracted the following code from that output:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 | import os import math # Read file paths from /tmp/files with open('/tmp/files', 'r') as f: file_paths = [line.strip() for line in f] # Calculate total size of all files total_size = sum(os.path.getsize(f) for f in file_paths) # Get the number of output files num_files = 16 # Calculate the target size for each output file target_size = math.ceil(total_size / num_files) # Initialize a list to keep track of the current size of each output file file_sizes = [0] * num_files # Initialize a list to keep track of the output file handles output_files = [None] * num_files # Open all output files for i in range(num_files): output_files[i] = open(f'/tmp/merged_{i:02d}', 'wb') # Sort files by size (largest first) file_paths.sort(key=os.path.getsize, reverse=True) # Iterate over all input files and distribute their contents to the output files for file_path in file_paths: # Find the output file with the smallest size idx = min(range(num_files), key=lambda i: file_sizes[i]) # Write the contents of the input file to the output file with open(file_path, 'rb') as f: while True: # Read a chunk of data from the input file data = f.read(1024 * 1024) if not data: break # Calculate the new size of the output file after writing the data new_size = file_sizes[idx] + len(data) # Check if the new size exceeds the target size if new_size > target_size: # If it does, distribute the remaining data to other output files for j in range(num_files): if j == idx or file_sizes[j] >= target_size: continue remaining_size = target_size - file_sizes[j] if remaining_size >= len(data): # If there is enough space in the current output file, write all remaining data output_files[j].write(data) file_sizes[j] += len(data) break else: # Otherwise, write as much data as possible and continue to the next output file output_files[j].write(data[:remaining_size]) file_sizes[j] += remaining_size data = data[remaining_size:] # If we have written all data to other output files, break the loop if not data: break # Otherwise, update the size of the current output file and continue to the next input file file_sizes[idx] = target_size break # If the new size does not exceed the target size, write the data to the output file output_files[idx].write(data) file_sizes[idx] = new_size # Close all output files for f in output_files: f.close() |
Running the following program:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 | import os import math # Read file paths from /tmp/files with open('/tmp/files', 'r') as f: file_paths = [line.strip() for line in f] # Calculate total size of all files total_size = sum(os.path.getsize(f) for f in file_paths) # Get the number of output files num_files = 16 # Calculate the target size for each output file target_size = math.ceil(total_size / num_files) # Initialize a list to keep track of the current size of each output file file_sizes = [0] * num_files # Initialize a list to keep track of the output file handles output_files = [None] * num_files # Open all output files for i in range(num_files): output_files[i] = open(f'/tmp/merged_{i:02d}', 'wb') # Sort files by size (largest first) file_paths.sort(key=os.path.getsize, reverse=True) # Iterate over all input files and distribute their contents to the output files for file_path in file_paths: # Find the output file with the smallest size idx = min(range(num_files), key=lambda i: file_sizes[i]) # Write the contents of the input file to the output file with open(file_path, 'rb') as f: while True: # Read a chunk of data from the input file data = f.read(1024 * 1024) if not data: break # Calculate the new size of the output file after writing the data new_size = file_sizes[idx] + len(data) # Check if the new size exceeds the target size if new_size > target_size: # If it does, distribute the remaining data to other output files for j in range(num_files): if j == idx or file_sizes[j] >= target_size: continue remaining_size = target_size - file_sizes[j] if remaining_size >= len(data): # If there is enough space in the current output file, write all remaining data output_files[j].write(data) file_sizes[j] += len(data) break else: # Otherwise, write as much data as possible and continue to the next output file output_files[j].write(data[:remaining_size]) file_sizes[j] += remaining_size data = data[remaining_size:] # If we have written all data to other output files, break the loop if not data: break # Otherwise, update the size of the current output file and continue to the next input file file_sizes[idx] = target_size break # If the new size does not exceed the target size, write the data to the output file output_files[idx].write(data) file_sizes[idx] = new_size # Close all output files for f in output_files: f.close() |
Traceback (most recent call last): File "/usr/src/app/check.py", line 20, in <module> print('final: ' + str(test())) ^^^^^^ File "/usr/src/app/check.py", line 6, in test sz.append(len(open("/tmp/merged_%d" % i).read())) ^^^^^^^^^^^^^^^^^^^^^^^^^^ FileNotFoundError: [Errno 2] No such file or directory: '/tmp/merged_0' Resulting in output: False
I have setup the docker container to run the model evaluation.
I have a list of several thousand file paths in /tmp/files. Write a python script that merges these all together and splits the content into 16 files called /tmp/merged_%d for i in 0..15 where each is approximately the same size. To do this, sort files by size (largest first) and add files in order always to the smallest one. "/tmp/files" is a list of file paths, one per line.
I have a list of several thousand file paths in /tmp/files. Write a python script that merges these all together and splits the content into 16 files called /tmp/merged_%d for i in 0..15 where each is approximately the same size. To do this, sort files by size (largest first) and add files in order always to the smallest one. "/tmp/files" is a list of file paths, one per line.
Here's a Python script that does what you described:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 | import os import math # Read file paths from /tmp/files with open('/tmp/files', 'r') as f: file_paths = [line.strip() for line in f] # Calculate total size and number of chunks total_size = sum(os.path.getsize(f) for f in file_paths) num_chunks = 16 chunk_size = math.ceil(total_size / num_chunks) # Sort files by size (largest first) file_paths.sort(key=os.path.getsize, reverse=True) # Initialize empty list of chunks chunks = [open(f'/tmp/merged_{i:02d}', 'wb') for i in range(num_chunks)] current_chunk_sizes = [0] * num_chunks # Add files to chunks in order for file_path in file_paths: file_size = os.path.getsize(file_path) # Find the smallest chunk that can fit the current file for i, (chunk_size, chunk) in enumerate(zip(current_chunk_sizes, chunks)): if chunk_size + file_size <= chunk_size * 2: # Write file to chunk with open(file_path, 'rb') as f: chunk.write(f.read()) # Update chunk size current_chunk_sizes[i] += file_size break # Close all chunks for chunk in chunks: chunk.close() |
I extracted the following code from that output:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 | import os import math # Read file paths from /tmp/files with open('/tmp/files', 'r') as f: file_paths = [line.strip() for line in f] # Calculate total size and number of chunks total_size = sum(os.path.getsize(f) for f in file_paths) num_chunks = 16 chunk_size = math.ceil(total_size / num_chunks) # Sort files by size (largest first) file_paths.sort(key=os.path.getsize, reverse=True) # Initialize empty list of chunks chunks = [open(f'/tmp/merged_{i:02d}', 'wb') for i in range(num_chunks)] current_chunk_sizes = [0] * num_chunks # Add files to chunks in order for file_path in file_paths: file_size = os.path.getsize(file_path) # Find the smallest chunk that can fit the current file for i, (chunk_size, chunk) in enumerate(zip(current_chunk_sizes, chunks)): if chunk_size + file_size <= chunk_size * 2: # Write file to chunk with open(file_path, 'rb') as f: chunk.write(f.read()) # Update chunk size current_chunk_sizes[i] += file_size break # Close all chunks for chunk in chunks: chunk.close() |
Running the following program:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 | import os import math # Read file paths from /tmp/files with open('/tmp/files', 'r') as f: file_paths = [line.strip() for line in f] # Calculate total size and number of chunks total_size = sum(os.path.getsize(f) for f in file_paths) num_chunks = 16 chunk_size = math.ceil(total_size / num_chunks) # Sort files by size (largest first) file_paths.sort(key=os.path.getsize, reverse=True) # Initialize empty list of chunks chunks = [open(f'/tmp/merged_{i:02d}', 'wb') for i in range(num_chunks)] current_chunk_sizes = [0] * num_chunks # Add files to chunks in order for file_path in file_paths: file_size = os.path.getsize(file_path) # Find the smallest chunk that can fit the current file for i, (chunk_size, chunk) in enumerate(zip(current_chunk_sizes, chunks)): if chunk_size + file_size <= chunk_size * 2: # Write file to chunk with open(file_path, 'rb') as f: chunk.write(f.read()) # Update chunk size current_chunk_sizes[i] += file_size break # Close all chunks for chunk in chunks: chunk.close() |
Traceback (most recent call last): File "/usr/src/app/check.py", line 20, in <module> print('final: ' + str(test())) ^^^^^^ File "/usr/src/app/check.py", line 6, in test sz.append(len(open("/tmp/merged_%d" % i).read())) ^^^^^^^^^^^^^^^^^^^^^^^^^^ FileNotFoundError: [Errno 2] No such file or directory: '/tmp/merged_0' Resulting in output: False