#!/usr/bin/python3 import os import sys import argparse def split_lines(input_file, chunk_size_mb=500): """ Splits a file into chunks of size chunk_size_mb but preserves lines. Args: input_file (str): Path to the file that will be splitted into chunks. chunk_size_mb (int, optional): Size of each chunk in MB. Defaults to 500. """ chunk_size_bytes = chunk_size_mb * 1024 * 1024 # Convert to MB to bytes current_chunk = 1 current_size = 0 output_file = None # Try to extract original extension file_extension = os.path.splitext(input_file) if len(file_extension) == 2: file_extension = file_extension[1] else: file_extension = '' # Open the file and split it into chunks with open(input_file, 'r') as infile: for line in infile: # Open new file if none exists or next line exceeds chunk size if output_file is None or ((current_size + len(line.encode('utf-8'))) > chunk_size_bytes): if output_file: output_file.close() output_filename = f"{os.path.splitext(input_file)[0]}_chunk{current_chunk:03d}{file_extension}" print(f"Created {output_filename}") output_file = open(output_filename, 'w') current_chunk += 1 current_size = 0 output_file.write(line) current_size += len(line.encode('utf-8')) if output_file: output_file.close() def parse_arguments(): """ Parses command line arguments. Returns: argparse.Namespace: The arguments passed to the command line. """ parser = argparse.ArgumentParser(description='Split a text file into smaller chunks but keep lines intakt.') parser.add_argument('input_file', type=str, help='The path to the input file to be split.') parser.add_argument('--chunk_size_mb', type=int, default=500, help='Maximum chunk size in MB (default: 500).') return parser.parse_args() def main(): args = parse_arguments() split_lines(args.input_file, args.chunk_size_mb) if __name__ == '__main__': main()