import json from pathlib import Path import sys import argparse from typing import Dict, Any, Optional, Tuple EXIT_SUCCESS = 0 EXIT_FAILURE = 1 def validate_json_string(json_str: str) -> Tuple[bool, str, Optional[Dict[str, Any]]]: """ Validates if a string contains properly formatted JSON. Args: json_str (str): String to validate as JSON. Returns: tuple: (is_valid, error_message, parsed_json) - is_valid (bool): True if valid JSON, False otherwise. - error_message (str): Description of the error if invalid, empty string if valid. - parsed_json: The parsed JSON object if valid, None if invalid. """ if not json_str.strip(): return False, "Empty line", None try: parsed_json = json.loads(json_str) return True, "", parsed_json except json.JSONDecodeError as e: return False, f"Invalid JSON format: {str(e)}", None except Exception as e: return False, f"Unexpected error: {str(e)}", None def process_json_file(input_path: str, valid_output: str = "valid_rows.json", invalid_output: str = "invalid_rows.txt") -> Tuple[int, int]: """ Processes a file containing JSON rows, separating valid and invalid ones. Args: input_path (str): Path to the input file. valid_output (str): Path for the output file containing valid JSON rows. invalid_output (str): Path for the output file containing invalid rows. Returns: tuple: (valid_count, invalid_count) """ input_path = Path(input_path) if not input_path.exists(): print(f"Error: Input file not found: {input_path}") sys.exit(EXIT_FAILURE) valid_count = 0 invalid_count = 0 with open(valid_output, 'w', encoding='utf-8') as valid_file, \ open(invalid_output, 'w', encoding='utf-8') as invalid_file: with input_path.open('r', encoding='utf-8') as input_file: for line_num, line in enumerate(input_file, 1): is_valid, error, parsed_json = validate_json_string(line.strip()) if is_valid: # Write each valid JSON object on a new line valid_file.write(json.dumps(parsed_json, separators=(',', ':')) + '\n') valid_count += 1 else: # Write invalid lines with line numbers and errors invalid_file.write(f"Line {line_num}: {error}\n") invalid_file.write(f"Content: {line.strip()}\n\n") invalid_count += 1 return valid_count, invalid_count def main(): parser = argparse.ArgumentParser(description='Validate JSON rows and separate valid from invalid ones.') parser.add_argument('input_file', help='Path to the input file containing JSON rows') parser.add_argument('--valid-output', default='valid_rows.json', help='Output file for valid JSON rows (default: valid_rows.json)') parser.add_argument('--invalid-output', default='invalid_rows.txt', help='Output file for invalid rows (default: invalid_rows.txt)') args = parser.parse_args() valid_count, invalid_count = process_json_file( args.input_file, args.valid_output, args.invalid_output ) # Print summary print(f"\nProcessing complete:") print(f"✓ Valid JSON rows: {valid_count} (saved to {args.valid_output})") print(f"✗ Invalid rows: {invalid_count} (saved to {args.invalid_output})") # Exit with status based on result sys.exit(EXIT_FAILURE if invalid_count > 0 else EXIT_SUCCESS) if __name__ == "__main__": main()