import sys import json # Check if the minimum number of arguments is provided if len(sys.argv) < 3: print("Usage: python script.py [timestamp_threshold]", file=sys.stderr) sys.exit(1) # Assign command-line arguments to variables input_file_path = sys.argv[1] output_file_path = sys.argv[2] # Optional timestamp threshold timestamp_threshold = None if len(sys.argv) >= 4: try: timestamp_threshold = int(sys.argv[3]) except ValueError: print("Timestamp threshold must be an integer", file=sys.stderr) sys.exit(1) # Initialize a list to hold valid JSON objects json_objects = [] # Initialize counters skipped_invalid = 0 skipped_below_threshold = 0 # Open the input file and read lines with open(input_file_path, 'r') as input_file: for line in input_file: try: # Try to parse the line as JSON json_object = json.loads(line) except json.JSONDecodeError: skipped_invalid += 1 continue timestamp = json_object.get('Timestamp', None) if timestamp is None or not isinstance(timestamp, int): skipped_invalid += 1 continue # Skip records below or equal to the threshold if timestamp_threshold is not None and timestamp <= timestamp_threshold: skipped_below_threshold += 1 continue # Add valid object json_objects.append(json_object) # Sort the list of JSON objects by the 'Timestamp' field json_objects_sorted = sorted(json_objects, key=lambda x: x['Timestamp']) # Remove duplicates by 'Timestamp' using a dictionary to preserve order json_objects_unique = {} for obj in json_objects_sorted: json_objects_unique[obj['Timestamp']] = obj # Write the unique JSON objects to the output file with open(output_file_path, 'w') as output_file: for obj in json_objects_unique.values(): json_line = json.dumps(obj) output_file.write(json_line + '\n') # Print summary to stderr total_written = len(json_objects_unique) print(f"Processing complete. {total_written} records written to {output_file_path}", file=sys.stderr) print(f"Skipped {skipped_invalid} records: invalid JSON or missing/invalid Timestamp", file=sys.stderr) print(f"Skipped {skipped_below_threshold} records: Timestamp ≤ threshold ({timestamp_threshold})", file=sys.stderr)