parser.add_argument("infile", help="Tweet Input file (JSON)") parser.add_argument("outfile", help="Cleaned Tweets output file") args = parser.parse_args() logging.basicConfig(filename='tweets_cleaned.log', filemode='w', level=logging.DEBUG) # Instantiate a class that manages processes and resources manager = Manager() # use a+ so we can add the total number of tweets with unicode after the tweet-by-tweet processing with open(args.infile) as input_fp, open(args.outfile, 'a+') as output_fp: lines = input_fp.readlines() # make sure output file is empty output_fp.truncate() # each line is a JSON formatted tweet for line in lines: # specify job id, job data and job type job = Job(line) job.clean_data() job.validate() if job.is_valid: manager.put(job) output_fp.write(str(manager.get())) output_fp.write(manager.unicode_output())