evtdict[evt] = field evtdup[evt] = [] dayno += 1 # if dayno > 128: sys.exit() # debug line = fin.readline() fin.close() writeevents() # write final day (which is only day in the pipeline call) fout.close() writedups(datestr) print "Finished" if __name__ == '__main__': if len(sys.argv) > 2: # initializations for stand-alone tests phox_utilities.init_logger('test_pipeline.log') logger = phox_utilities.logger # get a local copy for the pipeline phox_utilities.parse_config('test_config.ini') # initialize the various phox_utilities globals if len(sys.argv) > 1: datestr = sys.argv[1] else: print 'Date string is required for oneaday_formatter.py' sys.exit() main(datestr)
field = finlist[line].split("\t") thisURL = field[2][:-1] thisURL = thisURL[:MAX_URLLENGTH] # temporary to accommodate TABARI input limits thisstory = get_story(finlist[line + 1]) thisdate = get_date(field) thissource = get_source(field) if thissource == "999": print >> newout, thisURL # Adds sources not included in sources dictionary to 'newsource_results_20..' file output write_record(thissource, sourcecount, thisdate, thisURL, thisstory, fout) fin.close() fout.close() print "Finished" if __name__ == "__main__": if len(sys.argv) > 2: # initializations for stand-alone tests phox_utilities.init_logger("test_pipeline.log") logger = phox_utilities.logger # get a local copy for the pipeline phox_utilities.parse_config("test_config.ini") # initialize the various phox_utilities globals if len(sys.argv) > 1: thisday = sys.argv[1] else: phox_utilities.do_RuntimeError("No date suffix in Mongo.formatter.py") main(thisday)
import glob import logging import datetime import subprocess import phox_uploader import phox_utilities import mongo_formatter import oneaday_formatter import scraper_connection from ftplib import FTP from ConfigParser import ConfigParser if __name__ == '__main__': phox_utilities.init_logger('PHOX_pipeline.log') logger = phox_utilities.logger # get a local copy for the pipeline phox_utilities.parse_config('PHOX_config.ini') # initialize the various phox_utilities globals print '\nPHOX.pipeline run:', datetime.datetime.now() if len(sys.argv) > 1: date_string = sys.argv[1] logger.info('Date string: ' + date_string + '\n') print 'Date string:', date_string else: logger.info('Error: No date string in PHOX.pipeline') sys.exit()