def main(): initialize_db() met_dept_api_url = "http://www.haze.gov.sg/haze-updates/historical-psi-readings/" while True: current_url_date = get_date() #gets the first date unprocessed in the db data_list = [] webpage_html = get_html (met_dept_api_url, current_url_date) #gets the webpage of the url in url_list if (webpage_html == None): print "There was an error getting the html files, retrying in 1 minute" time.sleep(60) else: #successfully got the html of the url data_list = get_datalist(webpage_html, current_url_date) print "Great Stuff--writing this to the DB" write_list_to_db(data_list,current_url_date) #this marks the date as processed in the db print "Done"
def main(): initialize_db() met_dept_api_url_list = create_url_list() #get the url_list (refer to html_operations.py for the hardcoded values) while True: current_url_date = get_date() #looks for a data assigned to this process id, repeats every 2 seconds till there is an assignment data_list = [] webpage_htmls = get_html (met_dept_api_url_list, current_url_date) #gets the webpage of the url in url_list if (webpage_htmls == None): print "There was an error getting the html files, retrying in 1 minute" time.sleep(60) else: #successfully got the html of the 4 urls for html_output in webpage_htmls: html_datalist = get_datalist(html_output, current_url_date) data_list = data_list + html_datalist print "Great Stuff--writing this to the DB" write_list_to_db(data_list,current_url_date)
def get_api_readings(): initialize_db() met_dept_api_url = "http://www.haze.gov.sg/haze-updates/historical-psi-readings/" while True: current_url_date = get_date() #gets the first date unprocessed in the db logging.info ("Getting data for %s" % current_url_date) data_list = [] webpage_html = get_html (met_dept_api_url, current_url_date) #gets the webpage of the url in url_list if (webpage_html == None): logging.info ("There was an error getting the html files, retrying in 30 seconds") print ("Retrying in 30 seconds") time.sleep(30) else: #successfully got the html of the url data_list = get_datalist(webpage_html, current_url_date) logging.info ("Great Stuff--writing this to the DB") write_list_to_db(data_list,current_url_date) #this marks the date as processed in the db logging.info ("Done")