def fetch_latest_document (symbol, form_type): """Get the most recent document filing page for this symbol and form type, find the link to the complete submission (sgml) text, and return the html of the given filing.""" document_links = fetch_document_links (symbol, form_type) if len(document_links) > 0: # the first link is the latest one document_detail_page = load_url(document_links[0], USER_AGENT) if document_detail_page is not None: # find the sgml text link containing the full submission tree = parse_html(document_detail_page) sgml_link = None for node in tree.xpath('//a'): if node.attrib.has_key('href'): # the first url pointing to a '.txt' file is the complete submission try: if path.splitext(node.attrib['href'])[1] == '.txt': sgml_link = ''.join([SEC_ROOT_URL, node.attrib['href']]) break except IndexError: pass # with the complete submission link in hand, load it and return it if sgml_link is not None: return load_url(sgml_link, USER_AGENT)
def run(self): # get the current weather for NYC from the National Weather Service feed weather_xml = load_url( 'http://forecast.weather.gov/MapClick.php?lat=40.71980&lon=-73.99300&FcstType=dwml' ) if weather_xml is None: # there was an error gettting the weather data send('NYC Weather', 'Sorry, this service is temporarily unavailable', recipient_list=[self.sender], sender=server_auto_email) else: # parse the report from the xml and auto-reply with it as the message body doc = etree.fromstring(weather_xml) # find the human-readable text report in the xml report = [] for elem in doc.xpath('//wordedForecast'): for subelem in elem.getchildren(): if subelem.tag == 'text': report.append(subelem.text) # send it back to the sender send('NYC Weather', ' '.join(report), recipient_list=[self.sender], sender=server_auto_email)
def run(self): # get the time as an html page result from the US Naval Observatory Master Clock time_html = load_url('http://tycho.usno.navy.mil/cgi-bin/timer.pl') if time_html is None: # there was an error gettting the time data send('The Current Time', 'Sorry, this service is temporarily unavailable', recipient_list=[self.sender], sender=server_auto_email) else: # auto-reply with both the text and html versions of the time report time_txt = get_text_from_html(time_html.replace('<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final"//EN>', '')) send('The Current Time', time_txt, recipient_list=[self.sender], html=time_html, sender=server_auto_email)
def fetch_document_links (symbol, form_type): """Search for the most recent sec form_type filings for symbol and and return a list of the most recent links which point to document filing detail pages (the actual document will be a single link within the document filing detail page).""" results = [] matches = load_url(SEARCH_URL.substitute(symbol=symbol, form=form_type, root=SEC_ROOT_URL), USER_AGENT) if matches is not None: tree = parse_html(matches) for node in tree.xpath('//a[@id="documentsbutton"]'): if node.attrib.has_key('href'): results.append( ''.join([SEC_ROOT_URL, node.attrib['href']]) ) return results
def run(self): # get the current weather for NYC from the National Weather Service feed weather_xml = load_url('http://forecast.weather.gov/MapClick.php?lat=40.71980&lon=-73.99300&FcstType=dwml') if weather_xml is None: # there was an error gettting the weather data send('NYC Weather', 'Sorry, this service is temporarily unavailable', recipient_list=[self.sender], sender=server_auto_email) else: # parse the report from the xml and auto-reply with it as the message body doc = etree.fromstring(weather_xml) # find the human-readable text report in the xml report = [] for elem in doc.xpath('//wordedForecast'): for subelem in elem.getchildren(): if subelem.tag == 'text': report.append(subelem.text) # send it back to the sender send('NYC Weather', ' '.join(report), recipient_list=[self.sender], sender=server_auto_email)
def run(self): # get the time as an html page result from the US Naval Observatory Master Clock time_html = load_url('http://tycho.usno.navy.mil/cgi-bin/timer.pl') if time_html is None: # there was an error gettting the time data send('The Current Time', 'Sorry, this service is temporarily unavailable', recipient_list=[self.sender], sender=server_auto_email) else: # auto-reply with both the text and html versions of the time report time_txt = get_text_from_html( time_html.replace( '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final"//EN>', '')) send('The Current Time', time_txt, recipient_list=[self.sender], html=time_html, sender=server_auto_email)