class DOCParser(HTMLParser): def __init__(self, *args, **kwargs): HTMLParser.__init__(self, *args, **kwargs) self._logger = logging.getLogger(self.__class__.__name__) self.msg = {} # local (per-document) href cache self.hrefs = [] self.protocols_r = r"^http://" self.mimetype_r = r"^text\/html" self.queue = QueueManager( mqhost="localhost", mqport=61613, userid="", passwd="", qin="to_parse", qout="to_persist", recv_callback=self.worker, encode_parms=["data"], encoding="base64", ) def start(self): self.queue.subscribe() def handle_starttag(self, tag, attrs): attrs_h = dict(attrs) if ( tag == "a" and attrs_h.has_key("href") and attrs_h["href"] is not self.msg["url"] and attrs_h["href"] not in self.hrefs and re.search(self.protocols_r, attrs_h["href"], re.IGNORECASE) ): self._logger.info("Found %s. Enqueuing..." % attrs_h["href"]) try: self.queue.enqueue(msg={"parent": self.msg["url"], "url": attrs_h["href"]}) except: pass else: self.hrefs.append(attrs_h["href"]) def worker(self, msg={}): self.reset() self.msg = msg self.hrefs = [] if re.search(self.mimetype_r, self.msg["headers"]["content-type"], re.IGNORECASE): self._logger.info("Received %s" % msg["url"]) self.feed(msg["data"])
def __init__(self, url="http://www.python.org/", timeout=10): self._logger = logging.getLogger(self.__class__.__name__) self.url = url self.extensions_blacklist_r = r'\.(?:jpg|jpeg|png|gif|zip|gz|rar|tar|pdf|doc|docx|ppt|pptx|xls|xlsx|iso|mp3|wav|mid|wmv|wma|txt|scr|exe|com|bat|eml)$' self.timeout = timeout self.cache = CacheManager() self.queue = QueueManager(mqhost="localhost", mqport=61613, userid="", passwd="", qin="to_fetch", qout="to_parse", recv_callback=self.worker, encode_parms=["data"], encoding="base64")
class Fetcher(object): def __init__(self, url="http://www.python.org/", timeout=10): self._logger = logging.getLogger(self.__class__.__name__) self.url = url self.extensions_blacklist_r = r'\.(?:jpg|jpeg|png|gif|zip|gz|rar|tar|pdf|doc|docx|ppt|pptx|xls|xlsx|iso|mp3|wav|mid|wmv|wma|txt|scr|exe|com|bat|eml)$' self.timeout = timeout self.cache = CacheManager() self.queue = QueueManager(mqhost="localhost", mqport=61613, userid="", passwd="", qin="to_fetch", qout="to_parse", recv_callback=self.worker, encode_parms=["data"], encoding="base64") def start(self): self.queue.subscribe() # REMOVE import simplejson QueueManager._qinst.send(simplejson.dumps({"url": self.url, "parent": "start"}), destination="/queue/to_fetch") def worker(self, msg={}): url = msg["url"] if re.search(self.extensions_blacklist_r, url, re.IGNORECASE): return try: self._logger.info("Fetching %s" % msg["url"]) req = urlopen(url, timeout=self.timeout) except Exception, e: self._logger.error("Error on trying to fetch %s" % url) self._logger.exception(e) else:
def __init__(self, *args, **kwargs): HTMLParser.__init__(self, *args, **kwargs) self._logger = logging.getLogger(self.__class__.__name__) self.msg = {} # local (per-document) href cache self.hrefs = [] self.protocols_r = r"^http://" self.mimetype_r = r"^text\/html" self.queue = QueueManager( mqhost="localhost", mqport=61613, userid="", passwd="", qin="to_parse", qout="to_persist", recv_callback=self.worker, encode_parms=["data"], encoding="base64", )
from qmanager import QueueManager from fuzzywuzzy import process from hashlib import md5 import os from ryclass import SonicAuth import random from pprint import pprint # Enter Info Here sshost = "https://subsonic.example.org" ssuser = "******" sspass = "******" sauth = SonicAuth(sshost, ssuser, sspass) ssconn = sauth._ssconn queue = QueueManager() @ask.launch def start_skill(): #print 'the amazon user id is ' + session.user['userId'] text = 'Welcome to Subsonic. \ Try asking me to play a song or start a playlist' prompt = 'For example say, play music by Ozzy Osbourne' return question(text).reprompt(prompt) \ .simple_card(title='Welcome to Subsonic!', content='Try asking me to play a song') @ask.intent("AMAZON.SearchAction<object@MusicCreativeWork>",