示例#1
0
class DOCParser(HTMLParser):
    def __init__(self, *args, **kwargs):
        HTMLParser.__init__(self, *args, **kwargs)

        self._logger = logging.getLogger(self.__class__.__name__)

        self.msg = {}
        # local (per-document) href cache
        self.hrefs = []

        self.protocols_r = r"^http://"
        self.mimetype_r = r"^text\/html"

        self.queue = QueueManager(
            mqhost="localhost",
            mqport=61613,
            userid="",
            passwd="",
            qin="to_parse",
            qout="to_persist",
            recv_callback=self.worker,
            encode_parms=["data"],
            encoding="base64",
        )

    def start(self):
        self.queue.subscribe()

    def handle_starttag(self, tag, attrs):
        attrs_h = dict(attrs)

        if (
            tag == "a"
            and attrs_h.has_key("href")
            and attrs_h["href"] is not self.msg["url"]
            and attrs_h["href"] not in self.hrefs
            and re.search(self.protocols_r, attrs_h["href"], re.IGNORECASE)
        ):

            self._logger.info("Found %s. Enqueuing..." % attrs_h["href"])

            try:
                self.queue.enqueue(msg={"parent": self.msg["url"], "url": attrs_h["href"]})
            except:
                pass
            else:
                self.hrefs.append(attrs_h["href"])

    def worker(self, msg={}):
        self.reset()
        self.msg = msg
        self.hrefs = []

        if re.search(self.mimetype_r, self.msg["headers"]["content-type"], re.IGNORECASE):
            self._logger.info("Received %s" % msg["url"])
            self.feed(msg["data"])
示例#2
0
    def __init__(self, url="http://www.python.org/", timeout=10):
        self._logger = logging.getLogger(self.__class__.__name__)

        self.url = url
        self.extensions_blacklist_r = r'\.(?:jpg|jpeg|png|gif|zip|gz|rar|tar|pdf|doc|docx|ppt|pptx|xls|xlsx|iso|mp3|wav|mid|wmv|wma|txt|scr|exe|com|bat|eml)$'
        self.timeout = timeout

        self.cache = CacheManager()

        self.queue = QueueManager(mqhost="localhost", mqport=61613, userid="",
                                  passwd="", qin="to_fetch", qout="to_parse",
                                  recv_callback=self.worker, encode_parms=["data"],
                                  encoding="base64")
示例#3
0
class Fetcher(object):
    def __init__(self, url="http://www.python.org/", timeout=10):
        self._logger = logging.getLogger(self.__class__.__name__)

        self.url = url
        self.extensions_blacklist_r = r'\.(?:jpg|jpeg|png|gif|zip|gz|rar|tar|pdf|doc|docx|ppt|pptx|xls|xlsx|iso|mp3|wav|mid|wmv|wma|txt|scr|exe|com|bat|eml)$'
        self.timeout = timeout

        self.cache = CacheManager()

        self.queue = QueueManager(mqhost="localhost", mqport=61613, userid="",
                                  passwd="", qin="to_fetch", qout="to_parse",
                                  recv_callback=self.worker, encode_parms=["data"],
                                  encoding="base64")

    def start(self):
        self.queue.subscribe()

        # REMOVE
        import simplejson
        QueueManager._qinst.send(simplejson.dumps({"url": self.url, "parent": "start"}),
                                    destination="/queue/to_fetch")
    def worker(self, msg={}):
        url = msg["url"]

        if re.search(self.extensions_blacklist_r, url, re.IGNORECASE):
            return

        try:
            self._logger.info("Fetching %s" % msg["url"])
            req = urlopen(url, timeout=self.timeout)
        except Exception, e:
            self._logger.error("Error on trying to fetch %s" % url)
            self._logger.exception(e)

        else:
示例#4
0
    def __init__(self, *args, **kwargs):
        HTMLParser.__init__(self, *args, **kwargs)

        self._logger = logging.getLogger(self.__class__.__name__)

        self.msg = {}
        # local (per-document) href cache
        self.hrefs = []

        self.protocols_r = r"^http://"
        self.mimetype_r = r"^text\/html"

        self.queue = QueueManager(
            mqhost="localhost",
            mqport=61613,
            userid="",
            passwd="",
            qin="to_parse",
            qout="to_persist",
            recv_callback=self.worker,
            encode_parms=["data"],
            encoding="base64",
        )
示例#5
0
from qmanager import QueueManager
from fuzzywuzzy import process
from hashlib import md5
import os
from ryclass import SonicAuth
import random
from pprint import pprint

# Enter Info Here
sshost = "https://subsonic.example.org"
ssuser = "******"
sspass = "******"

sauth = SonicAuth(sshost, ssuser, sspass)
ssconn = sauth._ssconn
queue = QueueManager()


@ask.launch
def start_skill():
    #print 'the amazon user id is ' + session.user['userId']
    text = 'Welcome to Subsonic. \
            Try asking me to play a song or start a playlist'

    prompt = 'For example say, play music by Ozzy Osbourne'
    return question(text).reprompt(prompt) \
        .simple_card(title='Welcome to Subsonic!',
                     content='Try asking me to play a song')


@ask.intent("AMAZON.SearchAction<object@MusicCreativeWork>",