Пример #1
0
 def __init__(self, hostname=None, logger=None):
     self.__hostname = hostname if hostname is not None else "http://127.0.0.0:5000"
     self.__log = setup_console_logger(logger, "CallConnectHandler")
     self.__jh = JsonHandler(hostname=self.__hostname, logger=self.__log)
     self.__hh = HttpHandler(hostname=self.__hostname, logger=self.__log)
     self.__log.debug("CallConnect handler initialized for: %s" %
                      self.__hostname)
Пример #2
0
class FlaskrilioHandler:
    """A simple wrapper for the local Flaskrilio service"""


    def __init__(self, hostname=None, logger=None):
        self.__hostname = hostname if hostname is not None else "http://127.0.0.0:5000"
        self.__log = setup_console_logger(logger, "FlaskrilioHandler")
        self.__jh = JsonHandler(hostname=self.__hostname, logger=self.__log)
        self.__hh = HttpHandler(hostname=self.__hostname, logger=self.__log)
        self.__log.debug("Flaskrilio handler initialized for: %s" % self.__hostname)


    def get_home(self):
        return self.__jh.get(endpoint="/")


    def get_twiml(self, ctx):
        self.__log.debug("Getting TwilML for an endpoint: %s" % ctx)
        return self.__hh.get(endpoint=ctx)


    def get_calls(self):
        calls =  self.__jh.get(endpoint="/calls")
        self.__log.debug("Got calls: %s" % calls)
        if calls is None:
            return []
        else:
            return calls
Пример #3
0
    def crawl_worker(self, url):
        """
        This is the task that is being executed by the threadpool executor when it receive a url to do it's job
        :param url: Url that is supposed to be getting links and assets from
        :return: site_map_entry - dict(), links_with_issues_entry - set()
        """
        if url is None:
            raise ValueError("Url=%s has a None value" % url)

        site_map_entry = dict()
        links_with_issues_entry = set()

        module_logger.info("Working on url=%s" % url)

        try:
            # Make a relative url into an absolute url
            access_link = LinkHandler.reconstruct_link(self.start_url, url)

            if access_link is None:
                raise ValueError("Access link value: %s" % access_link)

            elif not DomainRule.apply(self.start_url, access_link):
                module_logger.info("url=%s is not in the same domain as %s" % (access_link, self.start_url))
                module_logger.debug("Start url=%s site_map_entry=%s links_with_issues_entry=%s" % (self.start_url,
                                                                                                   site_map_entry,
                                                                                                   links_with_issues_entry))
                return None, links_with_issues_entry

            module_logger.debug("Going to open access_link=%s" % access_link)

        except Exception as err:
            module_logger.warn(err)
            site_map_entry[url] = {'links': set(), 'assets': set()}
            return None, links_with_issues_entry


        try:
            content = HttpHandler.fetch_url_content(access_link)

            if content is None:
                raise ValueError("Content of the url=%s is None" % url)

        except ValueError as err:
            module_logger.warn(err)
            return None, links_with_issues_entry

        except Exception as err:
            module_logger.warn(err)
            return None, links_with_issues_entry

        links, assets = PageParser.parse_page_get_links(content)

        site_map_entry[url] = {'links': links, 'assets': assets}

        module_logger.info("Completed working on url=%s" % url)

        module_logger.info("SiteMap=%s" % site_map_entry)
        module_logger.info("Links with issues=%s" % links)

        return site_map_entry, links_with_issues_entry
Пример #4
0
class CallConnectHandler:
    """A simple wrapper for Call Connect endpoints"""

    def __init__(self, hostname=None, logger=None):
        self.__hostname = hostname if hostname is not None else "http://127.0.0.0:5000"
        self.__log = setup_console_logger(logger, "CallConnectHandler")
        self.__jh = JsonHandler(hostname=self.__hostname, logger=self.__log)
        self.__hh = HttpHandler(hostname=self.__hostname, logger=self.__log)
        self.__log.debug("CallConnect handler initialized for: %s" % self.__hostname)


    def get_new_caller_id(self):
        caller_id =  self.__jh.post(endpoint="/api/id", data="{}").json()['id']
        self.__log.debug("Received new Caller ID: %s" % caller_id)
        return caller_id


    def get_redirect_to(self, caller_id, number):
        payload = {
                    "id": caller_id,
                    "redirectTo": number
                }
        # use json.dumps to convert payload tupple into a string
        redir = self.__jh.post(endpoint="/api/callers",
                               data=json.dumps(payload))
        self.__log.debug("POST %s/api/callers payload: %s" % (self.__hostname, payload))
        self.__log.debug("Received new redirect_to: %s" % redir.json())
        return redir


    def delete_caller_id(self, callerId):
        self.__log.debug("Deleting callerId: %s" % callerId)
        return self.__hh.delete(endpoint="/api/callers/%s" % callerId)


    def get_callers_details(self, callerId):
        self.__log.debug("Getting details for callerId: %s" % callerId)
        return self.__jh.get(endpoint="/api/callers/%s" % callerId)


    def get_number_pool(self):
        pool = self.__jh.get(endpoint="/api/pool")
        self.__log.debug("Got Number pool: %s" % pool.json())
        return pool
Пример #5
0
class CallConnectHandler:
    """A simple wrapper for Call Connect endpoints"""
    def __init__(self, hostname=None, logger=None):
        self.__hostname = hostname if hostname is not None else "http://127.0.0.0:5000"
        self.__log = setup_console_logger(logger, "CallConnectHandler")
        self.__jh = JsonHandler(hostname=self.__hostname, logger=self.__log)
        self.__hh = HttpHandler(hostname=self.__hostname, logger=self.__log)
        self.__log.debug("CallConnect handler initialized for: %s" %
                         self.__hostname)

    def get_new_caller_id(self):
        caller_id = self.__jh.post(endpoint="/api/id", data="{}").json()['id']
        self.__log.debug("Received new Caller ID: %s" % caller_id)
        return caller_id

    def get_redirect_to(self, caller_id, number):
        payload = {"id": caller_id, "redirectTo": number}
        # use json.dumps to convert payload tupple into a string
        redir = self.__jh.post(endpoint="/api/callers",
                               data=json.dumps(payload))
        self.__log.debug("POST %s/api/callers payload: %s" %
                         (self.__hostname, payload))
        self.__log.debug("Received new redirect_to: %s" % redir.json())
        return redir

    def delete_caller_id(self, callerId):
        self.__log.debug("Deleting callerId: %s" % callerId)
        return self.__hh.delete(endpoint="/api/callers/%s" % callerId)

    def get_callers_details(self, callerId):
        self.__log.debug("Getting details for callerId: %s" % callerId)
        return self.__jh.get(endpoint="/api/callers/%s" % callerId)

    def get_number_pool(self):
        pool = self.__jh.get(endpoint="/api/pool")
        self.__log.debug("Got Number pool: %s" % pool.json())
        return pool
class FlaskrilioHandler:
    """A simple wrapper for the local Flaskrilio service"""
    def __init__(self, hostname=None, logger=None):
        self.__hostname = hostname if hostname is not None else "http://127.0.0.0:5000"
        self.__log = setup_console_logger(logger, "FlaskrilioHandler")
        self.__jh = JsonHandler(hostname=self.__hostname, logger=self.__log)
        self.__hh = HttpHandler(hostname=self.__hostname, logger=self.__log)
        self.__log.debug("Flaskrilio handler initialized for: %s" %
                         self.__hostname)

    def get_home(self):
        return self.__jh.get(endpoint="/")

    def get_twiml(self, ctx):
        self.__log.debug("Getting TwilML for an endpoint: %s" % ctx)
        return self.__hh.get(endpoint=ctx)

    def get_calls(self):
        calls = self.__jh.get(endpoint="/calls")
        self.__log.debug("Got calls: %s" % calls)
        if calls is None:
            return []
        else:
            return calls
Пример #7
0
 def __init__(self, hostname=None, logger=None):
     self.__hostname = hostname if hostname is not None else "http://127.0.0.0:5000"
     self.__log = setup_console_logger(logger, "FlaskrilioHandler")
     self.__jh = JsonHandler(hostname=self.__hostname, logger=self.__log)
     self.__hh = HttpHandler(hostname=self.__hostname, logger=self.__log)
     self.__log.debug("Flaskrilio handler initialized for: %s" % self.__hostname)
Пример #8
0
    def crawl(self, start_url=None):
        """
        Single threaded webcrawler.
        :param start_url: Starting url
        :return: starting url (str), sitemap (dict), links with issues (set)
        """
        if self.start_url is None and start_url is None:
            raise ValueError("Start url cannot be None")

        if start_url is not None:
            self.start_url = start_url

        site_map = dict()
        visited = set()
        links_with_issues = set()
        queue = LifoQueue()
        queue.put(self.start_url)

        while not queue.empty():
            next_link = queue.get()

            module_logger.info("Retrieved url=%s from queue" % next_link)

            try:
                if FileExtensionRule.apply(next_link):
                    module_logger.info("Url=%s is a file asset" % next_link)
                    continue

            except ValueError as err:
                module_logger.warn(err)

            try:
                # Create an absolute url from a relative url
                access_link = LinkHandler.reconstruct_link(self.start_url, next_link)

                if access_link is None:
                    module_logger.warn("Currently working on next_link=%s - But access link value is None,"
                                       "Something went wrong during the link construction" % next_link)
                    links_with_issues.add(next_link)
                    continue

                elif access_link in visited:
                    module_logger.info("Already visited url=%s, skipping" % access_link)
                    continue

                elif not DomainRule.apply(self.start_url, access_link):
                    module_logger.info("url=%s is not in the same domain as %s" % (access_link, self.start_url))
                    continue

                else:
                    module_logger.info("Going to access url=%s constructed from %s" % (access_link, next_link))

            except ValueError as err:
                module_logger.warn(err)
                links_with_issues.add(next_link)
                continue

            except Exception as err:
                module_logger.error("An unexpected error during the link construction of url=%s" % next_link, err)
                links_with_issues.add(next_link)
                continue

            try:
                content = HttpHandler.fetch_url_content(access_link)

                if content is None:
                    module_logger.warn("Unable to get content from link=%s" % access_link)
                    continue

            except ValueError as err:
                module_logger.warn("Link=%s has a value issue, value current is %s" % (access_link, content), err)
                module_logger.exception(err)
                continue

            except Exception as err:
                module_logger.warn("Something unexpected happened while fetching content of the url=%s"
                                   % access_link)
                module_logger.exception(err)
                continue

            # Get links and assets from HTML page
            links, assets = PageParser.parse_page_get_links(content)

            module_logger.debug("Add link=%s into already visited list" % next_link)

            visited.add(next_link)

            module_logger.info("Extracted from url=%s - links=%s assets=%s" % (access_link, links, assets))

            for link in links:
                if link not in visited:
                    queue.put(link)

            module_logger.debug("Current link queue=%s" % str(queue))
            site_map_record = {next_link: {'links': links, 'assets': assets}}

            module_logger.info("Adding record into site map=%s" % site_map_record)

            site_map.update(site_map_record)

        module_logger.info("Crawling completed.")

        module_logger.info("SiteMap=%s" % site_map)
        module_logger.info("Links with issues=%s" % links_with_issues)

        return self.start_url, site_map, links_with_issues