def __init__(self, config=None): """Creates new Sklik API client instance. :param config: Sklik API client configuration instance """ self.__session = None if not config: raise SklikApiError("No config given") self.__proxy = _create_server_proxy(config.namespace, verbose=config.debug, allow_none=True) res = self.__proxy.client.login(config.username, config.password) if res["status"] == 400: raise ArgumentError(res["statusMessage"], res["errors"]) elif res["status"] == 401: raise AuthenticationError(res["statusMessage"]) elif res["status"] != 200: raise SklikApiError(res["statusMessage"]) self.__session = res["session"]
def __init__(self, config=None): """Creates new Sklik API client instance Keyword arguments: config: sklik API client configuration instance """ self.__session = None if not config: raise SklikApiError("No config given") #endif self.__config = config self.__proxy = ServerProxy(self.__config.namespace, allow_none=True) res = self.__proxy.client.login(self.__config.username, self.__config.password) if res["status"] == 400: raise ArgumentError(res["statusMessage"], res["errors"]) elif res["status"] == 401: raise AuthenticationError(res["statusMessage"]) elif res["status"] != 200: raise SklikApiError(res["statusMessage"]) #endif self.__session = res["session"]
def __init__(self, url, parent=None): """Instantiates a Link object Args: url (str): The URL string either relative or absolute parent (Link, optional): If this is None, then the current Link will serve as the root, else a parent will be assigned to the current Link instance Raises: ArgumentError: If the arguments are faulty """ # validate args if parent: if not isinstance(parent, Link): raise ArgumentError('Link class should be instantiated with ' \ 'a valid parent Link object. Got this: %s' %parent) url = URLUtils.absolute(parent.url, url) # link info self.parent = parent self.url = URLUtils.normalize(url) self.domain = URLUtils.get_domain(url) self.is_valid = URLUtils.is_valid(self.url) self.id = URLUtils.hash(self.url) self.level = 0 if parent is None else parent.level + 1
def add_hand(self, hand: Hand) -> None: """Add a Hand to the player.""" if self.hand1 and self.hand2: raise ArgumentError( "Trying to add a hand to player who already has 2 hands. A player can't have a third hand." ) elif not self.hand1: self.hand1 = hand else: self.hand2 = hand
def __del__(self): """Logs out.""" if self.__session == None: return res = self.__proxy.client.logout(self.__session) if res["status"] == 400: raise ArgumentError(res["statusMessage"], res["errors"]) elif res["status"] == 401: raise AuthenticationError(res["statusMessage"]) elif res["status"] != 200: raise SklikApiError(res["statusMessage"])
def _get_nodes_and_marks(self, string): for possible_edge in self.POSSIBLE_EDGES: nodes_and_edge = string.split(possible_edge) if len(nodes_and_edge) < 2: continue node_1 = nodes_and_edge[0].strip(' ') node_2 = nodes_and_edge[1].strip(' ') return node_1, possible_edge, node_2 raise ArgumentError( \ "{} has an unrecognized edge. Possible edges: {}"\ .format(string, self.POSSIBLE_EDGES) )
def set_out_of(self, node): """ Add a tail next to the given node. Parameters: node: str Raises: ArgumentError if node not found. """ if node not in (self.node_1, self.node_2): raise ArgumentError('Node {} not found'.format(node)) if node == self.node_1: self.node_1_mark = PartialAncestralGraph.TAIL else: self.node_2_mark = PartialAncestralGraph.TAIL
def set_into(self, node): """ Add the appropriate arrowhead for the given node. Parameters: node: str Raises: ArgumentError if node not found. """ if node not in (self.node_1, self.node_2): raise ArgumentError('Node {} not found'.format(node)) if node == self.node_1: self.node_1_mark = PartialAncestralGraph.LEFT_ARROWHEAD else: self.node_2_mark = PartialAncestralGraph.RIGHT_ARROWHEAD
def out_of(self, node): """ If there is a tail next to a node, then return True, and False otherwise. Parameters: node: str Returns: bool Raises: ArgumentError if node not found. """ if node not in (self.node_1, self.node_2): raise ArgumentError('Node {} not found'.format(node)) if node == self.node_1 and self.node_1_mark == PartialAncestralGraph.TAIL: return True if node == self.node_2 and self.node_2_mark == PartialAncestralGraph.TAIL: return True return False
def has_word(self, word): """Checks the current HTML page for the presence of a given word The search is performed on the text content of the current page (excluding HTML tags, script sections, style sheet sections and invalid tags) Args: word (str): A non empty stop word Returns: bool: True, if the given word is present in the page; else False Raises: ArgumentError: If the arguments are faulty """ # validate args if not word: raise ArgumentError('Param "word" cannot be empty') return word in self.text_content
def into(self, node): """ If there is an arrowhead next to a node, then return True, and False otherwise. Parameters: node: str Returns: bool Raises: ArgumentError if node not found. """ if node not in (self.node_1, self.node_2): raise ArgumentError('Node {} not found'.format(node)) if node == self.node_1 \ and self.node_1_mark == PartialAncestralGraph.LEFT_ARROWHEAD: return True if node == self.node_2 \ and self.node_2_mark == PartialAncestralGraph.RIGHT_ARROWHEAD: return True return False
def validate_config(config): metadata = config.yaml["metadata"] if not re.match("^[-A-Za-z0-9]{1,63}$", metadata["name"]): raise Exception( "Name must be at least 1 character in length letters and numbers") if len(metadata["regions"]) != 2: raise ArgumentError("Please specify exactly 2 regions") ec2 = boto3.client("ec2", region_name=config.bastion.region) ec2_regions = ec2.describe_regions()["Regions"] region_names = [val["RegionName"] for val in ec2_regions] for region in metadata["regions"]: # Checking region validity first for faster response if not region in region_names: raise Exception(f"Region \"{region}\" is not valid") try: eks = boto3.client("eks", region_name=region) eks.list_clusters() except Exception as ex: raise Exception(f"Can't access EKS service in \"{region}\"")
def __init__(self, link): """Instantiates a HTMLPage object Args: link (Link): A valid Link object Raises: ArgumentError: If the arguments are faulty """ # validate args if not isinstance(link, Link) or not link.is_valid: raise ArgumentError('HTMLPage class should be instantiated with ' \ 'a valid Link object. Got this: %s' %link) self.link = link # fetch the actual webpage response = requests.get(link.url) self.status_code = response.status_code self.html_content = response.text.encode('utf8') self.text_content = HTMLUtils.html_to_text(self.html_content) self.encoding = response.encoding # fetch all child links self.child_links = self._get_all_links()
def search(self, search_code, search_type, start_url, max_level, stop_words=[], allowed_domains=[], persist=True): """Starts from the given URL, performs a Breadth First Search (BFS) or a Depth First Search (DFS) and continues indefinitely until a maximum level is reached or until one of the given stop words are encountered Args: search_code (str): A unique sting identifying the current search request search_type (str): Allowed values are "BFS" or "DFS" start_url (str): Search starting point max_level (int): The maximum level until which the search should be performed stop_words (list(str), optional): The list of words which is to be checked for to halt the current search allowed_domains (list(str), optional): The list of domains to restrict the URLs to, while performing the search persist (bool, optional): If True, the search results will be persisted in the database Returns: set(str): A list of unique Link objects encountered while performing the breadth first search Raises: ArgumentError: If arguments are faulty PersistenceExecuteError: For DB Query Execution issues PersistenceError: For any other DB related issue """ # validate args if not isinstance(search_type, str) or \ search_type.upper() not in ['BFS', 'DFS']: raise ArgumentError('Param "search_type" must be either of ' \ '"bfs" or "dfs". Got: %s' %search_type) if not isinstance(start_url, str): raise ArgumentError('Param "start_url" must be a string. ' \ 'Got: %s' %start_link.url) start_link = Link(start_url) if not start_link.is_valid: raise ArgumentError('Param "start_url" must be a valid URL. ' \ 'Got: %s' %start_link.url) if not isinstance(max_level, int): raise ArgumentError('Param "max_level" must be an integer. ' \ 'Got: %s' %max_level) # add the current domain to the list of allowed domains if not start_link.domain in allowed_domains: allowed_domains.append(start_link.domain) if persist: search_info = { 'search_code': search_code, 'search_type': search_type, 'start_url': start_url, 'max_level': max_level, 'crawled_date_time': datetime.today() } # save job details to DB try: self.db.insert('CRAWL.INFO', search_info) self.db.commit() except PersistenceExecuteError as err: self.db.rollback() logging.error('Insertion to CRAWL.INFO table failed for user: %s' \ %self.user_id) raise err # initiate the data structures visited = set() queue = [start_link] stop_word_hit = False # perform search while queue and not stop_word_hit: # depending on whether the search type is BFS/DFS # retrieve from top or bottom of the queue link = queue.pop(0) if search_type == 'BFS' else queue.pop() if link.is_valid and link not in visited: # visit the current link visited.add(link) if persist: # save the link to DB try: link_data = { 'search_code': search_code, 'id': link.id, 'url': link.url, 'level': link.level, 'parent_id': link.parent and link.parent.id } self.db.insert('CRAWL.DATA', link_data) except PersistenceExecuteError as err: logging.error('Insertion to CRAWL.DATA table failed for user: %s' \ %self.user_id) raise err # parse current page and get all child links page = HTMLPage(link) if page.status_code == 200 and link.level < max_level: queue.extend(page.get_links(allowed_domains)) logging.info(page) # check if the current page has one of the given stop words for word in stop_words: stop_word_hit = stop_word_hit or page.has_word(word) self.db.commit() return visited