def crawl(url): try: #print("Visiting: " + url) full_url = urlparse(url) req = urllib.request.Request(url, headers=header) page = urlopen(req) soup = BeautifulSoup(page, 'html.parser') text = get_text(soup) links = get_links(soup) allowed_links = prt.parse_and_test_links(get_base_url(full_url), links, user_agent) checked_links = N.check_duplicate_urls(allowed_links) N.check_and_save_page(url, text) Frontier.prioritize_urls(checked_links, number_of_priorities) #print(Frontier.queue_map) #print("Printing Front Queue:") #for index, queue in Frontier.front_queues.items(): # print(queue.queue) #print("Printing Back Queue:") #for index, queue in Frontier.back_queues.items(): # print(queue.queue) except (urllib.error.HTTPError, urllib.error.URLError) as e: pass
def __init__(self, expert): self.expert_mode = expert #Booléen indiquant si la partie se déroule en mode expert ou non self.frontiers_visual, self.frontiers_p0_rect, self.frontiers_p1_rect, self.frontiers_p2_rect = self.initializeFrontiers( ) #Visuels des frontières self.cards_visual = self.initializeCards() #Visuels des cartes self.frontiers = [] #Liste des objets "Frontier" for i in range(0, 9): #On initialise les objets "Frontier" self.frontiers.append(Frontier(i)) self.ClanCardDraw = Card_Draw( "Clan") #On initialise la pioche de cartes de clan if expert: #Si la partie se déroule en mode expert self.TacticCardDraw = Card_Draw( "Tactic") #On initialise la pioche de cartes tactiques self.nTacticCardPlayed = [ 0, 0 ] #Nombre de cartes tactiques jouées par les deux joueurs self.DiscardPile = [] #Liste de cartes défaussées self.hands = [Hand(expert), Hand(expert) ] #On initialise les objets "Hand" des deux joueurs self.hands[0].fillHand( self.ClanCardDraw) #On remplit la main du premier joueur self.hands[1].fillHand( self.ClanCardDraw) #On remplit la main du second joueur self.player1_cards_rect, self.player2_cards_rect = self.initializeHandCardsRect( ) #Positions des cartes des mains des joueurs self.frontiers_sides_cards_rect = self.initializeSideCardsRect( ) #Positions des cartes jouées self.winner = 0
def DepthFirstSearch(self, initial_state): """ Perform depth-first search of the problem """ # configure search: for DFS, we want the Frotnier with the LIFO Stack self._frontier = Frontiers.FrontierLIFO() # run search return self._tree_search(initial_state)
def BreadthFirstSearch(self, initial_state): """ Perform breadth-first search of the problem, starting at a given initial state. """ # configure search: for BFS, we want the Frontier with the FIFO Queue self._frontier = Frontiers.FrontierFIFO() # run search return self._tree_search(initial_state)
def DepthFirstSearch(self, initial_state): """ Perform depth-first search of the problem, starting at a given initial state. :param initial_state: a Problem State :return: SearchTerminationRecord """ # configure search: for DFS, we want the Frotnier with the LIFO Stack self._frontier = Frontiers.FrontierLIFO() # run search return self._tree_search(initial_state)
def DepthLimitedSearch(self, initial_state, limit): """ Perform depth-limited search of the problem, starting at a given initial state. """ # configure search: We want the FIFO Frontier with the depth limit self._frontier = Frontiers.FrontierLIFO_DL(limit) # run search result = self._tree_search(initial_state) result.cutoff = self._frontier._cutoff return result
def DepthLimitedSearch(self, initial_state, limit): """ Perform depth-limited search of the problem, starting at a given initial state. :param initial_state: a Problem State :param limit: the maximum allowable depth :return: SearchTerminationRecord """ # configure search: We want the FIFO Frontier with the depth limit self._frontier = Frontiers.FrontierLIFO_DL(limit) # run search result = self._tree_search(initial_state) # another attribute to indicate whether we ran out of time (cutoff = True) # or if the search space was less deep than the limit, and we searched it all # (cutoff = False) # This is needed by Iterative Deepening, so that we know to stop searching deeper. result.cutoff = self._frontier._cutoff return result
def searchAlgorithm(problem, maze): depth = 0 sol = None closed = [] fringe = Frontier.Frontier() initial_node = Node.Node() initial_node.idState = problem.initial initial_node.cost = 0 initial_node.parent = None initial_node.action = None initial_node.depth = 0 initial_node.heuristic = heuristic(problem.initial, problem.objective) initial_node.value = calcValue(initial_node, problem.strategy) fringe.insertNode(initial_node) while (1): if depth == 1000000: print("The algorithm arrrives to the limit of iteractions") break elif fringe.isEmpty(): print("The frontier is empty.") break else: currentNode = fringe.removeNode() if problem.goal(currentNode.idState): sol = solution(currentNode) break if not isIn(currentNode, closed): neighbors = succerssorFunction(currentNode, maze.grid) fringe.insertList( initNodes(currentNode, neighbors, problem.objective, problem.strategy, maze.grid)) closed.append(currentNode) depth += 1 writeSolution(sol, problem.strategy, maze) drawSolution(sol, fringe.frontier, closed, maze, problem.strategy)
def main(): Frontier.initialize_queues_and_priorities(number_of_crawlers, number_of_priorities) Frontier.prioritize_urls(seed, number_of_priorities) Frontier.update_back_queue() start(number_of_crawlers)
def crawler(): while True: url = Frontier.get_url_from_back_queue() if url is None: break crawl(url)
def cspace_callback(self, cspace_og): """ Brief: Callback for /cspace Details: - Constructs list of frontiers from C-space - Publishes frontier and centroid gridcells - Publishes goal position of highest priority frontier centroid Inputs: cspace_og [OccupancyGrid] Return: None """ # Start timing stopwatch = Stopwatch() # Build frontier list from C-space frontier_list = [] m = cspace_og.info.height n = cspace_og.info.width for i in range(m): for j in range(n): if og_on_frontier_ij(i, j, cspace_og): # Potential frontier starting point start = (i, j) # Check against existing frontiers new_frontier = True for frontier in frontier_list: if frontier.has_point(start): new_frontier = False break if new_frontier: # Generate new frontier from starting point frontier = Frontier(start) # Expand using wavefront algorithm queue = Queue() queue.put(start) visited = {} while not queue.empty(): curr = queue.pop() visited[curr] = True i_c = curr[0] j_c = curr[1] for i_n in range(i_c - 1, i_c + 2): for j_n in range(j_c - 1, j_c + 2): next_on_frontier = og_on_frontier_ij( i_n, j_n, cspace_og) next = (i_n, j_n) if (next not in visited ) and next_on_frontier: visited[next] = True queue.put(next) frontier.add_point(next) # Add to list of frontiers frontier_list.append(frontier) # Convert frontier list to priority queue frontier_points = [] centroid_points = [] frontier_queue = PriorityQueue() for frontier in frontier_list: # Goal position is centroid of frontier centroid = frontier.get_centroid() c_i = centroid[0] c_j = centroid[1] c_x, c_y = og_ij_to_xy(c_i, c_j, cspace_og) goal = PoseStamped() goal.pose.position = Point(c_x, c_y, 0.0) # Get path time cost from A* rospy.wait_for_service('waypoints') get_waypoints = rospy.ServiceProxy('waypoints', CalcWaypoints) resp = get_waypoints(goal, Bool(False)) # If valid path exists: if resp.exception.data == 'none': # Add frontier to priority queue priority = resp.time_cost.data frontier_queue.put(frontier, priority) # Add points to gridcells frontier_points += frontier.get_points() centroid_points.append(frontier.get_centroid()) # Check if map is complete if frontier_queue.empty(): # Map complete - publish home position to goal if self.nav_state == 'exploring': goal = PoseStamped() goal.pose.position = Point(self.robot_initial_x, self.robot_initial_y, 0.0) self.pub_goal.publish(goal) self.pub_debug.publish('Map complete - returning home') self.nav_state = 'returning' if hypot(self.robot_initial_x - self.robot_x, self.robot_initial_y - self.robot_y) < 0.2: self.pub_debug.publish('Returned home - ready to race') self.nav_state = 'racing' else: # Map incomplete - publish top priority frontier to goal if self.nav_state != 'racing': i, j = frontier_queue.pop().get_centroid() x, y = og_ij_to_xy(i, j, cspace_og) goal = PoseStamped() goal.pose.position = Point(x, y, 0.0) self.pub_goal.publish(goal) self.pub_debug.publish('Frontier goal published') self.nav_state = 'exploring' # Publish frontier and centroid gridcells og_pub_gridcells(frontier_points, 0.002, self.pub_frontiers_gc, cspace_og) og_pub_gridcells(centroid_points, 0.004, self.pub_centroids_gc, cspace_og) # Finish timing dur = stopwatch.stop() timing_msg = 'Frontier generation: ' + str(dur) + ' sec' self.pub_timing.publish(timing_msg)
def main(): Frontier.add_seed(seed) Crawler.crawl()
def __init__(self,seedUrls): self.__frontier = Frontier(seedUrls)
class Crawler: ''' classdocs ''' seedUrls =[""] __webGrapg = {} __siteContents = [] ____frontier = None __soup = None baseURL = "http://mysql12.f4.htw-berlin.de/crawl/" # base url erstellen durch urlparse def __init__(self,seedUrls): self.__frontier = Frontier(seedUrls) def startCrawling(self): node = self.__frontier.getNode() while(node!=""): self.__downloadPage(node) node = self.__frontier.getNode() return (self.__webGrapg,self.__siteContents) # download via urlib2 """ Download webpage to given Url via urllib2 and processes it with BeautifulSoup. After text and links will be extracted """ def __downloadPage(self,seedUrl): response = urllib2.urlopen(seedUrl) # TODO catch HTTPError urllib2.HTTPError: HTTP Error 404: Not Found #print response.info() html = response.read() self.__soup = BeautifulSoup(html) #get the html soup self.__getLinksFromPage(seedUrl) self.__getTextFromPage(seedUrl) response.close() # best practice to close the fi """ Extract all URLs from a with BeatifulSoup processed webpage and add them to the webgraph dictionary Found links will be added to the frontier argument The URL of the page from which the links should be extracted """ def __getLinksFromPage(self,seedUrl): links = [] # for link in self.__soup.find_all('a'): link = self.__validateUrl(seedUrl, link.get('href')) links.append(link) self.__frontier.setNode(link) self.__webGrapg.update({seedUrl:links}) #print '[%s]' % '\n '.join(map(str,links)) """ Extract all text from a with BeatifulSoup processed webpage and add them to the siteContent dictionary argument The URL of the page from which the links should be extracted """ def __getTextFromPage(self,seedUrl): [s.extract() for s in self.__soup('a')] #extract all <a> link tags bodyContent = self.__soup.body.get_text() regex = re.compile('[%s]' % re.escape(string.punctuation)) # building regex obj on punctuation words = regex.sub(" ",bodyContent).split() #replacing punctuation with whitespace and split into single words #self.__siteContents.update({seedUrl:words}) self.__siteContents.append((seedUrl,words)) #print '[%s]' % '\n '.join(map(str,words)) def __validateUrl(self,seedUrl,url): return "http://mysql12.f4.htw-berlin.de/crawl/"+url #TODO extract base url if missing