class ThreadedNormalWorker(object): def __init__(self, print_errors=False): self.print_errors = print_errors self.queue = LifoQueue() def get_url_bulk(self): normals = Normals.objects(access_success=False) for i in normals: self.queue.put(item=i) def grab_from_queue(self): while not self.queue.empty(): url = self.queue.get() normals_finder = NormalsSpider(url=url.url, print_errors=self.print_errors) normals_finder.update_normals_data() print(url.url) self.queue.task_done() def start(self, n_threads): self.get_url_bulk() for i in range(n_threads): thread = Thread(target=self.grab_from_queue()) thread.start() self.queue.join()
def match_query(self, query): '''Given a search query, return a tuple containing a regex match and trigger object that matches the given query. If no match can be found, return a tuple of (None, None).''' sink = LifoQueue() while not self.triggers.empty(): trigger = self.triggers.get() match = trigger.pattern.match(query) if match: break else: sink.put(trigger) trigger = None while not sink.empty(): self.triggers.put(sink.get()) if trigger: self.triggers.put(trigger) return (match, trigger) return (None, None)
def print_stack(stack): remainders = LifoQueue() while not stack.empty(): top_thing = stack.get() print(top_thing) remainders.put(top_thing) while not remainders.empty(): stack.put(remainders.get())
def next_greater_element(arr): stack = LifoQueue() for val in arr: if stack.empty(): stack.put(val) else: while not stack.empty(): pop_ele = stack.get() if val > pop_ele: print('{} -> {}'.format(pop_ele, val)) else: stack.put(pop_ele) break stack.put(val) while not stack.empty(): pop_ele = stack.get() print('{} -> -1'.format(pop_ele))
def sortStack(stack): tempStack = LifoQueue() while stack.empty() == False: elementToOrder = stack.get() compareWithTempStack(elementToOrder, stack, tempStack) while tempStack.empty() == False: stack.put(tempStack.get())
def __iter__(self): stack = LifoQueue() stack.put(self.tree.root) while not stack.empty(): node = stack.get() yield node for index in range(len(node.children) - 1, -1, -1): # reverse children stack.put(node.children[index])
def consumer(lifoIn: LifoQueue): while not lifoIn.empty(): print("Consuming", current_thread().getName()) assert (lifoIn.get() == "Foo") with pytest.raises(Empty): assert (lifoIn.get_nowait())
def dfs(start): """depth-first-search""" search = LifoQueue(maxsize=0) visited = set() max_fringe_size = 1 max_search_depth = 0 nodecount = 0 nodes = [] parents = [] search.put((start, 0)) while not search.empty(): state = search.get() nodecount += 1 if state[1] > max_search_depth: max_search_depth = state[1] visited.add(state[0]) if state[0] == ('0', '1', '2', '3', '4', '5', '6', '7', '8'): fil = open('output', 'w') path = [] ind = nodes.index(('0', '1', '2', '3', '4', '5', '6', '7', '8')) dif = parents[ind].index('0') - nodes[ind].index('0') if dif == 1: path.append("LEFT") if dif == -1: path.append("RIGHT") if dif == 3: path.append("UP") if dif == -3: path.append("DOWN") while parents[ind] != start: ind = nodes.index(parents[ind]) dif = parents[ind].index('0') - nodes[ind].index('0') if dif == 1: path.append("LEFT") if dif == -1: path.append("RIGHT") if dif == 3: path.append("UP") if dif == -3: path.append("DOWN") path.reverse() fil.write("path_to_goal: " + str(path)) fil.write("\ncost_of_path: " + str(state[1])) fil.write("\nnodes_expanded: " + str(nodecount - 1)) fil.write("\nfringe_size: " + str(search.qsize())) fil.write("\nmax_fringe_size: " + str(max_fringe_size)) fil.write("\nsearch_depth: " + str(state[1])) fil.write("\nmax_search_depth: " + str(max_search_depth)) fil.write("\nrunning_time: " + str(time.process_time())) return for i in reversed(neighbors(state[0])): if not i in visited: search.put((i, state[1] + 1)) nodes.append(i) parents.append(state[0]) visited.add(i) if search.qsize() > max_fringe_size: max_fringe_size = search.qsize()
class GPIOHypervisor(GPIOHandler): """ All operation specific I/O will be managed by this class """ def __init__(self): LOGGER.debug(f"{self}") super(GPIOHypervisor, self).__init__() self.cmd = Stack(maxsize=4) self.logger = logging.getLogger("GPIO Hypervisor") def put(self, cmd: CMD): self.cmd.put_nowait() def get_new_cmd(self): return self.cmd.get_nowait() def execute(self, cmd: CMD): newStates = cmd.get_new_states() pins = cmd.get_pins() GPIO.output( pins, list([GPIO.LOW if i == 0 else GPIO.HIGH for i in newStates])) for pin_states in self.states: for idx, pin in enumerate(pins): if pin_states == str(pin): self.states[pin_states] = newStates[idx] def update_states(self, pin, output): self.states[str(pin)] = output async def handler(self): while 1: if not self.cmd.empty(): cmd = self.get_new_cmd() if str(cmd) == "ENGINESTART": GPIO.output(GPIOVALVES["lox_dump"]["pin"], GPIO.LOW) self.update_states(GPIOVALVES["lox_dump"]["pin"], 0) self.logger.info("DUMPING LOX") sleep(GPIOVALVES["engine_start_delay"]["time_ms"]) GPIO.output(GPIOVALVES["kerosene_dump"]["pin"], GPIO.LOW) self.update_states(GPIOVALVES["kerosene_dump"]["pin"], 0) self.logger.info("DUMPING KEROSENE") GPIO.output(GPIOVALVES["igniter"]["pin"], GPIO.HIGH) elif str(cmd) == "ENGINESTOP": GPIO.output(GPIOVALVES["lox_dump"]["pin"], GPIO.HIGH) self.update_states(GPIOVALVES["lox_dump"]["pin"], 1) self.logger.info("STOPPING LOX") GPIO.output(GPIOVALVES["kerosene_dump"]["pin"], GPIO.HIGH) self.update_states(GPIOVALVES["kerosene_dump"]["pin"], 1) self.logger.info("STOPPING KEROSENE") else: #general non-mission critical gpio commands self.execute(cmd) else: await asyncio.sleep(0.1) def report(self): return self.states
class VideoStream: # initialize the file video stream def __init__(self, queueSize=128): global capfps global capwid global caphei cap = cv2.VideoCapture( "nvarguscamerasrc ! video/x-raw(memory:NVMM), width=(int)1280, height=(int)720,format=(string)NV12, framerate=(fraction)24/1 ! nvvidconv flip-method=2 ! video/x-raw, format=(string)BGRx ! videoconvert ! video/x-raw, format=(string)BGR ! appsink" ) # cap = cv2.VideoCapture(0) capfps = cap.get(cv2.CAP_PROP_FPS) capwid = round(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) caphei = round(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) self.stream = cap self.stopped = False # initialize the queue self.Q = LifoQueue(maxsize=queueSize) # thread to read frames from stream def start(self): t = Thread(target=self.update, args=()) t.daemon = True t.start() return self def update(self): while True: if self.stopped: return if not self.Q.full(): # read the next frame from the file (grabbed, frame) = self.stream.read() # stop video if end of video file if not grabbed: self.stop() return # add the frame to the queue self.Q.put(frame) def read(self): # return next frame in the queue return self.Q.get() def more(self): # return True if there are still frames in the queue return self.Q.qsize() > 0 def clearQ(self): # empty the queue so it doesn't hit max size with self.Q.mutex: self.Q.queue.clear() return self.Q.empty() def stop(self): # indicate that the thread should be stopped self.stopped = True
def parChecker(string): s = LifoQueue() index = 0 balanced = True while index < len(string) and balanced: if string[index] == "(": s.put(string[index]) else: if s.empty(): balanced = False else: s.get() index = index + 1 if balanced and s.empty(): return True else: return False
class DeviceVideoStream: # Idea from: https://www.pyimagesearch.com/2017/02/06/faster-video-file-fps-with-cv2-videocapture-and-opencv/ def __init__(self, device, stack_size=0): # initialize the file video stream along with the boolean # used to indicate if the thread should be stopped or not self.stream = cv2.VideoCapture(device) self.stopped = False # initialize the queue used to store frames read from # the video file self.stack = LifoQueue(maxsize=stack_size) def start(self): # start a thread to read frames from the file video stream t = Thread(target=self.update, args=()) t.daemon = True t.start() return self def update(self): # keep looping infinitely while True: # if the thread indicator variable is set, stop the # thread if self.stopped: return # otherwise, ensure the queue has room in it if not self.stack.full(): # read the next frame from the file (grabbed, frame) = self.stream.read() # if the `grabbed` boolean is `False`, then we have # reached the end of the video file if not grabbed: self.stop() return # add the frame to the queue self.stack.put(frame) def read(self): # return next frame in the queue last_frame = self.stack.get() stack_size = self.stack.qsize() + 1 while not self.stack.empty(): try: self.stack.get(False) except Empty: continue self.stack.task_done() return last_frame, stack_size def stop(self): # indicate that the thread should be stopped self.stopped = True
def valid_parentheses(value: str) -> bool: if len(value) % 2 != 0: return False closing_values = {'(': ')', '{': '}', '[': ']'} open_chars = closing_values.keys() stack = LifoQueue(len(value)) for char in value: if char in open_chars: stack.put(char) else: if stack.empty(): return False if closing_values[stack.get()] != char: return False return stack.empty()
def preorder_stack(my_tree): s = LifoQueue() t = my_tree while t is not None or not s.empty(): while t is not None: print(t.value) s.put(t.right) t = t.left t = s.get()
def reverse_post(self): # Reverse postorder: Put the vertex on a stack after the recursive calls. reverse = LifoQueue() # stack for v in self.postorder_vertices(): reverse.put(v) rp = list() while not reverse.empty(): rp.append(reverse.get()) return rp
def decompose_all(root): stack = LifoQueue() stack.put(root) while not stack.empty(): node = stack.get() if not node.decomposed: decompose(node) for child in node.children: stack.put(child)
def reverseString(self, A): stack = LifoQueue() reverseA = '' for x in A: stack.put(x) while not stack.empty(): reverseA += stack.get()
class QueryQueue: def __init__(self): self.queue = LifoQueue() self.comm_sender = CommSender() th = threading.Thread(target=self.send_require) th.start() def put(self, item): self.queue.put(item) def send_require(self): while True: time.sleep(1) c = ConnInfo.objects.all()[0] q = QueryInfo.objects.all()[0] r = RoomInfo.objects.all()[0] # if is logout or unconnected, only flush queue if c.is_log == "False" or c.is_conn == "False": while not self.queue.empty(): self.queue.get() continue # else get last item and flush queue if not self.queue.empty(): query = self.queue.get() while not self.queue.empty(): self.queue.get() # m = ModeInfo.objects.all()[0] s = SensorInfo.objects.all()[0] ss = SettingInfo.objects.all()[0] if m.mode == 'cold' and ss.target_temp > s.current_temp: query = 'standby' elif m.mode == 'hot' and ss.target_temp < s.current_temp: query = 'standby' # q.query_speed = query q.save() r = self.comm_sender.send_msg(data={'type': 'require', 'source': r.room_number, 'speed': query}) # if query is standby, we should change to standby immediately if query == 'standby' and r.json()['ack_nak'] == 'ACK': q.current_speed = 'standby' q.query_speed = 'None' q.save()
class Stack: def __init__(self, max_size=10): self.s = LifoQueue() self.s.currsize = 0 self.s.maxsize = max_size def size(self): return self.s.qsize() def push(self, elem): if self.s.currsize == self.s.maxsize: print("Stack is full!") return -1 else: self.s.put(elem) self.s.currsize += 1 def pop(self): if self.s.currsize == 0: print("Stack is empty!") exit(0) else: self.s.currsize -= 1 elem = self.s.get() print("Element popped: ", elem) return elem def reverse(self): if not self.s.empty(): temp = self.s.get() self.reverse() self.insert_below(temp) def insert_below(self, item): if self.s.empty(): self.push(item) else: temp = self.s.get() self.insert_below(item) print("TEMP, ", temp) self.push(temp) def currsize(s): return self.s.currsize
def dlst(start, limit): """depth-limited-search""" search = LifoQueue(maxsize=0) visited = set() nodecount = 0 nodes = [] parents = [] max_search_depth = 0 max_fringe_size = 1 mindist = dist(start) search.put((start, 0)) while not search.empty(): state = search.get() nodecount += 1 if state[1] > max_search_depth: max_search_depth = state[1] visited.add(state[0]) if state[0] == ('0', '1', '2', '3', '4', '5', '6', '7', '8'): path = [] ind = nodes.index(('0', '1', '2', '3', '4', '5', '6', '7', '8')) dif = parents[ind].index('0') - nodes[ind].index('0') if dif == 1: path.append("LEFT") if dif == -1: path.append("RIGHT") if dif == 3: path.append("UP") if dif == -3: path.append("DOWN") while parents[ind] != start: ind = nodes.index(parents[ind]) dif = parents[ind].index('0') - nodes[ind].index('0') if dif == 1: path.append("LEFT") if dif == -1: path.append("RIGHT") if dif == 3: path.append("UP") if dif == -3: path.append("DOWN") path.reverse() return (nodecount - 1, max_search_depth, max_fringe_size, state[1], search.qsize(), path) for i in reversed(neighbors(state[0])): if not i in visited and dist(i) < (mindist + limit): if dist(i) < mindist: mindist = dist(i) search.put((i, state[1] + 1)) visited.add(i) nodes.append(i) parents.append(state[0]) if search.qsize() > max_fringe_size: max_fringe_size = search.qsize() if nodecount >= 181440: return False return (nodecount - 1, max_search_depth, max_fringe_size)
class VideoStream: # initialize the file video stream def __init__(self, queueSize=128): global capfps global capwid global caphei cap = cv2.VideoCapture(0) capfps = cap.get(cv2.CAP_PROP_FPS) capwid = round(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) caphei = round(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) self.stream = cap self.stopped = False # initialize the queue self.Q = LifoQueue(maxsize=queueSize) # thread to read frames from stream def start(self): t = Thread(target=self.update, args=()) t.daemon = True t.start() return self def update(self): while True: if self.stopped: return if not self.Q.full(): # read the next frame from the file (grabbed, frame) = self.stream.read() # stop video if end of video file if not grabbed: self.stop() return # add the frame to the queue self.Q.put(frame) def read(self): # return next frame in the queue return self.Q.get() def more(self): # return True if there are still frames in the queue return self.Q.qsize() > 0 def clearQ(self): # empty the queue so it doesn't hit max size with self.Q.mutex: self.Q.queue.clear() return self.Q.empty() def stop(self): # indicate that the thread should be stopped self.stopped = True
def balanced_brackets(stri): stack = LifoQueue() for index,char in enumerate(stri,start=1): if char in '([{': stack.put(stri[index]) elif char in '}])': if stack.empty(): return index print(char) top = stack.get() print(top) if (top == '(' and char != ')') or (top == '[' and char != ']') or (top == '{' and char != '}'): return index if stack.empty(): return 'Success' else: return stri.index(stack.get())
def print_q(q): qq = LifoQueue() while not q.empty(): x = q.get() qq.put(x) print(x) while not qq.empty(): q.put(qq.get())
def dft_print(self, node): stack = LifoQueue() stack.put(node) while not stack.empty(): current_node = stack.get() print(current_node.value) if current_node.right is not None: stack.put(current_node.right) if current_node.left is not None: stack.put(current_node.left)
def zigzagLevelOrder(self, root: TreeNode) -> List[List[int]]: # 幅優先探索 # スタックを使って実装するか queue = LifoQueue() dic = {} if root == None: return {} # rootの処理 dic[0] = [root.val] queue.put(childTuple(root.left, 1, True)) queue.put(childTuple(root.right, 1, True)) while not queue.empty(): tmp_queue = LifoQueue() while not queue.empty(): node = queue.get() if node.node == None: continue if node.depth in dic: dic[node.depth].append(node.node.val) else: dic[node.depth] = [node.node.val] if node.is_start_right: tmp_queue.put( childTuple(node.node.right, node.depth + 1, False)) tmp_queue.put( childTuple(node.node.left, node.depth + 1, False)) else: tmp_queue.put( childTuple(node.node.left, node.depth + 1, True)) tmp_queue.put( childTuple(node.node.right, node.depth + 1, True)) queue = tmp_queue ans = [] for i in sorted(dic.keys()): ans.append(dic[i]) return ans
def gather_leaves(root): leaves = [] stack = LifoQueue() stack.put(root) while not stack.empty(): node = stack.get() if len(node.children) == 0: leaves.append(node) for child in node.children: stack.put(child) return leaves
def __iter__(self): queue = Queue() yield_stack = LifoQueue() queue.put(self.tree.root) while not queue.empty(): node = queue.get() yield_stack.put(node) for index in range(len(node.children) - 1, -1, -1): # reverse children queue.put(node.children[index]) while not yield_stack.empty(): yield yield_stack.get()
def checkBalanced(expr): q = LifoQueue() for i in expr: if i in ['(', '{', '[']: q.put(i) if i == '}': if q.empty() or q.get() != '{': return False if i == ']': if q.empty() or q.get() != '[': return False if i == ')': if q.empty() or q.get() != '(': return False return q.empty()
def find_ancestor_help(node_name, node_names, adjacency_matrix, node2idx, idx2node): ancestors = OrderedSet() nodes_to_visit = LifoQueue(maxsize = len(node_names)) nodes_to_visit.put(node2idx[node_name]) while not nodes_to_visit.empty(): child = nodes_to_visit.get() ancestors.add(idx2node[child]) for i in range(len(node_names)): if idx2node[i] not in ancestors and adjacency_matrix[i, child] == 1: # For edge a->b, a is along height and b is along width of adjacency matrix nodes_to_visit.put(i) return ancestors
def depth_first_search(graph, start_node): start_node.mark_as_explored() search_stack = LifoQueue() search_stack.put(start_node) while not search_stack.empty(): next_node = search_stack.get() for edge in graph.get_node_edges(next_node): destination = edge.get_destination(next_node) if not destination.is_explored(): destination.mark_as_explored() search_stack.put(destination)
def findParenEnd(text: str, start: int): parens = LifoQueue() parens.put("(") parenEnd = start while not parens.empty(): parenEnd += 1 if text[parenEnd] == "(": parens.put("(") elif text[parenEnd] == ")": parens.get() return parenEnd
def dividedBy2(dec): remstack = LifoQueue() while dec > 0: remstack.put(dec % 2) dec = dec // 2 binString = '' while not remstack.empty(): binString = binString + str(remstack.get()) return binString
def nodes_dfs_preorder(self): stack = LifoQueue() if root == None: return stack.put(self.root()) while not stack.empty(): node = stack.get() # pre-visit strategy yield node for child in reversed(node.children): stack.put(child)
def inorder_walk(a_root_node: BSTreeNode): node_stack = LifoQueue() current_item = a_root_node while True: while current_item: node_stack.put(current_item) current_item = current_item.left_child if node_stack.empty(): break tmp_item = node_stack.get() yield tmp_item current_item = tmp_item.right_child
def index(self, conf): session = requests.session() urls = LifoQueue() allowed_domains = conf['allowed_domains'].split(',') start = conf['url'] ignore = re.compile(conf['ignore']) found = set([start]) urls.put(start) while not urls.empty(): url = urls.get() r = session.get(url) for link in BeautifulSoup(r.content, 'lxml').find_all('a'): link_href = link.get('href') if not link_href: continue if link_href.startswith('/'): link_href = urljoin(url, link_href) parsed = urlparse(link_href) if parsed.hostname not in allowed_domains: continue if conf['ignore'] and ignore.match(link_href): continue if link_href not in found: found.add(link_href) urls.put(link_href) file = MemoryFile(r.content) file.url = url file.mimetype = 'text/html' file.size = 0 file.modified = None yield file
str="abcdefghijk" import random #Queue q=Queue(10) for i in range(10): q.put(random.choice(str)) print("size=",q.qsize()) while not q.empty(): print(q.get()) q.task_done() #Lifo Queue print("-"*10,"lifo_queue","-"*10) lifoq=LifoQueue(10) for i in range(10): lifoq.put_nowait(random.choice(str)) while not lifoq.empty(): print(lifoq.get_nowait()) lifoq.task_done() #Priority Queue print("-"*10,"priority queue","-"*10) pq=PriorityQueue(10) for i in range(10): pq.put_nowait(random.choice(str)) while not pq.empty(): print(pq.get_nowait()) pq.task_done()
class Grid: def __init__(self, columns=2, rows=2, allowed_paths=[Path.up, Path.right, Path.down, Path.left], verbose=False): self._columns = columns self._rows = rows self._allowed_paths = allowed_paths self._verbose = verbose self._pos_x = 0 self._pos_y = 0 self._move_history = LifoQueue() self._last_move = None self._create_grid_matrix() def _create_grid_matrix(self): self._grid_matrix = [] for r in range(0, self._rows + 1): self._grid_matrix.append([]) for c in range(0, self._columns + 1): open_paths = [] if Path.up in self._allowed_paths and r > 0 : open_paths.append(Path.up) if Path.right in self._allowed_paths and c < self._columns: open_paths.append(Path.right) if Path.down in self._allowed_paths and r < self._rows : open_paths.append(Path.down) if Path.left in self._allowed_paths and c > 0 : open_paths.append(Path.left) self._grid_matrix[r].append(Intersection(open_paths)) def get_intersection(self): return self._grid_matrix[self._pos_y][self._pos_x] def get_open_paths(self): return self._grid_matrix[self._pos_y][self._pos_x].get_open_paths() def backtrack(self): if self._move_history.empty(): if self._verbose: print('! No more paths to backtrack from.') return False self._last_move = self._move_history.get() self.get_intersection().reset() if self._last_move == Path.up : self._pos_y += 1 elif self._last_move == Path.right: self._pos_x -= 1 elif self._last_move == Path.down : self._pos_y -= 1 elif self._last_move == Path.left : self._pos_x += 1 else: if self._verbose: print('! Unable to backtrack anymore.') return False return True def move(self, path=None): open_paths = self.get_open_paths() if self._last_move is not None and self._last_move in open_paths: open_paths.remove(self._last_move) self._last_move = None if path is None: if len(open_paths) > 0: path = open_paths[0] else: if self._verbose: print('! No more open paths to move into.') return False elif path not in open_paths: if self._verbose: print('! Unable to move {}.'.format(path.name)) return False self.get_intersection().use_path(path) self._move_history.put(path) if path == Path.up : self._pos_y -= 1 elif path == Path.right: self._pos_x += 1 elif path == Path.down : self._pos_y += 1 elif path == Path.left : self._pos_x -= 1 return True def is_at_start(self): return self._pos_x == self._pos_y == 0 def is_at_end(self): return self._pos_y == self._rows and self._pos_x == self._columns def _to_string(self): COL_WIDTH = 3 ROW_HEIGHT = 1 output = '' for r in range(0, self._rows + 1): for c in range(0, self._columns + 1): if self._pos_y == r and self._pos_x == c: area = 'x' else: path = self._grid_matrix[r][c].get_used_path() if path == Path.up: area = '^' elif path == Path.right: area = '>' elif path == Path.down: area = 'v' elif path == Path.left: area = '<' else: area = '+' if c < self._columns: area += ''.rjust(COL_WIDTH, '-') output += area if r < self._rows: for h in range(0, ROW_HEIGHT): output += os.linesep for c in range(0, self._columns + 1): output += '|' if c < self._columns: output += ''.rjust(COL_WIDTH, ' ') if r < self._rows: output += os.linesep return output def __str__(self): return self._to_string() def __repr__(self): return self._to_string()
class CrawlerType2(BaseCrawler): def __init__(self, name, start_url, list_of_urls, number_of_threads, delayed_request=False, max_allowed_error=10): super().__init__(name, start_url, number_of_threads, delay_request=delayed_request, max_err=max_allowed_error) self.url_list = list_of_urls self.task_queue = LifoQueue() def run(self): """ Function to be called by subclasses to start crawler """ while True: # Crawl cycle starts print_util.print_info( 'Starting crawl with {0}'.format( self.name ), Colors.BLACK ) # Add URLs to task queue for url in self.url_list: self.task_queue.put( { 'type': 0, 'url': url, 'n_errors': 0 } ) # Start all threads threads = [] for n in range(1, self.number_of_threads + 1): temp_thread = Thread( target=self.threader, args=(n,) ) threads.append(temp_thread) temp_thread.start() # Wait for threads to finish for temp_thread in threads: temp_thread.join() # Crawl cycle ends def threader(self, thread_id): """ Worker function :param thread_id: Ass usual """ while not self.task_queue.empty(): task = self.task_queue.get() if task['n_errors'] >= self.max_allowed_errors: print_util.print_warning( '{0} --> Too many errors in task {1}. Skipping.'.format( thread_id, task ) ) continue print_util.print_info( '{0} --> New task : {1}'.format( thread_id, task ) ) try: if task['type'] == 0: self.get_artists( thread_id, task['url'] ) elif task['type'] == 1: self.get_artist( thread_id, task['url'], task['artist'] ) elif task['type'] == 2: self.get_songs_from_page( thread_id, task['url'], task['artist'] ) elif task['type'] == 3: self.get_song( thread_id, task['url'], task['song'], task['artist'] ) print_util.print_info( '{0} --> Task complete : {1}'.format( thread_id, task ), Colors.GREEN ) except Exception as e: print_util.print_error( '{0} --> Error : {1}'.format( thread_id, e ) ) task['n_errors'] += 1 self.task_queue.put(task) def get_artists(self, thread_id, url): """ Method to get artists from a URL :param thread_id: As usual :param url: As usual """ complete_url = self.start_url + url raw_html = open_request(complete_url, delayed=self.delay_request) artists_with_url = self.get_artist_with_url(raw_html) for artist_url, artist in artists_with_url: self.task_queue.put( { 'type': 1, 'url': artist_url, 'artist': artist, 'n_errors': 0 } ) def get_artist(self, thread_id, url, artist): """ Get songs for artist from URL in two parts: 1. Get songs from first page (:param url) 2. Add all other pages to task queue :param thread_id: :param url: :param artist: """ complete_url = self.start_url + url raw_html = open_request(complete_url, delayed=self.delay_request) pages = self.get_pages_for_artist(raw_html) # Add all songs from current page for song_url, song in self.get_songs(raw_html): self.task_queue.put( { 'type': 3, 'url': song_url, 'song': song, 'artist': artist, 'n_errors': 0 } ) # Add rest of pages in task queue for page in pages[1:]: self.task_queue.put( { 'type': 2, 'url': page, 'artist': artist, 'n_errors': 0 } ) def get_songs_from_page(self, thread_id, url, artist): """ Get songs from other pages of artist :param thread_id: As usual :param url: As usual :param artist: As usual """ complete_url = self.start_url + url raw_html = open_request(complete_url, delayed=self.delay_request) for song_url, song in self.get_songs(raw_html): self.task_queue.put( { 'type': 3, 'url': song_url, 'song': song, 'artist': artist, 'n_errors': 0 } ) def get_song(self, thread_id, url, song, artist): """ Get song from a URL :param thread_id: As usual :param url: As usual :param song: As usual :param artist: Artist of song """ if db_operations.exists_song(self.start_url, url): print_util.print_warning( '{0} --> Song {1} already exists. Skipping.'.format( thread_id, song ) ) complete_url = self.start_url + url raw_html = open_request(complete_url, delayed=self.delay_request) album, lyrics, lyricist, additional_artists = self.get_song_details( raw_html ) # Note: additional_artists are artist(s) featured in the song db_operations.save( song, url, album, url, self.start_url, lyrics, additional_artists + [artist, ], [artist, ], lyricist ) def get_song_details(self, raw_html): """ User overrides this method to get details about a song :param raw_html: HTML code of web page :return: Song details """ return ( 'album', 'lyrics', [ 'lyricist1', 'lyricist2' ], [ 'additional_artist1', 'additional_artist2', ] ) def get_artist_with_url(self, raw_html): """ User overrides this method to get all artists with URL from a web page :param raw_html: HTML code of web page :return: Artists with URLs """ return [ ('url1', 'artist1'), ('url2', 'artist2') ] def get_pages_for_artist(self, raw_html): """ Get a list of pages for an artist from given HTML code :param raw_html: HTML code of web page :return: List of URLs """ return [ 'url1', 'url2' ] def get_songs(self, raw_html): """ User overrides this function to get songs with URL from page's HTML :param raw_html: HTML code for web page :return: Songs with URLs """ return [ ('url1', 'song1'), ('url2', 'song2') ]
class CrawlerType0(BaseCrawler): def __init__(self, name, start_url, list_of_url, number_of_threads, max_err=10, delay_request=False): # Constructor for BaseCrawler """ Crawler for the websites of type 0. :param list_of_url: List of URLs to start with. """ super().__init__(name, start_url, number_of_threads, max_err, delay_request) # Initialize data members self.task_queue = LifoQueue() self.url_list = list_of_url def threader(self, thread_id): """ Worker function. :return: :param thread_id: Assigned ID of thread. """ while not self.task_queue.empty(): # While there are any tasks task = self.task_queue.get() # Get one of them if task['n_errors'] >= self.max_allowed_errors: # Too many errors print_util.print_warning( '{0} --> Too many errors in task {1}. Skipping.'.format( thread_id, task ) ) continue print_util.print_info( '{0} --> New task : {1}'.format( thread_id, task ) ) # Log the task try: # Call corresponding function if task['type'] == 0: self.get_movies( thread_id, task['url'] ) elif task['type'] == 1: self.download_movie( thread_id, task['url'], task['movie'] ) elif task['type'] == 2: self.download_song( thread_id, task['url'], task['song'], task['movie'], task['movie_url'] ) print_util.print_info( '{0} --> Task complete : {1}'.format( thread_id, task ), Colors.GREEN ) # Log success except Exception as e: # Some error print_util.print_error( '{0} --> Error : {1}'.format( thread_id, e ) ) # Log it task['n_errors'] += 1 # Increment number of errors self.task_queue.put(task) # Put back in queue def run(self): """ Function to be called by subclasses to start crawler. """ while True: # Crawl cycle start print_util.print_info( 'Starting new crawl with {0}.'.format( self.name ), Colors.BLACK ) # Add all URLs to task queue for url in self.url_list: self.task_queue.put( { 'type': 0, 'url': url, 'n_errors': 0 # No errors initially } ) # Start all threads threads = [] # List for all threads for n in range(1, self.number_of_threads + 1): temp_thread = Thread( target=self.threader, # Worker function args=(n,) # Pass thread id as argument ) threads.append(temp_thread) temp_thread.start() for temp_thread in threads: temp_thread.join() # Crawl cycle ends def download_movie(self, thread_id, url, movie): """ Method to get all songs from a movie website. :param thread_id: As usual :param url: URL of movie :param movie: Name of movie """ movie_website = self.start_url + url raw_html = open_request(movie_website, delayed=self.delay_request) song_with_url = self.get_songs_with_url(raw_html) # No new songs added if db_operations.number_of_songs(self.start_url, url) == len( song_with_url): db_operations.update_last_crawl(self.start_url, url) print_util.print_warning( '{0} --> Movie {1} contains no new songs. Skipping.'.format( thread_id, movie ) ) return # Add all songs for song_url, song in song_with_url: self.task_queue.put( { 'type': 2, 'url': song_url, 'song': song, 'movie': movie, 'movie_url': url, 'n_errors': 0 } ) def download_song(self, thread_id, url, song, movie, movie_url): """ Method to get song details from website. :param thread_id: As usual :param url: URL of song :param song: Name of song :param movie: Name of movie :param movie_url: URL of movie """ # Song already exists if db_operations.exists_song(self.start_url, url): print_util.print_warning( '{0} -> Song {1} already exists. Skipping.'.format( thread_id, song ) ) return # Get HTML song_url_ = self.start_url + url song_html = open_request(song_url_, delayed=self.delay_request) lyrics, singers, music_by, lyricist = self.get_song_details(song_html) # Save in database db_operations.save( song=song, song_url=url, movie=movie, movie_url=movie_url, start_url=self.start_url, lyrics=lyrics, singers=singers, director=music_by, lyricist=lyricist ) def get_movies(self, thread_id, url): # Get website HTML """ Get movie list from website :param thread_id: As usual :param url: URL of website from which movies are to be fetched """ website = self.start_url + url raw_html = open_request(website, delayed=self.delay_request) # Add movies to task queue movies_with_url = self.get_movies_with_url(raw_html) for url, movie in movies_with_url: self.task_queue.put( { 'type': 1, 'url': url, 'movie': movie, 'n_errors': 0 } ) def get_movies_with_url(self, raw_html): # User overrides this method to get list of movies from raw html """ Gets all movies' details from HTML code. :param raw_html: HTML code of web page :return: Movies with their URL """ return [('foobar.com', 'Foo Bar')] def get_songs_with_url(self, raw_html): """ User overrides this method to get list of songs from raw html :param raw_html: HTML code of web page :return: Songs with URL """ return [('foobar.com', 'Foo Bar')] def get_song_details(self, raw_html): """ User overrides this method to get details for a song from raw html :param raw_html: HTML code of web page :return: Structured song details """ return ( 'lyrics', [ 'singer1', 'singer2' ], 'music director', 'lyricist' )
class CrawlerType1(BaseCrawler): def __init__(self, name, start_url, list_of_url, number_of_threads, delay_request=False, max_allowed_errors=3): """ :param name: As usual :param start_url: As usual :param list_of_url: As usual :param number_of_threads: As usual :param delay_request: As usual :param max_allowed_errors: As usual """ super().__init__(name, start_url, number_of_threads=number_of_threads, delay_request=delay_request, max_err=max_allowed_errors) self.url_list = list_of_url self.task_queue = LifoQueue() def run(self): """ Method called from subclasses to start crawling process """ while True: # Crawl cycle starts print_util.print_info( 'Starting new crawl with {0}'.format( self.name ), Colors.BLACK ) # Add all URLs to task queue for url in self.url_list: self.task_queue.put( { 'type': 0, 'url': url, 'n_errors': 0 } ) # Start all threads threads = [] for n in range(1, self.number_of_threads + 1): temp_thread = Thread( target=self.threader, args=(n,) ) threads.append(temp_thread) temp_thread.start() for temp_thread in threads: temp_thread.join() # Crawl cycle ends def threader(self, thread_id): """ Worker function :param thread_id: As usual """ while not self.task_queue.empty(): task = self.task_queue.get() if task['n_errors'] >= self.max_allowed_errors: print_util.print_warning( '{0} --> Too many errors in task {1}. Skipping.'.format( thread_id, task ) ) continue print_util.print_info( '{0} --> New task : {1}'.format( thread_id, task ) ) try: if task['type'] == 0: self.get_artists( thread_id, task['url'] ) elif task['type'] == 1: self.get_artist_albums( thread_id, task['url'], task['artist'] ) elif task['type'] == 2: self.get_song( thread_id, task['url'], task['song'], task['album'], task['album_url'], task['artist'] ) print_util.print_info( '{0} --> Task complete : {1}'.format( thread_id, task ), Colors.GREEN ) except Exception as e: print_util.print_error( '{0} --> Error : {1}'.format( thread_id, e ) ) task['n_errors'] += 1 self.task_queue.put(task) def get_artists(self, thread_id, url): """ Method to get artists with URL from a web address :param thread_id: As usual :param url: As usual """ website = self.start_url + url raw_html = open_request(website, delayed=self.delay_request) artists_with_url = self.get_artists_with_url(raw_html) for artist_url, artist in artists_with_url: self.task_queue.put( { 'type': 1, 'url': artist_url, 'artist': artist, 'n_errors': 0 } ) def get_artist_albums(self, thread_id, url, artist): """ Method to get all songs for an artist :param thread_id: As usual :param url: As usual :param artist: Artist name """ website = self.start_url + '/' + url raw_html = open_request(website, delayed=self.delay_request) albums_with_songs = self.get_albums_with_songs(raw_html) for album, song_with_url in albums_with_songs: for song_url, song in song_with_url: self.task_queue.put( { 'type': 2, 'song': song, 'url': song_url, 'album': album, 'album_url': url, 'artist': artist, 'n_errors': 0 } ) def get_song(self, thread_id, url, song, album, album_url, artist): """ Method to get details of a song and save in database :param thread_id: As usual :param url: As usual :param song: Song title :param album: Album name :param album_url: URL of album (same as artist) on the website :param artist: As usual """ if db_operations.exists_song(self.start_url, url): print_util.print_warning( '{0} -> Song {1} already exists. Skipping'.format( thread_id, song ) ) return song_website = self.start_url + url song_html = open_request(song_website, delayed=self.delay_request) lyrics = self.get_song_details(song_html) db_operations.save( song=song, song_url=url, movie=album, movie_url=album_url, start_url=self.start_url, lyrics=lyrics, singers=artist, director=artist, lyricist=artist ) def get_artists_with_url(self, raw_html): """ Get artist list from HTML code :param raw_html: Web page HTML code :return: Artists with URLs """ return [('a.com', 'a'), ] def get_albums_with_songs(self, raw_html): """ Get all songs with albums for an artist :param raw_html: Web page HTML code :return: Songs with URL and album """ return [ ( 'album1', [ ('url1', 'song1'), ('url2', 'song2') ] ), ( 'album2', [ ('url3', 'song3'), ('url4', 'song4') ] ) ] def get_song_details(self, song_html): """ Get lyrics of the song from webpage :param song_html: :return: """ return 'la la la la'
class UnhandledExceptionHandler(Singleton): """ This class implements functionality to catch and log exceptions in a block of code, and also execute a set of teardown handlers intended to shut down the application gracefully and do any desired cleanup. It is implemented as a singleton because the teardown handlers can have global effects (e.g., stopping the event loop). This class is intended to be used as a context manager: >>> unhandled_exception_handler = UnhandledExceptionHandler.singleton() >>> with unhandled_exception_handler: >>> # code which may throw an exception goes here! """ HANDLED_EXCEPTION_EXIT_CODE = 1 EXCEPTION_DURING_TEARDOWN_EXIT_CODE = 2 def __init__(self): super().__init__() self._handling_lock = Lock() self._teardown_callback_stack = LifoQueue() # we execute callbacks in the reverse order that they were added self._logger = log.get_logger(__name__) self._handled_exceptions = Queue() self._teardown_callback_raised_exception = False # Set up a handler to be called when process receives SIGTERM. # Note: this will raise if called on a non-main thread, but we should NOT work around that here. (That could # prevent the teardown handler from ever being registered!) Calling code should be organized so that this # singleton is only ever initialized on the main thread. signal.signal(signal.SIGTERM, self._application_teardown_signal_handler) signal.signal(signal.SIGINT, self._application_teardown_signal_handler) def add_teardown_callback(self, callback, *callback_args, **callback_kwargs): """ Add a callback to be executed in the event of application teardown. :param callback: The method callback to execute :type callback: callable :param callback_args: args to be passed to the callback function :type callback_args: list :param callback_kwargs: kwargs to be passed to the callback function :type callback_kwargs: dict """ self._teardown_callback_stack.put((callback, callback_args, callback_kwargs)) def _application_teardown_signal_handler(self, sig, frame): """ A signal handler that will trigger application teardown. :param sig: Signal number of the received signal :type sig: int :param frame: The interrupted stack frame :type frame: frame """ signal_names = { signal.SIGTERM: 'SIGTERM', signal.SIGINT: 'SIGINT', } self._logger.info('{} signal received. Triggering teardown.', signal_names[sig]) raise AppTeardown def __enter__(self): """ Enables this to be used as a context manager. No special handling is needed on enter. """ pass def __exit__(self, exc_type, exc_value, traceback): """ Enables this to be used as a context manager. If an exception was raised during the execution block (inside the "with" statement) then exc_value will be set to the exception object. There are four situations in which we can go through this method: 1. Exception, on main thread - The exception is logged and in some cases (e.g., SystemExit) may be immediately reraised. - Teardown callbacks are executed. - Example: A KeyboardInterrupt exception raised because user presses ctrl-c / sends SIGINT signal 2. Exception, not on main thread - The exception is logged and in some cases may be passed to the main thread to be reraised. - Teardown callbacks are executed. - Example: Any unhandled exception that is raised on a SafeThread 3. Normal exit, on main thread - We check to see if there was an exception that we need to reraise on the main thread. In almost all cases we will *not* reraise an exception on the main thread since it has already been logged and teardown callbacks have already been executed on the thread that raised the exception. - Teardown callbacks are *not* executed. - Example: A SystemExit exception raised by sys.exit() is passed from a SafeThread to the main thread to make Python set the exit code. 4. Normal exit, not on main thread - Do nothing! All is well. """ if exc_value: # An exception occurred during execution, so run the teardown callbacks. We use a lock here since multiple # threads could raise exceptions at the same time and we only want to execute these once. with self._handling_lock: if not isinstance(exc_value, (SystemExit, AppTeardown, KeyboardInterrupt)): # It is not very useful to log the SystemExit exception since it is raised by sys.exit(), and thus # application exit is completely expected. self._logger.exception('Unhandled exception handler caught exception.') while not self._teardown_callback_stack.empty(): callback, args, kwargs = self._teardown_callback_stack.get() self._logger.debug('Executing teardown callback: {}', callback) try: callback(*args, **kwargs) except: # pylint: disable=bare-except # Also catch any exception that occurs during a teardown callback and log it. self._teardown_callback_raised_exception = True self._logger.exception('Exception raised by teardown callback {}', callback) self._handled_exceptions.put(exc_value) if current_thread() is main_thread(): # The usage of this class on the main thread is a special case since only exceptions raised on the main # thread may affect the exit code of the overall application. Any unhandled exceptions raised on child # threads will only interrupt execution on that particular thread. # # This main-thread-only code path serves to ensure that exceptions raised on child threads during a `with # unhandled_exception_handler` block will also raise an exception on the main thread upon exit of the main # thread's `with unhandled_exception_handler` block. This ensures we will set a failing exit code even if # an exception is raised on a child thread. # # Note: this only works for child threads protected by the UnhandledExceptionHandler (e.g., an instance of # a SafeThread). # # We check the self._handled_exceptions queue to see if there was an exception that we want to reraise. We # only care about the first exception on the queue -- it was the first caught exception so it "wins". if not self._handled_exceptions.empty(): handled_exception = self._handled_exceptions.get() # We reraise SystemExit on the main thread -- this specific exception is how Python controls setting # the process exit code, and that only works if raised on the main thread. if isinstance(handled_exception, SystemExit): raise handled_exception # We also want to make sure the process exit code is set non-zero if the UnhandledExceptionHandler # handled any Exception at all. (Note: this does not include AppTeardown or KeyboardInterrupt, which # both inherit from BaseException.) if isinstance(handled_exception, Exception): raise SystemExit(self.HANDLED_EXCEPTION_EXIT_CODE) # If an exception was raised while executing one of the teardown callbacks, also make sure to exit with a # non-zero exit code. if self._teardown_callback_raised_exception: raise SystemExit(self.EXCEPTION_DURING_TEARDOWN_EXIT_CODE) # Returning True from this method tells Python not to re-raise the exc_value exception on the current thread. return True