def find_path_astar(start, stop): visited = [] unvisited = [start] parent = {start: "None"} gscore = {start: 0} #fscore = {start:1/links_in_common(start,stop)} fscore = MinHeap() fscore.insert((start, distance_heuristic(start, stop))) #print(fscore) global article_data while len(unvisited) > 0: curr = fscore.extract()[0] print(ancestor_chain(parent, curr)) if curr == stop: unwrap_path(parent, start, stop) break try: unvisited.remove(curr) visited.append(curr) curr_links = get_page_links(curr)["parse"]["links"] for l in curr_links: try: if l["exists"] == "": link = l["*"] if (is_article(link)) and (link not in visited): tentative_gscore = gscore[curr] + 1 if link not in unvisited: unvisited.append(link) elif tentative_gscore >= gscore[link]: continue parent[link] = curr gscore[link] = tentative_gscore #fscore[link] = gscore[link] + (1/links_in_common(link,stop)) f = gscore[link] + distance_heuristic(link, stop) fscore.insert((link, f)) except: continue #print(fscore.array) #print("{0} of {1}".format(fscore.size,fscore.capacity)) except: continue del article_data[curr]
from minheap import MinHeap import sys #get the count for every job and state count = Counting(sys.argv[1]) job_count, state_count, sum_count = count.count() #find the top_10 jobs job_heap = MinHeap(10) job_fhandle = open(sys.argv[2],'w') for key,value in job_count.items(): job_heap.add(key,value) job_fhandle.write('TOP_OCCUPATIONS;NUMBER_CERTIFIED_APPLICATIONS;PERCENTAGE\n') result = dict() for i in range(min(10,len(job_count))): key,value = job_heap.extract() result[key.lstrip('""').rstrip('""')] = value result = sorted(result.items(), key = lambda item:item[0]) result.sort(key = lambda x:x[1], reverse = True) for item in result: key = item[0] value = item[1] p = round(value / sum_count * 100.0, 1) s = key + ';' + str(value) + ';' + str(p) + '%' + '\n' job_fhandle.write(s) #find the top_10 states state_heap = MinHeap(10) state_fhandle = open(sys.argv[3],'w') for key,value in state_count.items(): state_heap.add(key,value)