def fulltext(self): """Compute the new and deleted bibcodes for each type of error from most recent list of bibcodes compared with previous most recent list. Results stored in variables that are then used in report.py.""" for err in conf['FULLTEXT_ERRORS']: err_msg = "_".join(err.split('"')[1].split()).replace( '-', '_').replace(']', '').replace('[', '') dir = conf['AIR_DATA_DIRECTORY'] + "ft/" + err_msg + '/' # get 2 most recent files files = sorted(glob.glob(dir + '*.txt'), key=sorter, reverse=True) sort(files[0]) sort(files[1]) remove_duplicates(files[0]) remove_duplicates(files[1]) ft_start = files[1] ft_end = files[0] ft_new = dir + "new.tsv" self.values['new_ft_' + err_msg] = comm(ft_end, ft_start, ft_new) ft_fixed = dir + "fixed.tsv" self.values['fixed_ft_' + err_msg] = comm(ft_start, ft_end, ft_fixed)
def solve(objective: str): """Brute-force algorithm for solving puzzles. Analogous to DFS.""" global main_game states = [GameStatus(main_game)] print("Currently inspected:", end='\n') while True: print('\r%010d' % GameStatus.count, end='', flush=True) # Get top status from stack try: status = states.pop() except IndexError: steps = [] break # Set 'active' main_game to popped state main_game = deepcopy(status.game) # Check win condition if win_conditions[objective](): steps = status.steps break # expand possibilities for card in deepcopy(remove_duplicates(main_game.player.hand)): # Check if there is enough mana if card.mana_cost <= main_game.player.mana: # Get list of valid targets if hasattr(card, 'is_valid_target'): valid_targets = [ ct for ct in remove_duplicates(main_game.characters) if card.is_valid_target(ct) ] else: valid_targets = [None] for target in valid_targets: # Play card main_game.player.play_card(card, target=target) new_status = GameStatus(main_game, status.steps) new_status.steps.append({ 'card': type(card), 'target': type(target) }) states.append(new_status) # Reset to previous status (to test with other targets) main_game = deepcopy(status.game) print('\n') return steps
def update_picture_database(album_name): """ Updates the picture database that has field name 'album_name'. Performs a twitter REST API search with 'album_name' as a hashtag and retrive the new pictures from results to be added to Picture database. """ hashtag = "#" + album_name # 'tweets_result' list has the following contents: [media_url, favorite_count, user_screen_name, tweet_id] tweets_result = search(hashtag) # remove results that already exists in database pivots = ["tweet_id", "src_url"] print "Result tweets has %d results BEFORE duplicate removal." % len(tweets_result) for pivot in pivots: if len(tweets_result) <= 0: print "No new tweets are found." break tweets_result = remove_duplicates(tweets_result, pivot) print "Result tweets has %d results AFTER duplicate removal." % len(tweets_result) # modify picture contents. # modifications are: # 1. set new picture name to be stored # 2. extract image contents from url if len(tweets_result) > 0: new_pictures = [] for i in range(len(tweets_result)): # set new img name to be stored src = tweets_result[i][0] new_img_name = generate_img_filename(src) # extract img content and put to new file new_img_file = get_img_content(src, new_img_name) new_pictures.append(new_img_file) # upload new_pictures list to s3 bucket upload_to_bucket(new_pictures, album_name) print "New '%s' album images has been uploaded to S3 bucket." % album_name # upload contents to database i = 0 album = Album.objects.get(name=album_name) # get album object if album != None: for img in tweets_result: pic = Picture( album=album, url=new_pictures[i][0], src_url=img[0], like_count=img[1], owner=img[2], tweet_id=img[3], ) pic.save() print "%s saved to database." % img[0] i += 1 # fire off signal abcd = Picture(album=album, url=new_pictures[0], like_count=img[1], owner=img[2], tweet_id=img[3]) send_email.send(sender=Picture, instance=abcd) return
def choose_best_location(locations): unique_locations = remove_duplicates(locations) """Uses Bing Web Search to see if a tagged location is a real location""" key = "2b18d2bbb30f4a82a53845076a562986" search_url = "https://api.cognitive.microsoft.com/bing/v7.0/search" for location in locations: # Prevent exceeding request quota time.sleep(0.3) response = requests.get( search_url, headers={"Ocp-Apim-Subscription-Key": key}, params={ "q": " ".join(location), "textDecorations": True, "textFormat": "HTML", "count": 3, }, ) response.raise_for_status() search_results = response.json() x = response.headers search_items = search_results["webPages"]["value"] for item in search_items: if "www.cmu.edu" in item["url"]: return " ".join(location) return None
def hyperNeighborhood(phog, ortholog_type, threshold, taxon_id): edges = set() hyper_neighbors = phog.get_hyper_neighbors(ortholog_type, threshold) if taxon_id: taxon = NCBITaxonomy.objects.get(id__exact=taxon_id) nodes = {} nodes[phog.get_accession(ortholog_type, threshold)] = {} nodes[phog.get_accession(ortholog_type, threshold)]['description'] \ = phog.get_description(ortholog_type, threshold) if taxon_id: nodes[phog.get_accession(ortholog_type, threshold)]['genes_from_taxon'] \ = remove_duplicates([leaf.sequence_header.identifier() for leaf in phog.get_contained_leaves_from_taxon(taxon, ortholog_type, threshold)]) for type in hyper_neighbors.keys(): for neighbor in hyper_neighbors[type]: nodes[neighbor.get_accession(ortholog_type, threshold)] = {} nodes[neighbor.get_accession(ortholog_type, threshold)]['description'] \ = neighbor.get_description(ortholog_type, threshold) if taxon_id: nodes[neighbor.get_accession(ortholog_type, threshold)]['genes_from_taxon']\ = remove_duplicates([leaf.sequence_header.identifier() for leaf in neighbor.get_contained_leaves_from_taxon(taxon, ortholog_type, threshold)]) for type in hyper_neighbors.keys(): for hyper_neighbor in hyper_neighbors[type]: edges.add(((phog.get_accession(ortholog_type, threshold), hyper_neighbor.get_accession(ortholog_type, threshold)), type)) neighbors_of_hyper_neighbors \ = hyper_neighbor.get_hyper_neighbors(ortholog_type, threshold) if type in neighbors_of_hyper_neighbors: relevant_neighbors \ = neighbors_of_hyper_neighbors[type] & hyper_neighbors[type] for neighbor in relevant_neighbors: edges.add( ((hyper_neighbor.get_accession(ortholog_type, threshold), neighbor.get_accession(ortholog_type, threshold)), type)) ret = {} ret['query'] = phog.get_accession(ortholog_type, threshold) ret['edges'] = list(edges) ret['nodes'] = nodes _writer = json.JsonWriter() # print ret return (_writer.write(ret))
def test(tbl='logs'): ''' >>> db, d = test('location') for row in d.dict: sql = db.is_duplicate('location', row) if sql: db.query(sql) ''' from utils import unicode_csv_reader, replace_txt, remove_duplicates import tablib db = SimpleDB(os.path.join(os.getcwd(), 'test.db')) if tbl == 'logs': f_name = 'test_eternity.csv' headers = ('day','start_time','stop_time','duration', 'parent','activity','note','tags') elif tbl == 'location': f_name = 'test_gps.csv' headers = ('latitude','longitude','elevation','timestamp') else: raise Exception, "test(args) must = eternity or gps" # get data with open(os.path.join(os.getcwd(), f_name), 'r') as f: #d = list(set([tuple(row) for row in unicode_csv_reader(f)])) d = remove_duplicates([tuple(row) for row in unicode_csv_reader(f)]) data = tablib.Dataset(*d, headers=headers) # TODO - adjust replace_txt() function to accept orderedDicts # since the order of replacement is important. # replacement dicts parent_dict = { u'Media>': u'MEDIA', u'MISC - Real Life>': u'REAL_LIFE', u'Basic Routine>Meals & Snacks>': u'BASIC', u'Basic Routine>': u'BASIC', u'Salubrious Living>': u'HEALTH', } activity_dict = { u'RL - MISC - Home': u'HOME', u'RL - MISC - Outside': u'OUTSIDE', u'へんたい': u'HENTAI', u'アニメ': u'ANIME', u'Grocery Shopping': u'GROCERY-SHOPPING', u'Restaurant': u'RESTAURANT', u'Shower & Bathroom': u'SHOWER-BATHROOM' } # test for duplicates in data (skip over the first row to avoid headers) for row in data.dict[1:]: if tbl == 'logs': row['parent'] = replace_txt(row['parent'], parent_dict) row['activity'] = replace_txt(row['activity'], activity_dict) sql = db.is_duplicate(tbl, row) if sql: db.query(sql)
def set_popup(self, device_error): # Remove duplicate ids ids = utils.remove_duplicates(self.popup_id) for id in ids: query = {'id': id} set = {'$push': {"derror": {'$each': device_error}}} self.__client.sh.devices.update(query, set) self.popup_id.clear()
def pipeline(self, img): # each detection is ordered in terms of priority. A detection higher on the list will override a similar # detection lower down. # initial detection preformed on entire image. transposed, padhw, shavedim, resized = preprocess(img, shave=False) yolo_output = model.predict(np.array([transposed]))[0] boxes = process_output(yolo_output, threshold=0.20, padhw=padhw, shaved=False, shavedim=shavedim) # preform detection on each box in the stored previous boxes for box in self.boxes: offset = 150 shavedim = [ box.y1 - offset, box.y2 + offset, box.x1 - offset, box.x2 + offset ] transposed, padhw, shavedim, resized = preprocess( img, shave=True, shavedim=shavedim) yolo_output = model.predict(np.array([transposed]))[0] boxes2 = process_output(yolo_output, threshold=0.35, padhw=padhw, shaved=True, shavedim=shavedim) boxes += boxes2 # last detection preformed picture shaved to just the highway ahead transposed, padhw, shavedim, resized = preprocess(img, shave=True, shavedim=(350, 500, 500, 1000)) yolo_output = model.predict(np.array([transposed]))[0] boxes2 = process_output(yolo_output, threshold=0.30, padhw=padhw, shaved=True, shavedim=shavedim) boxes += boxes2 # remove duplacate boxes boxes = remove_duplicates(boxes, img) boxes = self.apply_threshold(boxes) if (len(boxes) < len(self.boxes)) and self.dropped < 3: self.dropped += 1 boxes = self.boxes else: self.dropped = 0 drawn = draw_boxes(boxes, img) self.boxes = boxes return drawn
def filterAndDropSystematics(self, include='.*', exclude=None, verbose=False) : nBefore = len(self.systematics) anyFilter = include or exclude toBeExcluded = filter(self,systematics, exclude) if exclude else [] systs = ['NOM'] if 'NOM' in self.systematics else [] if include : systs += filterWithRegexp(self.systematics, include) if exclude : systs = [s for s in systs if toBeExcluded and s not in toBeExcluded] self.systematics = systs if anyFilter else self.systematics self.systematics = remove_duplicates(self.systematics) nAfter = len(self.systematics) if verbose : print "%s : dropped %d systematics, left with %s"%(self.name, nBefore-nAfter, str(self.systematics)) assert self.systematics.count('NOM')==1 or not nBefore, "%s : 'NOM' required %s"%(self.name, str(self.systematics))
def args2params(args): """Parses command-line arguments into 'params' structure.""" root_dir = os.path.dirname(__file__) params = {} if args.RANDOM_STATE: params['random_state'] = args.RANDOM_STATE else: params['random_state'] = np.random.randint(2**15 - 1) params['learners'] = utils.remove_duplicates( [ml for ml in args.LEARNERS.split(',')]) # learners params['preps'] = utils.remove_duplicates( [prep for prep in args.PREP.split(',')]) # pre-processers params['search'] = args.SEARCH if params['search'] == 'random': if args.PREP: params['model_dir'] = 'ml/random_search_preprocessing/' else: params['model_dir'] = 'ml/random_search/' else: params['model_dir'] = 'ml/grid_search/' params['model_dir'] = os.path.join(root_dir, params['model_dir']) if not os.path.isdir(params['model_dir']): raise RuntimeError("Models' directory '%s' does not exist" % (params['model_dir'])) params['input_file'] = args.INPUT_FILE if os.path.isabs( args.INPUT_FILE) else os.path.join(root_dir, args.INPUT_FILE) params['dataset'] = params['input_file'].split('/')[-1].split('.csv')[0] params['results_path'] = '/'.join([args.RDIR, params['dataset']]) + '/' params['n_trials'] = args.N_TRIALS params['n_combos'] = args.N_COMBOS params['label'] = args.LABEL params['lsf'] = args.LSF params['queue'] = args.QUEUE params['n_jobs'] = args.N_JOBS params['m'] = args.M return params
def test_remove(self): l = [ Unit(10, 10, KEY_PRESS_UNIT_TYPE), Unit(10, 11, KEY_PRESS_UNIT_TYPE), Unit(10, 10, KEY_PRESS_UNIT_TYPE), Unit(10, 11, KEY_PRESS_UNIT_TYPE), Unit(11, 10, KEY_RELEASE_UNIT_TYPE), Unit(11, 10, KEY_RELEASE_UNIT_TYPE), ] self.assertEqual(remove_duplicates(l), [ Unit(10, 10, KEY_PRESS_UNIT_TYPE), Unit(10, 11, KEY_PRESS_UNIT_TYPE), Unit(11, 10, KEY_RELEASE_UNIT_TYPE), ])
def __init__(self, options): self.options = options self.targets = OrderedDict() self.buildEnv = buildEnv = BuildEnv(options, self.targets) _targets = {} targetDef = options.targets self.add_targets(targetDef, _targets) dependencies = self.order_dependencies(_targets, targetDef) dependencies = list(remove_duplicates(dependencies)) for dep in dependencies: if self.options.build_deps_only and dep == targetDef: continue self.targets[dep] = _targets[dep]
def prepare_sources(self): if self.options.skip_source_prepare: print("SKIP") return toolchain_sources = (tlc.source for tlc in self.buildEnv.toolchains if tlc.source) for toolchain_source in toolchain_sources: print("prepare sources for toolchain {} :".format(toolchain_source.name)) toolchain_source.prepare() sources = (dep.source for dep in self.targets.values() if not dep.skip) sources = remove_duplicates(sources, lambda s: s.__class__) for source in sources: print("prepare sources {} :".format(source.name)) source.prepare()
def filterAndDropSystematics(self, include='.*', exclude=None, verbose=False): "include and exclude can be either a regex, a single value, or a list" nBefore = len(self.systematics) def is_regex(exp): return exp and '*' in exp def is_list(exp): return type(exp) == list def is_literal_list(exp): return (exp and ',' in exp) def is_single_value(exp): return exp and len(exp) def str_to_list(exp): return eval("['{0}']".format(exp)) print 'type include ', type(include), ' type()==list: ', ( type(include) == list) toBeIncluded = ( [s for s in self.systematics if s in include] if is_list(include) else [s for s in self.systematics if s in str_to_list(include)] if is_literal_list(include) else filterWithRegexp(self.systematics, include) if is_regex(include) else str_to_list(include) if is_single_value(include) else self. systematics) toBeExcluded = ( [s for s in self.systematics if s in exclude] if is_list(exclude) else [s for s in self.systematics if s in str_to_list(exclude)] if is_literal_list(exclude) else filterWithRegexp(self, systematics, exclude) if is_regex(exclude) else str_to_list(exclude) if is_single_value(exclude) else []) self.systematics = remove_duplicates( [s for s in toBeIncluded if s not in toBeExcluded]) nAfter = len(self.systematics) if verbose: print "%s : dropped %d systematics, left with %s" % ( self.name, nBefore - nAfter, str(self.systematics)) assert self.systematics.count( 'NOM') == 1 or not nBefore, "%s : 'NOM' required %s" % ( self.name, str(self.systematics))
def extract_edges(bw, corners): #Extract the contour, #TODO: probably should have this passed in from the puzzle, since it already does this #It was done this way b/c the contours don't correspond to the correct pixel locations #in this cropped version of the image. (_, cnts, _) = cv2.findContours(bw.copy(), cv2.RETR_LIST, cv2.CHAIN_APPROX_NONE) if len(cnts) != 1: raise Exception('Found incorrect number of contours.') contour = cnts[0] contour = utils.remove_duplicates(contour) #out of all of the found corners, find the closest points in the contour, #these will become the endpoints of the edges for i in range(len(corners)): best = 10000000000 closest_point = contour[0] for j in range(len(contour)): d = utils.distance(corners[i], contour[j]) if d < best: best = d closest_point = contour[j] corners[i] = closest_point #We need the begining of the vector to correspond to the begining of an edge. contour = utils.rotate(contour, utils.find_first_in(contour, corners)) #assert(corners[0]!=corners[1] && corners[0]!=corners[2] && corners[0]!=corners[3] && corners[1]!=corners[2] && # corners[1]!=corners[3] && corners[2]!=corners[3]); #std::vector<std::vector<cv::Point>::iterator> sections; sections = utils.find_all_in(contour, corners) #Make corners go in the correct order for i in range(4): corners[i] *= sections[i] #assert(corners[1]!=corners[0] && corners[0]!=corners[2] && corners[0]!=corners[3] && corners[1]!=corners[2] && # corners[1]!=corners[3] && corners[2]!=corners[3]); edge1 = edge.create_edge(contour, sections[0], sections[1]) edge2 = edge.create_edge(contour, sections[1], sections[2]) edge3 = edge.create_edge(contour, sections[2], sections[3]) edge4 = edge.create_edge(contour, sections[3], len(contour)) return (edge1, edge2, edge3, edge4)
def start_scrapp(netflixInstance, loginEvent=None, loadedEvent=None, queue=None): # loginEvent.wait() #TODO otimizar esse looping movies_sources = [] for i, so in enumerate(['az', 'za']): netflixInstance.driver.get(config.MAIN_URL + '/browse/genre/34399?so=' + so) print('STATUS: Finding all movies on netflix...['+str(i+1)+'/2] ', end='', flush=True) scroll_page_until_ends(netflixInstance.driver, .9) print('OK') print('STATUS: Saving innerHTML of all movies...['+str(i+1)+'/2] ', end='', flush=True) # for each slider-item, save his html code movies_sources += list( map(lambda p : bs(p.get_attribute('innerHTML'), 'html.parser'), netflixInstance.driver.find_elements_by_class_name('slider-item')) ) print('OK') # Removing duplicates and sorting by name all_sources = utils.remove_duplicates(movies_sources, lambda s: s.find('a')['aria-label']) all_sources = sorted(all_sources, key=lambda s: s.find('a')['aria-label']) print("TOTAL OF MOVIES = " + str(len(all_sources))) print('STATUS: Starting to get information... ') if not os.path.exists(config.FOLDER_NAME): os.mkdir(config.FOLDER_NAME) # for each slider-item, run a thread to get movie's informations, # if the number of active thread is greater than max_threads # wait until one of active threads be released total_len = len(all_sources) for i, s_movie in enumerate(movies_sources): t = Thread( target=retrieve_movie, args=(s_movie, total_len), name='MOVIE_' + utils.safe_movie_name(s_movie.find('a')['aria-label']) ) t.start() while active_count() > config.MAX_THREADS: sleep(1)
def make_tetrahedron(N=50): corners = map(np.array,[ (1, 0, -2**(-.5)), (-1, 0, -2**(-.5)), (0, 1, 2**(-.5)), (0, -1, 2**(-.5))]) verts , I = make_tesselated_triangle(corners[1],corners[2],corners[0],0,N) v, i = make_tesselated_triangle(corners[1],corners[3],corners[2],len(verts),N) verts += v I += i v, i = make_tesselated_triangle(corners[3],corners[0],corners[2],len(verts),N) verts += v I += i v, i = make_tesselated_triangle(corners[0],corners[3],corners[1],len(verts),N) verts += v I += i return utils.remove_duplicates(verts,I)
def effective_net_values(net_values, dates, code): """ Args: net_values: net values of fund as a list dates: date of each net value code: code of fund return: effective net values and dates of fund as 2 list """ # sort by date net_values.reverse() dates.reverse() effective_values = [] effective_dates = [] # Filter effective net values for i in range(len(net_values)): if net_values[i] != 1: effective_values.extend(net_values[i:]) effective_dates.extend(dates[i:]) break if not utils.is_date_ascending(effective_dates): logger.info("{} dates is not ascending".format(code)) return [], [] if utils.has_duplicates(effective_dates): if not utils.is_duplicates_identical(effective_dates, effective_values): logger.info( "{} is deleted because it contains same dates with different net values" .format(code)) return [], [] else: effective_dates, effective_values = utils.remove_duplicates( effective_dates, effective_values) length = len(effective_values) for i in range(1, length - 1): ratio = effective_values[i] / effective_values[i + 1] if ratio < 0.5 or ratio > 2: logger.info("{} on {} changed {}: {}".format( code, [effective_dates[i], effective_dates[i + 1]], 1 - effective_values[i] / effective_values[i + 1], [effective_values[i], effective_values[i + 1]])) return [], [] return effective_values, effective_dates
def match_stances(stances, member): """Filters the member's stances keeping only those that match a stance in stances. The member's stances consist of personal stances (member.credo), voting record stances (member.stances), and group stances (member.pro_rel_stances) Keyword arguments: stances -- the list of stances to filter the member stances by member -- the member whose stances will be filtered. Return: A list of all member stances found in stances. The list has duplicates removed. """ matches = [] member_stances = member.credo + member.stances + member.pro_rel_stances for stance in stances: filter_fun = lambda member_stance : stance.match(member_stance) matches += filter(filter_fun, member_stances) return remove_duplicates(matches)
def train(self, dataset): """Uses a third of dataset examples for training and the rest for validation. Once it has been trained, it holds a SetOfRules obtained by converting into rules the DecisionTree produced by a DecisionTreeLearner trained on the same training examples. The rules are then pruned according to their accuracy on the validation examples.""" examples = dataset.examples total_size = len(examples) validation_size = total_size // 3 training_size = total_size - validation_size dataset.examples = examples[:training_size] self.validation_examples = examples[training_size:total_size] super().train(dataset) self.set_of_rules = SetOfRules(dataset, self.tree) self.input_names = remove_all(self.attr_names[self.target], self.attr_names) self.set_of_rules.rules = remove_duplicates([self.prune(rule) for rule in self.set_of_rules.rules]) dataset.examples = examples
def filterAndDropSystematics(self, include='.*', exclude=None, verbose=False) : "include and exclude can be either a regex, a single value, or a list" nBefore = len(self.systematics) def is_regex(exp) : return exp and '*' in exp def is_list(exp) : return type(exp)==list def is_literal_list(exp) : return (exp and ',' in exp) def is_single_value(exp) : return exp and len(exp) def str_to_list(exp) : return eval("['{0}']".format(exp)) print 'type include ',type(include),' type()==list: ',(type(include)==list) toBeIncluded = ([s for s in self.systematics if s in include] if is_list(include) else [s for s in self.systematics if s in str_to_list(include)] if is_literal_list(include) else filterWithRegexp(self.systematics, include) if is_regex(include) else str_to_list(include) if is_single_value(include) else self.systematics) toBeExcluded = ([s for s in self.systematics if s in exclude] if is_list(exclude) else [s for s in self.systematics if s in str_to_list(exclude)] if is_literal_list(exclude) else filterWithRegexp(self,systematics, exclude) if is_regex(exclude) else str_to_list(exclude) if is_single_value(exclude) else []) self.systematics = remove_duplicates([s for s in toBeIncluded if s not in toBeExcluded]) nAfter = len(self.systematics) if verbose : print "%s : dropped %d systematics, left with %s"%(self.name, nBefore-nAfter, str(self.systematics)) assert self.systematics.count('NOM')==1 or not nBefore, "%s : 'NOM' required %s"%(self.name, str(self.systematics))
def tag_with_stanford_tagger(email, original_email, stanford_tags): """ Uses the Stanford tagger as a last resort since it takes a long time to process """ names = [] i = 0 # Get names from tagged text while i < len(stanford_tags): name = [] # Chunk names while stanford_tags[i][1] == "PERSON": if i + 1 >= len(stanford_tags): name.append(stanford_tags[i][0]) i += 1 break name.append(stanford_tags[i][0]) i += 1 i += 1 if name: names.append(name) unique_names = remove_duplicates(names) if len(unique_names) > 0: names = find_speaker_from_names(unique_names, original_email) for name in names: email = tag_speaker_using_name(name, email) return email
def Copying(N, NSet, NNeighbor, PSet, PNeighbor): initNodes = NSet + NNeighbor + 1 G = nx.complete_graph(initNodes) for i in range(initNodes, N): # Random subset of existing nodes randomSet = random.sample(G.nodes(), NSet) # Creating set of random subset neighborhood neighbors = [] for j in randomSet: for neighbor in G.adjacency_list()[j]: neighbors.append(neighbor) neighbors = utils.remove_duplicates(neighbors) # Trimming neighborhood if it is too large if len(neighbors) > NNeighbor: neighbors = random.sample(neighbors, NNeighbor) G.add_node(i) # Forming edge with subset for node in randomSet: if random.random() < PSet: G.add_edge(i, node) # Forming edge with neighborhood for node in neighbors: if random.random() < PNeighbor: G.add_edge(i, node) return G
test_image = cv2.imread('test_images/test3.jpg') test_image = cv2.cvtColor(test_image, cv2.COLOR_BGR2RGB) plt.imshow(test_image[200:600, 400:1200]) test_image.shape processed, padhw, shavedim, resized = preprocess(test_image, shave=True) plt.imshow(resized) padhw prediction = model.predict(np.array([processed]))[0] boxes = process_output(prediction, padhw=padhw, shaved=True) len(boxes) boxes = remove_duplicates(boxes, test_image) img = draw_boxes(boxes, test_image) plt.figure(figsize=[10, 10]) plt.imshow(img) class Pipeline: def __init__(self): self.boxes = [] self.dropped = 0 self.history = deque(maxlen=8) self.first_frames = True def apply_threshold(self, boxes): if len(boxes) == 0 and len(self.history) > 0: self.history.popleft()
# Binarizamos la imagen ret, thresh = cv2.threshold(img_gray, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU) # Buscamos los contornos _, contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) # Simplificamos los contornos con el algoritmo que se indica en el artículo contours = u.contour_sifting(contours) """ EXTRA """ # Eliminamos los contornos duplicados contours = u.remove_duplicates(contours) # Ordenamos los contornos por area contours_ordered = u.contours_order_by_area(contours) # Intentamos extraer los fips del código QR, dados los contornos ordenados. contours = u.get_qr_fips(contours_ordered) # Sacamos una copia de la imagen img_qr = img.copy() # Dibujamos los contornos cv2.drawContours(img_qr, contours, -1, (0, 255, 0), 2) # Comprobamos si tenemos contornos que cumplen con las restricciones dadas if len(contours) <= 2:
for i, filename in enumerate(choices): print("\t{0}. {1}".format(i, filename)) else: raise Exception, "\tNO FILES FOUND" print('\ntype the number of the file you want to import') FILENUM = int(raw_input('>>> ')[0]) chosen_file = choices[FILENUM] #----------------------------------------------------------------------------- # DUMP DATA TO DATABASE #----------------------------------------------------------------------------- # get data with open(chosen_file, 'r') as f: d = remove_duplicates([tuple(row) for row in unicode_csv_reader(f)]) # validate and insert the data if table_name == 'logs': insertEternity(d) elif table_name == 'location': insertLocation(d) else: raise Exception, 'neither logs nor location' #----------------------------------------------------------------------------- # VALIDATION AND FORMATTING #-----------------------------------------------------------------------------
] xlp = [ 'PG', 'PEP', 'KO', 'WMT', 'MO', 'PM', 'COST', 'CL', 'GIS', 'STZ', 'CLX', 'KMB' ] xlv = [ 'JNJ', 'UNH', 'PFE', 'MRK', 'ABT', 'BMY', 'TMO', 'AMGN', 'CI', 'ANTM', 'GILD', 'CVS' ] xlf = [ 'BRK.B', 'JPM', 'BAC', 'WFC', 'C', 'GS', 'SPGI', 'CME', 'NLK', 'ICE', 'AXP' ] smh = [ 'TSM', 'INTC', 'NVDA', 'ASML', 'AVGO', 'TXN', 'AMD', 'QCOM', 'XLNX', 'SWKS', 'MU' ] xtl = [ 'BAND', 'VG', 'TMUS', 'ANET', 'CIEN', 'LITE', 'CCOI', 'FFIV', 'CSCO', 'JNPR', 'UI' ] xlu = [ 'NEE', 'D', 'DUK', 'SO', 'AEP', 'EXC', 'SRE', 'WEC', 'ED', 'ES', 'PEG', 'FE', 'AWK' ] iyr = [ 'AMT', 'PLD', 'CCI', 'AQIX', 'DLR', 'PSA', 'WELL', 'SPG', 'O', 'PSA', 'BXP', 'CBRE' ] tot_list = u.remove_duplicates( list(itertools.chain.from_iterable(all_watchlist)))
product_info.append('availability-missing-data') q.put(product_info) if __name__ == '__main__': n_pages = 2 startTime = time.time() # Searching into the bestsellers, we will scrap all of the bestsellers links first p_links = [] for i in range(1, (n_pages+1)): tmp_p_links = get_bestsellers_links(i) p_links += tmp_p_links utils.remove_duplicates(p_links) df_bestsellers_links = pd.DataFrame({'Product link page':p_links}) df_bestsellers_links.to_csv(csv_directory+'amazon_bestseller_products_links.csv', index=True, encoding='utf-8') # Now we have a array(and a .csv file) with all bestsellers links and what we wanna do is # to get the product info. I'll be using threading for more speed m = Manager() q = m.Queue() p = {} for i in range(0, len(p_links)): logger.debug("starting thread {}".format(i)) p[i] = threading.Thread(target=get_product_data, args=(p_links[i],q)) p[i].start() # Join process
f = open(file_name, 'r', encoding='ANSI') soup = BeautifulSoup(f, 'html.parser') overview = soup.find('table', attrs={'rules': 'all'}) rows = overview.find_all('tr') content = [] for row in rows: row_content = [] elements = row.find_all('td') for i in range(len(elements)): element = elements[i] raw = element.text.strip() clean_string = clean(raw, i) row_content.append(clean_string) if not all('' == s or s.isspace() for s in row_content): content.append(row_content) headers = content.pop(0) remove_duplicates(content) df = pandas.DataFrame(content, columns=headers) os.chdir("../csv") df.to_csv(file_name.split('.')[0] + '.csv', index=False, encoding='ANSI') os.chdir("../xls")
def listLinksInComment(url, name, type_): from guis import progressBG from reddit import reddit_request from utils import clean_str,remove_duplicates, is_filtered from default import comments_link_filter log('listLinksInComment:%s:%s' %(type_,url) ) post_title='' global harvest # ShowOnlyCommentsWithlink=False # if type_=='linksOnly': # ShowOnlyCommentsWithlink=True #url='https://np.reddit.com/r/videos/comments/64j9x7/doctor_violently_dragged_from_overbooked_cia/dg2pbtj/?st=j1cbxsst&sh=2d5daf4b' #url=url.split('?', 1)[0]+'.json'+url.split('?', 1)[1] #log(repr(url.split('?', 1)[0])) #log(repr(url.split('?', 1)[1])) #log(repr(url.split('?', 1)[0]+'.json?'+url.split('?', 1)[1])) #url='https://www.reddit.com/r/Music/comments/4k02t1/bonnie_tyler_total_eclipse_of_the_heart_80s_pop/' + '.json' #only get up to "https://www.reddit.com/r/Music/comments/4k02t1". # do not include "/bonnie_tyler_total_eclipse_of_the_heart_80s_pop/" # because we'll have problem when it looks like this: "https://www.reddit.com/r/Overwatch/comments/4nx91h/ever_get_that_feeling_déjà _vu/" #url=re.findall(r'(.*/comments/[A-Za-z0-9]+)',url)[0] #UPDATE you need to convert this: https://www.reddit.com/r/redditviewertesting/comments/4x8v1k/test_test_what_is_déjà_vu/ # to this: https://www.reddit.com/r/redditviewertesting/comments/4x8v1k/test_test_what_is_d%C3%A9j%C3%A0_vu/ # #use safe='' argument in quoteplus to encode only the weird chars part url=urllib.quote_plus(url,safe=':/?&') if '?' in url: url=url.split('?', 1)[0]+'.json?'+url.split('?', 1)[1] else: url+= '.json' xbmc_busy() loading_indicator=progressBG('Loading...') loading_indicator.update(0,'Retrieving comments') content = reddit_request(url) loading_indicator.update(10,'Parsing') if not content: loading_indicator.end() return try: xbmc_busy() content = json.loads(content) #harvest links in the post text (just 1) r_linkHunter(content[0]['data']['children']) #submitter=content[0]['data']['children'][0]['data']['author'] submitter=clean_str(content,[0,'data','children',0,'data','author']) #the post title is provided in json, we'll just use that instead of messages from addLink() #post_title=content[0]['data']['children'][0]['data']['title'] post_title=clean_str(content,[0,'data','children',0,'data','title']) #harvest links in the post itself r_linkHunter(content[1]['data']['children']) #for i, h in enumerate(harvest): # log( ' %d %s %.4d -%s link[%s]' % ( i, h[7].ljust(8)[:8], h[0], h[3].ljust(20)[:20],h[2] ) ) comments_count_orig=len(harvest) #log(' len harvest1 '+repr(len(harvest))) #remove duplicate links def k2(x): return (x[2],x[3]) harvest=remove_duplicates(harvest,k2) comments_count_rd=len(harvest) #log(' len harvest2 '+repr(len(harvest))) loading_indicator.update(15,'Removed %d duplicates' %(comments_count_orig-comments_count_rd) ) c_threads=[] q_liz=Queue() comments_count=len(harvest) filtered_posts=0 for idx, h in enumerate(harvest): comment_score=h[0] link_url=h[2] if comment_score < int_CommentTreshold: log(' comment score %d < %d, skipped' %(comment_score,int_CommentTreshold) ) filtered_posts+=1 continue if is_filtered(comments_link_filter,link_url): log(' [{0}] is hidden by comments_link_filter'.format(link_url)) filtered_posts+=1 continue domain,domain_count=count_links_from_same_domain_comments(link_url) #count how many same domains we're hitting delay=compute_anti_dos_delay(domain,domain_count) #have threads process each comment post t = threading.Thread(target=reddit_comment_worker, args=(idx, h,q_liz,submitter,delay), name='#t%.2d'%idx) c_threads.append(t) t.start() #loading_indicator.update(20,'Filtered %d comments' %(filtered_posts) ) log(repr(domains_d)) #check the queue to determine progress break_counter=0 #to avoid infinite loop expected_listitems=(comments_count-filtered_posts) if expected_listitems>0: loading_indicator.set_tick_total(expected_listitems) last_queue_size=0 while q_liz.qsize() < expected_listitems: if break_counter>=100: break #each change in the queue size gets a tick on our progress track if last_queue_size < q_liz.qsize(): items_added=q_liz.qsize()-last_queue_size loading_indicator.tick(items_added,'Parsing') else: break_counter+=1 last_queue_size=q_liz.qsize() xbmc.sleep(50) #wait for all threads to finish before collecting the list items for idx, t in enumerate(c_threads): #log(' joining %s' %t.getName()) t.join(timeout=20) xbmc_busy(False) #compare the number of entries to the returned results #log( "queue:%d entries:%d" %( q_liz.qsize() , len(content['data']['children'] ) ) ) if q_liz.qsize() != expected_listitems: log('some threads did not return a listitem. total comments:%d expecting(%d) but only got(%d)' %(comments_count, expected_listitems, q_liz.qsize())) #for t in threads: log('isAlive %s %s' %(t.getName(), repr(t.isAlive()) ) ) li=[ liz for idx,liz in sorted(q_liz.queue) ] #log(repr(li)) with q_liz.mutex: q_liz.queue.clear() except Exception as e: log(' ' + str(e) ) loading_indicator.end() #it is important to close xbmcgui.DialogProgressBG # this portion is abandoned for now. initial plan was to textbox with auto-height in a grouplist to mimic the comment tree but cannot figure out how links can be followed. from guis import comments_GUI2 ui = comments_GUI2('view_464_comments_grouplist.xml' , addon_path, defaultSkin='Default', defaultRes='1080i', listing=li, id=55) #ui = comments_GUI2('aaa.xml' , addon_path, defaultSkin='Default', defaultRes='1080i', listing=li, id=55) ui.title_bar_text=post_title ui.doModal() del ui return
def generate(self, lemma, parse, allow_form_override=True, context=None): answers = [] stems = None accent_override = None is_enclitic = False ending_override = None if lemma in self.lexicon: if allow_form_override: answer = self.lexicon[lemma].get("forms", {}).get(parse) if answer: return answer stems = self.regex_list(lemma, parse, context) if "." in parse: accents = self.lexicon[lemma].get("accents", {}).get(parse.split(".")[0]) if accents == "enclitic": is_enclitic = True else: accent_override = accents ending_override = self.lexicon[lemma].get("endings", {}).get(parse) if stems is None: return else: stems = stems.split("/") if parse not in stemming_rules: return for stem in stems: stem = debreath(stem) pairs = stemming_rules[parse] while isinstance(pairs, dict) and "ref" in pairs: if pairs["ref"] in stemming_rules: pairs = stemming_rules[pairs["ref"]] else: # @@@ raise error? return base_endings = [] default = [] for rule in pairs: s1, s234, s5 = rule.split("|") s2, s34 = s234.split(">") s3, s4 = s34.split("<") if stem.endswith(strip_accents(s1 + s2)): if s2: base = stem[:-len(s2)] else: base = stem else: continue if ending_override: ending_list = ending_override.split("/") else: ending_list = [s3 + s5] if s1 + s2: base_endings.append((base, ending_list)) else: default.append((base, ending_list)) # only use default if there are no other options if len(base_endings) == 0 and len(default) > 0: base_endings = default for base, ending_list in base_endings: for ending in ending_list: if accent(ending): answers.append((base + ending).replace("|", "")) elif is_enclitic: answers.append(make_oxytone(base + ending).replace("|", "")) else: if parse[2] == "P": if accent_override: answers.append(persistent(base + ending, accent_override)) elif parse == "AAP.NSM" and ending == "ων": answers.append(make_oxytone(base + ending).replace("|", "")) elif parse == "AAP.NSM" and ending == "_3+ς": answers.append(make_oxytone(base + ending).replace("|", "")) elif parse == "PAP.NSM" and ending == "_3+ς": answers.append(make_oxytone(base + ending).replace("|", "")) elif parse[0:3] == "AAP" and parse != "AAP.NSM": # calculate NSM nsms = self.generate(lemma, "AAP.NSM", context=context) nsms = nsms.split("/") for nsm in nsms: if nsm.endswith(("ών", "ούς")): answers.append(persistent(base + ending, nsm)) else: answers.append(persistent(base + ending, lemma)) elif parse[0:3] == "PAP" and parse != "PAP.NSM": # calculate NSM nsms = self.generate(lemma, "PAP.NSM").split("/") for nsm in nsms: nsm = strip_length(nsm) answers.append(persistent(base + ending, nsm)) else: answers.append(recessive(base + ending, default_short=True)) elif parse[0:3] in ["AAN", "XAN", "XMN", "XPN"]: answers.append(on_penult(base + ending, default_short=True)) elif parse[0:3] == "PAN" and stem.endswith("!"): answers.append(on_penult(base + ending, default_short=True)) else: answers.append(recessive(base + ending, default_short=True)) return "/".join(remove_duplicates(rebreath(w) for w in answers))