Python remove_duplicates 예제들, utils.remove_duplicates Python 예제들

예제 #1

0

파일 보기

파일: compute.py 프로젝트: spacemansteve/AutomatedIngestReport

    def fulltext(self):
        """Compute the new and deleted bibcodes for each type of error from
        most recent list of bibcodes compared with previous most recent list. Results stored
        in variables that are then used in report.py."""

        for err in conf['FULLTEXT_ERRORS']:

            err_msg = "_".join(err.split('"')[1].split()).replace(
                '-', '_').replace(']', '').replace('[', '')
            dir = conf['AIR_DATA_DIRECTORY'] + "ft/" + err_msg + '/'

            # get 2 most recent files
            files = sorted(glob.glob(dir + '*.txt'), key=sorter, reverse=True)

            sort(files[0])
            sort(files[1])

            remove_duplicates(files[0])
            remove_duplicates(files[1])

            ft_start = files[1]
            ft_end = files[0]
            ft_new = dir + "new.tsv"
            self.values['new_ft_' + err_msg] = comm(ft_end, ft_start, ft_new)

            ft_fixed = dir + "fixed.tsv"
            self.values['fixed_ft_' + err_msg] = comm(ft_start, ft_end,
                                                      ft_fixed)

예제 #2

0

파일 보기

def solve(objective: str):
    """Brute-force algorithm for solving puzzles. Analogous to DFS."""
    global main_game

    states = [GameStatus(main_game)]

    print("Currently inspected:", end='\n')
    while True:
        print('\r%010d' % GameStatus.count, end='', flush=True)
        # Get top status from stack
        try:
            status = states.pop()
        except IndexError:
            steps = []
            break

        # Set 'active' main_game to popped state
        main_game = deepcopy(status.game)

        # Check win condition
        if win_conditions[objective]():
            steps = status.steps
            break

        # expand possibilities
        for card in deepcopy(remove_duplicates(main_game.player.hand)):
            # Check if there is enough mana
            if card.mana_cost <= main_game.player.mana:
                # Get list of valid targets
                if hasattr(card, 'is_valid_target'):
                    valid_targets = [
                        ct for ct in remove_duplicates(main_game.characters)
                        if card.is_valid_target(ct)
                    ]
                else:
                    valid_targets = [None]

                for target in valid_targets:
                    # Play card
                    main_game.player.play_card(card, target=target)

                    new_status = GameStatus(main_game, status.steps)
                    new_status.steps.append({
                        'card': type(card),
                        'target': type(target)
                    })

                    states.append(new_status)

                    # Reset to previous status (to test with other targets)
                    main_game = deepcopy(status.game)
    print('\n')
    return steps

예제 #3

0

파일 보기

def update_picture_database(album_name):
    """
	Updates the picture database that has field name 'album_name'.

	Performs a twitter REST API search with 'album_name' as a hashtag and retrive the new pictures from results to be added to Picture
	database.
	"""

    hashtag = "#" + album_name
    # 'tweets_result' list has the following contents: [media_url, favorite_count, user_screen_name, tweet_id]
    tweets_result = search(hashtag)

    # remove results that already exists in database
    pivots = ["tweet_id", "src_url"]
    print "Result tweets has %d results BEFORE duplicate removal." % len(tweets_result)
    for pivot in pivots:
        if len(tweets_result) <= 0:
            print "No new tweets are found."
            break
        tweets_result = remove_duplicates(tweets_result, pivot)
    print "Result tweets has %d results AFTER duplicate removal." % len(tweets_result)

    # modify picture contents.
    # modifications are:
    # 	1. set new picture name to be stored
    # 	2. extract image contents from url
    if len(tweets_result) > 0:
        new_pictures = []
        for i in range(len(tweets_result)):
            # set new img name to be stored
            src = tweets_result[i][0]
            new_img_name = generate_img_filename(src)
            # extract img content and put to new file
            new_img_file = get_img_content(src, new_img_name)
            new_pictures.append(new_img_file)

            # upload new_pictures list to s3 bucket
        upload_to_bucket(new_pictures, album_name)
        print "New '%s' album images has been uploaded to S3 bucket." % album_name

        # upload contents to database
        i = 0
        album = Album.objects.get(name=album_name)  # get album object
        if album != None:
            for img in tweets_result:
                pic = Picture(
                    album=album,
                    url=new_pictures[i][0],
                    src_url=img[0],
                    like_count=img[1],
                    owner=img[2],
                    tweet_id=img[3],
                )
                pic.save()
                print "%s saved to database." % img[0]
                i += 1
                # fire off signal
        abcd = Picture(album=album, url=new_pictures[0], like_count=img[1], owner=img[2], tweet_id=img[3])
        send_email.send(sender=Picture, instance=abcd)
    return

예제 #4

0

파일 보기

def choose_best_location(locations):
    unique_locations = remove_duplicates(locations)
    """Uses Bing Web Search to see if a tagged location is a real location"""

    key = "2b18d2bbb30f4a82a53845076a562986"
    search_url = "https://api.cognitive.microsoft.com/bing/v7.0/search"

    for location in locations:
        # Prevent exceeding request quota
        time.sleep(0.3)

        response = requests.get(
            search_url,
            headers={"Ocp-Apim-Subscription-Key": key},
            params={
                "q": " ".join(location),
                "textDecorations": True,
                "textFormat": "HTML",
                "count": 3,
            },
        )
        response.raise_for_status()
        search_results = response.json()
        x = response.headers

        search_items = search_results["webPages"]["value"]

        for item in search_items:
            if "www.cmu.edu" in item["url"]:
                return " ".join(location)

    return None

예제 #5

0

파일 보기

def hyperNeighborhood(phog, ortholog_type, threshold, taxon_id):
    edges = set()
    hyper_neighbors = phog.get_hyper_neighbors(ortholog_type, threshold)
    if taxon_id:
        taxon = NCBITaxonomy.objects.get(id__exact=taxon_id)
    nodes = {}
    nodes[phog.get_accession(ortholog_type, threshold)] = {}
    nodes[phog.get_accession(ortholog_type, threshold)]['description'] \
        = phog.get_description(ortholog_type, threshold)
    if taxon_id:
        nodes[phog.get_accession(ortholog_type, threshold)]['genes_from_taxon'] \
            = remove_duplicates([leaf.sequence_header.identifier() for leaf in
                phog.get_contained_leaves_from_taxon(taxon, ortholog_type, threshold)])
    for type in hyper_neighbors.keys():
        for neighbor in hyper_neighbors[type]:
            nodes[neighbor.get_accession(ortholog_type, threshold)] = {}
            nodes[neighbor.get_accession(ortholog_type, threshold)]['description'] \
                = neighbor.get_description(ortholog_type, threshold)
            if taxon_id:
                nodes[neighbor.get_accession(ortholog_type,
                                              threshold)]['genes_from_taxon']\
                    = remove_duplicates([leaf.sequence_header.identifier() for leaf in
                        neighbor.get_contained_leaves_from_taxon(taxon, ortholog_type,
                                                                  threshold)])
    for type in hyper_neighbors.keys():
        for hyper_neighbor in hyper_neighbors[type]:
            edges.add(((phog.get_accession(ortholog_type, threshold),
                        hyper_neighbor.get_accession(ortholog_type,
                                                     threshold)), type))
            neighbors_of_hyper_neighbors \
              = hyper_neighbor.get_hyper_neighbors(ortholog_type, threshold)
            if type in neighbors_of_hyper_neighbors:
                relevant_neighbors \
                  = neighbors_of_hyper_neighbors[type] & hyper_neighbors[type]
                for neighbor in relevant_neighbors:
                    edges.add(
                        ((hyper_neighbor.get_accession(ortholog_type,
                                                       threshold),
                          neighbor.get_accession(ortholog_type,
                                                 threshold)), type))
    ret = {}
    ret['query'] = phog.get_accession(ortholog_type, threshold)
    ret['edges'] = list(edges)
    ret['nodes'] = nodes
    _writer = json.JsonWriter()
    # print ret
    return (_writer.write(ret))

예제 #6

0

파일 보기

파일: db.py 프로젝트: jmz527/myLifeDB

def test(tbl='logs'):
    '''
    >>> db, d = test('location')
    for row in d.dict:
        sql = db.is_duplicate('location', row)
        if sql: db.query(sql)
    '''
    from utils import unicode_csv_reader, replace_txt, remove_duplicates
    import tablib

    db = SimpleDB(os.path.join(os.getcwd(), 'test.db'))

    if tbl == 'logs':
        f_name  = 'test_eternity.csv'
        headers = ('day','start_time','stop_time','duration',
                   'parent','activity','note','tags')
    elif tbl == 'location':
        f_name  = 'test_gps.csv'
        headers = ('latitude','longitude','elevation','timestamp')
    else:
        raise Exception, "test(args) must = eternity or gps"

    # get data
    with open(os.path.join(os.getcwd(), f_name), 'r') as f:
        #d = list(set([tuple(row) for row in unicode_csv_reader(f)]))
        d = remove_duplicates([tuple(row) for row in unicode_csv_reader(f)])
        data = tablib.Dataset(*d, headers=headers)

    # TODO - adjust replace_txt() function to accept orderedDicts
    #        since the order of replacement is important.

    # replacement dicts
    parent_dict = {
        u'Media>':                        u'MEDIA',
        u'MISC - Real Life>':             u'REAL_LIFE',
        u'Basic Routine>Meals & Snacks>': u'BASIC',
        u'Basic Routine>':                u'BASIC',
        u'Salubrious Living>':            u'HEALTH',
    }
    activity_dict = {
        u'RL - MISC - Home':    u'HOME',
        u'RL - MISC - Outside': u'OUTSIDE',
        u'へんたい':                u'HENTAI',
        u'アニメ':                 u'ANIME',
        u'Grocery Shopping':    u'GROCERY-SHOPPING',
        u'Restaurant':          u'RESTAURANT',
        u'Shower & Bathroom':   u'SHOWER-BATHROOM'
    }

    # test for duplicates in data (skip over the first row to avoid headers)
    for row in data.dict[1:]:
        if tbl == 'logs':
            row['parent']   = replace_txt(row['parent'], parent_dict)
            row['activity'] = replace_txt(row['activity'], activity_dict)

        sql = db.is_duplicate(tbl, row)
        if sql:
            db.query(sql)

예제 #7

0

파일 보기

파일: alarm.py 프로젝트: etigui/smarthepia

    def set_popup(self, device_error):

        # Remove duplicate ids
        ids = utils.remove_duplicates(self.popup_id)
        for id in ids:
            query = {'id': id}
            set = {'$push': {"derror": {'$each': device_error}}}
            self.__client.sh.devices.update(query, set)
        self.popup_id.clear()

예제 #8

0

파일 보기

파일: Detection.py 프로젝트: idoglanz/DL_HW2

    def pipeline(self, img):
        # each detection is ordered in terms of priority. A detection higher on the list will override a similar
        # detection lower down.
        # initial detection preformed on entire image.
        transposed, padhw, shavedim, resized = preprocess(img, shave=False)
        yolo_output = model.predict(np.array([transposed]))[0]
        boxes = process_output(yolo_output,
                               threshold=0.20,
                               padhw=padhw,
                               shaved=False,
                               shavedim=shavedim)

        # preform detection on each box in the stored previous boxes
        for box in self.boxes:
            offset = 150
            shavedim = [
                box.y1 - offset, box.y2 + offset, box.x1 - offset,
                box.x2 + offset
            ]
            transposed, padhw, shavedim, resized = preprocess(
                img, shave=True, shavedim=shavedim)
            yolo_output = model.predict(np.array([transposed]))[0]
            boxes2 = process_output(yolo_output,
                                    threshold=0.35,
                                    padhw=padhw,
                                    shaved=True,
                                    shavedim=shavedim)
            boxes += boxes2

            # last detection preformed picture shaved to just the highway ahead
        transposed, padhw, shavedim, resized = preprocess(img,
                                                          shave=True,
                                                          shavedim=(350, 500,
                                                                    500, 1000))
        yolo_output = model.predict(np.array([transposed]))[0]
        boxes2 = process_output(yolo_output,
                                threshold=0.30,
                                padhw=padhw,
                                shaved=True,
                                shavedim=shavedim)
        boxes += boxes2

        # remove duplacate boxes
        boxes = remove_duplicates(boxes, img)
        boxes = self.apply_threshold(boxes)
        if (len(boxes) < len(self.boxes)) and self.dropped < 3:
            self.dropped += 1
            boxes = self.boxes
        else:
            self.dropped = 0

        drawn = draw_boxes(boxes, img)
        self.boxes = boxes
        return drawn

예제 #9

0

파일 보기

파일: check_hft_trees.py 프로젝트: gerbaudo/DileptonFakeMeasurement

 def filterAndDropSystematics(self, include='.*', exclude=None, verbose=False) :
     nBefore = len(self.systematics)
     anyFilter = include or exclude
     toBeExcluded = filter(self,systematics, exclude) if exclude else []
     systs = ['NOM'] if 'NOM' in self.systematics else []
     if include : systs += filterWithRegexp(self.systematics, include)
     if exclude : systs  = [s for s in systs if toBeExcluded and s not in toBeExcluded]
     self.systematics = systs if anyFilter else self.systematics
     self.systematics = remove_duplicates(self.systematics)
     nAfter = len(self.systematics)
     if verbose : print "%s : dropped %d systematics, left with %s"%(self.name, nBefore-nAfter, str(self.systematics))
     assert self.systematics.count('NOM')==1 or not nBefore, "%s : 'NOM' required %s"%(self.name, str(self.systematics))

예제 #10

0

파일 보기

파일: analyze.py 프로젝트: crim-ca/ml-analyst

def args2params(args):
    """Parses command-line arguments into 'params' structure."""

    root_dir = os.path.dirname(__file__)
    params = {}
    if args.RANDOM_STATE:
        params['random_state'] = args.RANDOM_STATE
    else:
        params['random_state'] = np.random.randint(2**15 - 1)

    params['learners'] = utils.remove_duplicates(
        [ml for ml in args.LEARNERS.split(',')])  # learners
    params['preps'] = utils.remove_duplicates(
        [prep for prep in args.PREP.split(',')])  # pre-processers

    params['search'] = args.SEARCH
    if params['search'] == 'random':
        if args.PREP:
            params['model_dir'] = 'ml/random_search_preprocessing/'
        else:
            params['model_dir'] = 'ml/random_search/'
    else:
        params['model_dir'] = 'ml/grid_search/'
    params['model_dir'] = os.path.join(root_dir, params['model_dir'])
    if not os.path.isdir(params['model_dir']):
        raise RuntimeError("Models' directory '%s' does not exist" %
                           (params['model_dir']))

    params['input_file'] = args.INPUT_FILE if os.path.isabs(
        args.INPUT_FILE) else os.path.join(root_dir, args.INPUT_FILE)
    params['dataset'] = params['input_file'].split('/')[-1].split('.csv')[0]
    params['results_path'] = '/'.join([args.RDIR, params['dataset']]) + '/'
    params['n_trials'] = args.N_TRIALS
    params['n_combos'] = args.N_COMBOS
    params['label'] = args.LABEL
    params['lsf'] = args.LSF
    params['queue'] = args.QUEUE
    params['n_jobs'] = args.N_JOBS
    params['m'] = args.M
    return params

예제 #11

0

파일 보기

 def test_remove(self):
     l = [
         Unit(10, 10, KEY_PRESS_UNIT_TYPE),
         Unit(10, 11, KEY_PRESS_UNIT_TYPE),
         Unit(10, 10, KEY_PRESS_UNIT_TYPE),
         Unit(10, 11, KEY_PRESS_UNIT_TYPE),
         Unit(11, 10, KEY_RELEASE_UNIT_TYPE),
         Unit(11, 10, KEY_RELEASE_UNIT_TYPE),
     ]
     self.assertEqual(remove_duplicates(l), [
         Unit(10, 10, KEY_PRESS_UNIT_TYPE),
         Unit(10, 11, KEY_PRESS_UNIT_TYPE),
         Unit(11, 10, KEY_RELEASE_UNIT_TYPE),
     ])

예제 #12

0

파일 보기

파일: kiwix-build.py 프로젝트: KiwiBear/kiwix-build

    def __init__(self, options):
        self.options = options
        self.targets = OrderedDict()
        self.buildEnv = buildEnv = BuildEnv(options, self.targets)

        _targets = {}
        targetDef = options.targets
        self.add_targets(targetDef, _targets)
        dependencies = self.order_dependencies(_targets, targetDef)
        dependencies = list(remove_duplicates(dependencies))

        for dep in dependencies:
            if self.options.build_deps_only and dep == targetDef:
                continue
            self.targets[dep] = _targets[dep]

예제 #13

0

파일 보기

파일: kiwix-build.py 프로젝트: KiwiBear/kiwix-build

    def prepare_sources(self):
        if self.options.skip_source_prepare:
            print("SKIP")
            return

        toolchain_sources = (tlc.source for tlc in self.buildEnv.toolchains if tlc.source)
        for toolchain_source in toolchain_sources:
            print("prepare sources for toolchain {} :".format(toolchain_source.name))
            toolchain_source.prepare()

        sources = (dep.source for dep in self.targets.values() if not dep.skip)
        sources = remove_duplicates(sources, lambda s: s.__class__)
        for source in sources:
            print("prepare sources {} :".format(source.name))
            source.prepare()

예제 #14

0

파일 보기

    def filterAndDropSystematics(self,
                                 include='.*',
                                 exclude=None,
                                 verbose=False):
        "include and exclude can be either a regex, a single value, or a list"
        nBefore = len(self.systematics)

        def is_regex(exp):
            return exp and '*' in exp

        def is_list(exp):
            return type(exp) == list

        def is_literal_list(exp):
            return (exp and ',' in exp)

        def is_single_value(exp):
            return exp and len(exp)

        def str_to_list(exp):
            return eval("['{0}']".format(exp))

        print 'type include ', type(include), ' type()==list: ', (
            type(include) == list)
        toBeIncluded = (
            [s for s in self.systematics
             if s in include] if is_list(include) else
            [s for s in self.systematics
             if s in str_to_list(include)] if is_literal_list(include) else
            filterWithRegexp(self.systematics, include) if is_regex(include)
            else str_to_list(include) if is_single_value(include) else self.
            systematics)
        toBeExcluded = (
            [s for s in self.systematics
             if s in exclude] if is_list(exclude) else
            [s for s in self.systematics
             if s in str_to_list(exclude)] if is_literal_list(exclude) else
            filterWithRegexp(self, systematics, exclude) if is_regex(exclude)
            else str_to_list(exclude) if is_single_value(exclude) else [])
        self.systematics = remove_duplicates(
            [s for s in toBeIncluded if s not in toBeExcluded])
        nAfter = len(self.systematics)
        if verbose:
            print "%s : dropped %d systematics, left with %s" % (
                self.name, nBefore - nAfter, str(self.systematics))
        assert self.systematics.count(
            'NOM') == 1 or not nBefore, "%s : 'NOM' required %s" % (
                self.name, str(self.systematics))

예제 #15

0

파일 보기

def extract_edges(bw, corners):
    #Extract the contour,
    #TODO: probably should have this passed in from the puzzle, since it already does this
    #It was done this way b/c the contours don't correspond to the correct pixel locations
    #in this cropped version of the image.
    (_, cnts, _) = cv2.findContours(bw.copy(), cv2.RETR_LIST,
                                    cv2.CHAIN_APPROX_NONE)
    if len(cnts) != 1:
        raise Exception('Found incorrect number of contours.')

    contour = cnts[0]

    contour = utils.remove_duplicates(contour)

    #out of all of the found corners, find the closest points in the contour,
    #these will become the endpoints of the edges
    for i in range(len(corners)):
        best = 10000000000
        closest_point = contour[0]
        for j in range(len(contour)):
            d = utils.distance(corners[i], contour[j])
            if d < best:
                best = d
                closest_point = contour[j]
        corners[i] = closest_point

    #We need the begining of the vector to correspond to the begining of an edge.
    contour = utils.rotate(contour, utils.find_first_in(contour, corners))

    #assert(corners[0]!=corners[1] && corners[0]!=corners[2] && corners[0]!=corners[3] && corners[1]!=corners[2] &&
    #       corners[1]!=corners[3] && corners[2]!=corners[3]);

    #std::vector<std::vector<cv::Point>::iterator> sections;
    sections = utils.find_all_in(contour, corners)

    #Make corners go in the correct order
    for i in range(4):
        corners[i] *= sections[i]

    #assert(corners[1]!=corners[0] && corners[0]!=corners[2] && corners[0]!=corners[3] && corners[1]!=corners[2] &&
    #       corners[1]!=corners[3] && corners[2]!=corners[3]);

    edge1 = edge.create_edge(contour, sections[0], sections[1])
    edge2 = edge.create_edge(contour, sections[1], sections[2])
    edge3 = edge.create_edge(contour, sections[2], sections[3])
    edge4 = edge.create_edge(contour, sections[3], len(contour))
    return (edge1, edge2, edge3, edge4)

예제 #16

0

파일 보기

파일: scrapper.py 프로젝트: luizok/NetflixScrapper

def start_scrapp(netflixInstance, loginEvent=None, loadedEvent=None, queue=None):
    # loginEvent.wait() 
    #TODO otimizar esse looping

    movies_sources = []

    for i, so in enumerate(['az', 'za']):
        netflixInstance.driver.get(config.MAIN_URL + '/browse/genre/34399?so=' + so)
        
        print('STATUS: Finding all movies on netflix...['+str(i+1)+'/2] ', end='', flush=True)
        scroll_page_until_ends(netflixInstance.driver, .9)

        print('OK')           
        print('STATUS: Saving innerHTML of all movies...['+str(i+1)+'/2] ', end='', flush=True)

        # for each slider-item, save his html code
        movies_sources += list(
            map(lambda p : bs(p.get_attribute('innerHTML'), 'html.parser'),
                netflixInstance.driver.find_elements_by_class_name('slider-item'))
        )        
        print('OK')

    # Removing duplicates and sorting by name
    all_sources = utils.remove_duplicates(movies_sources, lambda s: s.find('a')['aria-label'])
    all_sources = sorted(all_sources, key=lambda s: s.find('a')['aria-label'])

    print("TOTAL OF MOVIES = " + str(len(all_sources)))
    print('STATUS: Starting to get information... ')

    if not os.path.exists(config.FOLDER_NAME):
        os.mkdir(config.FOLDER_NAME)

    # for each slider-item, run a thread to get movie's informations, 
    # if the number of active thread is greater than max_threads
    # wait until one of active threads be released
    total_len = len(all_sources)
    for i, s_movie in enumerate(movies_sources):
        t = Thread(
            target=retrieve_movie,
            args=(s_movie, total_len), 
            name='MOVIE_' + utils.safe_movie_name(s_movie.find('a')['aria-label'])
        )
        t.start()

        while active_count() > config.MAX_THREADS:
            sleep(1)

예제 #17

0

파일 보기

def make_tetrahedron(N=50):
  corners = map(np.array,[
      (1, 0, -2**(-.5)),
      (-1, 0, -2**(-.5)),
      (0, 1, 2**(-.5)),
      (0, -1, 2**(-.5))])
  verts , I = make_tesselated_triangle(corners[1],corners[2],corners[0],0,N)
  v, i = make_tesselated_triangle(corners[1],corners[3],corners[2],len(verts),N)
  verts += v
  I += i
  v, i = make_tesselated_triangle(corners[3],corners[0],corners[2],len(verts),N)
  verts += v
  I += i
  v, i = make_tesselated_triangle(corners[0],corners[3],corners[1],len(verts),N)
  verts += v
  I += i
  return utils.remove_duplicates(verts,I)

예제 #18

0

파일 보기

def effective_net_values(net_values, dates, code):
    """
    Args:
        net_values: net values of fund as a list
        dates: date of each net value
        code: code of fund

    return: effective net values and dates of fund as 2 list
    """
    # sort by date
    net_values.reverse()
    dates.reverse()
    effective_values = []
    effective_dates = []
    # Filter effective net values
    for i in range(len(net_values)):
        if net_values[i] != 1:
            effective_values.extend(net_values[i:])
            effective_dates.extend(dates[i:])
            break
    if not utils.is_date_ascending(effective_dates):
        logger.info("{} dates is not ascending".format(code))
        return [], []
    if utils.has_duplicates(effective_dates):
        if not utils.is_duplicates_identical(effective_dates,
                                             effective_values):
            logger.info(
                "{} is deleted because it contains same dates with different net values"
                .format(code))
            return [], []
        else:
            effective_dates, effective_values = utils.remove_duplicates(
                effective_dates, effective_values)
    length = len(effective_values)
    for i in range(1, length - 1):
        ratio = effective_values[i] / effective_values[i + 1]
        if ratio < 0.5 or ratio > 2:
            logger.info("{} on {} changed {}: {}".format(
                code, [effective_dates[i], effective_dates[i + 1]],
                1 - effective_values[i] / effective_values[i + 1],
                [effective_values[i], effective_values[i + 1]]))
            return [], []
    return effective_values, effective_dates

예제 #19

0

파일 보기

파일: vote.py 프로젝트: WEB3-GForce/VOTE

def match_stances(stances, member):
    """Filters the member's stances keeping only those that match a stance in
       stances. The member's stances consist of personal stances (member.credo),
       voting record stances (member.stances), and group stances
       (member.pro_rel_stances)

       Keyword arguments:
            stances   -- the list of stances to filter the member stances by
            member    -- the member whose stances will be filtered.

        Return:
            A list of all member stances found in stances. The list has
            duplicates removed.
    """
    matches = []
    member_stances = member.credo + member.stances + member.pro_rel_stances

    for stance in stances:
        filter_fun = lambda member_stance : stance.match(member_stance)
        matches += filter(filter_fun, member_stances)
    return remove_duplicates(matches)

예제 #20

0

파일 보기

    def train(self, dataset):
        """Uses a third of dataset examples for training and the rest for
        validation. Once it has been trained, it holds a SetOfRules obtained by
        converting into rules the DecisionTree produced by a
        DecisionTreeLearner trained on the same training examples. The rules
        are then pruned according to their accuracy on the validation examples."""
        examples = dataset.examples

        total_size = len(examples)
        validation_size = total_size // 3
        training_size = total_size - validation_size

        dataset.examples = examples[:training_size]
        self.validation_examples = examples[training_size:total_size]
        super().train(dataset)

        self.set_of_rules = SetOfRules(dataset, self.tree)
        self.input_names = remove_all(self.attr_names[self.target], self.attr_names)

        self.set_of_rules.rules = remove_duplicates([self.prune(rule) for rule in self.set_of_rules.rules])

        dataset.examples = examples

예제 #21

0

파일 보기

파일: systUtils.py 프로젝트: gerbaudo/susynt-ss3l

 def filterAndDropSystematics(self, include='.*', exclude=None, verbose=False) :
     "include and exclude can be either a regex, a single value, or a list"
     nBefore = len(self.systematics)
     def is_regex(exp) : return exp and '*' in exp
     def is_list(exp) : return type(exp)==list
     def is_literal_list(exp) : return (exp and ',' in exp)
     def is_single_value(exp) : return exp and len(exp)
     def str_to_list(exp) : return eval("['{0}']".format(exp))
     print 'type include ',type(include),' type()==list: ',(type(include)==list)
     toBeIncluded = ([s for s in self.systematics if s in include] if is_list(include) else
                     [s for s in self.systematics if s in str_to_list(include)] if is_literal_list(include) else
                     filterWithRegexp(self.systematics, include) if is_regex(include) else
                     str_to_list(include) if is_single_value(include) else
                     self.systematics)
     toBeExcluded = ([s for s in self.systematics if s in exclude] if is_list(exclude) else
                     [s for s in self.systematics if s in str_to_list(exclude)] if is_literal_list(exclude) else
                     filterWithRegexp(self,systematics, exclude) if is_regex(exclude) else
                     str_to_list(exclude) if is_single_value(exclude) else
                     [])
     self.systematics = remove_duplicates([s for s in toBeIncluded if s not in toBeExcluded])
     nAfter = len(self.systematics)
     if verbose : print "%s : dropped %d systematics, left with %s"%(self.name, nBefore-nAfter, str(self.systematics))
     assert self.systematics.count('NOM')==1 or not nBefore, "%s : 'NOM' required %s"%(self.name, str(self.systematics))

예제 #22

0

파일 보기

def tag_with_stanford_tagger(email, original_email, stanford_tags):
    """
    Uses the Stanford tagger as a last resort since it takes a long time to
    process
    """

    names = []
    i = 0

    # Get names from tagged text
    while i < len(stanford_tags):
        name = []

        # Chunk names
        while stanford_tags[i][1] == "PERSON":
            if i + 1 >= len(stanford_tags):
                name.append(stanford_tags[i][0])
                i += 1
                break

            name.append(stanford_tags[i][0])
            i += 1

        i += 1

        if name:
            names.append(name)

    unique_names = remove_duplicates(names)

    if len(unique_names) > 0:
        names = find_speaker_from_names(unique_names, original_email)

        for name in names:
            email = tag_speaker_using_name(name, email)

    return email

예제 #23

0

파일 보기

파일: networks.py 프로젝트: kojino/cs136-final_project

def Copying(N, NSet, NNeighbor, PSet, PNeighbor):
	initNodes = NSet + NNeighbor + 1
	G = nx.complete_graph(initNodes)


	for i in range(initNodes, N):

		# Random subset of existing nodes
		randomSet = random.sample(G.nodes(), NSet)

		# Creating set of random subset neighborhood
		neighbors = []
		for j in randomSet:

			for neighbor in G.adjacency_list()[j]:
				neighbors.append(neighbor)

		neighbors = utils.remove_duplicates(neighbors)

		# Trimming neighborhood if it is too large
		if len(neighbors) > NNeighbor:
			neighbors = random.sample(neighbors, NNeighbor)

		G.add_node(i)

		# Forming edge with subset
		for node in randomSet:
			if random.random() < PSet:
				G.add_edge(i, node)

		# Forming edge with neighborhood
		for node in neighbors:
			if random.random() < PNeighbor:
				G.add_edge(i, node)

	return G

예제 #24

0

파일 보기

파일: Detection.py 프로젝트: idoglanz/DL_HW2

test_image = cv2.imread('test_images/test3.jpg')
test_image = cv2.cvtColor(test_image, cv2.COLOR_BGR2RGB)

plt.imshow(test_image[200:600, 400:1200])
test_image.shape

processed, padhw, shavedim, resized = preprocess(test_image, shave=True)
plt.imshow(resized)
padhw

prediction = model.predict(np.array([processed]))[0]
boxes = process_output(prediction, padhw=padhw, shaved=True)
len(boxes)

boxes = remove_duplicates(boxes, test_image)
img = draw_boxes(boxes, test_image)
plt.figure(figsize=[10, 10])
plt.imshow(img)


class Pipeline:
    def __init__(self):
        self.boxes = []
        self.dropped = 0
        self.history = deque(maxlen=8)
        self.first_frames = True

    def apply_threshold(self, boxes):
        if len(boxes) == 0 and len(self.history) > 0:
            self.history.popleft()

예제 #25

0

파일 보기

파일: main.py 프로젝트: Pozas91/mc-qr-detector

# Binarizamos la imagen
ret, thresh = cv2.threshold(img_gray, 0, 255,
                            cv2.THRESH_BINARY | cv2.THRESH_OTSU)

# Buscamos los contornos
_, contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE,
                                          cv2.CHAIN_APPROX_SIMPLE)

# Simplificamos los contornos con el algoritmo que se indica en el artículo
contours = u.contour_sifting(contours)
"""
EXTRA
"""

# Eliminamos los contornos duplicados
contours = u.remove_duplicates(contours)

# Ordenamos los contornos por area
contours_ordered = u.contours_order_by_area(contours)

# Intentamos extraer los fips del código QR, dados los contornos ordenados.
contours = u.get_qr_fips(contours_ordered)

# Sacamos una copia de la imagen
img_qr = img.copy()

# Dibujamos los contornos
cv2.drawContours(img_qr, contours, -1, (0, 255, 0), 2)

# Comprobamos si tenemos contornos que cumplen con las restricciones dadas
if len(contours) <= 2:

예제 #26

0

파일 보기

파일: mylife.py 프로젝트: jmz527/myLifeDB

    for i, filename in enumerate(choices):
        print("\t{0}. {1}".format(i, filename))
else:
    raise Exception, "\tNO FILES FOUND"

print('\ntype the number of the file you want to import')
FILENUM     = int(raw_input('>>> ')[0])
chosen_file = choices[FILENUM]

#-----------------------------------------------------------------------------
# DUMP DATA TO DATABASE
#-----------------------------------------------------------------------------

# get data
with open(chosen_file, 'r') as f:
    d = remove_duplicates([tuple(row) for row in unicode_csv_reader(f)])

# validate and insert the data
if table_name == 'logs':
    insertEternity(d)
elif table_name == 'location':
    insertLocation(d)
else:
    raise Exception, 'neither logs nor location'


#-----------------------------------------------------------------------------
# VALIDATION AND FORMATTING
#-----------------------------------------------------------------------------

예제 #27

0

파일 보기

]
xlp = [
    'PG', 'PEP', 'KO', 'WMT', 'MO', 'PM', 'COST', 'CL', 'GIS', 'STZ', 'CLX',
    'KMB'
]
xlv = [
    'JNJ', 'UNH', 'PFE', 'MRK', 'ABT', 'BMY', 'TMO', 'AMGN', 'CI', 'ANTM',
    'GILD', 'CVS'
]
xlf = [
    'BRK.B', 'JPM', 'BAC', 'WFC', 'C', 'GS', 'SPGI', 'CME', 'NLK', 'ICE', 'AXP'
]
smh = [
    'TSM', 'INTC', 'NVDA', 'ASML', 'AVGO', 'TXN', 'AMD', 'QCOM', 'XLNX',
    'SWKS', 'MU'
]
xtl = [
    'BAND', 'VG', 'TMUS', 'ANET', 'CIEN', 'LITE', 'CCOI', 'FFIV', 'CSCO',
    'JNPR', 'UI'
]
xlu = [
    'NEE', 'D', 'DUK', 'SO', 'AEP', 'EXC', 'SRE', 'WEC', 'ED', 'ES', 'PEG',
    'FE', 'AWK'
]
iyr = [
    'AMT', 'PLD', 'CCI', 'AQIX', 'DLR', 'PSA', 'WELL', 'SPG', 'O', 'PSA',
    'BXP', 'CBRE'
]

tot_list = u.remove_duplicates(
    list(itertools.chain.from_iterable(all_watchlist)))

예제 #28

0

파일 보기

파일: amazon.py 프로젝트: regisfaria/top-sold-products

            product_info.append('availability-missing-data')

        q.put(product_info)


if __name__ == '__main__':
    n_pages = 2
    startTime = time.time()
    
    # Searching into the bestsellers, we will scrap all of the bestsellers links first
    p_links = []
    for i in range(1, (n_pages+1)):
        tmp_p_links = get_bestsellers_links(i)
        p_links += tmp_p_links

    utils.remove_duplicates(p_links)
    
    df_bestsellers_links = pd.DataFrame({'Product link page':p_links})
    df_bestsellers_links.to_csv(csv_directory+'amazon_bestseller_products_links.csv', index=True, encoding='utf-8')
    
    # Now we have a array(and a .csv file) with all bestsellers links and what we wanna do is
    # to get the product info. I'll be using threading for more speed
    m = Manager()
    q = m.Queue()
    p = {}
    for i in range(0, len(p_links)):
        logger.debug("starting thread {}".format(i))
        p[i] = threading.Thread(target=get_product_data, args=(p_links[i],q))
        p[i].start()

    # Join process

예제 #29

0

파일 보기

파일: xls_to_csv.py 프로젝트: san13660/data-science-proyecto

    f = open(file_name, 'r', encoding='ANSI')

    soup = BeautifulSoup(f, 'html.parser')
    overview = soup.find('table', attrs={'rules': 'all'})
    rows = overview.find_all('tr')

    content = []

    for row in rows:
        row_content = []
        elements = row.find_all('td')
        for i in range(len(elements)):
            element = elements[i]
            raw = element.text.strip()

            clean_string = clean(raw, i)

            row_content.append(clean_string)

        if not all('' == s or s.isspace() for s in row_content):
            content.append(row_content)

    headers = content.pop(0)

    remove_duplicates(content)

    df = pandas.DataFrame(content, columns=headers)

    os.chdir("../csv")
    df.to_csv(file_name.split('.')[0] + '.csv', index=False, encoding='ANSI')
    os.chdir("../xls")

예제 #30

0

파일 보기

파일: main_listing.py 프로젝트: gedisony/script.reddit.reader

def listLinksInComment(url, name, type_):
    from guis import progressBG
    from reddit import reddit_request
    from utils import clean_str,remove_duplicates, is_filtered
    from default import comments_link_filter

    log('listLinksInComment:%s:%s' %(type_,url) )

    post_title=''
    global harvest
#    ShowOnlyCommentsWithlink=False
#    if type_=='linksOnly':
#        ShowOnlyCommentsWithlink=True

    #url='https://np.reddit.com/r/videos/comments/64j9x7/doctor_violently_dragged_from_overbooked_cia/dg2pbtj/?st=j1cbxsst&sh=2d5daf4b'
    #url=url.split('?', 1)[0]+'.json'+url.split('?', 1)[1]

    #log(repr(url.split('?', 1)[0]))
    #log(repr(url.split('?', 1)[1]))
    #log(repr(url.split('?', 1)[0]+'.json?'+url.split('?', 1)[1]))

    #url='https://www.reddit.com/r/Music/comments/4k02t1/bonnie_tyler_total_eclipse_of_the_heart_80s_pop/' + '.json'
    #only get up to "https://www.reddit.com/r/Music/comments/4k02t1".
    #   do not include                                            "/bonnie_tyler_total_eclipse_of_the_heart_80s_pop/"
    #   because we'll have problem when it looks like this: "https://www.reddit.com/r/Overwatch/comments/4nx91h/ever_get_that_feeling_dÃ©jÃ _vu/"
    #url=re.findall(r'(.*/comments/[A-Za-z0-9]+)',url)[0]
    #UPDATE you need to convert this: https://www.reddit.com/r/redditviewertesting/comments/4x8v1k/test_test_what_is_déjà_vu/
    #                        to this: https://www.reddit.com/r/redditviewertesting/comments/4x8v1k/test_test_what_is_d%C3%A9j%C3%A0_vu/
    #
    #use safe='' argument in quoteplus to encode only the weird chars part
    url=urllib.quote_plus(url,safe=':/?&')

    if '?' in url:
        url=url.split('?', 1)[0]+'.json?'+url.split('?', 1)[1]
    else:
        url+= '.json'

    xbmc_busy()

    loading_indicator=progressBG('Loading...')
    loading_indicator.update(0,'Retrieving comments')
    content = reddit_request(url)
    loading_indicator.update(10,'Parsing')

    if not content:
        loading_indicator.end()
        return

    try:
        xbmc_busy()
        content = json.loads(content)

        #harvest links in the post text (just 1)
        r_linkHunter(content[0]['data']['children'])

        #submitter=content[0]['data']['children'][0]['data']['author']
        submitter=clean_str(content,[0,'data','children',0,'data','author'])

        #the post title is provided in json, we'll just use that instead of messages from addLink()
        #post_title=content[0]['data']['children'][0]['data']['title']
        post_title=clean_str(content,[0,'data','children',0,'data','title'])

        #harvest links in the post itself
        r_linkHunter(content[1]['data']['children'])
        #for i, h in enumerate(harvest):
        #    log( '  %d %s %.4d -%s   link[%s]' % ( i, h[7].ljust(8)[:8], h[0], h[3].ljust(20)[:20],h[2] ) )

        comments_count_orig=len(harvest)
        #log(' len harvest1 '+repr(len(harvest)))
        #remove duplicate links
        def k2(x): return (x[2],x[3])
        harvest=remove_duplicates(harvest,k2)
        comments_count_rd=len(harvest)
        #log(' len harvest2 '+repr(len(harvest)))

        loading_indicator.update(15,'Removed %d duplicates' %(comments_count_orig-comments_count_rd) )

        c_threads=[]
        q_liz=Queue()
        comments_count=len(harvest)
        filtered_posts=0
        for idx, h in enumerate(harvest):
            comment_score=h[0]
            link_url=h[2]
            if comment_score < int_CommentTreshold:
                log('    comment score %d < %d, skipped' %(comment_score,int_CommentTreshold) )
                filtered_posts+=1
                continue

            if is_filtered(comments_link_filter,link_url):
                log('    [{0}] is hidden by comments_link_filter'.format(link_url))
                filtered_posts+=1
                continue

            domain,domain_count=count_links_from_same_domain_comments(link_url) #count how many same domains we're hitting
            delay=compute_anti_dos_delay(domain,domain_count)

            #have threads process each comment post
            t = threading.Thread(target=reddit_comment_worker, args=(idx, h,q_liz,submitter,delay), name='#t%.2d'%idx)
            c_threads.append(t)
            t.start()

        #loading_indicator.update(20,'Filtered %d comments' %(filtered_posts) )
        log(repr(domains_d))
        #check the queue to determine progress
        break_counter=0 #to avoid infinite loop
        expected_listitems=(comments_count-filtered_posts)
        if expected_listitems>0:
            loading_indicator.set_tick_total(expected_listitems)
            last_queue_size=0
            while q_liz.qsize() < expected_listitems:
                if break_counter>=100:
                    break
                #each change in the queue size gets a tick on our progress track
                if last_queue_size < q_liz.qsize():
                    items_added=q_liz.qsize()-last_queue_size
                    loading_indicator.tick(items_added,'Parsing')
                else:
                    break_counter+=1

                last_queue_size=q_liz.qsize()
                xbmc.sleep(50)

        #wait for all threads to finish before collecting the list items
        for idx, t in enumerate(c_threads):
            #log('    joining %s' %t.getName())
            t.join(timeout=20)

        xbmc_busy(False)

        #compare the number of entries to the returned results
        #log( "queue:%d entries:%d" %( q_liz.qsize() , len(content['data']['children'] ) ) )
        if q_liz.qsize() != expected_listitems:
            log('some threads did not return a listitem. total comments:%d expecting(%d) but only got(%d)' %(comments_count, expected_listitems, q_liz.qsize()))

        #for t in threads: log('isAlive %s %s' %(t.getName(), repr(t.isAlive()) )  )
        li=[ liz for idx,liz in sorted(q_liz.queue) ]
        #log(repr(li))

        with q_liz.mutex:
            q_liz.queue.clear()

    except Exception as e:
        log('  ' + str(e) )

    loading_indicator.end() #it is important to close xbmcgui.DialogProgressBG
# this portion is abandoned for now. initial plan was to textbox with auto-height in a grouplist to mimic the comment tree but cannot figure out how links can be followed.
    from guis import comments_GUI2
    ui = comments_GUI2('view_464_comments_grouplist.xml' , addon_path, defaultSkin='Default', defaultRes='1080i', listing=li, id=55)
    #ui = comments_GUI2('aaa.xml' , addon_path, defaultSkin='Default', defaultRes='1080i', listing=li, id=55)
    ui.title_bar_text=post_title
    ui.doModal()
    del ui
    return

예제 #31

0

파일 보기

파일: verbs.py 프로젝트: jtauber-archive/greek-inflection

    def generate(self, lemma, parse, allow_form_override=True, context=None):
        answers = []
        stems = None
        accent_override = None
        is_enclitic = False
        ending_override = None

        if lemma in self.lexicon:
            if allow_form_override:
                answer = self.lexicon[lemma].get("forms", {}).get(parse)
                if answer:
                    return answer

            stems = self.regex_list(lemma, parse, context)

            if "." in parse:
                accents = self.lexicon[lemma].get("accents", {}).get(parse.split(".")[0])
                if accents == "enclitic":
                    is_enclitic = True
                else:
                    accent_override = accents

            ending_override = self.lexicon[lemma].get("endings", {}).get(parse)

        if stems is None:
            return
        else:
            stems = stems.split("/")

        if parse not in stemming_rules:
            return

        for stem in stems:
            stem = debreath(stem)
            pairs = stemming_rules[parse]
            while isinstance(pairs, dict) and "ref" in pairs:
                if pairs["ref"] in stemming_rules:
                    pairs = stemming_rules[pairs["ref"]]
                else:
                    # @@@ raise error?
                    return
            base_endings = []
            default = []
            for rule in pairs:
                s1, s234, s5 = rule.split("|")
                s2, s34 = s234.split(">")
                s3, s4 = s34.split("<")

                if stem.endswith(strip_accents(s1 + s2)):
                    if s2:
                        base = stem[:-len(s2)]
                    else:
                        base = stem
                else:
                    continue

                if ending_override:
                    ending_list = ending_override.split("/")
                else:
                    ending_list = [s3 + s5]

                if s1 + s2:
                    base_endings.append((base, ending_list))
                else:
                    default.append((base, ending_list))

            # only use default if there are no other options
            if len(base_endings) == 0 and len(default) > 0:
                base_endings = default

            for base, ending_list in base_endings:
                for ending in ending_list:
                    if accent(ending):
                        answers.append((base + ending).replace("|", ""))
                    elif is_enclitic:
                        answers.append(make_oxytone(base + ending).replace("|", ""))
                    else:
                        if parse[2] == "P":
                            if accent_override:
                                answers.append(persistent(base + ending, accent_override))
                            elif parse == "AAP.NSM" and ending == "ων":
                                answers.append(make_oxytone(base + ending).replace("|", ""))
                            elif parse == "AAP.NSM" and ending == "_3+ς":
                                answers.append(make_oxytone(base + ending).replace("|", ""))
                            elif parse == "PAP.NSM" and ending == "_3+ς":
                                answers.append(make_oxytone(base + ending).replace("|", ""))
                            elif parse[0:3] == "AAP" and parse != "AAP.NSM":
                                # calculate NSM
                                nsms = self.generate(lemma, "AAP.NSM", context=context)
                                nsms = nsms.split("/")
                                for nsm in nsms:
                                    if nsm.endswith(("ών", "ούς")):
                                        answers.append(persistent(base + ending, nsm))
                                    else:
                                        answers.append(persistent(base + ending, lemma))
                            elif parse[0:3] == "PAP" and parse != "PAP.NSM":
                                # calculate NSM
                                nsms = self.generate(lemma, "PAP.NSM").split("/")
                                for nsm in nsms:
                                    nsm = strip_length(nsm)
                                    answers.append(persistent(base + ending, nsm))
                            else:
                                answers.append(recessive(base + ending, default_short=True))
                        elif parse[0:3] in ["AAN", "XAN", "XMN", "XPN"]:
                            answers.append(on_penult(base + ending, default_short=True))
                        elif parse[0:3] == "PAN" and stem.endswith("!"):
                            answers.append(on_penult(base + ending, default_short=True))
                        else:
                            answers.append(recessive(base + ending, default_short=True))

        return "/".join(remove_duplicates(rebreath(w) for w in answers))