Пример #1
0
def resolve_names_in_file(filename='authorsUnresolvedAff.csv',
                          exceptions_filename='exceptions_alias.csv'):
    already_parsed = {}
    if os.path.isfile(exceptions_filename):
        with open('exceptions_alias.csv', 'rt') as csvfile:
            spamreader = csv.reader(csvfile, delimiter=':')
            for row in spamreader:
                old, new = row
                already_parsed[old] = new
    i_g_dict, g_i_dict, g_city_dict, g_country_dict, alias_g_dict, labels_g_dict = utils.consolidated_names(
    )
    all_insti = list(i_g_dict.keys())
    alias_names = list(alias_g_dict.keys())
    labels_names = list(labels_g_dict.keys())
    all_names = all_insti + alias_names + labels_names
    completer = MyCompleter(all_names)
    readline.set_completer_delims('')
    readline.set_completer(completer.complete)
    readline.parse_and_bind('tab: complete')

    VICTOR = False
    if VICTOR:
        start_pt = 0
        end_pt = 1000
    else:
        start_pt = 1000
        end_pt = 2167

    year_list, title_list, author_list, n_aff_list, email_list = victor_exceptions(
        filename)
    with open('exceptions_alias.csv', 'at') as csvfile:
        writer = csv.writer(csvfile, delimiter=':')
        new_aff_list = n_aff_list[start_pt:end_pt]
        for id_prev, noidea in enumerate(new_aff_list):
            idx = id_prev + start_pt
            if noidea not in already_parsed.keys():
                testname = simple_mistakes(noidea)
                if testname in all_names:
                    if testname in all_insti:
                        uinput = testname
                    else:
                        try:
                            uinput = g_i_dict[alias_g_dict[testname]]
                        except KeyError:
                            uinput = g_i_dict[labels_g_dict[testname]]
                else:
                    a_noidea = author_list[idx] + '| \n'
                    y_noidea = year_list[idx] + '| '
                    t_noidea = title_list[idx] + '| '
                    bing.search(noidea + ' ' + author_list[idx], 3)
                    print(idx)
                    uinput = input(
                        y_noidea + t_noidea + a_noidea + noidea +
                        ' | Correction :  ', )
                if uinput != '':
                    already_parsed[noidea] = uinput.lower()
                    writer.writerow([noidea, uinput.lower()])
Пример #2
0
def victor_exceptions_2018(filename='affiliationsNotInDB_2018.csv',
                           exceptions_filename='exceptions_alias.csv'):
    already_parsed = {}
    if os.path.isfile(exceptions_filename):
        with open('exceptions_alias.csv', 'rt') as csvfile:
            spamreader = csv.reader(csvfile, delimiter=':')
            for row in spamreader:
                old, new = row
                already_parsed[old] = new
    i_g_dict, g_i_dict, g_city_dict, g_country_dict, alias_g_dict, labels_g_dict = utils.consolidated_names(
    )
    all_insti = list(i_g_dict.keys())
    alias_names = list(alias_g_dict.keys())
    labels_names = list(labels_g_dict.keys())
    all_names = all_insti + alias_names + labels_names
    completer = MyCompleter(all_names)
    readline.set_completer_delims('')
    readline.set_completer(completer.complete)
    readline.parse_and_bind('tab: complete')

    start_pt = 0
    end_pt = 65

    with open(filename, 'rt') as csvfile:
        reader = csv.reader(csvfile, delimiter=',')
        aff_list = []
        for ii, row in enumerate(reader):
            if ii != 0:
                aff_list.append(row[0].lower())
    with open('exceptions_alias.csv', 'at') as csvfile:
        writer = csv.writer(csvfile, delimiter=':')
        new_aff_list = aff_list[start_pt:end_pt]
        for id_prev, noidea in enumerate(new_aff_list):
            idx = id_prev + start_pt
            if noidea not in already_parsed.keys():
                testname = simple_mistakes(noidea)
                if testname in all_names:
                    print('We know this name')
                    if testname in all_insti:
                        uinput = testname
                    else:
                        try:
                            uinput = g_i_dict[alias_g_dict[testname]]
                        except KeyError:
                            uinput = g_i_dict[labels_g_dict[testname]]
                else:
                    bing.search(noidea, 3)
                    print(idx)
                    uinput = input(noidea + ' | Correction :  ', )
                if uinput != '':
                    already_parsed[noidea] = uinput.lower()
                    writer.writerow([noidea, uinput.lower()])
Пример #3
0
def search(request):
    if 'query' in request.GET:
        q = request.GET['query']
        Search(query=q).save() # record search and timestamp
        results = bing.search(q, count=25)

        rankedResults = []
        for bing_rank, result in enumerate(results, 1):
            t = int(getAlexaRank(result[0]))
            rankedResults.append(result[:] + (t,bing_rank))

        rankedResults.sort(key= lambda r: r[3], reverse=True)

        finalResults = []
        for rank, result in enumerate(rankedResults, 1):
            # for redirect
            url = '/click/?url=%s&rank=%d&search=%s' % (quote(result[0]), rank, q)
            BingRank(search=q, url=result[0], rank=result[4]).save()
            PopRank(search=q, url=result[0], rank=rank).save()

            # add orignial url on the end for voting mechanism
            finalResults.append((url,) + result[1:] + (quote(result[0]),) + (Vote.objects.filter(link__startswith=result[0]).filter(vote=True).count(),) + (Vote.objects.filter(link__startswith=result[0]).filter(vote=False).count(),))

        context = {'results': finalResults, 'query' : q }
    else:
        context = {}
    return render(request, 'search/search.html', context)
Пример #4
0
def search_profile(keywords):

    query  = ' '.join(keywords)
    query += ' Linkedin'

    in_regex = re.compile(r'\/in\/')
    for r in bing.search(query):
        if in_regex.search(r['url']): 
            return r

    return None
Пример #5
0
def keywords_search(site, keywords=[]):

    query = _keywords_query(site, keywords)

    result = {k: '' for k in keywords}
    result['_count'] = 0
    result['Domain'] = site

    for r in bing.search(query):
        for k in keywords:
            snippet = r.get('snippet', '')
            if not result[k] and \
                re.search(k, snippet, re.I):
                result[k] = r.get('url')
        result['_count'] += 1

    return result
Пример #6
0
from bing import search

keywords = [line.rstrip('\n') for line in open('keywords.txt')]

for words in keywords:
    for url in search(words, num=1, start=0):
        print(url)