def resolve_names_in_file(filename='authorsUnresolvedAff.csv', exceptions_filename='exceptions_alias.csv'): already_parsed = {} if os.path.isfile(exceptions_filename): with open('exceptions_alias.csv', 'rt') as csvfile: spamreader = csv.reader(csvfile, delimiter=':') for row in spamreader: old, new = row already_parsed[old] = new i_g_dict, g_i_dict, g_city_dict, g_country_dict, alias_g_dict, labels_g_dict = utils.consolidated_names( ) all_insti = list(i_g_dict.keys()) alias_names = list(alias_g_dict.keys()) labels_names = list(labels_g_dict.keys()) all_names = all_insti + alias_names + labels_names completer = MyCompleter(all_names) readline.set_completer_delims('') readline.set_completer(completer.complete) readline.parse_and_bind('tab: complete') VICTOR = False if VICTOR: start_pt = 0 end_pt = 1000 else: start_pt = 1000 end_pt = 2167 year_list, title_list, author_list, n_aff_list, email_list = victor_exceptions( filename) with open('exceptions_alias.csv', 'at') as csvfile: writer = csv.writer(csvfile, delimiter=':') new_aff_list = n_aff_list[start_pt:end_pt] for id_prev, noidea in enumerate(new_aff_list): idx = id_prev + start_pt if noidea not in already_parsed.keys(): testname = simple_mistakes(noidea) if testname in all_names: if testname in all_insti: uinput = testname else: try: uinput = g_i_dict[alias_g_dict[testname]] except KeyError: uinput = g_i_dict[labels_g_dict[testname]] else: a_noidea = author_list[idx] + '| \n' y_noidea = year_list[idx] + '| ' t_noidea = title_list[idx] + '| ' bing.search(noidea + ' ' + author_list[idx], 3) print(idx) uinput = input( y_noidea + t_noidea + a_noidea + noidea + ' | Correction : ', ) if uinput != '': already_parsed[noidea] = uinput.lower() writer.writerow([noidea, uinput.lower()])
def victor_exceptions_2018(filename='affiliationsNotInDB_2018.csv', exceptions_filename='exceptions_alias.csv'): already_parsed = {} if os.path.isfile(exceptions_filename): with open('exceptions_alias.csv', 'rt') as csvfile: spamreader = csv.reader(csvfile, delimiter=':') for row in spamreader: old, new = row already_parsed[old] = new i_g_dict, g_i_dict, g_city_dict, g_country_dict, alias_g_dict, labels_g_dict = utils.consolidated_names( ) all_insti = list(i_g_dict.keys()) alias_names = list(alias_g_dict.keys()) labels_names = list(labels_g_dict.keys()) all_names = all_insti + alias_names + labels_names completer = MyCompleter(all_names) readline.set_completer_delims('') readline.set_completer(completer.complete) readline.parse_and_bind('tab: complete') start_pt = 0 end_pt = 65 with open(filename, 'rt') as csvfile: reader = csv.reader(csvfile, delimiter=',') aff_list = [] for ii, row in enumerate(reader): if ii != 0: aff_list.append(row[0].lower()) with open('exceptions_alias.csv', 'at') as csvfile: writer = csv.writer(csvfile, delimiter=':') new_aff_list = aff_list[start_pt:end_pt] for id_prev, noidea in enumerate(new_aff_list): idx = id_prev + start_pt if noidea not in already_parsed.keys(): testname = simple_mistakes(noidea) if testname in all_names: print('We know this name') if testname in all_insti: uinput = testname else: try: uinput = g_i_dict[alias_g_dict[testname]] except KeyError: uinput = g_i_dict[labels_g_dict[testname]] else: bing.search(noidea, 3) print(idx) uinput = input(noidea + ' | Correction : ', ) if uinput != '': already_parsed[noidea] = uinput.lower() writer.writerow([noidea, uinput.lower()])
def search(request): if 'query' in request.GET: q = request.GET['query'] Search(query=q).save() # record search and timestamp results = bing.search(q, count=25) rankedResults = [] for bing_rank, result in enumerate(results, 1): t = int(getAlexaRank(result[0])) rankedResults.append(result[:] + (t,bing_rank)) rankedResults.sort(key= lambda r: r[3], reverse=True) finalResults = [] for rank, result in enumerate(rankedResults, 1): # for redirect url = '/click/?url=%s&rank=%d&search=%s' % (quote(result[0]), rank, q) BingRank(search=q, url=result[0], rank=result[4]).save() PopRank(search=q, url=result[0], rank=rank).save() # add orignial url on the end for voting mechanism finalResults.append((url,) + result[1:] + (quote(result[0]),) + (Vote.objects.filter(link__startswith=result[0]).filter(vote=True).count(),) + (Vote.objects.filter(link__startswith=result[0]).filter(vote=False).count(),)) context = {'results': finalResults, 'query' : q } else: context = {} return render(request, 'search/search.html', context)
def search_profile(keywords): query = ' '.join(keywords) query += ' Linkedin' in_regex = re.compile(r'\/in\/') for r in bing.search(query): if in_regex.search(r['url']): return r return None
def keywords_search(site, keywords=[]): query = _keywords_query(site, keywords) result = {k: '' for k in keywords} result['_count'] = 0 result['Domain'] = site for r in bing.search(query): for k in keywords: snippet = r.get('snippet', '') if not result[k] and \ re.search(k, snippet, re.I): result[k] = r.get('url') result['_count'] += 1 return result
from bing import search keywords = [line.rstrip('\n') for line in open('keywords.txt')] for words in keywords: for url in search(words, num=1, start=0): print(url)