def execute_query(scopus_queries): eids = [] for search_string in scopus_queries.search_strings: search = scopus.ScopusSearch(search_string, refresh=True) print(search) eids = eids + search.get_eids() # convert to set in order to remove duplicates eids = set(eids) return eids
def retrieve_publications_sample(project_id, query_id): session_id = request.args.get('session') sample_size = int(request.args.get('sample_size')) if sample_size is None: sample_size = 100 if session_id is None: session_id = 'default_session_' try: random_sample_eids = eids_service.load_eid_list(project_id, session_id) except: random_sample_eids = generate_sample_publication_list( project_id, sample_size, session_id) search_string = utils.generate_scopus_search_from_eid_list( random_sample_eids) search = scopus.ScopusSearch(search_string, refresh=True, project_id=project_id) sample_publications_json = json.dumps(search.results, cls=PropertyEncoder) return Response(sample_publications_json, status=200, mimetype='application/json')
def prepare_fig(w=1, h=None): if h is None: h = w figsize = (6 * w, 3 * h) sns.set(rc={'figure.figsize': figsize}) fig = plt.figure(figsize=figsize) plt.clf() return fig def top_k(mapping, k=10): return sorted(mapping.keys(), key=lambda x: mapping[x])[::-1][:k] pd.set_option('display.max_rows', 250) eids = scopus.ScopusSearch(query).get_eids() random.seed(0) random.shuffle(eids) bar = widgets.IntProgress(min=0, max=len(eids), description='Loading') display(bar) print('query: {} ({} results)'.format(query, len(eids))) papers = [] for eid in eids: papers.append(scopus.AbstractRetrieval(eid, view='FULL')) bar.value += 1 bar.description = str(bar.value) print('scopus returned {} results'.format(len(papers)))
def query_execution(project_id): """ executes the defined and saved query in scopus :param project_id: the ID of the current project :return: 'finished' with a status of 204 when the query was executed successfully """ app.logger.info('project {}: running queries'.format(project_id)) # reads the saved Scopus search string from disk scopus_queries = query_service.load_scopus_queries(project_id) # retrieve the project from disk, set the booleans and save the project project = project_service.load_project(project_id) project.isEidsCollected = False project.isEidsCollecting = True project_service.save_project(project) # prepares the status file status = Status("EIDS_COLLECTING") status_service.save_status(project_id, status) # prepare EIDs list eids = [] for index, search_strings in enumerate(scopus_queries.search_strings): individual_eids = [] for search_string in search_strings: app.logger.info('project {}: executing search {} - {}'.format( project_id, index, search_string)) search = scopus.ScopusSearch(search_string, refresh=True, field='eid', view='STANDARD') if search.results is not None: app.logger.info( 'project {}: result search {} - {} entries found'.format( project_id, index, len(search.results))) for result in search.results: # add EID if it is not already in the list (from a former search) eids.append(result.eid) individual_eids.append(result.eid) eids_service.save_eid_list( project_id=project_id, eids=set(individual_eids), prefix=(str(scopus_queries.search_ids[index]) + '_')) # convert to set in order to remove duplicates eids = set(eids) # print the results to the command line for logging app.logger.info('project {}: found {} eids in Scopus'.format( project_id, len(eids))) # persist EIDs to file eids_service.save_eid_list(project_id=project_id, eids=eids) # set the total number of results to the relevance_measures measure save it to disk relevance_measure = RelevanceMeasure( number_of_search_results=eids.__len__()) relevance_measure_service.save_relevance_measures(project_id, relevance_measure) # set the total number of results to the status save it to disk status.total = relevance_measure.number_of_search_results status_service.save_status(project_id, status) # set the status and save it to disk status = Status("EIDS_COLLECTED") status_service.save_status(project_id, status) # set the project boolean and save the project project.isEidslist = True project.isEidsCollected = True project.isEidsCollecting = False project_service.save_project(project) return Response({"status": "FINISHED"}, status=204)