Exemplo n.º 1
0
def getPrecision(project_id):
    # load collected eids
    eids = eids_service.load_eid_list(project_id)
    relevance_measure = relevance_measure_service.load_relevance_measure(
        project_id)
    if relevance_measure is None:
        relevance_measure = RelevanceMeasure()
    relevance_measure.number_of_search_results = len(eids)
    judgement_list = eids_service.load_judgement_file(project_id)
    app.logger.info('project {}: loaded judgements'.format(project_id))
    relevance_measure.number_sample_entries = len(judgement_list)
    relevance_measure.number_positive_sample_entries = 0
    for judgement in judgement_list:
        if judgement.isRelevant:
            relevance_measure.number_positive_sample_entries = \
                relevance_measure.number_positive_sample_entries + 1
    if relevance_measure.number_sample_entries > 0:
        relevance_measure.precision = relevance_measure.number_positive_sample_entries / relevance_measure.number_sample_entries
    else:
        relevance_measure.precision = 0
    relevance_measure_service.save_relevance_measures(project_id,
                                                      relevance_measure)
    app.logger.info(
        'project {}: calculated relevance measure precision'.format(
            project_id))
    return jsonify(relevance_measure)
Exemplo n.º 2
0
def check_test_eids(project_id):
    """
    calcluates the Recall by comparing the list of EIDs retrieved from the query against lists of EIDs as obtained from
     the survey
    :param project_id: the ID of the current project
    :return: a JSON formatted relevance measure object.
    """
    test_eids = eids_service.load_eid_list(project_id, 'test_')
    app.logger.info('project {}: loaded test eids'.format(project_id))

    # load collected eids
    eids = eids_service.load_eid_list(project_id)
    relevance_measure = relevance_measure_service.load_relevance_measure(
        project_id)
    if relevance_measure is None:
        relevance_measure = RelevanceMeasure()
    relevance_measure.number_of_search_results = len(eids)
    relevance_measure.number_test_entries = len(test_eids)
    relevance_measure.number_test_entries_found = 0
    for test_eid in test_eids:
        if test_eid in eids:
            relevance_measure.number_test_entries_found = relevance_measure.number_test_entries_found + 1
    if relevance_measure.number_of_search_results > 0:
        relevance_measure.recall = relevance_measure.number_test_entries_found / relevance_measure.number_test_entries
    else:
        relevance_measure.recall = 0
    relevance_measure_service.save_relevance_measures(project_id,
                                                      relevance_measure)
    app.logger.info(
        'project {}: calculated relevance measure recall'.format(project_id))
    return jsonify(relevance_measure)
def check_sample_eids(project_id):
    # load collected eids
    eids = eids_service.load_eid_list(project_id)
    relevance_measure = relevance_measure_service.load_relevance_measure(
        project_id)
    if relevance_measure is None:
        relevance_measure = RelevanceMeasure()
    relevance_measure.number_of_search_results = len(eids)
    judgement_list = eids_service.load_judgement_file(project_id)
    relevance_measure['number_sample_entries'] = len(judgement_list)
    for judgement in judgement_list:
        if judgement['isRelevant']:
            relevance_measure['number_positive_sample_entries'] = \
                relevance_measure['number_positive_sample_entries'] + 1
    relevance_measure_service.save_relevance_measures(project_id,
                                                      relevance_measure)
    return jsonify(relevance_measure.__dict__)
def load_relevance_measure(project_id):
    with app.app_context():
        location = app.config.get("LIBINTEL_DATA_DIR")
    path_to_file = location + '/out/' + project_id + '/relevance_measures.json'
    with open(path_to_file) as json_file:
        try:
            relevance_measure = json.load(json_file)
            json_file.close()
            return RelevanceMeasure(**relevance_measure)
        except FileNotFoundError:
            return {}
        except JSONDecodeError:
            return {}
def check_test_eids(project_id):
    # load test eids
    test_eids = eids_service.load_eid_list(project_id, 'test_')

    # load collected eids
    eids = eids_service.load_eid_list(project_id)
    relevance_measure = relevance_measure_service.load_relevance_measure(
        project_id)
    if relevance_measure is None:
        relevance_measure = RelevanceMeasure()
    relevance_measure.number_of_search_results = len(eids)
    relevance_measure.number_test_entries = len(test_eids)
    for test_eid in test_eids:
        if test_eid in eids:
            relevance_measure.number_test_entries_found = relevance_measure.number_test_entries_found + 1
    if relevance_measure.number_of_search_results > 0:
        relevance_measure.recall = relevance_measure.number_test_entries_found / relevance_measure.number_test_entries
    else:
        relevance_measure.recall = 0
    relevance_measure_service.save_relevance_measures(project_id,
                                                      relevance_measure)
    return jsonify(relevance_measure.__dict__)
def query_execution(project_id):
    """
    executes the defined and saved query in scopus
    :param project_id: the ID of the current project
    :return: 'finished' with a status of 204 when the query was executed successfully
    """
    app.logger.info('project {}: running queries'.format(project_id))
    # reads the saved Scopus search string from disk
    scopus_queries = query_service.load_scopus_queries(project_id)

    # retrieve the project from disk, set the booleans and save the project
    project = project_service.load_project(project_id)
    project.isEidsCollected = False
    project.isEidsCollecting = True
    project_service.save_project(project)

    # prepares the status file
    status = Status("EIDS_COLLECTING")
    status_service.save_status(project_id, status)

    # prepare EIDs list
    eids = []

    for index, search_strings in enumerate(scopus_queries.search_strings):
        individual_eids = []
        for search_string in search_strings:
            app.logger.info('project {}: executing search {} - {}'.format(
                project_id, index, search_string))
            search = scopus.ScopusSearch(search_string,
                                         refresh=True,
                                         field='eid',
                                         view='STANDARD')
            if search.results is not None:
                app.logger.info(
                    'project {}: result search {} - {} entries found'.format(
                        project_id, index, len(search.results)))
                for result in search.results:
                    # add EID if it is not already in the list (from a former search)
                    eids.append(result.eid)
                    individual_eids.append(result.eid)
        eids_service.save_eid_list(
            project_id=project_id,
            eids=set(individual_eids),
            prefix=(str(scopus_queries.search_ids[index]) + '_'))

    # convert to set in order to remove duplicates
    eids = set(eids)

    # print the results to the command line for logging
    app.logger.info('project {}: found {} eids in Scopus'.format(
        project_id, len(eids)))

    # persist EIDs to file
    eids_service.save_eid_list(project_id=project_id, eids=eids)

    # set the total number of results to the relevance_measures measure save it to disk
    relevance_measure = RelevanceMeasure(
        number_of_search_results=eids.__len__())
    relevance_measure_service.save_relevance_measures(project_id,
                                                      relevance_measure)

    # set the total number of results to the status save it to disk
    status.total = relevance_measure.number_of_search_results
    status_service.save_status(project_id, status)

    # set the status and save it to disk
    status = Status("EIDS_COLLECTED")
    status_service.save_status(project_id, status)

    # set the project boolean and save the project
    project.isEidslist = True
    project.isEidsCollected = True
    project.isEidsCollecting = False
    project_service.save_project(project)

    return Response({"status": "FINISHED"}, status=204)