def test_rabbit_matchable_name(self, mocked_func1, mocked_func2):
        '''
        The return value of the function that creates the matchable name is being mocked.
        With this test we ensure that the function create_matchable_name is actually being used.
        '''
        mocked_func1.return_value = 'Fake Name'
        mocked_func2.return_value = mocked_func1.return_value

        rabbit([self.main_bibrec], verbose=True)

        first_pid = run_sql(
            "select personid from aidPERSONIDPAPERS where bibrec=%s",
            (self.main_bibrec, ))[0][0]

        second_marcxml_record = get_new_marc_for_test(
            'Rabbit Test Paper', author_name=self.heavily_modified_name)
        second_bibrec = get_bibrec_for_record(second_marcxml_record,
                                              opt_mode='insert')
        self.bibrecs_to_clean.append(second_bibrec)

        rabbit([second_bibrec], verbose=True)

        second_pid = run_sql(
            "select personid from aidPERSONIDPAPERS where bibrec=%s",
            (second_bibrec, ))[0][0]
        self.assertEquals(first_pid, second_pid)
        def test_rabbit_add_inspireID():
            '''
            An inspire id is added to to an author artificially. Then, a record is uploaded with a
            heavily modifield name of the person + the same inspire ID. Despite the fact that the name
            is totally different, due to the fact that there is an inspire ID in place,
            the entry shall not change.
            '''
            self.main_marcxml_record = get_modified_marc_for_test(
                self.main_marcxml_record, author_name=self.author_name)
            self.main_bibrec = get_bibrec_for_record(self.main_marcxml_record,
                                                     opt_mode='replace')
            rabbit([self.main_bibrec], verbose=True)
            personid_to_test = get_authors_by_name(self.author_name)[0]

            #PERSONID_EXTERNAL_IDENTIFIER_MAP.values() TODO
            add_external_id_to_author(personid_to_test, 'INSPIREID',
                                      self.ext_id)

            self.main_marcxml_record = get_modified_marc_for_test(
                self.main_marcxml_record,
                author_name=self.heavily_modified_name,
                ext_id=self.ext_id)
            self.main_bibrec = get_bibrec_for_record(self.main_marcxml_record,
                                                     opt_mode='replace')
            rabbit([self.main_bibrec], verbose=True)
            self.assertEquals(
                personid_to_test,
                get_authors_by_name(self.heavily_modified_name)[0])

            _remove_external_id_from_author(personid_to_test, 'INSPIREID',
                                            self.ext_id)
        def test_rabbit_heavily_modify_author():
            '''
            The author's name is modified heavily. This means, that the modified string is significantly
            different than the original.. After the run of rabbit, the name in
            aidPERSONIDDATA SHOULD change, since this is a heavy modification.
            '''
            number_of_personids_before = get_count_of_pids()
            self.main_marcxml_record = get_modified_marc_for_test(
                self.main_marcxml_record,
                author_name=self.heavily_modified_name)
            self.main_bibrec = get_bibrec_for_record(self.main_marcxml_record,
                                                     opt_mode='replace')
            rabbit([self.main_bibrec], verbose=True)

            previous_bibref_value = self.current_bibref_value
            self.current_bibref_value = get_bibref_value_for_name(
                self.heavily_modified_name)
            number_of_personids_after = get_count_of_pids()
            self.assertNotEquals(previous_bibref_value,
                                 self.current_bibref_value)
            self.assertTrue(
                person_in_aidpersonidpapers(self.heavily_modified_name,
                                            self.main_bibrec))
            self.assertTrue(
                person_in_aidpersoniddata(self.heavily_modified_name))
            self.assertFalse(person_in_aidpersoniddata(self.author_name))
            self.assertFalse(
                person_in_aidpersonidpapers(self.slightly_modified_author_name,
                                            self.main_bibrec))
            self.assertEquals(number_of_personids_before,
                              number_of_personids_after)
 def test_rabbit_heavily_modify_coauthors():
     number_of_personids_before = get_count_of_pids()
     self.main_marcxml_record = get_modified_marc_for_test(
         self.main_marcxml_record,
         author_name=self.author_name,
         co_authors_names=self.heavily_mod_co_authors_names)
     self.main_bibrec = get_bibrec_for_record(self.main_marcxml_record,
                                              opt_mode='replace')
     rabbit([self.main_bibrec], verbose=True)
     previous_bibref_value_of_author = self.current_bibref_value_of_author
     previous_bibrefs_of_coauthors = deepcopy(
         self.current_bibref_values_of_coauthors)
     for index, _ in enumerate(self.current_bibref_values_of_coauthors):
         self.current_bibref_values_of_coauthors[
             index] = get_bibref_value_for_name(
                 self.heavily_mod_co_authors_names[index])
     number_of_personids_after = get_count_of_pids()
     self.assertTrue(
         person_in_aidpersonidpapers(self.author_name,
                                     self.main_bibrec))
     self.assertEquals(previous_bibref_value_of_author,
                       self.current_bibref_value_of_author)
     self.assertNotEquals(set(previous_bibrefs_of_coauthors),
                          set(self.current_bibref_values_of_coauthors))
     self.assertEquals(number_of_personids_after,
                       number_of_personids_before)
     self.assertTrue(
         person_in_aidpersonidpapers(self.author_name,
                                     self.main_bibrec))
     self.assertTrue(person_in_aidpersoniddata(self.author_name))
     for coauthor_name in self.heavily_mod_co_authors_names:
         self.assertTrue(
             person_in_aidpersonidpapers(coauthor_name,
                                         self.main_bibrec))
         self.assertTrue(person_in_aidpersoniddata(coauthor_name))
 def setUp(self):
     self._pid = get_free_author_id()
     self._orcid = '1234-1234-1234-1234'
     add_orcid_id_to_author(self._pid, self._orcid)
     marc = get_new_marc_for_test('Orcid test paper',
                                  author_name='Author, SomeAuthor',
                                  identifiers=['ORCID:' + self._orcid])
     self._rec = get_bibrec_for_record(marc, opt_mode="insert")
     rabbit([self._rec])
     populate_partial_marc_caches([self._rec])
Exemplo n.º 6
0
def rabbit_with_log(papers, check_invalid_papers, log_comment, partial=False):
    from invenio.bibauthorid_rabbit import rabbit

    personids_to_update_extids = _get_personids_to_update_extids(papers)
    starting_time = get_sql_time()
    rabbit(papers, check_invalid_papers, personids_to_update_extids)
    if partial:
        action = 'PID_UPDATE_PARTIAL'
    else:
        action = 'PID_UPDATE'
    insert_user_log('daemon', '-1', action, 'bibsched', 'status', comment=log_comment, timestamp=starting_time)
 def test_rabbit_add_new_paper_with_one_author():
     '''
     Rabbit gets a record with a new author.
     Tests whether the author-related tables are populated with
     the author's name.
     '''
     rabbit([self.main_bibrec], verbose=True)
     self.current_bibref_value = get_bibref_value_for_name(
         self.author_name)  # saved for following tests
     self.assertTrue(
         person_in_aidpersonidpapers(self.author_name,
                                     self.main_bibrec))
     self.assertTrue(person_in_aidpersoniddata(self.author_name))
        def test_rabbit_mark_record_as_deleted():
            number_of_personids_before = get_count_of_pids()
            self.main_marcxml_record = get_modified_marc_for_test(
                self.main_marcxml_record,
                author_name=self.author_name,
                co_authors_names=self.heavily_mod_co_authors_names)
            self.main_bibrec = get_bibrec_for_record(self.main_marcxml_record,
                                                     opt_mode='delete')
            rabbit([self.main_bibrec], verbose=True)

            number_of_personids_after = get_count_of_pids()
            self.assertEquals(number_of_personids_before - 5,
                              number_of_personids_after)
Exemplo n.º 9
0
def safe_disambiguation_iteration():
    from invenio.bibauthorid_tortoise import tortoise
    from invenio.bibauthorid_rabbit import rabbit
    from invenio.bibauthorid_personid_maintenance import check_author_paper_associations \
                                                 , duplicated_tortoise_results_exist \
                                                 , repair_author_paper_associations
    if not check_author_paper_associations():
        rabbit([])
        repair_author_paper_associations()
        rabbit([])

    assert check_author_paper_associations()
    tortoise()
    assert duplicated_tortoise_results_exist()
Exemplo n.º 10
0
def safe_disambiguation_iteration():
    from invenio.bibauthorid_tortoise import tortoise
    from invenio.bibauthorid_rabbit import rabbit
    from invenio.bibauthorid_personid_maintenance import check_author_paper_associations \
                                                 , duplicated_tortoise_results_exist \
                                                 , repair_author_paper_associations
    if not check_author_paper_associations():
        rabbit([])
        repair_author_paper_associations()
        rabbit([])

    assert check_author_paper_associations()
    tortoise()
    assert duplicated_tortoise_results_exist()
Exemplo n.º 11
0
def safe_disambiguation_iteration():
    from invenio.bibauthorid_tortoise import tortoise
    from invenio.bibauthorid_rabbit import rabbit
    from invenio.bibauthorid_personid_maintenance import check_personid_papers \
                                                 , check_results \
                                                 , repair_personid

    if not check_personid_papers():
        rabbit([])
        repair_personid()
        rabbit([])

    assert check_personid_papers()
    tortoise()
    assert check_results()
Exemplo n.º 12
0
def safe_disambiguation_iteration():
    from invenio.bibauthorid_tortoise import tortoise
    from invenio.bibauthorid_rabbit import rabbit
    from invenio.bibauthorid_personid_maintenance import check_personid_papers \
                                                 , check_results \
                                                 , repair_personid

    if not check_personid_papers():
        rabbit([])
        repair_personid()
        rabbit([])

    assert check_personid_papers()
    tortoise()
    assert check_results()
 def test_rabbit_mark_record_as_deleted():
     '''
     A record is deleted. Rabbit should understand that and remove the author from the aidPERSON* tables.
     '''
     number_of_personids_before = get_count_of_pids()
     if config.CFG_INSPIRE_SITE:
         self.main_marcxml_record = get_modified_marc_for_test(
             self.main_marcxml_record,
             author_name=self.heavily_modified_name,
             ext_id=self.ext_id)
     self.main_bibrec = get_bibrec_for_record(self.main_marcxml_record,
                                              opt_mode='delete')
     rabbit([self.main_bibrec], verbose=True)
     number_of_personids_after = get_count_of_pids()
     self.assertEquals(number_of_personids_before - 1,
                       number_of_personids_after)
    def test_m_names_cache(self, mocked_func, mocked_destroy):
        '''
        For this test we check whether a value is in the cache.
        '''
        def do_nothing():
            '''
            A function that does nothing. It substitutes the destroy_mnames_pids_cache,
            so that we can actually take a snapshot of the cache for our test.
            '''
            pass

        mocked_func.return_value = [9999]
        mocked_destroy.side_effect = do_nothing()
        rabbit([self.main_bibrec], verbose=True)
        m_name = create_matchable_name(self.author_name)
        self.assertTrue(invenio.bibauthorid_rabbit.M_NAME_PIDS_CACHE[m_name])
Exemplo n.º 15
0
def rabbit_with_log(papers, check_invalid_papers, log_comment, partial=False):
    from invenio.bibauthorid_rabbit import rabbit

    personids_to_update_extids = _get_personids_to_update_extids(papers)
    starting_time = get_db_time()
    rabbit(papers, check_invalid_papers, personids_to_update_extids)
    if partial:
        action = 'PID_UPDATE_PARTIAL'
    else:
        action = 'PID_UPDATE'
    insert_user_log('daemon',
                    '-1',
                    action,
                    'bibsched',
                    'status',
                    comment=log_comment,
                    timestamp=starting_time)
Exemplo n.º 16
0
def test_accuracy():
    from invenio.bibauthorid_tortoise import tortoise
    from invenio.bibauthorid_rabbit import rabbit
    from invenio.bibauthorid_personid_maintenance import check_personid_papers \
                                                 , check_results \
                                                 , repair_personid
    from invenio.bibauthorid_merge import matched_claims

    if not check_personid_papers():
        rabbit([])
        repair_personid()
        rabbit([])

    assert check_personid_papers()
    tortoise(pure=True)
    assert check_results()

    return matched_claims()
Exemplo n.º 17
0
def test_accuracy():
    from invenio.bibauthorid_tortoise import tortoise
    from invenio.bibauthorid_rabbit import rabbit
    from invenio.bibauthorid_personid_maintenance import check_author_paper_associations \
                                                 , duplicated_tortoise_results_exist \
                                                 , repair_author_paper_associations
    from invenio.bibauthorid_merge import matched_claims

    if not check_author_paper_associations():
        rabbit([])
        repair_author_paper_associations()
        rabbit([])

    assert check_author_paper_associations()
    tortoise(pure=True)
    assert duplicated_tortoise_results_exist()

    return matched_claims()
Exemplo n.º 18
0
def test_accuracy():
    from invenio.bibauthorid_tortoise import tortoise
    from invenio.bibauthorid_rabbit import rabbit
    from invenio.bibauthorid_personid_maintenance import check_author_paper_associations \
                                                 , duplicated_tortoise_results_exist \
                                                 , repair_author_paper_associations
    from invenio.bibauthorid_merge import matched_claims

    if not check_author_paper_associations():
        rabbit([])
        repair_author_paper_associations()
        rabbit([])

    assert check_author_paper_associations()
    tortoise(pure=True)
    assert duplicated_tortoise_results_exist()

    return matched_claims()
Exemplo n.º 19
0
def test_accuracy():
    from invenio.bibauthorid_tortoise import tortoise
    from invenio.bibauthorid_rabbit import rabbit
    from invenio.bibauthorid_personid_maintenance import check_personid_papers \
                                                 , check_results \
                                                 , repair_personid
    from invenio.bibauthorid_merge import matched_claims

    if not check_personid_papers():
        rabbit([])
        repair_personid()
        rabbit([])

    assert check_personid_papers()
    tortoise(pure=True)
    assert check_results()

    return matched_claims()
        def test_rabbit_remove_author_from_paper():
            '''
            The author field of the record is removed.
            Tests whether the author is actually removed by rabbit.
            '''
            number_of_personids_before = get_count_of_pids()
            self.main_marcxml_record = get_modified_marc_for_test(
                self.main_marcxml_record)
            self.main_bibrec = get_bibrec_for_record(self.main_marcxml_record,
                                                     opt_mode='replace')

            rabbit([self.main_bibrec], verbose=True)
            number_of_personids_after = get_count_of_pids()
            self.assertEquals(number_of_personids_before,
                              number_of_personids_after + 1)
            self.assertFalse(
                person_in_aidpersonidpapers(self.author_name,
                                            self.main_bibrec))
            self.assertFalse(person_in_aidpersoniddata(self.author_name))
        def test_rabbit_add_new_paper_with_four_coauthors():
            rabbit([self.main_bibrec], verbose=True)
            self.current_bibref_value_of_author = get_bibref_value_for_name(
                self.author_name)
            self.assertTrue(
                person_in_aidpersonidpapers(self.author_name,
                                            self.main_bibrec))
            self.assertTrue(person_in_aidpersoniddata(self.author_name))

            self.current_bdentifiers = [(
                self.ext_id,
                'i',
            )] + [None for i in range(len(self.co_authors_names))]
            self.current_bibref_values_of_coauthors = list()
            for coauthor_name in self.co_authors_names:
                bibref_value = get_bibref_value_for_name(coauthor_name)
                self.current_bibref_values_of_coauthors.append(bibref_value)
                self.assertTrue(
                    person_in_aidpersonidpapers(coauthor_name,
                                                self.main_bibrec))
                self.assertTrue(person_in_aidpersoniddata(coauthor_name))
 def test_rabbit_add_author_again():
     '''
     The author field of the record is re-added.
     Tests whether the author is added again to aidPERSONIDPAPERS and aidPERSONIDDATA.
     '''
     number_of_personids_before = get_count_of_pids()
     self.main_marcxml_record = get_modified_marc_for_test(
         self.main_marcxml_record, author_name=self.author_name)
     self.main_bibrec = get_bibrec_for_record(self.main_marcxml_record,
                                              opt_mode='replace')
     rabbit([self.main_bibrec], verbose=True)
     previous_bibref_value = self.current_bibref_value
     self.current_bibref_value = get_bibref_value_for_name(
         self.author_name)
     number_of_personids_after = get_count_of_pids()
     self.assertEquals(previous_bibref_value, self.current_bibref_value)
     self.assertEquals(number_of_personids_after,
                       number_of_personids_before + 1)
     self.assertTrue(
         person_in_aidpersonidpapers(self.author_name,
                                     self.main_bibrec))
     self.assertTrue(person_in_aidpersoniddata(self.author_name))
Exemplo n.º 23
0
def safe_merger():
    from invenio.bibauthorid_merge import merge_static
    from invenio.bibauthorid_rabbit import rabbit
    from invenio.bibauthorid_personid_maintenance import check_author_paper_associations \
                                                 , duplicated_tortoise_results_exist \
                                                 , merger_errors_exist \
                                                 , repair_author_paper_associations \
                                                 , back_up_author_paper_associations \
                                                 , compare_personids

    assert duplicated_tortoise_results_exist()
    if not check_author_paper_associations():
        rabbit([])
        repair_author_paper_associations()
        rabbit([])

    assert check_author_paper_associations()
    back_up_author_paper_associations()
    merge_static()
    assert check_author_paper_associations()
    assert merger_errors_exist()
    compare_personids("/tmp/merge_diff")
Exemplo n.º 24
0
def safe_merger():
    from invenio.bibauthorid_merge import merge_static
    from invenio.bibauthorid_rabbit import rabbit
    from invenio.bibauthorid_personid_maintenance import check_personid_papers \
                                                 , check_results \
                                                 , check_merger \
                                                 , repair_personid \
                                                 , copy_personids \
                                                 , compare_personids

    assert check_results()
    if not check_personid_papers():
        rabbit([])
        repair_personid()
        rabbit([])

    assert check_personid_papers()
    copy_personids()
    merge_static()
    assert check_personid_papers()
    assert check_merger()
    compare_personids("/tmp/merge_diff")
        def test_rabbit_remove_coauthors_from_paper():

            number_of_personids_before = get_count_of_pids()
            self.main_marcxml_record = get_modified_marc_for_test(
                self.main_marcxml_record, author_name=self.author_name)
            self.main_bibrec = get_bibrec_for_record(self.main_marcxml_record,
                                                     opt_mode='replace')
            rabbit([self.main_bibrec], verbose=True)
            number_of_personids_after = get_count_of_pids()
            self.assertEquals(number_of_personids_before,
                              number_of_personids_after + 4)

            self.assertTrue(
                person_in_aidpersonidpapers(self.author_name,
                                            self.main_bibrec))
            self.assertTrue(person_in_aidpersoniddata(self.author_name))

            for coauthor_name in self.co_authors_names:
                self.assertFalse(
                    person_in_aidpersonidpapers(coauthor_name,
                                                self.main_bibrec))
                self.assertFalse(person_in_aidpersoniddata(coauthor_name))
Exemplo n.º 26
0
def safe_merger():
    from invenio.bibauthorid_merge import merge_static
    from invenio.bibauthorid_rabbit import rabbit
    from invenio.bibauthorid_personid_maintenance import check_author_paper_associations \
                                                 , duplicated_tortoise_results_exist \
                                                 , merger_errors_exist \
                                                 , repair_author_paper_associations \
                                                 , back_up_author_paper_associations \
                                                 , compare_personids

    assert duplicated_tortoise_results_exist()
    if not check_author_paper_associations():
        rabbit([])
        repair_author_paper_associations()
        rabbit([])

    assert check_author_paper_associations()
    back_up_author_paper_associations()
    merge_static()
    assert check_author_paper_associations()
    assert merger_errors_exist()
    compare_personids("/tmp/merge_diff")
Exemplo n.º 27
0
def safe_merger():
    from invenio.bibauthorid_merge import merge_static
    from invenio.bibauthorid_rabbit import rabbit
    from invenio.bibauthorid_personid_maintenance import check_personid_papers \
                                                 , check_results \
                                                 , check_merger \
                                                 , repair_personid \
                                                 , copy_personids \
                                                 , compare_personids

    assert check_results()
    if not check_personid_papers():
        rabbit([])
        repair_personid()
        rabbit([])

    assert check_personid_papers()
    copy_personids()
    merge_static()
    assert check_personid_papers()
    assert check_merger()
    compare_personids("/tmp/merge_diff")
    def test_m_names_transformations(self):
        '''
        In this test we define three functions and then use them
        as the functions that generate mnames.
        '''
        def m_name_func_1(name):
            m_name_func_1.has_been_called = True
            return invenio.bibauthorid_rabbit.M_NAME_FUNCTIONS[0](name)

        m_name_func_1.has_been_called = False

        def m_name_func_2(name):
            m_name_func_2.has_been_called = True
            return invenio.bibauthorid_rabbit.M_NAME_FUNCTIONS[0](name)

        m_name_func_2.has_been_called = False

        invenio.bibauthorid_rabbit.M_NAME_FUNCTIONS[1:] = [
            m_name_func_1, m_name_func_2
        ]
        rabbit([self.main_bibrec], verbose=True)
        self.assertTrue(m_name_func_1.has_been_called)
        self.assertTrue(m_name_func_2.has_been_called)
        def test_rabbit_claim_record():
            '''
            The test record is artificially being claimed. Then, the name of the author is being modified:
                i) slightly
                A slight modification of a claimed record should have the same behavior as before:
                    Name changes in aidPERSONIDPAPERS but not in aidPERSONIDDATA.
                ii) heavily
                Due to the fact that the paper is claimed the canonical name should NOT change in aidPERSONIDDATA.
            '''
            number_of_personids_before = get_count_of_pids()
            self.main_marcxml_record = get_modified_marc_for_test(
                self.main_marcxml_record, author_name=self.author_name)
            self.main_bibrec = get_bibrec_for_record(self.main_marcxml_record,
                                                     opt_mode='replace')
            rabbit([self.main_bibrec], verbose=True)

            claim_test_paper(self.main_bibrec)

            self.main_marcxml_record = get_modified_marc_for_test(
                self.main_marcxml_record,
                author_name=self.slightly_modified_author_name)
            self.main_bibrec = get_bibrec_for_record(self.main_marcxml_record,
                                                     opt_mode='replace')
            rabbit([self.main_bibrec], verbose=True)

            number_of_personids_after = get_count_of_pids()
            self.assertEquals(number_of_personids_before,
                              number_of_personids_after)
            self.assertTrue(
                person_in_aidpersonidpapers(self.slightly_modified_author_name,
                                            self.main_bibrec))
            self.assertFalse(
                person_in_aidpersonidpapers(self.author_name,
                                            self.main_bibrec))
            self.assertTrue(is_test_paper_claimed(self.main_bibrec, 100))

            self.main_marcxml_record = get_modified_marc_for_test(
                self.main_marcxml_record,
                author_name=self.heavily_modified_name)
            self.main_bibrec = get_bibrec_for_record(self.main_marcxml_record,
                                                     opt_mode='replace')
            rabbit([self.main_bibrec], verbose=True)

            self.assertTrue(
                person_in_aidpersonidpapers(self.heavily_modified_name,
                                            self.main_bibrec))
            self.assertFalse(
                person_in_aidpersonidpapers(self.slightly_modified_author_name,
                                            self.main_bibrec))
            self.assertFalse(is_test_paper_claimed(self.main_bibrec, 100))
        def test_rabbit_claim_record():
            self.main_marcxml_record = get_modified_marc_for_test(
                self.main_marcxml_record,
                author_name=self.author_name,
                co_authors_names=self.co_authors_names)
            self.main_bibrec = get_bibrec_for_record(self.main_marcxml_record,
                                                     opt_mode='replace')
            rabbit([self.main_bibrec], verbose=True)
            claim_test_paper(self.main_bibrec)
            number_of_personids_before = get_count_of_pids()
            self.main_marcxml_record = get_modified_marc_for_test(
                self.main_marcxml_record,
                author_name=self.author_name,
                co_authors_names=self.slightly_mod_co_authors_names)
            self.main_bibrec = get_bibrec_for_record(self.main_marcxml_record,
                                                     opt_mode='replace')
            rabbit([self.main_bibrec], verbose=True)
            number_of_personids_after = get_count_of_pids()
            self.assertEquals(number_of_personids_before,
                              number_of_personids_after)
            self.assertTrue(is_test_paper_claimed(self.main_bibrec, 700))
            self.assertTrue(
                person_in_aidpersonidpapers(self.author_name,
                                            self.main_bibrec))
            self.assertTrue(person_in_aidpersoniddata(self.author_name))
            for coauthor_name in self.slightly_mod_co_authors_names:
                self.assertTrue(
                    person_in_aidpersonidpapers(coauthor_name,
                                                self.main_bibrec))
            for coauthor_name in self.co_authors_names:
                self.assertTrue(person_in_aidpersoniddata(coauthor_name))
            self.main_marcxml_record = get_modified_marc_for_test(
                self.main_marcxml_record,
                author_name=self.author_name,
                co_authors_names=self.heavily_mod_co_authors_names)
            self.main_bibrec = get_bibrec_for_record(self.main_marcxml_record,
                                                     opt_mode='replace')
            rabbit([self.main_bibrec], verbose=True)

            self.assertTrue(
                person_in_aidpersonidpapers(self.author_name,
                                            self.main_bibrec))
            self.assertTrue(person_in_aidpersoniddata(self.author_name))
            for coauthor_name in self.heavily_mod_co_authors_names:
                self.assertTrue(
                    person_in_aidpersonidpapers(coauthor_name,
                                                self.main_bibrec))
                self.assertTrue(person_in_aidpersoniddata(coauthor_name))
            self.assertFalse(is_test_paper_claimed(self.main_bibrec, 700))
            float(i) / total_updates,
            '%s out of %s (%s)' % (str(i), str(total_updates), str(bibref)))
        try:
            name = get_name_by_bibref(bibref)
        except AssertionError, error:
            if "A bibref must have exactly one name" in error.message:
                records_for_rabbit.add(bibref[1])
            else:
                raise error
        else:
            m_name = create_matchable_name(name)
            run_sql(
                "update aidPERSONIDPAPERS set name=%s, m_name=%s where bibref_table=%s "
                "and bibref_value=%s ", (name, m_name, bibref[0], bibref[1]))
    if records_for_rabbit:
        rabbit(records_for_rabbit)
    logger.update_status(1., 'Finished')

    run_sql(
        "alter table aidPERSONIDDATA modify  data varchar(255) not null default '' "
    )


def estimate():
    """
    Let's assume 2ms per sql query in a standard production environment, with some safety margin.
    """
    n = run_sql("select count(*) from aidPERSONIDPAPERS")[0][0]
    queries = n * 2
    return 0.002 * queries
Exemplo n.º 32
0
    def setUpClass(cls):

        if cls.run_exec:
            return
        cls.run_exec = True
        cls.verbose = 0
        cls.logger = setup_loggers()
        cls.logger.info('Setting up regression tests...')
        task_set_task_param('verbose', cls.verbose)

        cls.authors = {
            'author1': {
                'name': 'authoraaaaa authoraaaab',
                'inspireID': 'INSPIRE-FAKE_ID1'
            },
            'author2': {
                'name': 'authorbbbba authorbbbbb',
                'inspireID': 'INSPIRE-FAKE_ID2'
            },
            'author3': {
                'name': 'authorcccca authorccccb',
                'inspireID': 'INSPIRE-FAKE_ID3'
            },
            'author4': {
                'name': 'authordddda authorddddb',
                'inspireID': 'INSPIRE-FAKE_ID4'
            },
            'author5': {
                'name': 'authoreeeea authoreeeeb',
                'inspireID': 'INSPIRE-FAKE_ID5'
            },
            'author6': {
                'name': 'authorffffa authorffffb',
                'inspireID': 'INSPIRE-FAKE_ID6'
            },
            'author7': {
                'name': 'authorgggga authorggggb',
                'inspireID': 'INSPIRE-FAKE_ID7'
            },
            'author8': {
                'name': 'authorhhhha authorhhhhb',
                'inspireID': 'INSPIRE-FAKE_ID8'
            },
            'author9': {
                'name': 'authoriiiia authoriiiib',
                'inspireID': 'INSPIRE-FAKE_ID9'
            },
            'author10': {
                'name': 'authorjjjja authorjjjjb',
                'inspireID': 'INSPIRE-FAKE_ID10'
            },
            'author11': {
                'name': 'authorkkkka authorkkkkb',
                'inspireID': 'INSPIRE-FAKE_ID11'
            },
            'author12': {
                'name': 'authorlllla authorllllb',
                'inspireID': 'INSPIRE-FAKE_ID12'
            },
            'author13': {
                'name': 'authormmmma authormmmmb',
                'inspireID': 'INSPIRE-FAKE_ID13'
            },
            'author14': {
                'name': 'authornnnna authornnnnb',
                'inspireID': 'INSPIRE-FAKE_ID14'
            },
            'author15': {
                'name': 'authorooooa authoroooob',
                'inspireID': 'INSPIRE-FAKE_ID15'
            },
            'author16': {
                'name': 'authorppppa authorppppb',
                'inspireID': 'INSPIRE-FAKE_ID16'
            },
            'author17': {
                'name': 'authorqqqqa authorqqqqb',
                'inspireID': 'INSPIRE-FAKE_ID17'
            },
            'author18': {
                'name': 'authorrrrra authorrrrrb',
                'inspireID': 'INSPIRE-FAKE_ID18'
            },
            'author19': {
                'name': 'authorssssa authorssssb',
                'inspireID': 'INSPIRE-FAKE_ID19'
            }
        }
        cls.marc_xmls = dict()
        cls.bibrecs = dict()
        cls.pids = dict()
        cls.bibrefs = dict()

        def set_up_test_hoover_inertia():
            cls.marc_xmls['paper1'] = get_new_marc_for_test(
                'Test Paper',
                cls.authors['author1']['name'],
                limit_to_collections=True)
            cls.bibrecs['paper1'] = get_bibrec_for_record(
                cls.marc_xmls['paper1'], opt_mode='insert')
            cls.marc_xmls['paper1'] = add_001_field(cls.marc_xmls['paper1'],
                                                    cls.bibrecs['paper1'])

        def set_up_test_hoover_duplication():
            cls.marc_xmls['paper2'] = get_new_marc_for_test(
                'Test Paper',
                cls.authors['author2']['name'],
                None, ((cls.authors['author2']['inspireID'], 'i'), ),
                limit_to_collections=True)

            cls.bibrecs['paper2'] = get_bibrec_for_record(
                cls.marc_xmls['paper2'], opt_mode='insert')
            cls.marc_xmls['paper2'] = add_001_field(cls.marc_xmls['paper2'],
                                                    cls.bibrecs['paper2'])

        def set_up_test_hoover_assign_one_inspire_id_from_an_unclaimed_paper():
            cls.marc_xmls['paper3'] = get_new_marc_for_test(
                'Test Paper',
                cls.authors['author3']['name'],
                None, ((cls.authors['author3']['inspireID'], 'i'), ),
                limit_to_collections=True)

            cls.bibrecs['paper3'] = get_bibrec_for_record(
                cls.marc_xmls['paper3'], opt_mode='insert')
            cls.marc_xmls['paper3'] = add_001_field(cls.marc_xmls['paper3'],
                                                    cls.bibrecs['paper3'])

        def set_up_test_hoover_assign_one_inspire_id_from_a_claimed_paper():
            cls.marc_xmls['paper4'] = get_new_marc_for_test(
                'Test Paper',
                cls.authors['author4']['name'],
                None, ((cls.authors['author4']['inspireID'], 'i'), ),
                limit_to_collections=True)

            cls.bibrecs['paper4'] = get_bibrec_for_record(
                cls.marc_xmls['paper4'], opt_mode='insert')
            cls.marc_xmls['paper4'] = add_001_field(cls.marc_xmls['paper4'],
                                                    cls.bibrecs['paper4'])

        def set_up_test_hoover_assign_one_inspire_id_from_unclaimed_papers_with_different_inspireID(
        ):
            cls.marc_xmls['paper5'] = get_new_marc_for_test(
                'Test Paper',
                cls.authors['author5']['name'],
                None, ((cls.authors['author5']['inspireID'], 'i'), ),
                limit_to_collections=True)

            cls.bibrecs['paper5'] = get_bibrec_for_record(
                cls.marc_xmls['paper5'], opt_mode='insert')
            cls.marc_xmls['paper5'] = add_001_field(cls.marc_xmls['paper5'],
                                                    cls.bibrecs['paper5'])

            cls.marc_xmls['paper6'] = get_new_marc_for_test(
                'Test Paper',
                cls.authors['author5']['name'],
                None, ((cls.authors['author6']['inspireID'], 'i'), ),
                limit_to_collections=True)

            cls.bibrecs['paper6'] = get_bibrec_for_record(
                cls.marc_xmls['paper6'], opt_mode='insert')
            cls.marc_xmls['paper6'] = add_001_field(cls.marc_xmls['paper6'],
                                                    cls.bibrecs['paper6'])

        def set_up_test_hoover_assign_one_inspire_id_from_a_claimed_paper_and_unclaimed_paper_with_different_inspireID(
        ):
            cls.marc_xmls['paper7'] = get_new_marc_for_test(
                'Test Paper',
                cls.authors['author7']['name'],
                None, ((cls.authors['author7']['inspireID'], 'i'), ),
                limit_to_collections=True)

            cls.bibrecs['paper7'] = get_bibrec_for_record(
                cls.marc_xmls['paper7'], opt_mode='insert')
            cls.marc_xmls['paper7'] = add_001_field(cls.marc_xmls['paper7'],
                                                    cls.bibrecs['paper7'])

            cls.marc_xmls['paper8'] = get_new_marc_for_test(
                'Test Paper',
                cls.authors['author7']['name'],
                None, ((cls.authors['author8']['inspireID'], 'i'), ),
                limit_to_collections=True)

            cls.bibrecs['paper8'] = get_bibrec_for_record(
                cls.marc_xmls['paper8'], opt_mode='insert')
            cls.marc_xmls['paper8'] = add_001_field(cls.marc_xmls['paper8'],
                                                    cls.bibrecs['paper8'])

        def set_up_test_hoover_assign_one_inspire_id_from_claimed_papers_with_different_inspireID(
        ):
            cls.marc_xmls['paper9'] = get_new_marc_for_test(
                'Test Paper',
                cls.authors['author9']['name'],
                None, ((cls.authors['author2']['inspireID'], 'i'), ),
                limit_to_collections=True)

            cls.bibrecs['paper9'] = get_bibrec_for_record(
                cls.marc_xmls['paper9'], opt_mode='insert')
            cls.marc_xmls['paper9'] = add_001_field(cls.marc_xmls['paper9'],
                                                    cls.bibrecs['paper9'])

            cls.marc_xmls['paper10'] = get_new_marc_for_test(
                'Test Paper',
                cls.authors['author9']['name'],
                None, ((cls.authors['author10']['inspireID'], 'i'), ),
                limit_to_collections=True)

            cls.bibrecs['paper10'] = get_bibrec_for_record(
                cls.marc_xmls['paper10'], opt_mode='insert')
            cls.marc_xmls['paper10'] = add_001_field(cls.marc_xmls['paper10'],
                                                     cls.bibrecs['paper10'])

        def set_up_test_hoover_vacuum_an_unclaimed_paper_with_an_inspire_id_from_a_claimed_paper(
        ):
            cls.marc_xmls['paper11'] = get_new_marc_for_test(
                'Test Paper',
                cls.authors['author11']['name'],
                None, ((cls.authors['author11']['inspireID'], 'i'), ),
                limit_to_collections=True)

            cls.bibrecs['paper11'] = get_bibrec_for_record(
                cls.marc_xmls['paper11'], opt_mode='insert')
            cls.marc_xmls['paper11'] = add_001_field(cls.marc_xmls['paper11'],
                                                     cls.bibrecs['paper11'])

            cls.marc_xmls['paper12'] = get_new_marc_for_test(
                'Test Paper',
                cls.authors['author12']['name'],
                None, ((cls.authors['author11']['inspireID'], 'i'), ),
                limit_to_collections=True)

            cls.bibrecs['paper12'] = get_bibrec_for_record(
                cls.marc_xmls['paper12'], opt_mode='insert')
            cls.marc_xmls['paper12'] = add_001_field(cls.marc_xmls['paper12'],
                                                     cls.bibrecs['paper12'])

        def set_up_test_hoover_vacuum_a_claimed_paper_with_an_inspire_id_from_a_claimed_paper(
        ):
            cls.marc_xmls['paper13'] = get_new_marc_for_test(
                'Test Paper',
                cls.authors['author13']['name'],
                None, ((cls.authors['author13']['inspireID'], 'i'), ),
                limit_to_collections=True)

            cls.bibrecs['paper13'] = get_bibrec_for_record(
                cls.marc_xmls['paper13'], opt_mode='insert')
            cls.marc_xmls['paper13'] = add_001_field(cls.marc_xmls['paper13'],
                                                     cls.bibrecs['paper13'])

            cls.marc_xmls['paper14'] = get_new_marc_for_test(
                'Test Paper',
                cls.authors['author14']['name'],
                None, ((cls.authors['author13']['inspireID'], 'i'), ),
                limit_to_collections=True)

            cls.bibrecs['paper14'] = get_bibrec_for_record(
                cls.marc_xmls['paper14'], opt_mode='insert')
            cls.marc_xmls['paper14'] = add_001_field(cls.marc_xmls['paper14'],
                                                     cls.bibrecs['paper14'])

        def set_up_test_hoover_assign_one_inspire_id_from_hepnames_record():
            cls.marc_xmls['paper15'] = get_new_hepnames_marc_for_test(
                cls.authors['author15']['name'],
                ((cls.authors['author15']['inspireID'], 'i'), ))

            cls.bibrecs['paper15'] = get_bibrec_for_record(
                cls.marc_xmls['paper15'], opt_mode='insert')
            cls.marc_xmls['paper15'] = add_001_field(cls.marc_xmls['paper15'],
                                                     cls.bibrecs['paper15'])

        def set_up_duplicated_unclaimed_signature():
            cls.marc_xmls['paper16'] = get_new_marc_for_test(
                'Test Paper',
                cls.authors['author16']['name'],
                (cls.authors['author17']['name'], ),
                ((cls.authors['author16']['inspireID'], 'i'),
                 (cls.authors['author16']['inspireID'], 'i')),
                limit_to_collections=True)

            cls.bibrecs['paper16'] = get_bibrec_for_record(
                cls.marc_xmls['paper16'], opt_mode='insert')
            cls.marc_xmls['paper16'] = add_001_field(cls.marc_xmls['paper16'],
                                                     cls.bibrecs['paper16'])

        def set_up_duplicated_claimed_signature():
            cls.marc_xmls['paper18'] = get_new_marc_for_test(
                'Test Paper',
                cls.authors['author18']['name'],
                (cls.authors['author19']['name'], ),
                ((cls.authors['author18']['inspireID'], 'i'),
                 (cls.authors['author18']['inspireID'], 'i')),
                limit_to_collections=True)

            cls.bibrecs['paper18'] = get_bibrec_for_record(
                cls.marc_xmls['paper18'], opt_mode='insert')
            cls.marc_xmls['paper18'] = add_001_field(cls.marc_xmls['paper18'],
                                                     cls.bibrecs['paper18'])

        set_up_test_hoover_inertia()
        set_up_test_hoover_duplication()
        set_up_test_hoover_assign_one_inspire_id_from_an_unclaimed_paper()
        set_up_test_hoover_assign_one_inspire_id_from_a_claimed_paper()
        set_up_test_hoover_assign_one_inspire_id_from_unclaimed_papers_with_different_inspireID(
        )
        set_up_test_hoover_assign_one_inspire_id_from_a_claimed_paper_and_unclaimed_paper_with_different_inspireID(
        )
        set_up_test_hoover_assign_one_inspire_id_from_claimed_papers_with_different_inspireID(
        )
        set_up_test_hoover_vacuum_an_unclaimed_paper_with_an_inspire_id_from_a_claimed_paper(
        )
        set_up_test_hoover_vacuum_a_claimed_paper_with_an_inspire_id_from_a_claimed_paper(
        )
        set_up_test_hoover_assign_one_inspire_id_from_hepnames_record()
        set_up_duplicated_unclaimed_signature()
        set_up_duplicated_claimed_signature()

        cls.bibrecs_to_clean = [cls.bibrecs[key] for key in cls.bibrecs]
        rabbit(sorted([cls.bibrecs[key] for key in cls.bibrecs]),
               verbose=False)

        for key in cls.authors:
            try:
                temp = set()
                cls.bibrefs[key] = get_bibref_value_for_name(
                    cls.authors[key]['name'])
                temp = run_sql(
                    "select personid from aidPERSONIDPAPERS where bibref_value=%s and bibrec=%s and name=%s",
                    (cls.bibrefs[key], cls.bibrecs[key.replace(
                        'author', 'paper')], cls.authors[key]['name']))
                cls.pids[key] = temp[0][0] if temp else ()
            except KeyError as e:
                print e

        claim_test_paper(cls.bibrecs['paper4'])
        claim_test_paper(cls.bibrecs['paper7'])
        claim_test_paper(cls.bibrecs['paper9'])
        claim_test_paper(cls.bibrecs['paper10'])
        claim_test_paper(cls.bibrecs['paper11'])
        claim_test_paper(cls.bibrecs['paper13'])
        claim_test_paper(cls.bibrecs['paper14'])
        claim_test_paper(cls.bibrecs['paper18'])
        tmp_claimed_exception = invenio.bibauthorid_hoover.DuplicateClaimedPaperException
        tmp_unclaimed_exception = invenio.bibauthorid_hoover.DuplicateUnclaimedPaperException

        class MockClaimedException(
                invenio.bibauthorid_hoover.DuplicateClaimedPaperException):
            def __init__(self, message, pid, signature, present_signatures):
                global dupl
                super(MockClaimedException,
                      self).__init__(message, pid, signature,
                                     present_signatures)
                dupl += 1

        class MockUnclaimedException(
                invenio.bibauthorid_hoover.DuplicateUnclaimedPaperException):
            def __init__(self, message, _pid, signature, present_signatures):
                global pid
                super(MockUnclaimedException,
                      self).__init__(message, _pid, signature,
                                     present_signatures)
                pid = _pid

        invenio.bibauthorid_hoover.DuplicateClaimedPaperException = MockClaimedException
        invenio.bibauthorid_hoover.DuplicateUnclaimedPaperException = MockUnclaimedException
        hoover(list(set(cls.pids[key] for key in cls.pids if cls.pids[key])))
        invenio.bibauthorid_hoover.DuplicateClaimedPaperException = tmp_claimed_exception
        invenio.bibauthorid_hoover.DuplicateUnclaimedPaperException = tmp_unclaimed_exception
        print "dupl", dupl