Python collect_personid_papers示例，bibauthorid_dbinterface.collect_personid_papers Python示例

示例#1

0

显示文件

文件： bibauthorid_personid_maintenance.py 项目： kaplun/Invenio-OpenAIRE

        def check_paper(self):
            if bconfig.TABLES_UTILS_DEBUG:
                print " -> processing paper = %s" % (self.paper[0],)

            bibrefs100 = dbinter.get_authors_from_paper(self.paper[0])
            bibrefs700 = dbinter.get_coauthors_from_paper(self.paper[0])
            bibrecreflist = frozenset(
                ["100:%s,%s" % (str(i[0]), self.paper[0]) for i in bibrefs100]
                + ["700:%s,%s" % (str(i[0]), self.paper[0]) for i in bibrefs700]
            )
            pid_rows_lazy = None

            # finally, if a bibrec/ref pair is in the authornames table but not in this list that name of that paper
            # is no longer existing and must be removed from the table. The new one will be addedd by the
            # update procedure in future; this entry will be risky becouse the garbage collector may
            # decide to kill the bibref in the bibX0x table
            for row in self.paper[1]:
                if row[3] not in bibrecreflist:
                    if not pid_rows_lazy:
                        pid_rows_lazy = dbinter.collect_personid_papers(paper=(self.paper[0],), person=personid_q)

                    other_bibrefs = [b[0] for b in pid_rows_lazy if b[1] == row[1] and b[3] != row[3]]
                    dbinter.delete_personid_by_id(int(row[0]))
                    if bconfig.TABLES_UTILS_DEBUG:
                        print "*   deleting record with missing bibref: \
                               id = %s, personid = %s, tag = %s, data = %s, flag = %s, lcul = %s" % row
                        print "found %d other records with the same personid and bibrec" % len(other_bibrefs)
                    if len(other_bibrefs) == 1:
                        # we have one and only one sobstitute, we can switch them!
                        dbinter.update_flags_in_personid(row[4], row[5], other_bibrefs[0])
                        if bconfig.TABLES_UTILS_DEBUG:
                            print "updating id=%d with flag=%d,lcul=%d" % (other_bibrefs[0], row[4], row[5])

            persons_to_update = set([(p[1],) for p in self.paper[1]])
            dbinter.update_personID_canonical_names(persons_to_update)
            dbinter.update_personID_names_string_set(persons_to_update, single_threaded=True, wait_finished=True)
            close_connection()

示例#2

0

显示文件

文件： bibauthorid_personid_maintenance.py 项目： robk5uj/invenio

        def check_paper(self):
            if bconfig.TABLES_UTILS_DEBUG:
                print " -> processing paper = %s" % (self.paper[0],)

            bibrefs100 = dbinter.get_authors_from_paper(self.paper[0])
            bibrefs700 = dbinter.get_coauthors_from_paper(self.paper[0])
            bibrecreflist = frozenset(["100:%s,%s" % (str(i[0]), self.paper[0]) for i in bibrefs100] +
                                      ["700:%s,%s" % (str(i[0]), self.paper[0]) for i in bibrefs700])
            pid_rows_lazy = None

            #finally, if a bibrec/ref pair is in the authornames table but not in this list that name of that paper
            #is no longer existing and must be removed from the table. The new one will be addedd by the
            #update procedure in future; this entry will be risky becouse the garbage collector may
            #decide to kill the bibref in the bibX0x table
            for row in self.paper[1]:
                if row[3] not in bibrecreflist:
                    if not pid_rows_lazy:
                        pid_rows_lazy = dbinter.collect_personid_papers(paper=(self.paper[0],),
                                                                        person=personid_q)

                    other_bibrefs = [b[0] for b in pid_rows_lazy if b[1] == row[1] and b[3] != row[3]]
                    dbinter.delete_personid_by_id(int(row[0]))
                    if bconfig.TABLES_UTILS_DEBUG:
                        print "*   deleting record with missing bibref: \
                               id = %s, personid = %s, tag = %s, data = %s, flag = %s, lcul = %s" % row
                        print "found %d other records with the same personid and bibrec" % len(other_bibrefs)
                    if len(other_bibrefs) == 1:
                        #we have one and only one sobstitute, we can switch them!
                        dbinter.update_flags_in_personid(row[4], row[5], other_bibrefs[0])
                        if bconfig.TABLES_UTILS_DEBUG:
                            print "updating id=%d with flag=%d,lcul=%d" % (other_bibrefs[0], row[4], row[5])

            persons_to_update = set([(p[1],) for p in self.paper[1]])
            dbinter.update_personID_canonical_names(persons_to_update)
            dbinter.update_personID_names_string_set(persons_to_update, single_threaded=True, wait_finished=True)
            close_connection()

示例#3

0

显示文件

文件： bibauthorid_personid_maintenance.py 项目： robk5uj/invenio

    deleted_recs = dbinter.get_deleted_papers()
    deleted_recs = frozenset(x[0] for x in deleted_recs)
    if bconfig.TABLES_UTILS_DEBUG:
        print "%d total deleted papers" % (len(deleted_recs),)

    if personid:
        personid_q = dbinter.list_2_SQL_str(personid, lambda x: str(x[0]))
    else:
        personid_q = None

    counter = 0
    rows_limit = 10000000
    end_loop = False
    while not end_loop:
        task_sleep_now_if_required(True)
        papers_data = dbinter.collect_personid_papers(person=personid_q,
                                                      limit=(counter, rows_limit,))

        if bconfig.TABLES_UTILS_DEBUG:
            print "query with limit %d %d" % (counter, rows_limit)

        if len(papers_data) == rows_limit:
            counter += rows_limit
        else:
            end_loop = True

        papers_data = tuple((extract_bibrec(p[3]), p) for p in papers_data)
        to_remove = set()
        jobs = dict()
        for p in papers_data:
            if int(p[0]) in deleted_recs:
                to_remove.add(p[1][0])

示例#4

0

显示文件

文件： bibauthorid_personid_maintenance.py 项目： Kennethhole/Invenio-1

    deleted_recs = dbinter.get_deleted_papers()
    deleted_recs = frozenset(x[0] for x in deleted_recs)
    if bconfig.TABLES_UTILS_DEBUG:
        print "%d total deleted papers" % (len(deleted_recs),)

    if personid:
        personid_q = dbinter.list_2_SQL_str(personid, lambda x: str(x[0]))
    else:
        personid_q = None

    counter = 0
    rows_limit = 10000000
    end_loop = False
    while not end_loop:
        task_sleep_now_if_required(can_stop_too=False)
        papers_data = dbinter.collect_personid_papers(person=personid_q,
                                                      limit=(counter, rows_limit,))

        if bconfig.TABLES_UTILS_DEBUG:
            print "query with limit %d %d" % (counter, rows_limit)

        if len(papers_data) == rows_limit:
            counter += rows_limit
        else:
            end_loop = True

        papers_data = tuple((extract_bibrec(p[3]), p) for p in papers_data)
        to_remove = set()
        jobs = dict()
        for p in papers_data:
            if int(p[0]) in deleted_recs:
                to_remove.add(p[1][0])