def do_upgrade():

    logger = Logger("Rabbit m_name upgrade script")

    warnings.filterwarnings('ignore')
    run_sql(
        "alter table aidPERSONIDPAPERS add `m_name` VARCHAR(255) not null after name"
    )

    run_sql("alter table aidPERSONIDPAPERS add INDEX `m_name-b` (`m_name`)")

    present_bibrefs = set(
        run_sql("select bibref_table, bibref_value from aidPERSONIDPAPERS"))

    total_updates = len(present_bibrefs)

    records_for_rabbit = set()
    for i, bibref in enumerate(present_bibrefs):
        logger.update_status(
            float(i) / total_updates,
            '%s out of %s (%s)' % (str(i), str(total_updates), str(bibref)))
        try:
            name = get_name_by_bibref(bibref)
        except AssertionError, error:
            if "A bibref must have exactly one name" in error.message:
                records_for_rabbit.add(bibref[1])
            else:
                raise error
        else:
            m_name = create_matchable_name(name)
            run_sql(
                "update aidPERSONIDPAPERS set name=%s, m_name=%s where bibref_table=%s "
                "and bibref_value=%s ", (name, m_name, bibref[0], bibref[1]))
示例#2
0
def show_papers(personid, external_id=None, orcid=None, inspire=None):
    search = 'select * from aidPERSONIDPAPERS where personid='
    search += str(personid) + ' and flag>-2'
    result = run_sql(search)
    hep_records = ''
    for personid, table, bibref, bibrec, author, match, flag, cul, date \
                       in result:
    #for personid, table, bibref, bibrec, author in result:
        #rec = AmendableRecord(get_bibrecord(bibrec))
        position = -1
        author_name = get_name_by_bibref((table, bibref))
        for key, value in AmendableRecord(get_bibrecord(bibrec)).\
                   iterfields(['{0}__%'.format(table, )]):
            if (key[0] == '700__a' or key[0] == '100__a') and \
                                            value == author_name:
                position = key[1]
        if position >= 0:
            for key, value in AmendableRecord(get_bibrecord(bibrec)).\
                   iterfields(['{0}__%'.format(table, )]):
                if key[1] == position and key[0] in \
                      ('{0}__a'.format(table), '{0}__i'.
                       format(table), '{0}__j'.format(table),
                       '{0}__k'.format(table), '{0}__m'.format(table), ):
                    if value.replace('ORCID:', '') == external_id and \
                       value.replace('ORCID:', '') != orcid and \
                       value != inspire:
                       hep_records += "    " + " ".join([str(bibrec),
                                                         author, value, '\n'])
    if hep_records:
        return hep_records
    return None
def show_papers(personid, external_id=None, orcid=None, inspire=None):
    search = 'select * from aidPERSONIDPAPERS where personid='
    search += str(personid) + ' and flag>-2'
    result = run_sql(search)
    hep_records = ''
    for personid, table, bibref, bibrec, author, match, flag, cul, date \
                       in result:
    #for personid, table, bibref, bibrec, author in result:
        #rec = AmendableRecord(get_bibrecord(bibrec))
        position = -1
        author_name = get_name_by_bibref((table, bibref))
        for key, value in AmendableRecord(get_bibrecord(bibrec)).\
                   iterfields(['{0}__%'.format(table, )]):
            if (key[0] == '700__a' or key[0] == '100__a') and \
                                            value == author_name:
                position = key[1]
        if position >= 0:
            for key, value in AmendableRecord(get_bibrecord(bibrec)).\
                   iterfields(['{0}__%'.format(table, )]):
                if key[1] == position and key[0] in \
                      ('{0}__a'.format(table), '{0}__i'.
                       format(table), '{0}__j'.format(table),
                       '{0}__k'.format(table), '{0}__m'.format(table), ):
                    if value.replace('ORCID:', '') == external_id and \
                       value.replace('ORCID:', '') != orcid and \
                       value != inspire:
                       hep_records += "    " + " ".join([str(bibrec),
                                                         author, value, '\n'])
    if hep_records:
        return hep_records
    return None
示例#4
0
def export_to_dot(cs, fname, graph_info, extra_edge=None):
    from invenio.bibauthorid_dbinterface import get_name_by_bibref

    fptr = open(fname, "w")
    fptr.write("graph wedgy {\n")
    fptr.write("    overlap=prism\n")

    for idx, bib in enumerate(graph_info):
        fptr.write('    %d [color=black label="%s"];\n' %
                   (idx, get_name_by_bibref(idx)))

    if extra_edge:
        v1, v2, (prob, cert) = extra_edge
        fptr.write('    %d -- %d [color=green label="p: %.2f, c: %.2f"];\n' %
                   (v1, v2, prob, cert))

    for clus in cs.clusters:
        fptr.write("    %s [color=blue];\n" %
                   " -- ".join(str(x) for x in clus.bibs))

        fptr.write("".join("    %d -- %d [color=red]\n" % (b1, b2)
                           for b1 in clus.bibs for h in clus.hate
                           for b2 in h.bibs))

    fptr.write("}")
示例#5
0
def process_authors(pids):
    # get the author - paper associations for authors
    # with id: min_pid <= id < max_pid (excluding rejected papers)
    associations = set(
        run_sql("""select personid, bibref_table, bibref_value, bibrec
                                  from aidPERSONIDPAPERS
                                  where flag > -2
                                  and personid in ( %s ) """ %
                ' , '.join("'" + str(p) + "'" for p in pids)))

    # get mapping of: (bibref_table, bibref_value) -> name
    bibref_to_name = dict()
    bibrefs = set([(table, ref) for _, table, ref, _ in associations])
    counter = 0
    for bibref in bibrefs:
        counter += 1
        bibref_to_name[bibref] = get_name_by_bibref(bibref)
        write_message('Got name: %s for bibref: %s.' %
                      (bibref_to_name[bibref], str(bibref)))

    # get mapping of: author -> affiliations
    pid_to_affiliations = dict()
    counterr = 0
    for pid in pids:
        counterr += 1
        pid_to_affiliations[pid] = get_affiliations_for_author(pid)
        write_message('Got affiliations: %s for author: %s.' %
                      (pid_to_affiliations[pid], str(pid)))

    # get the affiliated records for this group of authors: (rec, name, affiliation, ... + some extra info)
    affiliated_records = list()
    for pid, table, ref, rec in associations:
        # we don't want to keep records which have no affiliation/s
        affiliations, year = pid_to_affiliations[pid]
        if not affiliations:
            continue
        affiliated_records += [
            rec, bibref_to_name[(table, ref)],
            serialize(affiliations), table, ref, year
        ]

    # flush data to db
    if affiliated_records:
        write_message('Populating table with %s records.' %
                      len(affiliated_records))
        populate_table('bibEDITAFFILIATIONS_tmp', [
            'bibrec', 'name', 'affiliations', 'bibref_table', 'bibref_value',
            'year'
        ],
                       affiliated_records,
                       empty_table_first=False)

    return len(affiliated_records)
    def get_message_body(self):
        """Return the body of the message to be reported by the exception"""
        msg = ['Found wrong signature claimed to profile ']
        try:
            cname = get_canonical_name_of_author(self.pid)[0]
        except IndexError:
            cname = self.pid

        msg.append("%s/author/profile/%s" % (CFG_SITE_URL, cname))
        sig_name = get_name_by_bibref(self.signature[0:2])
        p_sigs = [(x, get_name_by_bibref(x[0:2]))
                  for x in self.present_signatures]

        p_sig_strings = ",".join('%s (%s on record %s)' % (x[0], x[1], x[0][2])
                                 for x in p_sigs)

        msg.append(
            "want to move %s (%s on record %s) to this profile but [%s] are already present and claimed"
            % (self.signature, sig_name, self.signature[2], p_sig_strings))
        msg.append("%s/record/%s" % (CFG_SITE_URL, self.signature[2]))
        # msg.append(self.message)
        return '\n'.join(msg)
示例#7
0
def process_authors(pids):
    # get the author - paper associations for authors
    # with id: min_pid <= id < max_pid (excluding rejected papers)
    associations = set(run_sql("""select personid, bibref_table, bibref_value, bibrec
                                  from aidPERSONIDPAPERS
                                  where flag > -2
                                  and personid in ( %s ) """ % ' , '.join("'"+str(p)+"'" for p in pids)))


    # get mapping of: (bibref_table, bibref_value) -> name
    bibref_to_name = dict()
    bibrefs = set([(table, ref) for _, table, ref, _ in associations])
    counter = 0
    for bibref in bibrefs:
        counter += 1
        bibref_to_name[bibref] = get_name_by_bibref(bibref)
        write_message('Got name: %s for bibref: %s.' % (bibref_to_name[bibref], str(bibref)))

    # get mapping of: author -> affiliations
    pid_to_affiliations = dict()
    counterr = 0
    for pid in pids:
        counterr += 1
        pid_to_affiliations[pid] = get_affiliations_for_author(pid)
        write_message('Got affiliations: %s for author: %s.' % (pid_to_affiliations[pid], str(pid)))

    # get the affiliated records for this group of authors: (rec, name, affiliation, ... + some extra info)
    affiliated_records = list()
    for pid, table, ref, rec in associations:
        # we don't want to keep records which have no affiliation/s
        affiliations, year = pid_to_affiliations[pid]
        if not affiliations:
            continue
        affiliated_records += [rec, bibref_to_name[(table, ref)], serialize(affiliations), table, ref, year]

    # flush data to db
    if affiliated_records:
        write_message('Populating table with %s records.' % len(affiliated_records))
        populate_table('bibEDITAFFILIATIONS_tmp', ['bibrec','name','affiliations','bibref_table','bibref_value', 'year'], affiliated_records, empty_table_first=False)

    return len(affiliated_records)
示例#8
0
def export_to_dot(cs, fname, graph_info, extra_edge=None):
    from invenio.bibauthorid_dbinterface import get_name_by_bibref

    fptr = open(fname, "w")
    fptr.write("graph wedgy {\n")
    fptr.write("    overlap=prism\n")

    for idx, bib in enumerate(graph_info):
        fptr.write('    %d [color=black label="%s"];\n' % (idx, get_name_by_bibref(idx)))

    if extra_edge:
        v1, v2, (prob, cert) = extra_edge
        fptr.write('    %d -- %d [color=green label="p: %.2f, c: %.2f"];\n' % (v1, v2, prob, cert))

    for clus in cs.clusters:
        fptr.write("    %s [color=blue];\n" % " -- ".join(str(x) for x in clus.bibs))

        fptr.write("".join("    %d -- %d [color=red]\n" % (b1, b2)
                      for b1 in clus.bibs for h in clus.hate for b2 in h.bibs))

    fptr.write("}")