def get_coauthors(author, tags, cache):
    """Get all coauthors for an author

    Given author A, returns all the authors having published
    a record with author A
    """
    if author in cache:
        return cache[author]

    friends = set()

    # sanity check: author may not exist
    if not author:
        return friends

    try:
        authorid = int(author)
        records = get_person_bibrecs(authorid)
    except ValueError:
        records = search_engine.search_pattern(p=author, f='author')
    for recid in records:
        friends.update(get_authors_from_record(recid, tags))

    cache[author] = friends
    return friends
Пример #2
0
def get_coauthors(author, tags, cache):
    """Get all coauthors for an author

    Given author A, returns all the authors having published
    a record with author A
    """
    if author in cache:
        return cache[author]

    friends = set()

    # sanity check: author may not exist
    if not author:
        return friends

    try:
        authorid = int(author)
        records = get_person_bibrecs(authorid)
    except ValueError:
        records = search_engine.search_pattern(p=author, f='author')
    for recid in records:
        friends.update(get_authors_from_record(recid, tags))

    cache[author] = friends
    return friends
def summarize_records(recids, of, ln, searchpattern="", searchfield="", req=None):
    """Write summary report for records RECIDS in the format OF in language LN.
       SEARCHPATTERN and SEARCHFIELD are search query that led to RECIDS,
       for instance p='Smith, Paul' and f='author'.  They are used for links.
       REQ is the Apache/mod_python request object.
    """
    import search_engine
    if of == 'hcs':
        # this is HTML cite summary

        # 1) hcs prologue:
        d_recids = {}
        d_total_recs = {}
        for coll, colldef in CFG_CITESUMMARY_COLLECTIONS:
            if not colldef:
                d_recids[coll] = recids
            else:
                d_recids[coll] = recids & search_engine.search_pattern(p=colldef)
            d_total_recs[coll] = len(d_recids[coll])
        req.write(websearch_templates.tmpl_citesummary_prologue(d_total_recs, CFG_CITESUMMARY_COLLECTIONS, searchpattern, searchfield, ln))

        # 2) hcs overview:
        d_recid_citers = {}
        d_total_cites = {}
        d_avg_cites = {}
        for coll, colldef in CFG_CITESUMMARY_COLLECTIONS:
            d_total_cites[coll] = 0
            d_avg_cites[coll] = 0
            d_recid_citers[coll] =  get_cited_by_list(d_recids[coll])
            for recid, lciters in d_recid_citers[coll]:
                if lciters:
                    d_total_cites[coll] += len(lciters)
            if d_total_cites[coll] != 0:
                d_avg_cites[coll] = d_total_cites[coll] * 1.0 / d_total_recs[coll]
        req.write(websearch_templates.tmpl_citesummary_overview(d_total_cites, d_avg_cites, CFG_CITESUMMARY_COLLECTIONS, ln))

        # 3) hcs break down by fame:
        for low, high, fame in CFG_CITESUMMARY_FAME_THRESHOLDS:
            d_cites = {}
            for coll, colldef in CFG_CITESUMMARY_COLLECTIONS:
                d_cites[coll] = 0
                for recid, lciters in d_recid_citers[coll]:
                    numcites = 0
                    if lciters:
                        numcites = len(lciters)
                    if numcites >= low and numcites <= high:
                        d_cites[coll] += 1
            req.write(websearch_templates.tmpl_citesummary_breakdown_by_fame(d_cites, low, high, fame, CFG_CITESUMMARY_COLLECTIONS, searchpattern, searchfield, ln))

        # 4) hcs epilogue:
        req.write(websearch_templates.tmpl_citesummary_epilogue(ln))
        return ''

    elif of == 'xcs':
        # this is XML cite summary
        citedbylist = get_cited_by_list(recids)
        return print_citation_summary_xml(citedbylist)
Пример #4
0
def get_coauthors(author, tags, cache):
    """ Get all coauthors for an author

    Given author A, returns all the authors having published
    a record with author A
    """
    if author in cache:
        return cache[author]

    friends = set()

    for recid in search_engine.search_pattern(p=author, f='author'):
        friends.update(get_authors_from_record(recid, tags))

    cache[author] = friends
    return friends
def summarize_records(recids,
                      of,
                      ln,
                      searchpattern="",
                      searchfield="",
                      req=None):
    """Write summary report for records RECIDS in the format OF in language LN.
       SEARCHPATTERN and SEARCHFIELD are search query that led to RECIDS,
       for instance p='Smith, Paul' and f='author'.  They are used for links.
       REQ is the Apache/mod_python request object.
    """
    if of == 'hcs':
        # this is HTML cite summary
        html = []
        compute_self_citations_p = True

        # 1) hcs prologue:
        d_recids = {}
        d_total_recs = {}
        for coll, colldef in CFG_CITESUMMARY_COLLECTIONS:
            if not colldef:
                d_recids[coll] = recids
            else:
                d_recids[coll] = recids & search_engine.search_pattern(
                    p=colldef)
            d_total_recs[coll] = len(d_recids[coll])
            if d_total_recs[coll] > CFG_CITESUMMARY_SELFCITES_THRESHOLD:
                compute_self_citations_p = False

        prologue = websearch_templates.tmpl_citesummary_prologue(
            d_total_recs, CFG_CITESUMMARY_COLLECTIONS, searchpattern,
            searchfield, ln)

        if not req:
            html.append(prologue)
        elif hasattr(req, "write"):
            req.write(prologue)

        # 2) hcs overview:
        d_recid_citers = {}
        d_total_cites = {}
        d_avg_cites = {}
        d_recid_citecount_l = {}
        for coll, dummy_colldef in CFG_CITESUMMARY_COLLECTIONS:
            d_total_cites[coll] = 0
            d_avg_cites[coll] = 0
            d_recid_citecount_l[coll] = []
            d_recid_citers[coll] = get_cited_by_list(d_recids[coll])
            for recid, lciters in d_recid_citers[coll]:
                if lciters:
                    d_total_cites[coll] += len(lciters)
                    d_recid_citecount_l[coll].append((recid, len(lciters)))
            if d_total_recs[coll] != 0:
                d_avg_cites[
                    coll] = d_total_cites[coll] * 1.0 / d_total_recs[coll]
        overview = websearch_templates.tmpl_citesummary_overview(
            d_total_cites, d_avg_cites, CFG_CITESUMMARY_COLLECTIONS, ln)

        if not req:
            html.append(overview)
        elif hasattr(req, "write"):
            req.write(overview)

        # 3) compute self-citations
        if compute_self_citations_p:
            overview = render_self_citations(d_recids, d_total_recs, ln)

            if not req:
                html.append(overview)
            elif hasattr(req, "write"):
                req.write(overview)

        header = websearch_templates.tmpl_citesummary_breakdown_header(ln)
        if not req:
            html.append(header)
        elif hasattr(req, "write"):
            req.write(header)

        # 4) hcs break down by fame:
        for low, high, fame in CFG_CITESUMMARY_FAME_THRESHOLDS:
            d_cites = {}
            for coll, colldef in CFG_CITESUMMARY_COLLECTIONS:
                d_cites[coll] = 0
                for recid, lciters in d_recid_citers[coll]:
                    numcites = 0
                    if lciters:
                        numcites = len(lciters)
                    if numcites >= low and numcites <= high:
                        d_cites[coll] += 1
            fame_info = websearch_templates.tmpl_citesummary_breakdown_by_fame(
                d_cites, low, high, fame, CFG_CITESUMMARY_COLLECTIONS,
                searchpattern, searchfield, ln)

            if not req:
                html.append(fame_info)
            elif hasattr(req, "write"):
                req.write(fame_info)

        # 5) hcs calculate h index
        d_h_factors = {}

        def comparator(x, y):
            if x[1] > y[1]:
                return -1
            elif x[1] == y[1]:
                return 0
            else:
                return +1

        for coll, colldef in CFG_CITESUMMARY_COLLECTIONS:
            d_h_factors[coll] = 0
            d_recid_citecount_l[coll].sort(cmp=comparator)
            #req.write(repr(d_recid_citecount_l[coll])) # DEBUG
            for citecount in d_recid_citecount_l[coll]:
                d_h_factors[coll] += 1
                if d_h_factors[coll] > citecount[1]:
                    d_h_factors[coll] -= 1
                    break
        h_idx = websearch_templates.tmpl_citesummary_h_index(
            d_h_factors, CFG_CITESUMMARY_COLLECTIONS, ln)

        if not req:
            html.append(h_idx)
        elif hasattr(req, "write"):
            req.write(h_idx)

        # 6) hcs epilogue:
        eplilogue = websearch_templates.tmpl_citesummary_epilogue(ln)

        if not req:
            html.append(eplilogue)
        elif hasattr(req, "write"):
            req.write(eplilogue)

        if not req:
            return "\n".join(html)
        else:
            return ''

    elif of == 'xcs':
        # this is XML cite summary
        citedbylist = get_cited_by_list(recids)
        return print_citation_summary_xml(citedbylist)
Пример #6
0
def summarize_records(recids,
                      of,
                      ln,
                      searchpattern="",
                      searchfield="",
                      req=None):
    """Write summary report for records RECIDS in the format OF in language LN.
       SEARCHPATTERN and SEARCHFIELD are search query that led to RECIDS,
       for instance p='Smith, Paul' and f='author'.  They are used for links.
       REQ is the Apache/mod_python request object.
    """
    if of == 'hcs':
        # this is HTML cite summary
        html = []
        compute_self_citations = True

        # 1) hcs prologue:
        d_recids = {}
        d_total_recs = {}
        for coll, colldef in CFG_CITESUMMARY_COLLECTIONS:
            if not colldef:
                d_recids[coll] = recids
            else:
                d_recids[coll] = recids & search_engine.search_pattern(
                    p=colldef)
            d_total_recs[coll] = len(d_recids[coll])
            if d_total_recs[coll] > CFG_CITESUMMARY_SELFCITES_THRESHOLD:
                compute_self_citations = False

        prologue = websearch_templates.tmpl_citesummary_prologue(
            d_total_recs, CFG_CITESUMMARY_COLLECTIONS, searchpattern,
            searchfield, ln)

        if not req:
            html.append(prologue)
        elif hasattr(req, "write"):
            req.write(prologue)

        # 2) hcs overview:
        d_recid_citers = {}
        d_total_cites = {}
        d_avg_cites = {}
        d_recid_citecount_l = {}
        for coll, colldef in CFG_CITESUMMARY_COLLECTIONS:
            d_total_cites[coll] = 0
            d_avg_cites[coll] = 0
            d_recid_citecount_l[coll] = []
            d_recid_citers[coll] = get_cited_by_list(d_recids[coll])
            for recid, lciters in d_recid_citers[coll]:
                if lciters:
                    d_total_cites[coll] += len(lciters)
                    d_recid_citecount_l[coll].append((recid, len(lciters)))
            if d_total_cites[coll] != 0:
                d_avg_cites[
                    coll] = d_total_cites[coll] * 1.0 / d_total_recs[coll]
        overview = websearch_templates.tmpl_citesummary_overview(
            d_total_cites, d_avg_cites, CFG_CITESUMMARY_COLLECTIONS, ln)

        if not req:
            html.append(overview)
        elif hasattr(req, "write"):
            req.write(overview)

        # 3) compute self-citations
        if compute_self_citations:
            try:
                tags = get_authors_tags()
            except (IndexError, ConfigParser.NoOptionError), msg:
                register_exception(prefix="attribute " + \
                    str(msg) + " missing in config", alert_admin=True)
                compute_self_citations = False

        if compute_self_citations:
            d_recid_citers = {}
            d_total_cites = {}
            d_avg_cites = {}
            for coll, colldef in CFG_CITESUMMARY_COLLECTIONS:
                d_total_cites[coll] = 0
                d_avg_cites[coll] = 0

                d_recid_citers[coll] = get_cited_by_list(d_recids[coll])
                authors_cache = {}
                for recid, lciters in d_recid_citers[coll]:
                    if lciters:
                        authors = get_authors_from_record(recid, tags)
                        if len(authors) > 20:
                            # Use collaboration names
                            collaborations = get_collaborations_from_record(
                                recid, tags)
                            for cit in lciters:
                                cit_collaborations = get_collaborations_from_record(
                                    cit, tags)
                                if len(
                                        collaborations.intersection(
                                            cit_collaborations)) == 0:
                                    d_total_cites[coll] += 1

                        else:
                            # Use author names
                            for cit in lciters:
                                cit_authors = get_authors_from_record(
                                    cit, tags)
                                #extend with circle of friends
                                for author in list(cit_authors)[:20]:
                                    author_friends = get_coauthors(
                                        author, tags, authors_cache)
                                    cit_authors.update(author_friends)

                                if len(authors.intersection(cit_authors)) == 0:
                                    d_total_cites[coll] += 1

                if d_total_cites[coll] != 0:
                    d_avg_cites[
                        coll] = d_total_cites[coll] * 1.0 / d_total_recs[coll]
            overview = websearch_templates.tmpl_citesummary_minus_self_cites(
                d_total_cites, d_avg_cites, CFG_CITESUMMARY_COLLECTIONS, ln)

            if not req:
                html.append(overview)
            elif hasattr(req, "write"):
                req.write(overview)

        header = websearch_templates.tmpl_citesummary_breakdown_header(ln)
        if not req:
            html.append(header)
        elif hasattr(req, "write"):
            req.write(header)

        # 4) hcs break down by fame:
        for low, high, fame in CFG_CITESUMMARY_FAME_THRESHOLDS:
            d_cites = {}
            for coll, colldef in CFG_CITESUMMARY_COLLECTIONS:
                d_cites[coll] = 0
                for recid, lciters in d_recid_citers[coll]:
                    numcites = 0
                    if lciters:
                        numcites = len(lciters)
                    if numcites >= low and numcites <= high:
                        d_cites[coll] += 1
            fame_info = websearch_templates.tmpl_citesummary_breakdown_by_fame(
                d_cites, low, high, fame, CFG_CITESUMMARY_COLLECTIONS,
                searchpattern, searchfield, ln)

            if not req:
                html.append(fame_info)
            elif hasattr(req, "write"):
                req.write(fame_info)

        # 5) hcs calculate h index
        d_h_factors = {}

        def comparator(x, y):
            if x[1] > y[1]:
                return -1
            elif x[1] == y[1]:
                return 0
            else:
                return +1

        for coll, colldef in CFG_CITESUMMARY_COLLECTIONS:
            d_h_factors[coll] = 0
            d_recid_citecount_l[coll].sort(cmp=comparator)
            #req.write(repr(d_recid_citecount_l[coll])) # DEBUG
            for citecount in d_recid_citecount_l[coll]:
                d_h_factors[coll] += 1
                if d_h_factors[coll] > citecount[1]:
                    d_h_factors[coll] -= 1
                    break
        h_idx = websearch_templates.tmpl_citesummary_h_index(
            d_h_factors, CFG_CITESUMMARY_COLLECTIONS, ln)

        if not req:
            html.append(h_idx)
        elif hasattr(req, "write"):
            req.write(h_idx)

        # 6) hcs epilogue:
        eplilogue = websearch_templates.tmpl_citesummary_epilogue(ln)

        if not req:
            html.append(eplilogue)
        elif hasattr(req, "write"):
            req.write(eplilogue)

        if not req:
            return "\n".join(html)
        else:
            return ''
def summarize_records(recids, of, ln, searchpattern="", searchfield="", req=None):
    """Write summary report for records RECIDS in the format OF in language LN.
       SEARCHPATTERN and SEARCHFIELD are search query that led to RECIDS,
       for instance p='Smith, Paul' and f='author'.  They are used for links.
       REQ is the Apache/mod_python request object.
    """
    if of == 'hcs':
        # this is HTML cite summary
        html = []
        # 1) hcs prologue:
        d_recids = {}
        d_total_recs = {}
        for coll, colldef in CFG_CITESUMMARY_COLLECTIONS:
            if not colldef:
                d_recids[coll] = recids
            else:
                d_recids[coll] = recids & search_engine.search_pattern(p=colldef)
            d_total_recs[coll] = len(d_recids[coll])

        prologue = websearch_templates.tmpl_citesummary_prologue(d_total_recs, CFG_CITESUMMARY_COLLECTIONS, searchpattern, searchfield, ln)

        if not req:
            html.append(prologue)
        elif hasattr(req, "write"):
            req.write(prologue)

        # 2) hcs overview:
        d_recid_citers = {}
        d_total_cites = {}
        d_avg_cites = {}
        d_recid_citecount_l = {}
        for coll, colldef in CFG_CITESUMMARY_COLLECTIONS:
            d_total_cites[coll] = 0
            d_avg_cites[coll] = 0
            d_recid_citecount_l[coll] = []
            d_recid_citers[coll] =  get_cited_by_list(d_recids[coll])
            for recid, lciters in d_recid_citers[coll]:
                if lciters:
                    d_total_cites[coll] += len(lciters)
                    d_recid_citecount_l[coll].append((recid, len(lciters)))
            if d_total_cites[coll] != 0:
                d_avg_cites[coll] = d_total_cites[coll] * 1.0 / d_total_recs[coll]
        overview = websearch_templates.tmpl_citesummary_overview(d_total_cites, d_avg_cites, CFG_CITESUMMARY_COLLECTIONS, ln)

        if not req:
            html.append(overview)
        elif hasattr(req, "write"):
            req.write(overview)

        # 3) hcs break down by fame:
        for low, high, fame in CFG_CITESUMMARY_FAME_THRESHOLDS:
            d_cites = {}
            for coll, colldef in CFG_CITESUMMARY_COLLECTIONS:
                d_cites[coll] = 0
                for recid, lciters in d_recid_citers[coll]:
                    numcites = 0
                    if lciters:
                        numcites = len(lciters)
                    if numcites >= low and numcites <= high:
                        d_cites[coll] += 1
            fame_info = websearch_templates.tmpl_citesummary_breakdown_by_fame(d_cites, low, high, fame, CFG_CITESUMMARY_COLLECTIONS, searchpattern, searchfield, ln)

            if not req:
                html.append(fame_info)
            elif hasattr(req, "write"):
                req.write(fame_info)

        # 4) hcs calculate h index
        d_h_factors = {}
        def comparator(x, y):
            if x[1] > y[1]:
                return -1
            elif x[1] == y[1]:
                return 0
            else: return +1
        for coll, colldef in CFG_CITESUMMARY_COLLECTIONS:
            d_h_factors[coll] = 0
            d_recid_citecount_l[coll].sort(cmp=comparator)
            #req.write(repr(d_recid_citecount_l[coll])) # DEBUG
            for citecount in d_recid_citecount_l[coll]:
                d_h_factors[coll] += 1
                if d_h_factors[coll] > citecount[1]:
                    d_h_factors[coll] -= 1
                    break
        h_idx = websearch_templates.tmpl_citesummary_h_index(d_h_factors, CFG_CITESUMMARY_COLLECTIONS, ln)

        if not req:
            html.append(h_idx)
        elif hasattr(req, "write"):
            req.write(h_idx)

        # 5) hcs epilogue:
        eplilogue = websearch_templates.tmpl_citesummary_epilogue(ln)

        if not req:
            html.append(eplilogue)
        elif hasattr(req, "write"):
            req.write(eplilogue)

        if not req:
            return "\n".join(html)
        else:
            return ''

    elif of == 'xcs':
        # this is XML cite summary
        citedbylist = get_cited_by_list(recids)
        return print_citation_summary_xml(citedbylist)