Exemplo n.º 1
0
def w5_internal_directed_authorcite_matrix(w5):
    """Returns a square matrix where the rows and columns are all author
    names; each cell represents a case where a row author cites a
    column author, so this is a directed graph.  Unlike the DOI case,
    though, here authors can cite each other multiple times

    """

    rows = numpy.sort(w5.all_authors())
    cols = rows.copy()
    authordict = w5.dict_doi_to_authors()
    result = numpy.zeros((rows.shape[0], cols.shape[0]),
                         dtype=numpy.int32)
    for paper in w5.h5.root.papers:
        for rindex in sindices(rows, w5.h5.root.authors[paper['index']]):
            for doi in (x for x in w5.h5.root.cited_papers[paper['index']]
                        if x in authordict):
                for cindex in sindices(cols, authordict[doi]):
                    result[rindex, cindex] += 1
    return result, rows, cols
Exemplo n.º 2
0
def w5_internal_directed_cites_matrix(w5):
    """Returns a square matrix where the rows and columns are all DOIs.
    Each cell represents a case where a row paper cites a column
    paper, so this is a directed graph.

    """

    rows = numpy.sort(w5.all_dois())
    cols = rows.copy()
    result = numpy.zeros((rows.shape[0], cols.shape[0]), dtype=numpy.int8)
    for paper in w5.h5.root.papers:
        rindex = sindex(rows, paper['doi'])
        if rindex is not None:
            try:
                for cindex in sindices(
                        cols, w5.h5.root.cited_papers[paper['index']]):
                    result[rindex, cindex] = 1
            except IndexError:
                logging.error(w5.h5.root.cited_papers[paper['index']])
    return (result, rows, cols)