示例#1
0
def get_tori(identifiers, bibcodes, self_cits=None):
    # Get additional data necessary for Tori calculation
    data = get_tori_data(identifiers)
    if len(data) == 0:
        return 0, 0, 0, 0, []
    # If we did not get self-citations, retrieve them
    if not self_cits:
        self_cits = get_selfcitations(identifiers, bibcodes)[1]
    self_citations = set((itertools.chain(*[x[0] for x in self_cits])))
    # Now we can calculate the Tori index
    tori_data = [p for p in list(itertools.chain(
        *[p.rn_citation_data for p in data if p.rn_citation_data])) if
        p['bibcode'] not in self_citations and 'pubyear' in p]
    tori_data_ref = [p for p in list(itertools.chain(
        *[p.rn_citation_data for p in data if p.refereed and
            p.rn_citation_data])) if p['bibcode'] not in self_citations]
    try:
        tori = np.sum(
            np.array([r['auth_norm'] * r['ref_norm'] for r in tori_data]))
        tori_ref = np.sum(
            np.array([r['auth_norm'] * r['ref_norm'] for r in tori_data_ref]))
    except:
        return 0, 0, 0, 0, tori_data
    # The riq index follows from the Tori index and the year range
    yrange = datetime.now().year - min([int(b[:4]) for b in bibcodes]) + 1
    yrange_ref = datetime.now().year - \
        min([int(p.bibcode[:4]) for p in data]) + 1
    riq = int(1000.0 * sqrt(float(tori)) / float(yrange))
    riq_ref = int(1000.0 * sqrt(float(tori_ref)) / float(yrange))
    # Send the results back
    return tori, tori_ref, riq, riq_ref, tori_data
 def test_get_tori_data(self):
     '''Test getting tori data'''
     from models import get_tori_data
     data = get_tori_data(testset)
     # The most important thing here is to test that it is a list
     # of MetricsModel instances
     self.assertEqual(isinstance(data, list), True)
     self.assertTrue(False not in
                     [x.__class__.__name__ == 'MetricsModel' for x in data])
 def test_get_tori_data(self):
     '''Test getting tori data'''
     from models import get_tori_data
     data = get_tori_data(testset)
     # The most important thing here is to test that it is a list
     # of MetricsModel instances
     self.assertEqual(isinstance(data, list), True)
     self.assertTrue(
         False not in [x.__class__.__name__ == 'MetricsModel' for
                       x in data])
示例#4
0
def get_tori(identifiers, bibcodes, self_cits=None):
    # Get additional data necessary for Tori calculation
    data = get_tori_data(identifiers)
    if len(data) == 0:
        return 0, 0, 0, 0, []
    # If we did not get self-citations, retrieve them
    if not self_cits:
        self_cits = get_selfcitations(identifiers, bibcodes)[1]
    self_citations = set((itertools.chain(*[x[0] for x in self_cits])))
    # Now we can calculate the Tori index
    tori_data = [
        p for p in list(
            itertools.chain(
                *[p.rn_citation_data for p in data if p.rn_citation_data]))
        if p['bibcode'] not in self_citations and 'pubyear' in p
    ]
    tori_data_ref = [
        p for p in list(
            itertools.chain(*[
                p.rn_citation_data for p in data
                if p.refereed and p.rn_citation_data
            ])) if p['bibcode'] not in self_citations
    ]
    try:
        tori = np.sum(
            np.array([r['auth_norm'] * r['ref_norm'] for r in tori_data]))
        tori_ref = np.sum(
            np.array([r['auth_norm'] * r['ref_norm'] for r in tori_data_ref]))
    except:
        return 0, 0, 0, 0, tori_data
    # The riq index follows from the Tori index and the year range
    yrange = datetime.now().year - min([int(b[:4]) for b in bibcodes]) + 1
    yrange_ref = datetime.now().year - \
        min([int(p.bibcode[:4]) for p in data]) + 1
    riq = int(1000.0 * sqrt(float(tori)) / float(yrange))
    riq_ref = int(1000.0 * sqrt(float(tori_ref)) / float(yrange))
    # Send the results back
    return tori, tori_ref, riq, riq_ref, tori_data
示例#5
0
def get_time_series(identifiers,
                    bibcodes,
                    data=None,
                    usagedata=None,
                    tori_data=None,
                    include_tori=True,
                    self_cits=None):
    series = {}
    i10 = {}
    i100 = {}
    h = {}
    g = {}
    r10 = {}
    tori = {}
    # Get data if nothing was supplied
    if not data:
        data = get_citations(identifiers)
    if not usagedata:
        usagedata = get_usage_data(identifiers)
    if not self_cits and include_tori:
        self_cits = get_selfcitations(identifiers, bibcodes)[1]
    self_citations = set((itertools.chain(*[x[0] for x in self_cits])))
    if not tori_data and include_tori:
        tdata = get_tori_data(identifiers)
        tori_data = [
            p for p in list(
                itertools.chain(
                    *[p.rn_citation_data for p in tdata
                      if p.rn_citation_data]))
            if p['bibcode'] not in self_citations and 'pubyear' in p
        ]
    # Determine the year range
    Nentries = datetime.now().year - 1996 + 1
    years = [int(b[:4]) for b in bibcodes]
    yrange = range(min(years), datetime.now().year + 1)
    d0 = date(datetime.now().year, 1, 1)
    d1 = date(datetime.now().year, datetime.now().month, datetime.now().day)
    d2 = date(datetime.now().year, 12, 31)
    delta = (d1 - d0).days + 1
    ndays = (d2 - d0).days + 1
    try:
        r10_corr = float(ndays) / float(delta)
    except:
        r10_corr = 1.0
    for year in yrange:
        biblist = [b for b in bibcodes if int(b[:4]) <= year]
        citations = sorted([
            len([int(c[:4]) for c in p.citations if int(c[:4]) <= year])
            for p in data if p.bibcode in biblist
        ],
                           reverse=True)
        if year < 1996:
            r10[year] = 0.0
        else:
            idx = year - 1996
            r10[year] = sum([
                float(p.reads[idx]) / float(p.author_num) for p in usagedata
                if p.bibcode in biblist and int(p.bibcode[:4]) > year -
                10 and p.reads and len(p.reads) == Nentries
            ])
        try:
            h[year] = max([i for i, n in enumerate(citations) if i <= n])
            g[year] = max([
                i for i, n in enumerate(np.cumsum(citations, axis=0))
                if i**2 <= n
            ])
        except:
            h[year] = 0
            g[year] = 0
        i10[year] = len([c for c in citations if c >= 10])
        i100[year] = len([c for c in citations if c >= 100])
        if include_tori:
            tori[year] = np.sum(
                np.array([
                    r['auth_norm'] * r['ref_norm'] for r in tori_data
                    if r['pubyear'] <= year and r['cityear'] <= year
                ]))

    r10[datetime.now().year] = r10[datetime.now().year] * r10_corr
    series['i10'] = i10
    series['i100'] = i100
    series['h'] = h
    series['g'] = g
    series['read10'] = r10
    if include_tori:
        series['tori'] = tori

    return series
示例#6
0
def get_time_series(identifiers, bibcodes, data=None, usagedata=None,
                    tori_data=None, include_tori=True, self_cits=None):
    series = {}
    i10 = {}
    i100 = {}
    h = {}
    g = {}
    r10 = {}
    tori = {}
    # Get data if nothing was supplied
    if not data:
        data = get_citations(identifiers)
    if not usagedata:
        usagedata = get_usage_data(identifiers)
    if not self_cits and include_tori:
        self_cits = get_selfcitations(identifiers, bibcodes)[1]
    self_citations = set((itertools.chain(*[x[0] for x in self_cits])))
    if not tori_data and include_tori:
        tdata = get_tori_data(identifiers)
        tori_data = [p for p in list(itertools.chain(
            *[p.rn_citation_data for p in tdata if p.rn_citation_data])) if
            p['bibcode'] not in self_citations and 'pubyear' in p]
    # Determine the year range
    Nentries = datetime.now().year - 1996 + 1
    years = [int(b[:4]) for b in bibcodes]
    yrange = range(min(years), datetime.now().year + 1)
    d0 = date(datetime.now().year, 1, 1)
    d1 = date(datetime.now().year, datetime.now().month, datetime.now().day)
    d2 = date(datetime.now().year, 12, 31)
    delta = (d1 - d0).days + 1
    ndays = (d2 - d0).days + 1
    try:
       r10_corr = float(ndays)/float(delta)
    except:
       r10_corr = 1.0
    for year in yrange:
        biblist = [b for b in bibcodes if int(b[:4]) <= year]
        citations = sorted([len([int(c[:4]) for c in p.citations if int(
            c[:4]) <= year]) for p in data if
            p.bibcode in biblist], reverse=True)
        if year < 1996:
            r10[year] = 0.0
        else:
            idx = year - 1996
            r10[year] = sum([float(p.reads[idx]) / float(p.author_num) for
                             p in usagedata if p.bibcode in biblist and int(
                p.bibcode[:4]) > year - 10 and p.reads and
                len(p.reads) == Nentries])
        try:
            h[year] = max([i for i, n in enumerate(citations) if i <= n])
            g[year] = max(
                [i for i, n in enumerate(np.cumsum(citations, axis=0)) if
                 i**2 <= n])
        except:
            h[year] = 0
            g[year] = 0
        i10[year] = len([c for c in citations if c >= 10])
        i100[year] = len([c for c in citations if c >= 100])
        if include_tori:
            tori[year] = np.sum(np.array([r['auth_norm'] * r['ref_norm'] for
                                          r in tori_data if
                                          r['pubyear'] <= year and
                                          r['cityear'] <= year]))

    r10[datetime.now().year] = r10[datetime.now().year] * r10_corr
    series['i10'] = i10
    series['i100'] = i100
    series['h'] = h
    series['g'] = g
    series['read10'] = r10
    if include_tori:
        series['tori'] = tori

    return series