def get_tori(identifiers, bibcodes, self_cits=None): # Get additional data necessary for Tori calculation data = get_tori_data(identifiers) if len(data) == 0: return 0, 0, 0, 0, [] # If we did not get self-citations, retrieve them if not self_cits: self_cits = get_selfcitations(identifiers, bibcodes)[1] self_citations = set((itertools.chain(*[x[0] for x in self_cits]))) # Now we can calculate the Tori index tori_data = [p for p in list(itertools.chain( *[p.rn_citation_data for p in data if p.rn_citation_data])) if p['bibcode'] not in self_citations and 'pubyear' in p] tori_data_ref = [p for p in list(itertools.chain( *[p.rn_citation_data for p in data if p.refereed and p.rn_citation_data])) if p['bibcode'] not in self_citations] try: tori = np.sum( np.array([r['auth_norm'] * r['ref_norm'] for r in tori_data])) tori_ref = np.sum( np.array([r['auth_norm'] * r['ref_norm'] for r in tori_data_ref])) except: return 0, 0, 0, 0, tori_data # The riq index follows from the Tori index and the year range yrange = datetime.now().year - min([int(b[:4]) for b in bibcodes]) + 1 yrange_ref = datetime.now().year - \ min([int(p.bibcode[:4]) for p in data]) + 1 riq = int(1000.0 * sqrt(float(tori)) / float(yrange)) riq_ref = int(1000.0 * sqrt(float(tori_ref)) / float(yrange)) # Send the results back return tori, tori_ref, riq, riq_ref, tori_data
def test_get_tori_data(self): '''Test getting tori data''' from models import get_tori_data data = get_tori_data(testset) # The most important thing here is to test that it is a list # of MetricsModel instances self.assertEqual(isinstance(data, list), True) self.assertTrue(False not in [x.__class__.__name__ == 'MetricsModel' for x in data])
def test_get_tori_data(self): '''Test getting tori data''' from models import get_tori_data data = get_tori_data(testset) # The most important thing here is to test that it is a list # of MetricsModel instances self.assertEqual(isinstance(data, list), True) self.assertTrue( False not in [x.__class__.__name__ == 'MetricsModel' for x in data])
def get_tori(identifiers, bibcodes, self_cits=None): # Get additional data necessary for Tori calculation data = get_tori_data(identifiers) if len(data) == 0: return 0, 0, 0, 0, [] # If we did not get self-citations, retrieve them if not self_cits: self_cits = get_selfcitations(identifiers, bibcodes)[1] self_citations = set((itertools.chain(*[x[0] for x in self_cits]))) # Now we can calculate the Tori index tori_data = [ p for p in list( itertools.chain( *[p.rn_citation_data for p in data if p.rn_citation_data])) if p['bibcode'] not in self_citations and 'pubyear' in p ] tori_data_ref = [ p for p in list( itertools.chain(*[ p.rn_citation_data for p in data if p.refereed and p.rn_citation_data ])) if p['bibcode'] not in self_citations ] try: tori = np.sum( np.array([r['auth_norm'] * r['ref_norm'] for r in tori_data])) tori_ref = np.sum( np.array([r['auth_norm'] * r['ref_norm'] for r in tori_data_ref])) except: return 0, 0, 0, 0, tori_data # The riq index follows from the Tori index and the year range yrange = datetime.now().year - min([int(b[:4]) for b in bibcodes]) + 1 yrange_ref = datetime.now().year - \ min([int(p.bibcode[:4]) for p in data]) + 1 riq = int(1000.0 * sqrt(float(tori)) / float(yrange)) riq_ref = int(1000.0 * sqrt(float(tori_ref)) / float(yrange)) # Send the results back return tori, tori_ref, riq, riq_ref, tori_data
def get_time_series(identifiers, bibcodes, data=None, usagedata=None, tori_data=None, include_tori=True, self_cits=None): series = {} i10 = {} i100 = {} h = {} g = {} r10 = {} tori = {} # Get data if nothing was supplied if not data: data = get_citations(identifiers) if not usagedata: usagedata = get_usage_data(identifiers) if not self_cits and include_tori: self_cits = get_selfcitations(identifiers, bibcodes)[1] self_citations = set((itertools.chain(*[x[0] for x in self_cits]))) if not tori_data and include_tori: tdata = get_tori_data(identifiers) tori_data = [ p for p in list( itertools.chain( *[p.rn_citation_data for p in tdata if p.rn_citation_data])) if p['bibcode'] not in self_citations and 'pubyear' in p ] # Determine the year range Nentries = datetime.now().year - 1996 + 1 years = [int(b[:4]) for b in bibcodes] yrange = range(min(years), datetime.now().year + 1) d0 = date(datetime.now().year, 1, 1) d1 = date(datetime.now().year, datetime.now().month, datetime.now().day) d2 = date(datetime.now().year, 12, 31) delta = (d1 - d0).days + 1 ndays = (d2 - d0).days + 1 try: r10_corr = float(ndays) / float(delta) except: r10_corr = 1.0 for year in yrange: biblist = [b for b in bibcodes if int(b[:4]) <= year] citations = sorted([ len([int(c[:4]) for c in p.citations if int(c[:4]) <= year]) for p in data if p.bibcode in biblist ], reverse=True) if year < 1996: r10[year] = 0.0 else: idx = year - 1996 r10[year] = sum([ float(p.reads[idx]) / float(p.author_num) for p in usagedata if p.bibcode in biblist and int(p.bibcode[:4]) > year - 10 and p.reads and len(p.reads) == Nentries ]) try: h[year] = max([i for i, n in enumerate(citations) if i <= n]) g[year] = max([ i for i, n in enumerate(np.cumsum(citations, axis=0)) if i**2 <= n ]) except: h[year] = 0 g[year] = 0 i10[year] = len([c for c in citations if c >= 10]) i100[year] = len([c for c in citations if c >= 100]) if include_tori: tori[year] = np.sum( np.array([ r['auth_norm'] * r['ref_norm'] for r in tori_data if r['pubyear'] <= year and r['cityear'] <= year ])) r10[datetime.now().year] = r10[datetime.now().year] * r10_corr series['i10'] = i10 series['i100'] = i100 series['h'] = h series['g'] = g series['read10'] = r10 if include_tori: series['tori'] = tori return series
def get_time_series(identifiers, bibcodes, data=None, usagedata=None, tori_data=None, include_tori=True, self_cits=None): series = {} i10 = {} i100 = {} h = {} g = {} r10 = {} tori = {} # Get data if nothing was supplied if not data: data = get_citations(identifiers) if not usagedata: usagedata = get_usage_data(identifiers) if not self_cits and include_tori: self_cits = get_selfcitations(identifiers, bibcodes)[1] self_citations = set((itertools.chain(*[x[0] for x in self_cits]))) if not tori_data and include_tori: tdata = get_tori_data(identifiers) tori_data = [p for p in list(itertools.chain( *[p.rn_citation_data for p in tdata if p.rn_citation_data])) if p['bibcode'] not in self_citations and 'pubyear' in p] # Determine the year range Nentries = datetime.now().year - 1996 + 1 years = [int(b[:4]) for b in bibcodes] yrange = range(min(years), datetime.now().year + 1) d0 = date(datetime.now().year, 1, 1) d1 = date(datetime.now().year, datetime.now().month, datetime.now().day) d2 = date(datetime.now().year, 12, 31) delta = (d1 - d0).days + 1 ndays = (d2 - d0).days + 1 try: r10_corr = float(ndays)/float(delta) except: r10_corr = 1.0 for year in yrange: biblist = [b for b in bibcodes if int(b[:4]) <= year] citations = sorted([len([int(c[:4]) for c in p.citations if int( c[:4]) <= year]) for p in data if p.bibcode in biblist], reverse=True) if year < 1996: r10[year] = 0.0 else: idx = year - 1996 r10[year] = sum([float(p.reads[idx]) / float(p.author_num) for p in usagedata if p.bibcode in biblist and int( p.bibcode[:4]) > year - 10 and p.reads and len(p.reads) == Nentries]) try: h[year] = max([i for i, n in enumerate(citations) if i <= n]) g[year] = max( [i for i, n in enumerate(np.cumsum(citations, axis=0)) if i**2 <= n]) except: h[year] = 0 g[year] = 0 i10[year] = len([c for c in citations if c >= 10]) i100[year] = len([c for c in citations if c >= 100]) if include_tori: tori[year] = np.sum(np.array([r['auth_norm'] * r['ref_norm'] for r in tori_data if r['pubyear'] <= year and r['cityear'] <= year])) r10[datetime.now().year] = r10[datetime.now().year] * r10_corr series['i10'] = i10 series['i100'] = i100 series['h'] = h series['g'] = g series['read10'] = r10 if include_tori: series['tori'] = tori return series