def get_indicators(identifiers, data=None, usagedata=None): ind = {} ind_ref = {} # Get the necessary data if we did not get any if not data: data = get_indicator_data(identifiers) if not usagedata: usagedata = get_usage_data(identifiers) # Organize the citations with a running index (the citation # data is already ordered from most to least cited) citations = [(i + 1, p.citation_num) for i, p in enumerate(data)] # First the Hirsch index ind['h'] = max([x[0] for x in citations if x[1] >= x[0]] or [0]) # Next the g index ind['g'] = max([i for (c, i) in zip(list(np.cumsum([x[1] for x in citations], axis=0)), [x[0] for x in citations]) if i**2 <= c] or [0]) # The number of paper with 10 or more citations (i10) ind['i10'] = len([x for x in citations if x[1] >= 10]) # The number of paper with 100 or more citations (i100) ind['i100'] = len([x for x in citations if x[1] >= 100]) # The m index is the g index divided by the range of publication years yrange = datetime.now().year - \ min([int(p.bibcode[:4]) for p in usagedata]) + 1 ind['m'] = float(ind['h']) / float(yrange) # The read10 index is calculated from current reads for papers published # in the last 10 years, normalized by number of authors year = datetime.now().year Nentries = year - 1996 + 1 ind['read10'] = sum([float(p.reads[-2]) / float(p.author_num) for p in usagedata if int(p.bibcode[:4]) > year - 10 and p.reads and len(p.reads) == Nentries]) # Now all the values for the refereed publications citations = [(i + 1, n) for i, n in enumerate([p.citation_num for p in data if p.refereed])] # First the Hirsch index ind_ref['h'] = max([x[0] for x in citations if x[1] >= x[0]] or [0]) # Next the g index ind_ref['g'] = max([i for (c, i) in zip(list(np.cumsum( [x[1] for x in citations], axis=0)), [x[0] for x in citations]) if i**2 <= c] or [0]) # The number of paper with 10 or more citations (i10) ind_ref['i10'] = len([x for x in citations if x[1] >= 10]) # The number of paper with 100 or more citations (i100) ind_ref['i100'] = len([x for x in citations if x[1] >= 100]) # The m index is the g index divided by the range of publication years yrange_ref = datetime.now().year - \ min([int(p.bibcode[:4]) for p in usagedata]) + 1 ind_ref['m'] = float(ind_ref['h']) / float(yrange_ref) # The read10 index is calculated from current reads for papers published # in the last 10 years, normalized by number of authors year = datetime.now().year Nentries = year - 1996 + 1 ind_ref['read10'] = sum([float(p.reads[-1]) / float(p.author_num) for p in usagedata if p.refereed and int(p.bibcode[:4]) > year - 10 and p.reads and len(p.reads) == Nentries]) # Send results back return ind, ind_ref
def test_get_indicator_data(self): '''Test getting indicator data''' from models import get_indicator_data data = get_indicator_data(testset) # The most important thing here is to test that it is a list # of MetricsModel instances self.assertEqual(isinstance(data, list), True) self.assertTrue(False not in [x.__class__.__name__ == 'MetricsModel' for x in data])
def test_get_indicator_data(self): '''Test getting indicator data''' from models import get_indicator_data data = get_indicator_data(testset) # The most important thing here is to test that it is a list # of MetricsModel instances self.assertEqual(isinstance(data, list), True) self.assertTrue( False not in [x.__class__.__name__ == 'MetricsModel' for x in data])
def get_indicators(identifiers, data=None, usagedata=None): ind = {} ind_ref = {} # Get the necessary data if we did not get any if not data: data = get_indicator_data(identifiers) if not usagedata: usagedata = get_usage_data(identifiers) # Organize the citations with a running index (the citation # data is already ordered from most to least cited) citations = [(i + 1, p.citation_num) for i, p in enumerate(data)] # First the Hirsch index ind['h'] = max([x[0] for x in citations if x[1] >= x[0]] or [0]) # Next the g index ind['g'] = max([ i for (c, i) in zip(list(np.cumsum([x[1] for x in citations], axis=0)), [x[0] for x in citations]) if i**2 <= c ] or [0]) # The number of paper with 10 or more citations (i10) ind['i10'] = len([x for x in citations if x[1] >= 10]) # The number of paper with 100 or more citations (i100) ind['i100'] = len([x for x in citations if x[1] >= 100]) # The m index is the g index divided by the range of publication years yrange = datetime.now().year - \ min([int(p.bibcode[:4]) for p in usagedata]) + 1 ind['m'] = float(ind['h']) / float(yrange) # The read10 index is calculated from current reads for papers published # in the last 10 years, normalized by number of authors year = datetime.now().year Nentries = year - 1996 + 1 ind['read10'] = sum([ float(p.reads[-2]) / float(p.author_num) for p in usagedata if int(p.bibcode[:4]) > year - 10 and p.reads and len(p.reads) == Nentries ]) # Now all the values for the refereed publications citations = [ (i + 1, n) for i, n in enumerate([p.citation_num for p in data if p.refereed]) ] # First the Hirsch index ind_ref['h'] = max([x[0] for x in citations if x[1] >= x[0]] or [0]) # Next the g index ind_ref['g'] = max([ i for (c, i) in zip(list(np.cumsum([x[1] for x in citations], axis=0)), [x[0] for x in citations]) if i**2 <= c ] or [0]) # The number of paper with 10 or more citations (i10) ind_ref['i10'] = len([x for x in citations if x[1] >= 10]) # The number of paper with 100 or more citations (i100) ind_ref['i100'] = len([x for x in citations if x[1] >= 100]) # The m index is the g index divided by the range of publication years yrange_ref = datetime.now().year - \ min([int(p.bibcode[:4]) for p in usagedata]) + 1 ind_ref['m'] = float(ind_ref['h']) / float(yrange_ref) # The read10 index is calculated from current reads for papers published # in the last 10 years, normalized by number of authors year = datetime.now().year Nentries = year - 1996 + 1 ind_ref['read10'] = sum([ float(p.reads[-1]) / float(p.author_num) for p in usagedata if p.refereed and int(p.bibcode[:4]) > year - 10 and p.reads and len(p.reads) == Nentries ]) # Send results back return ind, ind_ref