def _get_edges_per_slot(): slots = {} # {year : edges} for year in range(1992, 2004): slots[year] = HepReader.read_edges("datasets/hepth/timeslots/cit-HepTh-{0}.edges".format(year)) return slots
P.show() def plot_data_distribution(): """Plot number of new papers and new citations per year, take Hep files as input""" minyear = 1992 maxyear = 2003 x = [datetime(year, 01, 01) for year in range(minyear, maxyear + 1)] x.extend([datetime(year, 06, 01) for year in range(minyear, maxyear + 1)]) x.extend([datetime(year, 9, 1) for year in range(minyear, maxyear + 1)]) x.extend([datetime(year, 12, 1) for year in range(minyear, maxyear + 1)]) x = sorted(x) dates = HepReader.read_dates("/home/stpk/dev/role-mining/datasets/hepth/cit-HepTh-dates-cleaned.txt") edges = HepReader.read_edges("/home/stpk/dev/role-mining/datasets/hepth/cit-HepTh.txt") slots = HepReader.split_to_timeslots(dates, edges, x) for slot in slots.values(): print len(slot) citats = [len(slots[t]) for t in x] print citats print x fig, ax = P.subplots() ax.xaxis_date() P.title("Nowe publikacje na kwartal") P.plot(x, citats, color='b', alpha=0.5, label="Nowe cytowania") ax.set_xticks(x) P.xticks(rotation=70)