示例#1
0
def plot_year_wise_publications():
    p = palette.Palette()
    year = Counter()
    labels = []
    values = []
    total = 0
    for d in get_data():
        year.update({d.year})
        if d.year == 1980:
            total += 1
    for y in year.most_common():
        labels.append(y[0])
        values.append(y[1])
    zipped = zip(labels, values)
    zipped = sorted(zipped)
    zipped.reverse()
    labels, values = zip(*zipped)
    labels = list(labels)
    ind = range(len(labels))
    fig = plt.figure()
    ax = fig.add_subplot(111)
    plt.bar(ind, values, color=p.cerulean())
    plt.xticks(ind, labels, rotation=90)
    plt.ylabel("Number of publications")
    plt.xlabel("Year (till 11 March 2019)")
    plt.tight_layout()
    plt.savefig("year_wise.png", type="png", dpi=300)
    plt.show()
示例#2
0
def large_file_year_stats():
    headers = ["Title", "URL", "Description", "Details", "ShortDetails", "Resource", "Type", "Identifiers", "Db",
               "EntrezUID", "Properties"]
    chunk_size = 100000
    base_df = pd.DataFrame(columns=headers)
    for chunk in pd.read_csv(FILE_PUBMED, chunksize=chunk_size, names=headers, low_memory=False):
        chunk['year'] = chunk['ShortDetails'].str[-4:]
        base_df = pd.concat([base_df, chunk], sort=False)
    base_df = base_df.iloc[1:]
    base_df = base_df[base_df['year'].apply(lambda x: check_if_number(x))]
    base_df["year"] = base_df["year"].apply(pd.to_numeric)
    base_df = base_df[base_df.year > 1979]
    s = base_df["year"].value_counts()
    labels = list(s.index)
    values = s.values
    zipped = zip(labels, values)
    zipped = sorted(zipped)
    zipped.reverse()
    labels, values = zip(*zipped)
    ind = range(len(labels))
    plt.bar(ind, values, color=palette.Palette().magenta())
    plt.xticks(ind, labels, rotation=90)
    plt.ylabel("Number of publications")
    plt.xlabel("Year (till 11 March 2019)")
    plt.tight_layout()
    plt.savefig("year_wise_other.png", type="png", dpi=300)
    plt.show()
示例#3
0
def author_per_year():
    p = palette.Palette()
    year = defaultdict(int)
    articles = defaultdict(int)
    labels = []
    values = []
    for d in get_data():
        if d.year > 1979:
            year[d.year] += len(d.authors)
            articles[d.year] += 1

    for y in year:
        labels.append(y)
        values.append(year[y] / articles[y])
    zipped = zip(labels, values)
    zipped = sorted(zipped)
    zipped.reverse()
    labels, values = zip(*zipped)
    labels = list(labels)
    ind = range(len(labels))
    plt.bar(ind, values, color=p.green())
    plt.xticks(ind, labels, rotation=90)
    plt.ylabel("Number of authors per article")
    plt.xlabel("Year (till 11 March 2019)")
    plt.tight_layout()
    plt.grid(axis='y', color="k", alpha=0.5, linestyle="--")
    plt.savefig("author_wise.png", type="png", dpi=300)
    plt.show()
示例#4
0
def plot_expression_profile(gene: str, parent_structure: str = None):
    p = palette.Palette()
    entries_start = []

    for d in get_expression_pattern_in(gene, parent_structure):
        entries_start.append(float(d.start_stage.time))

    plt.hist(entries_start, color=p.cerulean())
    plt.ylabel("Frequency of Publications")
    plt.xlabel("hpf")
    plt.title("'{0}' Expression Star Stage".format(gene))
    plt.show()
示例#5
0
def budget_plot():
    p = palette.Palette()
    data = []
    with open(FILE_BUDGET_DATA) as f:
        for line in csv.reader(f):
            data.append(line[4:])

    ind = range(len(data[0]))
    plt.bar(ind, [round(float(x), 2) for x in data[2]], color=p.blue(), label="World")
    plt.bar(ind, [round(float(x), 2) for x in data[1]], color=p.orange(), label="India")
    plt.ylabel("% of GDP")
    plt.xlabel("Year")
    plt.legend(loc='upper center', bbox_to_anchor=(0.5, 1.05),
               ncol=2, fancybox=True, shadow=True)
    plt.xticks(ind, data[0], rotation=90)
    plt.show()
示例#6
0
def journal_over_years():
    number_of_journals = 3
    p = palette.Palette(remove_white=True)
    year_data = defaultdict(Counter)
    for d in get_data():
        if 2019 > d.year > 1999:
            year_data[d.year].update({d.journal})

    year_label = []
    year_values = []
    year_journals = []
    global_color = defaultdict(list)

    for y in year_data:
        total = 0
        current_year = []
        for t in year_data[y].most_common():
            total += t[1]
            if len(current_year) < number_of_journals:
                current_year.append(t)

        temp_values = []
        temp_name = []
        for c in current_year:
            temp_values.append(c[1] * 100 / total)
            temp_name.append(c[0])
            global_color[c[0]].append(1)

        year_label.append(f"{y}")
        year_values.append(temp_values)
        year_journals.append(temp_name)

    zipped = zip(year_label, year_values, year_journals)
    zipped = sorted(zipped)
    year_label, year_values, year_journals = zip(*zipped)
    color_palette = []
    for f in p:
        color_palette.append(f)
    if len(color_palette) < len(global_color):
        color_palette.extend(p.random(no_of_colors=len(global_color), grade=40))

    colors = {}
    for g in global_color:
        colors[g] = color_palette[0]
        color_palette.pop(0)
    ind = range(len(year_label))
    year_values = np.asanyarray(year_values).T
    year_journals = np.asanyarray(year_journals).T
    fig = plt.figure()
    ax = fig.add_subplot(111)

    bar_patch = []
    left_margin = np.zeros(len(year_label))
    for i, v in enumerate(year_values):
        c = []
        for journal in year_journals[i]:
            c.append(colors[journal])
        bar_patch.append(ax.barh(ind, v, left=left_margin, color=c))
        left_margin += v

    plt.yticks(ind, year_label)

    for j in range(len(bar_patch)):
        for i, patch in enumerate(bar_patch[j].get_children()):
            point = patch.get_xy()
            x = 0.5 * patch.get_width() + point[0]
            y = 0.4 * patch.get_height() + point[1]
            # ax.text(x, y, f"{round(patch.get_width(), 1)}%", ha='center')

    legend_patches = []
    for key in colors:
        legend_patches.append(mpatches.Patch(color=colors[key], label=key))

    plt.legend(handles=legend_patches, loc='center left', bbox_to_anchor=(1, 0.5))
    plt.tight_layout()
    plt.xlabel("Percentage of all paper published in given year")
    plt.ylabel("Year")
    plt.savefig("year_top_journals.png", type="png", dpi=300)
    plt.show()