def figure3d(): """Represents cost per JR5 download by provider Reads in cost data from supplementary file for each provider Cost per JR1 download = total cost per provider / # of JR5 downloads by provider Represented as "big 7" providers, including Elsevier Freedom and Elsevier Subscribed titles Chart Type: Bar Graph Y-Axis: Cost (in dollars) per JR5 Download Y-Axis Data Source: Original 1Figr Dataset, Journals Per Provider, Provider 1figr_U_Virginia_edit_Supp_Data, Total cost for 2017 X-Axis: Provider Name X-Axis Data Source: Original 1Figr Dataset, Journals Per Provider, Provider 1figr_U_Virginia_edit_Supp_Data, Total cost for 2017 """ data = pd.read_excel(filename, sheet_name='Journals per Provider', skiprows=8) big7 = [ 'Elsevier', 'Sage', 'Springer', 'Taylor & Francis', 'Wiley', 'Elsevier Freedom', 'Elsevier Subscribed' ] stats_by_provider = [] for provider_name in big7: subset_by_provider = data.loc[data['Provider'] == provider_name] journals_data = subset_by_provider.groupby( 'Journal', as_index=False).sum().values.tolist() for i in journals_data: if i[0] == provider_name: jr5_total = i[5] stats_by_provider.append( (i[0], jr5_total)) #i[0] = name of provider elsevier_freedom_collection = rf.make_freedom_collection_provider() elsevier_freedom_jr5_downloads = elsevier_freedom_collection[ 'Downloads JR5 2017 in 2017'].sum() stats_by_provider.append( ('Elsevier Freedom', elsevier_freedom_jr5_downloads)) elsevier_subscribed_titles = rf.make_elsevier_subscribed_titles_provider() elsevier_subscribed_jr5_downloads = elsevier_subscribed_titles[ 'Downloads JR5 2017 in 2017'].sum() stats_by_provider.append( ('Elsevier Subscribed', elsevier_subscribed_jr5_downloads)) #reads cost data per provider from the following supplementary file cost_data = pd.read_excel('1figr_U_Virginia_edit_Supp_Data.xlsx') cost_per_provider = cost_data.groupby( ['Package'], as_index=False).sum().values.tolist() cost_per_jr5_download = [] for stat in stats_by_provider: for cost in cost_per_provider: if stat[0] == cost[0]: #first element in each item is the name cost_per_jr5_download.append(cost[1] / stat[1]) #make plot mpl.rcParams['ytick.major.width'] = 1 mpl.rcParams['xtick.major.width'] = 1 plt.figure(num=None, figsize=(8, 8)) plt.suptitle(f'Cost per Download, current year 2017 downloads (JR5)') plot = plt.bar(big7, cost_per_jr5_download, width=.8, color='green') plt.ylabel('Cost (dollars)') plt.ylim(0, 37) #changes top and bottom limit of y axis in plot plt.xticks(rotation=90) for i in plot: score = i.get_height() plt.text(i.get_x() + i.get_width() / 2, 1.05 * score, '${:,.2f}'.format(score), ha='center', va='bottom')
def figure1g(): """Makes JR80, JR90, JR95 graph for all 6 big providers, splitting elsevier into Elseveier Freedom and Elsevier Subscribed. Plots References (Articles published by your institution, referenced by other authors) JR80 = Journals which make up 80% of References JR90 = Journals which make up 90% of References JR95 = Journals which make up 95% of References Chart Type: Stacked Bar Graph Y-Axis: Percent of Total Titles Y-Axis Data Source: Original 1Figr Dataset, Journals Per Provider, Provider Elsevier_2019, Subscribed Journal List 2019 X-Axis: Provider Name X-Axis Data Source: Original 1Figr Dataset, Journals Per Provider, Provider Elsevier_2019, Subscribed Journal List 2019 """ data = pd.read_excel(filename, sheet_name='Journals per Provider', skiprows=8) stats_by_provider = [] providers = ['Sage', 'Springer', 'Taylor & Francis', 'Wiley'] #make stats for existing providers for provider_name in providers: subset_by_provider = data.loc[data['Provider'] == provider_name] journals_data = subset_by_provider.groupby( 'Journal', as_index=False).sum().values.tolist() for i in journals_data: if i[0] == provider_name: journals_data.remove(i) #removing aggregator column data total_references = 0 total_journals = 0 for i in journals_data: total_references += i[6] total_journals += 1 reference_tuples = [(i[0], i[6]) for i in journals_data] reference_tuples_sorted = sorted( reference_tuples, key=lambda i: i[1], reverse=True) #sorts on second element of reference_tuples jr80_running_tally = 0 #represents 80% of collections use jr90_running_tally = 0 jr95_running_tally = 0 jr80_highly_used_journals = [] #THIS HOLDS (JOURNAL NAME, REFERENCES) jr90_highly_used_journals = [] jr95_highly_used_journals = [] for i in reference_tuples_sorted: if jr80_running_tally < (total_references * 0.8): jr80_highly_used_journals.append(i) jr80_running_tally += i[1] for i in reference_tuples_sorted: if jr90_running_tally < (total_references * 0.9): jr90_highly_used_journals.append(i) jr90_running_tally += i[1] for i in reference_tuples_sorted: if jr95_running_tally < (total_references * 0.95): jr95_highly_used_journals.append(i) jr95_running_tally += i[1] jr80_score = (len(jr80_highly_used_journals)) / (total_journals) jr90_score = (len(jr90_highly_used_journals)) / (total_journals) jr90_score = (jr90_score - jr80_score) jr95_score = (len(jr95_highly_used_journals)) / (total_journals) jr95_score = (jr95_score - (jr80_score + jr90_score)) total_score = (1 - (jr80_score + jr90_score + jr95_score)) stats_by_provider.append( (provider_name, jr80_score, jr90_score, jr95_score, total_score, len(jr80_highly_used_journals), len(jr90_highly_used_journals), len(jr95_highly_used_journals))) unmatched_collection_provider = rf.make_elsevier_unmatched_provider() unmatched_collection_provider[ 'Provider Name'] = 'Elsevier Unmatched' #need to create a column which holds provider name subscribed_titles_provider = rf.make_elsevier_subscribed_titles_provider() subscribed_titles_provider[ 'Provider Name'] = 'Elsevier Subscribed' #need to create a column which holds provider name freedom_collection_provider = rf.make_freedom_collection_provider() freedom_collection_provider[ 'Provider Name'] = 'Elsevier Freedom' #need to create a column which holds provider name elsevier_providers = [ unmatched_collection_provider, subscribed_titles_provider, freedom_collection_provider ] for provider_name in elsevier_providers: first_row = provider_name.iloc[1] name = first_row[ 'Provider Name'] #need string of provider name for stats_by_provider journals_data = provider_name.groupby( 'Journal', as_index=False).sum().values.tolist() total_references = 0 total_journals = 0 for i in journals_data: total_references += i[6] total_journals += 1 reference_tuples = [(i[0], i[6]) for i in journals_data] reference_tuples_sorted = sorted( reference_tuples, key=lambda i: i[1], reverse=True) #sorts on second element of reference_tuples jr80_running_tally = 0 #represents 80% of collections use jr90_running_tally = 0 jr95_running_tally = 0 jr80_highly_used_journals = [] #THIS HOLDS (JOURNAL NAME, REFERENCES) jr90_highly_used_journals = [] jr95_highly_used_journals = [] for i in reference_tuples_sorted: if jr80_running_tally < (total_references * 0.8): jr80_highly_used_journals.append(i) jr80_running_tally += i[1] for i in reference_tuples_sorted: if jr90_running_tally < (total_references * 0.9): jr90_highly_used_journals.append(i) jr90_running_tally += i[1] for i in reference_tuples_sorted: if jr95_running_tally < (total_references * 0.95): jr95_highly_used_journals.append(i) jr95_running_tally += i[1] jr80_score = (len(jr80_highly_used_journals)) / (total_journals) jr90_score = (len(jr90_highly_used_journals)) / (total_journals) jr90_score = (jr90_score - jr80_score) jr95_score = (len(jr95_highly_used_journals)) / (total_journals) jr95_score = (jr95_score - (jr80_score + jr90_score)) total_score = (1 - (jr80_score + jr90_score + jr95_score)) stats_by_provider.append( (name, jr80_score, jr90_score, jr95_score, total_score, len(jr80_highly_used_journals), len(jr90_highly_used_journals), len(jr95_highly_used_journals))) #make plot plt.figure(num=None, figsize=(10, 10)) plt.suptitle('Percentage of Titles Referenced by Provider (References)') plt.ylabel('Percent of total titles') plt.gca().yaxis.set_major_formatter( StrMethodFormatter('{x:,.0%}')) #formats y axis as % jr80s = mpatches.Patch(color='violet', label='JR80 titles') jr90s = mpatches.Patch(color='moccasin', label='JR90 titles') jr95s = mpatches.Patch(color='paleturquoise', label='JR95 titles') others = mpatches.Patch(color='silver', label='Total titles') plt.legend(handles=[jr80s, jr90s, jr95s, others], bbox_to_anchor=(1, 1)) #moves legend outside plot plt.xticks(rotation=45) #NEED TO ADD LABELS TO PLOTS for i in stats_by_provider: provider = i[0] jr80 = i[1] jr90 = i[2] jr95 = i[3] total_values = i[4] plot1 = plt.bar(provider, jr80, color='violet') plot2 = plt.bar(provider, jr90, bottom=jr80, color='moccasin') plot3 = plt.bar(provider, jr95, bottom=(jr80 + jr90), color='paleturquoise') plot4 = plt.bar(provider, total_values, bottom=(jr80 + jr90 + jr95), color='silver')
def figure2b(): """A measurement of currency. Compares JR5 downloads to JR1 downloads for each of the big 7 providers. JR5 downloads are 2017 articles downloaded in 2017. JR1 downloads are all years articles downloaded in 2017. We want to see what % of current articles people are downloading. Adds the 'Elsevier Freedom Collection' and 'Elsevier Subscribed Titles', making it the big 7 providers Chart Type: Bar Graph Y-Axis: Percent of Total Y-Axis Data Source: Original 1Figr Dataset, Journals Per Provider, Provider Elsevier_2019, Subscribed Journal List 2019 X-Axis: Provider Names X-Axis Data Source: Original 1Figr Dataset, Journals Per Provider, Provider Elsevier_2019, Subscribed Journal List 2019 """ data = pd.read_excel(filename, sheet_name='Journals per Provider', skiprows=8) big7 = ['Sage', 'Springer', 'Taylor & Francis', 'Wiley', 'Elsevier Freedom Collection', 'Elsevier Subscribed Titles', 'Elsevier Unmatched'] percent_jr5_of_jr1 = [] for provider_name in big7: subset_by_provider = data.loc[data['Provider'] == provider_name] journals_data = subset_by_provider.groupby('Journal', as_index=False).sum().values.tolist() for i in journals_data: if i[0] == provider_name: jr1_total = i[4] jr5_total = i[5] ratio = jr5_total/jr1_total percent_jr5_of_jr1.append(ratio) #make ratio of jr5 to jr1 downloads for elsevier freedom collection elsevier_freedom_collection = rf.make_freedom_collection_provider() elsevier_freedom_jr5_downloads = elsevier_freedom_collection['Downloads JR5 2017 in 2017'].sum() elsevier_freedom_jr1_downloads = elsevier_freedom_collection['Downloads JR1 2017'].sum() elsevier_freedom_ratio = elsevier_freedom_jr5_downloads/elsevier_freedom_jr1_downloads percent_jr5_of_jr1.append(elsevier_freedom_ratio) #make ratio of jr5 to jr1 downloads for elsevier subscribed titles elsevier_subscribed_titles = rf.make_elsevier_subscribed_titles_provider() elsevier_subscribed_jr5_downloads = elsevier_subscribed_titles['Downloads JR5 2017 in 2017'].sum() elsevier_subscribed_jr1_downloads = elsevier_subscribed_titles['Downloads JR1 2017'].sum() elsevier_subscribed_ratio = elsevier_subscribed_jr5_downloads/elsevier_subscribed_jr1_downloads percent_jr5_of_jr1.append(elsevier_subscribed_ratio) #make ratio of jr5 to jr1 downloads for elsevier unmatched titles elsevier_unmatched_titles = rf.make_elsevier_unmatched_provider() elsevier_unmatched_jr5_downloads = elsevier_unmatched_titles['Downloads JR5 2017 in 2017'].sum() elsevier_unmatched_jr1_downloads = elsevier_unmatched_titles['Downloads JR1 2017'].sum() elsevier_unmatched_ratio = elsevier_unmatched_jr5_downloads/elsevier_unmatched_jr1_downloads percent_jr5_of_jr1.append(elsevier_unmatched_ratio) mpl.rcParams['ytick.major.width'] = 1 mpl.rcParams['xtick.major.width'] = 1 plt.figure(num=None, figsize=(8,8)) plt.suptitle(f'Percent JR5 downloads of JR1 downloads (for 2017)') plot = plt.bar(big7, percent_jr5_of_jr1, width=.8, color='green') plt.ylabel('Percent of Total') plt.ylim(0, 1) #changes top and bottom limit of y axis in plot plt.xticks(rotation=90) plt.gca().yaxis.set_major_formatter(StrMethodFormatter('{x:,.0%}')) #formats y axis as % for i in plot: score = i.get_height() plt.text(i.get_x() + i.get_width()/2, 1.05 * score, '{:.1%}'.format(score), ha='center', va='bottom')
def figure4b_references(): """ Show references per year (2008-2017) by your institution's affiliated authors, separating Elsevier Freedom and Elsevier Subscribed titles out from Elsevier as a whole. Chart Type: Line Graph Y-Axis: Number of References Y-Axis Data Source: Original 1Figr Dataset, Journals Per Provider, References to journal/provider by your institution's authors (as measured in Scopus) Elsevier_2019, Subscribed Journal List 2019 X-Axis: Year X-Axis Data Source: Original 1Figr Dataset, Journals Per Provider, Provider Elsevier_2019, Subscribed Journal List 2019 """ original_1figr_dataset = pd.read_excel(filename, sheet_name='Journals per Provider', skiprows=8) elsevier_freedom_collection = rf.make_freedom_collection_provider() elsevier_subscribed_titles = rf.make_elsevier_subscribed_titles_provider() elsevier_unmatched_titles = rf.make_elsevier_unmatched_provider() #this holds reference totals for all providers in the end, which is used to make final plot ref_by_provider = [] #populate references totals for elsevier subset providers elsevier_providers = [ elsevier_freedom_collection, elsevier_subscribed_titles, elsevier_unmatched_titles ] for provider_name in elsevier_providers: ref_by_year = [] ref_2008 = provider_name['2008.1'].tolist() ref_by_year.append(sum(ref_2008)) ref_2009 = provider_name['2009.1'].tolist() ref_by_year.append(sum(ref_2009)) ref_2010 = provider_name['2010.1'].tolist() ref_by_year.append(sum(ref_2010)) ref_2011 = provider_name['2011.1'].tolist() ref_by_year.append(sum(ref_2011)) ref_2012 = provider_name['2012.1'].tolist() ref_by_year.append(sum(ref_2012)) ref_2013 = provider_name['2013.1'].tolist() ref_by_year.append(sum(ref_2013)) ref_2014 = provider_name['2014.1'].tolist() ref_by_year.append(sum(ref_2014)) ref_2015 = provider_name['2015.1'].tolist() ref_by_year.append(sum(ref_2015)) ref_2016 = provider_name['2016.1'].tolist() ref_by_year.append(sum(ref_2016)) ref_2017 = provider_name['2017.1'].tolist() ref_by_year.append(sum(ref_2017)) ref_by_provider.append(ref_by_year) #populate references totals for other providers providers = ['Sage', 'Springer', 'Taylor & Francis', 'Wiley'] for provider_name in providers: subset_by_provider = original_1figr_dataset.loc[ original_1figr_dataset['Provider'] == provider_name] ref_by_year = [] ref_2008 = subset_by_provider['2008.1'].tolist() ref_by_year.append(ref_2008[0]) ref_2009 = subset_by_provider['2009.1'].tolist() ref_by_year.append(ref_2009[0]) ref_2010 = subset_by_provider['2010.1'].tolist() ref_by_year.append(ref_2010[0]) ref_2011 = subset_by_provider['2011.1'].tolist() ref_by_year.append(ref_2011[0]) ref_2012 = subset_by_provider['2012.1'].tolist() ref_by_year.append(ref_2012[0]) ref_2013 = subset_by_provider['2013.1'].tolist() ref_by_year.append(ref_2013[0]) ref_2014 = subset_by_provider['2014.1'].tolist() ref_by_year.append(ref_2014[0]) ref_2015 = subset_by_provider['2015.1'].tolist() ref_by_year.append(ref_2015[0]) ref_2016 = subset_by_provider['2016.1'].tolist() ref_by_year.append(ref_2016[0]) ref_2017 = subset_by_provider['2017.1'].tolist() ref_by_year.append(ref_2017[0]) ref_by_provider.append(ref_by_year) years = [ '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017' ] plt.figure(num=None, figsize=(10, 10)) plt.suptitle( f'Number of References Made by {your_institution} Researchers by Provider' ) plt.xlabel('Year') plt.ylabel('Number References') plt.ylim(0, 12000) plt.plot(years, ref_by_provider[0], label='Elsevier Freedom', color='red', linestyle='dashed') plt.plot(years, ref_by_provider[1], label='Elsevier Subscribed', color='red') plt.plot(years, ref_by_provider[2], label='Elsevier Unmatched', color='black') plt.plot(years, ref_by_provider[3], label='Sage', color='blue') plt.plot(years, ref_by_provider[4], label='Springer', color='green') plt.plot(years, ref_by_provider[5], label='Taylor & Francis', color='purple') plt.plot(years, ref_by_provider[6], label='Wiley', color='orange') plt.legend()
def figure4b_percentage(): """ Show percent references per year as a part of all references for each provider, separating Elsevier Freedom and Elsevier Subscribed titles out from Elsevier as a whole. Chart Type: Line Graph Y-Axis: Number of References Y-Axis Data Source: Original 1Figr Dataset, Journals Per Provider, References to journal/provider by your institution's authors (as measured in Scopus) Elsevier_2019, Subscribed Journal List 2019 X-Axis: Year X-Axis Data Source: Original 1Figr Dataset, Journals Per Provider, Provider Elsevier_2019, Subscribed Journal List 2019 """ original_1figr_dataset = pd.read_excel(filename, sheet_name='Journals per Provider', skiprows=8) all_providers = original_1figr_dataset['Provider'].unique( ) #makes list of unique providers #build total references for all providers by year sum_2008 = 0 sum_2009 = 0 sum_2010 = 0 sum_2011 = 0 sum_2012 = 0 sum_2013 = 0 sum_2014 = 0 sum_2015 = 0 sum_2016 = 0 sum_2017 = 0 for provider_name in all_providers: subset_by_provider = original_1figr_dataset.loc[ original_1figr_dataset['Provider'] == provider_name] ref_2008 = subset_by_provider['2008.1'].tolist() sum_2008 += ref_2008[0] ref_2009 = subset_by_provider['2009.1'].tolist() sum_2009 += ref_2009[0] ref_2010 = subset_by_provider['2010.1'].tolist() sum_2010 += ref_2010[0] ref_2011 = subset_by_provider['2011.1'].tolist() sum_2011 += ref_2011[0] ref_2012 = subset_by_provider['2012.1'].tolist() sum_2012 += ref_2012[0] ref_2013 = subset_by_provider['2013.1'].tolist() sum_2013 += ref_2013[0] ref_2014 = subset_by_provider['2014.1'].tolist() sum_2014 += ref_2014[0] ref_2015 = subset_by_provider['2015.1'].tolist() sum_2015 += ref_2015[0] ref_2016 = subset_by_provider['2016.1'].tolist() sum_2016 += ref_2016[0] ref_2017 = subset_by_provider['2017.1'].tolist() sum_2017 += ref_2017[0] #build references by provider for each year providers = ['Sage', 'Springer', 'Taylor & Francis', 'Wiley'] ref_by_provider = [ ] #this holds percentage of total references for each year by provider, which is later plotted for provider_name in providers: subset_by_provider = original_1figr_dataset.loc[ original_1figr_dataset['Provider'] == provider_name] ref_by_year = [] ref_2008 = subset_by_provider['2008.1'].tolist() ref_by_year.append(ref_2008[0] / sum_2008) ref_2009 = subset_by_provider['2009.1'].tolist() ref_by_year.append(ref_2009[0] / sum_2009) ref_2010 = subset_by_provider['2010.1'].tolist() ref_by_year.append(ref_2010[0] / sum_2010) ref_2011 = subset_by_provider['2011.1'].tolist() ref_by_year.append(ref_2011[0] / sum_2011) ref_2012 = subset_by_provider['2012.1'].tolist() ref_by_year.append(ref_2012[0] / sum_2012) ref_2013 = subset_by_provider['2013.1'].tolist() ref_by_year.append(ref_2013[0] / sum_2013) ref_2014 = subset_by_provider['2014.1'].tolist() ref_by_year.append(ref_2014[0] / sum_2014) ref_2015 = subset_by_provider['2015.1'].tolist() ref_by_year.append(ref_2015[0] / sum_2015) ref_2016 = subset_by_provider['2016.1'].tolist() ref_by_year.append(ref_2016[0] / sum_2016) ref_2017 = subset_by_provider['2017.1'].tolist() ref_by_year.append(ref_2017[0] / sum_2017) ref_by_provider.append(ref_by_year) #Calculate number of references for Elsevier Freedom and Elsevier Subscribed titles elsevier_freedom_collection = rf.make_freedom_collection_provider() elsevier_subscribed_titles = rf.make_elsevier_subscribed_titles_provider() elsevier_unmatched_titles = rf.make_elsevier_unmatched_provider() elsevier_providers = [ elsevier_freedom_collection, elsevier_subscribed_titles, elsevier_unmatched_titles ] for provider_name in elsevier_providers: ref_by_year = [] ref_2008 = provider_name['2008.1'].tolist() ref_by_year.append((sum(ref_2008)) / sum_2008) ref_2009 = provider_name['2009.1'].tolist() ref_by_year.append((sum(ref_2009)) / sum_2009) ref_2010 = provider_name['2010.1'].tolist() ref_by_year.append((sum(ref_2010)) / sum_2010) ref_2011 = provider_name['2011.1'].tolist() ref_by_year.append((sum(ref_2011)) / sum_2011) ref_2012 = provider_name['2012.1'].tolist() ref_by_year.append((sum(ref_2012)) / sum_2012) ref_2013 = provider_name['2013.1'].tolist() ref_by_year.append((sum(ref_2013)) / sum_2013) ref_2014 = provider_name['2014.1'].tolist() ref_by_year.append((sum(ref_2014)) / sum_2014) ref_2015 = provider_name['2015.1'].tolist() ref_by_year.append((sum(ref_2015)) / sum_2015) ref_2016 = provider_name['2016.1'].tolist() ref_by_year.append((sum(ref_2016)) / sum_2016) ref_2017 = provider_name['2017.1'].tolist() ref_by_year.append((sum(ref_2017)) / sum_2017) ref_by_provider.append(ref_by_year) years = [ '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017' ] plt.figure(num=None, figsize=(10, 10)) plt.suptitle( f'Percent of All References Made by {your_institution} Authors') plt.xlabel('Year') plt.ylabel('Percentage') plt.gca().yaxis.set_major_formatter( StrMethodFormatter('{x:,.0%}')) #formats y axis as % plt.plot(years, ref_by_provider[0], label='Sage', color='blue') plt.plot(years, ref_by_provider[1], label='Springer', color='green') plt.plot(years, ref_by_provider[2], label='Taylor & Francis', color='purple') plt.plot(years, ref_by_provider[3], label='Wiley', color='orange') plt.plot(years, ref_by_provider[4], label='Elsevier Freedom', color='red', linestyle='dashed') plt.plot(years, ref_by_provider[5], label='Elsevier Subscribed', color='red') plt.plot(years, ref_by_provider[6], label='Elsevier Unmatched', color='black') plt.legend(loc='center left', bbox_to_anchor=(1, 0.8))
def figure6b_percent_oa_articles(): """ Show percent papers per year published open access per provider per year as a percentage of all papers published per provider per year. Separates elsevier freedom an elsevier subscribed titles out from the whole Chart Type: Line Graph Y-Axis: Number of Papers Y-Axis Data Source: Original 1Figr Dataset, Journals Per Provider, OA papers in 1findr per journal/provider (intersection with Scopus) Elsevier_2019, Subscribed Journal List 2019 X-Axis: Year X-Axis Data Source: Original 1Figr Dataset, Journals Per Provider, Provider Elsevier_2019, Subscribed Journal List 2019 """ original_1figr_dataset = pd.read_excel(filename, sheet_name='Journals per Provider', skiprows=8) #holds percent papers published open access per year per provider percent_oa_papers_by_provider = [] #populate % oa papers totals for other providers providers = ['Sage', 'Springer', 'Taylor & Francis', 'Wiley'] for provider_name in providers: percent_oa_papers_by_year = [] subset_by_provider = original_1figr_dataset.loc[ original_1figr_dataset['Provider'] == provider_name] oa_papers_2008 = subset_by_provider['2008.3'].tolist() percent_oa_papers_by_year.append(oa_papers_2008[0]) oa_papers_2009 = subset_by_provider['2009.3'].tolist() percent_oa_papers_by_year.append(oa_papers_2009[0]) oa_papers_2010 = subset_by_provider['2010.3'].tolist() percent_oa_papers_by_year.append(oa_papers_2010[0]) oa_papers_2011 = subset_by_provider['2011.3'].tolist() percent_oa_papers_by_year.append(oa_papers_2011[0]) oa_papers_2012 = subset_by_provider['2012.3'].tolist() percent_oa_papers_by_year.append(oa_papers_2012[0]) oa_papers_2013 = subset_by_provider['2013.3'].tolist() percent_oa_papers_by_year.append(oa_papers_2013[0]) oa_papers_2014 = subset_by_provider['2014.3'].tolist() percent_oa_papers_by_year.append(oa_papers_2014[0]) oa_papers_2015 = subset_by_provider['2015.3'].tolist() percent_oa_papers_by_year.append(oa_papers_2015[0]) oa_papers_2016 = subset_by_provider['2016.3'].tolist() percent_oa_papers_by_year.append(oa_papers_2016[0]) oa_papers_2017 = subset_by_provider['2017.3'].tolist() percent_oa_papers_by_year.append(oa_papers_2017[0]) percent_oa_papers_by_provider.append(percent_oa_papers_by_year) #populate % oa papers totals for elsevier freedom and elsevier subscribed titles #this is # of OA papers divided by total papers elsevier_freedom_collection = rf.make_freedom_collection_provider() elsevier_subscribed_titles = rf.make_elsevier_subscribed_titles_provider() elsevier_unmatched_titles = rf.make_elsevier_unmatched_provider() elsevier_providers = [ elsevier_freedom_collection, elsevier_subscribed_titles, elsevier_unmatched_titles ] for provider_name in elsevier_providers: percent_oa_papers_by_year = [] oa_papers_2008 = provider_name['2008.2'].tolist() total_2008 = provider_name['2008.4'].tolist() percent_oa_papers_by_year.append(sum(oa_papers_2008) / sum(total_2008)) oa_papers_2009 = provider_name['2009.2'].tolist() total_2009 = provider_name['2009.4'].tolist() percent_oa_papers_by_year.append(sum(oa_papers_2009) / sum(total_2009)) oa_papers_2010 = provider_name['2010.2'].tolist() total_2010 = provider_name['2010.4'].tolist() percent_oa_papers_by_year.append(sum(oa_papers_2010) / sum(total_2010)) oa_papers_2011 = provider_name['2010.2'].tolist() total_2011 = provider_name['2011.4'].tolist() percent_oa_papers_by_year.append(sum(oa_papers_2011) / sum(total_2011)) oa_papers_2012 = provider_name['2012.2'].tolist() total_2012 = provider_name['2012.4'].tolist() percent_oa_papers_by_year.append(sum(oa_papers_2012) / sum(total_2012)) oa_papers_2013 = provider_name['2013.2'].tolist() total_2013 = provider_name['2013.4'].tolist() percent_oa_papers_by_year.append(sum(oa_papers_2013) / sum(total_2013)) oa_papers_2014 = provider_name['2014.2'].tolist() total_2014 = provider_name['2014.4'].tolist() percent_oa_papers_by_year.append(sum(oa_papers_2014) / sum(total_2014)) oa_papers_2015 = provider_name['2015.2'].tolist() total_2015 = provider_name['2015.4'].tolist() percent_oa_papers_by_year.append(sum(oa_papers_2015) / sum(total_2015)) oa_papers_2016 = provider_name['2016.2'].tolist() total_2016 = provider_name['2016.4'].tolist() percent_oa_papers_by_year.append(sum(oa_papers_2016) / sum(total_2016)) oa_papers_2017 = provider_name['2017.2'].tolist() total_2017 = provider_name['2017.4'].tolist() percent_oa_papers_by_year.append(sum(oa_papers_2017) / sum(total_2017)) percent_oa_papers_by_provider.append(percent_oa_papers_by_year) years = [ '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017' ] plt.figure(num=None, figsize=(10, 10)) plt.suptitle(f'Percent of all Open Access by provider') plt.xlabel('Year') plt.ylabel('Percentage') plt.gca().yaxis.set_major_formatter( StrMethodFormatter('{x:,.0%}')) #formats y axis as % plt.plot(years, percent_oa_papers_by_provider[0], label='Sage', color='blue') plt.plot(years, percent_oa_papers_by_provider[1], label='Springer', color='green') plt.plot(years, percent_oa_papers_by_provider[2], label='Taylor & Francis', color='purple') plt.plot(years, percent_oa_papers_by_provider[3], label='Wiley', color='orange') plt.plot(years, percent_oa_papers_by_provider[4], label='Elsevier Freedom', color='red', linestyle='dashed') plt.plot(years, percent_oa_papers_by_provider[5], label='Elsevier Subscribed', color='red') plt.plot(years, percent_oa_papers_by_provider[6], label='Elsevier Unmatched', color='black') plt.legend(loc='center left', bbox_to_anchor=(1, 0.8))
def figure6b_oa_available_articles(): """Show number Open Access (OA) available articles per provider over time (2008-2017) for provider, separating our Elsevier subscribed and Elsevier Freedom collection from Elsevier as a whole Chart Type: Line Graph Y-Axis: Number of Open access Articles Y-Axis Data Source: Original 1Figr Dataset, Journals Per Provider, OA papers in 1findr per journal/provider (intersection with Scopus) Elsevier_2019, Subscribed Journal List 2019 X-Axis: Year X-Axis Data Source: Original 1Figr Dataset, Journals Per Provider, Provider Elsevier_2019, Subscribed Journal List 2019 """ original_1figr_dataset = pd.read_excel(filename, sheet_name='Journals per Provider', skiprows=8) elsevier_freedom_collection = rf.make_freedom_collection_provider() elsevier_subscribed_titles = rf.make_elsevier_subscribed_titles_provider() elsevier_unmatched_titles = rf.make_elsevier_unmatched_provider() oa_articles_by_provider = [] elsevier_providers = [ elsevier_freedom_collection, elsevier_subscribed_titles, elsevier_unmatched_titles ] for provider_name in elsevier_providers: oa_articles_by_year = [] oa_articles_2008 = provider_name['2008.2'].tolist() oa_articles_by_year.append(sum(oa_articles_2008)) oa_articles_2009 = provider_name['2009.2'].tolist() oa_articles_by_year.append(sum(oa_articles_2009)) oa_articles_2010 = provider_name['2010.2'].tolist() oa_articles_by_year.append(sum(oa_articles_2010)) oa_articles_2011 = provider_name['2011.2'].tolist() oa_articles_by_year.append(sum(oa_articles_2011)) oa_articles_2012 = provider_name['2012.2'].tolist() oa_articles_by_year.append(sum(oa_articles_2012)) oa_articles_2013 = provider_name['2013.2'].tolist() oa_articles_by_year.append(sum(oa_articles_2013)) oa_articles_2014 = provider_name['2014.2'].tolist() oa_articles_by_year.append(sum(oa_articles_2014)) oa_articles_2015 = provider_name['2015.2'].tolist() oa_articles_by_year.append(sum(oa_articles_2015)) oa_articles_2016 = provider_name['2016.2'].tolist() oa_articles_by_year.append(sum(oa_articles_2016)) oa_articles_2017 = provider_name['2017.2'].tolist() oa_articles_by_year.append(sum(oa_articles_2017)) oa_articles_by_provider.append(oa_articles_by_year) providers = ['Sage', 'Springer', 'Taylor & Francis', 'Wiley'] for provider_name in providers: subset_by_provider = original_1figr_dataset.loc[ original_1figr_dataset['Provider'] == provider_name] oa_articles_by_year = [] oa_articles_2008 = subset_by_provider['2008.2'].tolist() oa_articles_by_year.append(oa_articles_2008[0]) oa_articles_2009 = subset_by_provider['2009.2'].tolist() oa_articles_by_year.append(oa_articles_2009[0]) oa_articles_2010 = subset_by_provider['2010.2'].tolist() oa_articles_by_year.append(oa_articles_2010[0]) oa_articles_2011 = subset_by_provider['2011.2'].tolist() oa_articles_by_year.append(oa_articles_2011[0]) oa_articles_2012 = subset_by_provider['2012.2'].tolist() oa_articles_by_year.append(oa_articles_2012[0]) oa_articles_2013 = subset_by_provider['2013.2'].tolist() oa_articles_by_year.append(oa_articles_2013[0]) oa_articles_2014 = subset_by_provider['2014.2'].tolist() oa_articles_by_year.append(oa_articles_2014[0]) oa_articles_2015 = subset_by_provider['2015.2'].tolist() oa_articles_by_year.append(oa_articles_2015[0]) oa_articles_2016 = subset_by_provider['2016.2'].tolist() oa_articles_by_year.append(oa_articles_2016[0]) oa_articles_2017 = subset_by_provider['2017.2'].tolist() oa_articles_by_year.append(oa_articles_2017[0]) oa_articles_by_provider.append(oa_articles_by_year) years = [ '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017' ] plt.figure(num=None, figsize=(10, 10)) plt.suptitle(f'Number of OA-Available Articles') plt.xlabel('Year') plt.ylabel('Number of Articles') plt.plot(years, oa_articles_by_provider[0], label='Elsevier Freedom', color='red', linestyle='dashed') plt.plot(years, oa_articles_by_provider[1], label='Elsevier Subscribed', color='red') plt.plot(years, oa_articles_by_provider[2], label='Elsevier Unmatched', color='black') plt.plot(years, oa_articles_by_provider[3], label='Sage', color='blue') plt.plot(years, oa_articles_by_provider[4], label='Springer', color='green') plt.plot(years, oa_articles_by_provider[5], label='Taylor & Francis', color='purple') plt.plot(years, oa_articles_by_provider[6], label='Wiley', color='orange') plt.legend(loc='center left', bbox_to_anchor=(1, 0.8))
def figure7e(): """This is counting JR1 downloads by Domain for the subscribed titles and freedom collection providers Chart Type: Dot Plot/Scatter Plot Y-Axis: Domain Y-Axis Data Source: Original 1Figr Dataset, Journals Per Provider, Provider Elsevier_2019, Subscribed Journal List 2019 X-Axis: Number of JR1 Downloads X-Axis Data Source: Original 1Figr Dataset, Journals Per Provider, Provider Elsevier_2019, Subscribed Journal List 2019 """ original_1figr_data = pd.read_excel(filename, sheet_name='Journals per Provider', skiprows=8) subscribed_titles_provider = rf.make_elsevier_subscribed_titles_provider() freedom_collection_provider = rf.make_freedom_collection_provider() unique_domains = original_1figr_data['Domain'].unique().tolist() for i in unique_domains: #this drops the 'nan' column from the unique domains if type(i) == float: unique_domains.remove(i) unique_domains.sort() stats_by_domain = [] #builds list of tuples containing (Domain name, # of subscribed titles JR1 downloads, # of Freedom titles downloads) for domain in unique_domains: subscribed_subset_by_domain = subscribed_titles_provider.loc[ subscribed_titles_provider['Domain'] == domain] subscribed_jr1_downloads = subscribed_subset_by_domain[ 'Downloads JR1 2017'].sum() freedom_subset_by_domain = freedom_collection_provider.loc[ freedom_collection_provider['Domain'] == domain] freedom_jr1_downloads = freedom_subset_by_domain[ 'Downloads JR1 2017'].sum() stats_by_domain.append( (domain, subscribed_jr1_downloads, freedom_jr1_downloads)) #sorts domains by sum of total # of jr1 downloads arrangement = sorted(stats_by_domain, key=lambda x: (x[1] + x[2])) #splitting elements of tuples in arrangement into lists so it is easier to plot domains_list = [i[0] for i in arrangement] subscribed_total = [i[1] for i in arrangement] freedom_total = [i[2] for i in arrangement] #make plot mpl.rcParams['ytick.major.width'] = 1 mpl.rcParams['xtick.major.width'] = 1 plt.figure(num=None, figsize=(8, 8)) plt.suptitle(f'JR1 downloads by Domain') plot = plt.scatter(subscribed_total, domains_list, color='blue') plot2 = plt.scatter(freedom_total, domains_list, color='orange') subscribed_legend_label = mpatches.Patch(color='blue', label='Elsevier Subscribed') freedom_legend_label = mpatches.Patch(color='orange', label='Elsevier Freedom') plt.xlabel('Number of JR1 Downloads') plt.legend(loc='lower right', handles=[subscribed_legend_label, freedom_legend_label])
def figure5b_percentage(): """ Show percent papers per year published by UVA authors as a percentage of all papers for each provider, separating Elsevier Freedom and Elsevier Subscribed titles out from Elsevier as a whole. All papers are the 'total papers in scopus' columns Chart Type: Line Graph Y-Axis: Number of Papers Y-Axis Data Source: Original 1Figr Dataset, Journals Per Provider, Total papers in Scopus per journal/provider Elsevier_2019, Subscribed Journal List 2019 X-Axis: Year X-Axis Data Source: Original 1Figr Dataset, Journals Per Provider, Provider Elsevier_2019, Subscribed Journal List 2019 """ original_1figr_dataset = pd.read_excel(filename, sheet_name='Journals per Provider', skiprows=8) #holds percent papers per year published by UVA authors of total papers per provider percent_papers_by_provider = [] #populate papers totals for other providers providers = ['Sage', 'Springer', 'Taylor & Francis', 'Wiley'] for provider_name in providers: percent_papers_by_year = [] subset_by_provider = original_1figr_dataset.loc[ original_1figr_dataset['Provider'] == provider_name] papers_2008 = subset_by_provider[2008].tolist() total_2008 = subset_by_provider['2008.4'].tolist() percent_papers_by_year.append(sum(papers_2008) / sum(total_2008)) papers_2009 = subset_by_provider[2009].tolist() total_2009 = subset_by_provider['2009.4'].tolist() percent_papers_by_year.append(sum(papers_2009) / sum(total_2009)) papers_2010 = subset_by_provider[2010].tolist() total_2010 = subset_by_provider['2010.4'].tolist() percent_papers_by_year.append(sum(papers_2010) / sum(total_2010)) papers_2011 = subset_by_provider[2011].tolist() total_2011 = subset_by_provider['2011.4'].tolist() percent_papers_by_year.append(sum(papers_2011) / sum(total_2011)) papers_2012 = subset_by_provider[2012].tolist() total_2012 = subset_by_provider['2012.4'].tolist() percent_papers_by_year.append(sum(papers_2012) / sum(total_2012)) papers_2013 = subset_by_provider[2013].tolist() total_2013 = subset_by_provider['2013.4'].tolist() percent_papers_by_year.append(sum(papers_2013) / sum(total_2013)) papers_2014 = subset_by_provider[2014].tolist() total_2014 = subset_by_provider['2014.4'].tolist() percent_papers_by_year.append(sum(papers_2014) / sum(total_2014)) papers_2015 = subset_by_provider[2015].tolist() total_2015 = subset_by_provider['2015.4'].tolist() percent_papers_by_year.append(sum(papers_2015) / sum(total_2015)) papers_2016 = subset_by_provider[2016].tolist() total_2016 = subset_by_provider['2016.4'].tolist() percent_papers_by_year.append(sum(papers_2016) / sum(total_2016)) papers_2017 = subset_by_provider[2017].tolist() total_2017 = subset_by_provider['2017.4'].tolist() percent_papers_by_year.append(sum(papers_2017) / sum(total_2017)) percent_papers_by_provider.append(percent_papers_by_year) #populate papers totals for elsevier freedom and elsevier subscribed providers elsevier_freedom_collection = rf.make_freedom_collection_provider() elsevier_subscribed_titles = rf.make_elsevier_subscribed_titles_provider() elsevier_unmatched_titles = rf.make_elsevier_unmatched_provider() elsevier_providers = [ elsevier_freedom_collection, elsevier_subscribed_titles, elsevier_unmatched_titles ] for provider_name in elsevier_providers: percent_papers_by_year = [] papers_2008 = provider_name[2008].tolist() total_2008 = provider_name['2008.4'].tolist() percent_papers_by_year.append(sum(papers_2008) / sum(total_2008)) papers_2009 = provider_name[2009].tolist() total_2009 = provider_name['2009.4'].tolist() percent_papers_by_year.append(sum(papers_2009) / sum(total_2009)) papers_2010 = provider_name[2010].tolist() total_2010 = provider_name['2010.4'].tolist() percent_papers_by_year.append(sum(papers_2010) / sum(total_2010)) papers_2011 = provider_name[2011].tolist() total_2011 = provider_name['2011.4'].tolist() percent_papers_by_year.append(sum(papers_2011) / sum(total_2011)) papers_2012 = provider_name[2012].tolist() total_2012 = provider_name['2012.4'].tolist() percent_papers_by_year.append(sum(papers_2012) / sum(total_2012)) papers_2013 = provider_name[2013].tolist() total_2013 = provider_name['2013.4'].tolist() percent_papers_by_year.append(sum(papers_2013) / sum(total_2013)) papers_2014 = provider_name[2014].tolist() total_2014 = provider_name['2014.4'].tolist() percent_papers_by_year.append(sum(papers_2014) / sum(total_2014)) papers_2015 = provider_name[2015].tolist() total_2015 = provider_name['2015.4'].tolist() percent_papers_by_year.append(sum(papers_2015) / sum(total_2015)) papers_2016 = provider_name[2016].tolist() total_2016 = provider_name['2016.4'].tolist() percent_papers_by_year.append(sum(papers_2016) / sum(total_2016)) papers_2017 = provider_name[2017].tolist() total_2017 = provider_name['2017.4'].tolist() percent_papers_by_year.append(sum(papers_2017) / sum(total_2017)) percent_papers_by_provider.append(percent_papers_by_year) years = [ '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017' ] plt.figure(num=None, figsize=(10, 10)) plt.suptitle( f'Percent of All Articles published by {your_institution} Authors') plt.xlabel('Year') plt.ylabel('Percentage') plt.gca().yaxis.set_major_formatter( StrMethodFormatter('{x:,.2%}')) #formats y axis as % plt.plot(years, percent_papers_by_provider[0], label='Sage', color='blue') plt.plot(years, percent_papers_by_provider[1], label='Springer', color='green') plt.plot(years, percent_papers_by_provider[2], label='Taylor & Francis', color='purple') plt.plot(years, percent_papers_by_provider[3], label='Wiley', color='orange') plt.plot(years, percent_papers_by_provider[4], label='Elsevier Freedom', color='red', linestyle='dashed') plt.plot(years, percent_papers_by_provider[5], label='Elsevier Subscribed', color='red') plt.plot(years, percent_papers_by_provider[6], label='Elsevier Unmatched', color='black') plt.legend(loc='center left', bbox_to_anchor=(1, 0.8))
def figure5b_papers(): """Show papers per year (2008-2017) by your institution's affiliated authors, separating Elsevier Freedom and Elsevier Subscribed titles out from Elsevier as a whole. Papers are publications by you institution's affiliated authors. Chart Type: Line Graph Y-Axis: Number of Papers Y-Axis Data Source: Original 1Figr Dataset, Journals Per Provider, Total papers in Scopus per journal/provider Elsevier_2019, Subscribed Journal List 2019 X-Axis: Year X-Axis Data Source: Original 1Figr Dataset, Journals Per Provider, Provider Elsevier_2019, Subscribed Journal List 2019 """ original_1figr_dataset = pd.read_excel(filename, sheet_name='Journals per Provider', skiprows=8) elsevier_freedom_collection = rf.make_freedom_collection_provider() elsevier_subscribed_titles = rf.make_elsevier_subscribed_titles_provider() elsevier_unmatched_titles = rf.make_elsevier_unmatched_provider() #this holds papers totals for all providers in the end, which is used to make final plot papers_by_provider = [] #populate papers totals for elsevier subset providers elsevier_providers = [ elsevier_freedom_collection, elsevier_subscribed_titles, elsevier_unmatched_titles ] for provider_name in elsevier_providers: papers_by_year = [] papers_2008 = provider_name[2008].tolist() papers_by_year.append(sum(papers_2008)) papers_2009 = provider_name[2009].tolist() papers_by_year.append(sum(papers_2009)) papers_2010 = provider_name[2010].tolist() papers_by_year.append(sum(papers_2010)) papers_2011 = provider_name[2011].tolist() papers_by_year.append(sum(papers_2011)) papers_2012 = provider_name[2012].tolist() papers_by_year.append(sum(papers_2012)) papers_2013 = provider_name[2013].tolist() papers_by_year.append(sum(papers_2013)) papers_2014 = provider_name[2014].tolist() papers_by_year.append(sum(papers_2014)) papers_2015 = provider_name[2015].tolist() papers_by_year.append(sum(papers_2015)) papers_2016 = provider_name[2016].tolist() papers_by_year.append(sum(papers_2016)) papers_2017 = provider_name[2017].tolist() papers_by_year.append(sum(papers_2017)) papers_by_provider.append(papers_by_year) #populate papers totals for other providers providers = ['Sage', 'Springer', 'Taylor & Francis', 'Wiley'] for provider_name in providers: subset_by_provider = original_1figr_dataset.loc[ original_1figr_dataset['Provider'] == provider_name] papers_by_year = [] papers_2008 = subset_by_provider[2008].tolist() papers_by_year.append(papers_2008[0]) papers_2009 = subset_by_provider[2009].tolist() papers_by_year.append(papers_2009[0]) papers_2010 = subset_by_provider[2010].tolist() papers_by_year.append(papers_2010[0]) papers_2011 = subset_by_provider[2011].tolist() papers_by_year.append(papers_2011[0]) papers_2012 = subset_by_provider[2012].tolist() papers_by_year.append(papers_2012[0]) papers_2013 = subset_by_provider[2013].tolist() papers_by_year.append(papers_2013[0]) papers_2014 = subset_by_provider[2014].tolist() papers_by_year.append(papers_2014[0]) papers_2015 = subset_by_provider[2015].tolist() papers_by_year.append(papers_2015[0]) papers_2016 = subset_by_provider[2016].tolist() papers_by_year.append(papers_2016[0]) papers_2017 = subset_by_provider[2017].tolist() papers_by_year.append(papers_2017[0]) papers_by_provider.append(papers_by_year) years = [ '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017' ] plt.figure(num=None, figsize=(10, 10)) plt.suptitle(f'Number of Articles (papers) by {your_institution} Authors') plt.xlabel('Year') plt.ylabel('Number of Articles') plt.plot(years, papers_by_provider[0], label='Elsevier Freedom', color='red', linestyle='dashed') plt.plot(years, papers_by_provider[1], label='Elsevier Subscribed', color='red') plt.plot(years, papers_by_provider[2], label='Elsevier Unmatched Titles', color='black') plt.plot(years, papers_by_provider[3], label='Sage', color='blue') plt.plot(years, papers_by_provider[4], label='Springer', color='green') plt.plot(years, papers_by_provider[5], label='Taylor & Francis', color='purple') plt.plot(years, papers_by_provider[6], label='Wiley', color='orange') plt.legend(loc='center left', bbox_to_anchor=(1, 0.8))