def main(): with open(f"{project_dir}/paper_config.yaml", "r") as infile: pars = yaml.safe_load(infile)["section_10"] papers, papers_orgs, topic_mix, vectors = read_process_data() logging.info("Process google papers") papers_orgs_ = google_process(papers_orgs) logging.info("Making TSNE df") combi_df = make_combined_tsne_df( pars["activity_thresholds"], pars["period"], vectors, papers, papers_orgs_, pars["highlights_n"], ) logging.info("Visualising Tsne") tsne = visualise_tsne(combi_df, save=True) if __name__ == "__main__": driv = altair_visualisation_setup() alt.data_transformers.disable_max_rows() pd.options.mode.chained_assignment = None main()
# %autoreload 2 # %config Completer.use_jedi = False from narrowing_ai_research.utils.read_utils import read_papers, read_papers_orgs, read_topic_mix, paper_orgs_processing import pandas as pd import numpy as np from toolz import pipe from narrowing_ai_research import project_dir import statsmodels.api as sm from statsmodels.api import add_constant from sklearn.decomposition import PCA import altair as alt from narrowing_ai_research.utils.altair_utils import altair_visualisation_setup, save_altair # - webd = altair_visualisation_setup() # ### Read data # + papers = (read_papers( keep_vars=['article_id', 'year', 'date', 'is_ai', 'citation_count']).query( "is_ai == True").reset_index(drop=True)) porgs = read_papers_orgs() orgs = (paper_orgs_processing( porgs, papers).query("is_ai==True").reset_index(drop=True)) tm = read_topic_mix() # -