def main():
    with open(f"{project_dir}/paper_config.yaml", "r") as infile:
        pars = yaml.safe_load(infile)["section_10"]

    papers, papers_orgs, topic_mix, vectors = read_process_data()

    logging.info("Process google papers")
    papers_orgs_ = google_process(papers_orgs)

    logging.info("Making TSNE df")
    combi_df = make_combined_tsne_df(
        pars["activity_thresholds"],
        pars["period"],
        vectors,
        papers,
        papers_orgs_,
        pars["highlights_n"],
    )

    logging.info("Visualising Tsne")
    tsne = visualise_tsne(combi_df, save=True)


if __name__ == "__main__":
    driv = altair_visualisation_setup()
    alt.data_transformers.disable_max_rows()
    pd.options.mode.chained_assignment = None
    main()
Exemplo n.º 2
0
# %autoreload 2
# %config Completer.use_jedi = False
from narrowing_ai_research.utils.read_utils import read_papers, read_papers_orgs, read_topic_mix, paper_orgs_processing
import pandas as pd
import numpy as np
from toolz import pipe
from narrowing_ai_research import project_dir
import statsmodels.api as sm
from statsmodels.api import add_constant
from sklearn.decomposition import PCA
import altair as alt

from narrowing_ai_research.utils.altair_utils import altair_visualisation_setup, save_altair
# -

webd = altair_visualisation_setup()

# ### Read data

# +
papers = (read_papers(
    keep_vars=['article_id', 'year', 'date', 'is_ai', 'citation_count']).query(
        "is_ai == True").reset_index(drop=True))

porgs = read_papers_orgs()

orgs = (paper_orgs_processing(
    porgs, papers).query("is_ai==True").reset_index(drop=True))

tm = read_topic_mix()
# -