import os from typing import Optional, Set import nltk import pandas from util.chart import AxisLabel, DistributionChart from util.distribution import filter_distribution_if_in, plot_top_word_distribution, word_distribution_per_topics from util.keyword import split_keywords, Topic from util.natural_language import lemmatize import tqdm from util.log import init_local_logger, log_execution _logger = init_local_logger(logging.INFO) @log_execution(_logger) def main(*, papers_details_mendeley_path: str, relevant_papers_path: str): papers_details: pandas.DataFrame = pandas.read_csv( papers_details_mendeley_path, index_col=0) relevant_papers: pandas.DataFrame = pandas.read_csv(relevant_papers_path) def filename(tag): return os.path.join('phase1_2', 'generated', f'distribution_of_mendeley_keywords.{tag}') # callback protocols currently are not supported by PyCharm :( outputs = word_distribution_per_topics(filename, count_lemmas, papers_details, relevant_papers)
import math import os import traceback from dataclasses import dataclass from enum import Enum from typing import Tuple import pandas import bibtexparser from util.bibtex_entry import id_tag_of from util.log import init_local_logger, log_execution _logger = init_local_logger(level=logging.INFO) class ForumType(Enum): JOURNAL = 'journal' CONFERENCE = 'conference' @log_execution(_logger) def main(*, raw_papers_path: str) -> Tuple[str, ...]: parser = bibtexparser.bparser.BibTexParser(common_strings=True, ignore_nonstandard_types=False) with open(raw_papers_path, 'r', encoding='utf-8') as bibfile: bibliography: bibtexparser.bibdatabase.BibDatabase = bibtexparser.load(bibfile, parser) papers = pandas.DataFrame() for entry in bibliography.get_entry_list():