def load_sentiment_dl_dataset(self): output_file_name = 'stock.csv' output_folder = 'sentiment/' data_url = 'http://ckl-it.de/wp-content/uploads/2020/12/stock_data.csv' return pd.read_csv(t.download_dataset(data_url, output_file_name, output_folder), error_bad_lines=False).iloc[0:100]
def load_pos_train_dataset_and_get_path(self): output_file_name = 'ud_french.txt' output_folder = 'pos/' data_url = 'https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/fr/pos/UD_French/UD_French-GSD_2.3.txt ' return t.download_dataset(data_url, output_file_name, output_folder) return pd.DataFrame(path)
def load_multi_classifier_dl_dataset(self): output_file_name = 'e2e_test.csv' output_folder = 'multi_classifier_dl/' data_url = "http://ckl-it.de/wp-content/uploads/2020/12/testset_w_refs.csv" return pd.read_csv( t.download_dataset(data_url, output_file_name, output_folder)).iloc[0:100]
def load_classifier_dl_dataset(self): output_file_name = 'news_category_test.csv' output_folder = 'classifier_dl/' data_url = "https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/en/classifier-dl/news_Category/news_category_test.csv" return pd.read_csv( t.download_dataset(data_url, output_file_name, output_folder)).iloc[0:15]
def load_ner_train_dataset_and_get_path(self): output_file_name = 'conll2008.data' output_folder = 'ner/' data_url = "https://github.com/patverga/torch-ner-nlp-from-scratch/raw/master/data/conll2003/eng.train" return t.download_dataset(data_url, output_file_name, output_folder)