示例#1
0
 def store_pd_frame(data_frame, identifier, session_id):
     DiskStorageMisc.create_data_folder(session_id)
     data_frame.to_pickle(
         DiskStorage.get_file_path_pickle(
             identifier,
             session_id,
             create_sub_dirs=1,
             root_path=DiskStorageMisc.get_session_data_path(session_id)))
示例#2
0
 def store_model(model, identifier, session_id):
     DiskStorageMisc.create_data_folder(session_id)
     model.save(
         DiskStorage.get_file_path_model(
             identifier,
             session_id,
             create_sub_dirs=1,
             root_path=DiskStorageMisc.get_session_data_path(session_id)))
示例#3
0
 def get_file_path_h5_model(identifier,
                            session_id,
                            create_sub_dirs=0,
                            root_path=None):
     data_path = DiskStorageMisc.get_session_data_path(session_id)
     identifier = DiskStorageMisc.get_identifier_path(
         identifier, create_sub_dirs=create_sub_dirs, root_path=root_path)
     return os.path.join(data_path, identifier + DiskStorage.h5_model_ext)
示例#4
0
 def set_stopwords(session_id, stopwords):
     data = {DiskStorageStopwordHandler.sw_list_key: []}
     for stopword in stopwords:
         data[DiskStorageStopwordHandler.sw_list_key].append(stopword)
     data_path = DiskStorageMisc.get_session_data_path(session_id)
     stopwords_path = os.path.join(data_path,
                                   DiskStorageStopwordHandler.file_name)
     DiskStorageMisc.create_data_folder(session_id)
     with open(stopwords_path, 'w+', encoding='utf8') as json_file:
         json.dump(data, json_file, ensure_ascii=False)
示例#5
0
 def set_categories(session_id, categories):
     data = {DiskStorageCategoryListHandler.cat_list_key: []}
     for category in categories:
         data[DiskStorageCategoryListHandler.cat_list_key].append(category)
     data_path = DiskStorageMisc.get_session_data_path(session_id)
     file_name = SessionConfigReader.read_value(DiskStorageCategoryListHandler.cat_id_key) + DiskStorageCategoryListHandler.ext_json
     categories_path = os.path.join(data_path, file_name)
     DiskStorageMisc.create_data_folder(session_id)
     with open(categories_path, 'w+', encoding='utf8') as json_file:
         json.dump(data, json_file, ensure_ascii=False)
 def import_docs(csv_path=None):
     if csv_path is None:
         session_folder = os.path.join(TenKGnadImporter.sessions_folder,
                                       SessionConfigReader.get_session_id())
         corpus_id = SessionConfigReader.read_value(
             TenKGnadImporter.corpus_id_key)
         corpus_id = DiskStorageMisc.get_identifier_path(corpus_id)
         csv_path = os.path.join(session_folder,
                                 corpus_id + TenKGnadImporter.csv_ext)
     df = pd.read_csv(
         csv_path,
         sep=';',
         quotechar='\'',
         quoting=csv.QUOTE_MINIMAL,
         header=None,
         names=[TenKGnadImporter.category_name, TenKGnadImporter.text_name])
     category_list = df[TenKGnadImporter.category_name].tolist()
     df[TenKGnadImporter.category_name] = df.apply(
         lambda x: [x[TenKGnadImporter.category_name]], axis=1)
     head, f_name = os.path.split(csv_path)
     identifier = f_name.split('.')[0]
     Storage.store_pd_frame(df, identifier)
     SessionLogger.log('TenKGnad Corpus (' + str(len(df.index)) +
                       ' entries) has been imported into \'' + identifier +
                       '\' (columns: \'' + TenKGnadImporter.category_name +
                       '\', \'' + TenKGnadImporter.text_name + '\').')
     category_set = set(category_list)
     category_list = list(category_set)
     CategoryListHandler.set_categories(category_list)
     return identifier
 def set_config(session_id, config_id, json_f):
     config_path = DiskStorageSessionConfigReader.get_config_path(
         session_id,
         config_id,
         create_sub_dirs=1,
         root_path=DiskStorageMisc.get_session_path(session_id))
     with open(config_path, 'w+', encoding='utf8') as json_file:
         json.dump(json_f, json_file, ensure_ascii=False, indent=4)
示例#8
0
 def read_stopwords(session_id):
     data_path = DiskStorageMisc.get_session_data_path(session_id)
     stopwords_path = os.path.join(data_path,
                                   DiskStorageStopwordHandler.file_name)
     if not os.path.exists(stopwords_path):
         return set()
     with open(stopwords_path, encoding='utf8') as json_file:
         file = json.load(json_file)
         return set(file[DiskStorageStopwordHandler.sw_list_key])
示例#9
0
 def read_categories(session_id):
     data_path = DiskStorageMisc.get_session_data_path(session_id)
     file_name = SessionConfigReader.read_value(DiskStorageCategoryListHandler.cat_id_key) + DiskStorageCategoryListHandler.ext_json
     categories_path = os.path.join(data_path, file_name)
     if not os.path.exists(categories_path):
         return list()
     with open(categories_path, encoding='utf8') as json_file:
         file = json.load(json_file)
         return file[DiskStorageCategoryListHandler.cat_list_key]
示例#10
0
 def list_ids(location, session_id):
     session_path = DiskStorageMisc.get_session_path(session_id)
     location_path = os.path.join(session_path, location)
     potential_ids = listdir(location_path)
     ids = list()
     for pot_id in potential_ids:
         if os.path.isfile(os.path.join(location_path, pot_id)):
             f_parts = pot_id.split('.')
             idx = 0
             identifier = ''
             for part in f_parts:
                 if idx == 0:
                     identifier = part
                 if 0 < idx < len(f_parts) - 1:
                     identifier = identifier + '.' + part
                 idx = idx + 1
             ids.append(identifier)
     return ids
 def get_config_path(session_id,
                     config_id,
                     create_sub_dirs=0,
                     root_path=None):
     if config_id == DiskStorageSessionConfigReader.best_performing:
         return os.path.join(
             DiskStorageSessionConfigReader.best_performing_f_name,
             DiskStorageSessionConfigReader.json_ext)
     else:
         sessions_path = DiskStorageSessionConfigReader.sessions_dir
         session_path = os.path.join(sessions_path, session_id)
         config_id = DiskStorageMisc.get_identifier_path(
             config_id,
             create_sub_dirs=create_sub_dirs,
             root_path=root_path)
         return os.path.join(
             session_path,
             config_id + DiskStorageSessionConfigReader.json_ext)
示例#12
0
 def delete_location(location, session_id):
     session_path = DiskStorageMisc.get_session_path(session_id)
     location_path = os.path.join(session_path, location)
     DiskStorageMisc.delete_from_folder(location_path)
     SessionLogger.log('Location \'' + location + '\' has been deleted.')
示例#13
0
 def delete_session_data(session_id):
     DiskStorageMisc.delete_session_data(session_id)
示例#14
0
 def delete_from_folder(path):
     DiskStorageMisc.delete_from_folder(path)