def test_export_to_sqlite_from_list_006(self): self.blogger_corpus() self.prj_folder() self.blogger_lists() #real_fold = os.path.join(self.path_to_zas_rep_tools, "data/tests_data/Corpora/BloggerCorpus/") exporter = Exporter(self.input_list_fake_blogger_corpus, mode=self.mode) dbname = "blogger_corpus" #p(self.fieldnames) exporter.tosqlite(self.tempdir_project_folder, dbname, self.fieldnames) #exporter.tosqlite(real_fold, dbname, self.fieldnames) for item in os.listdir(self.tempdir_project_folder): if ".db" in item: if dbname not in item: assert False
def test_export_to_sqlite_from_reader_007(self): self.blogger_corpus() self.prj_folder() self.blogger_lists() reader = Reader(os.path.join(self.tempdir_blogger_corp, self.txt_blogger_hightrepetativ_set), "txt", send_end_file_marker=False, regex_template="blogger", mode=self.mode) exporter = Exporter(reader.getlazy(), mode=self.mode) dbname = "blogger_corpus" exporter.tosqlite(self.tempdir_project_folder, dbname, self.fieldnames) for item in os.listdir(self.tempdir_project_folder): if ".db" in item: if dbname not in item: assert False
def create_testsets_in_diff_file_formats(self, rewrite=False, abs_path_to_storage_place=False, silent_ignore = True): #p(abs_path_to_storage_place) #sys.exit() if not rewrite: rewrite = self._rewrite if not abs_path_to_storage_place: abs_path_to_storage_place = self._path_to_zas_rep_tools #p("fghjk") created_sets = [] if not abs_path_to_storage_place: sys.exit() try: # make test_sets for Blogger Corp for file_format, test_sets in self._types_folder_names_of_testsets.iteritems(): for name_of_test_set, folder_for_test_set in test_sets.iteritems(): if file_format == "txt": continue abs_path_to_current_test_case = os.path.join(abs_path_to_storage_place, self._path_to_testsets["blogger"], folder_for_test_set) # p((file_format, name_of_test_set)) # p(abs_path_to_current_test_case) if rewrite: if os.path.isdir(abs_path_to_current_test_case): shutil.rmtree(abs_path_to_current_test_case) #os.remove(abs_path_to_current_test_case) if not os.path.isdir(abs_path_to_current_test_case): os.makedirs(abs_path_to_current_test_case) path_to_txt_corpus = os.path.join(self.path_to_zas_rep_tools,self._path_to_testsets["blogger"] , self._types_folder_names_of_testsets["txt"][name_of_test_set] ) reader = Reader(path_to_txt_corpus, "txt", regex_template="blogger",logger_level= self._logger_level,logger_traceback=self._logger_traceback, logger_folder_to_save=self._logger_folder_to_save,logger_usage=self._logger_usage, logger_save_logs= self._logger_save_logs, mode=self._mode , error_tracking=self._error_tracking, ext_tb= self._ext_tb) exporter = Exporter(reader.getlazy(), rewrite=rewrite, silent_ignore=silent_ignore, logger_level= self._logger_level,logger_traceback=self._logger_traceback, logger_folder_to_save=self._logger_folder_to_save,logger_usage=self._logger_usage, logger_save_logs= self._logger_save_logs, mode=self._mode , error_tracking=self._error_tracking, ext_tb= self._ext_tb) if file_format == "csv": if name_of_test_set == "small": flag = exporter.tocsv(abs_path_to_current_test_case, "blogger_corpus",self._columns_in_doc_table["blogger"], rows_limit_in_file=5) if not flag: yield False else: created_sets.append("csv") yield True else: flag= exporter.tocsv(abs_path_to_current_test_case, "blogger_corpus",self._columns_in_doc_table["blogger"], rows_limit_in_file=2) if not flag: yield False else: created_sets.append("csv") yield True elif file_format == "xml": if name_of_test_set == "small": flag = exporter.toxml(abs_path_to_current_test_case, "blogger_corpus", rows_limit_in_file=5) if not flag: yield False else: created_sets.append("xml") yield True else: flag = exporter.toxml(abs_path_to_current_test_case, "blogger_corpus", rows_limit_in_file=2) if not flag: yield False else: created_sets.append("xml") yield True elif file_format == "json": if name_of_test_set == "small": flag = exporter.tojson(abs_path_to_current_test_case, "blogger_corpus", rows_limit_in_file=5) if not flag: yield False else: created_sets.append("json") yield True else: flag = exporter.tojson(abs_path_to_current_test_case, "blogger_corpus", rows_limit_in_file=2) if not flag: yield False else: created_sets.append("json") yield True elif file_format == "sqlite": flag = exporter.tosqlite(abs_path_to_current_test_case, "blogger_corpus",self._columns_in_doc_table["blogger"]) if not flag: yield False else: created_sets.append("sqlite") yield True #p(created_sets, "created_sets") for created_set in set(created_sets): path_to_set = os.path.join(abs_path_to_storage_place, self._path_to_testsets["blogger"], created_set) #p(path_to_set) #p(os.path.join(os.path.split(path_to_set)[0], created_set+".zip")) make_zipfile(os.path.join(os.path.split(path_to_set)[0], created_set+".zip"), path_to_set) self.logger.info("TestSets (diff file formats) was initialized.") except Exception, e: print_exc_plus() if self._ext_tb else "" self.logger.error("SubsetsCreaterError: Throw following Exception: '{}'. ".format(e), exc_info=self._logger_traceback)