示例#1
0
    def test_export_to_csv_from_reader_001(self):
        self.blogger_corpus()
        self.prj_folder()
        self.blogger_lists()
        reader = Reader(os.path.join(self.tempdir_blogger_corp,
                                     self.txt_blogger_hightrepetativ_set),
                        "txt",
                        send_end_file_marker=False,
                        regex_template="blogger",
                        mode=self.mode)
        exporter = Exporter(reader.getlazy(), mode=self.mode)

        exporter.tocsv(self.tempdir_project_folder,
                       "blogger_corpus",
                       self.fieldnames,
                       rows_limit_in_file=1)

        i = 0
        for item in os.listdir(self.tempdir_project_folder):
            if ".csv" in item:
                i += 1

        #p(list(reader.getlazy()))
        if len(list(reader.getlazy())) != i:
            assert False
示例#2
0
    def test_export_to_sqlite_from_list_006(self):
        self.blogger_corpus()
        self.prj_folder()
        self.blogger_lists()
        #real_fold = os.path.join(self.path_to_zas_rep_tools, "data/tests_data/Corpora/BloggerCorpus/")
        exporter = Exporter(self.input_list_fake_blogger_corpus,
                            mode=self.mode)

        dbname = "blogger_corpus"
        #p(self.fieldnames)
        exporter.tosqlite(self.tempdir_project_folder, dbname, self.fieldnames)
        #exporter.tosqlite(real_fold, dbname, self.fieldnames)

        for item in os.listdir(self.tempdir_project_folder):
            if ".db" in item:
                if dbname not in item:
                    assert False
示例#3
0
 def test_exporter_initialisation_with_reader_obj_001(self):
     self.blogger_corpus()
     reader = Reader(os.path.join(self.tempdir_blogger_corp,
                                  self.txt_blogger_hightrepetativ_set),
                     "txt",
                     regex_template="blogger",
                     mode=self.mode)
     exporter = Exporter(reader.getlazy(), mode=self.mode)
     exporter.should.be.a(Exporter)
示例#4
0
    def test_export_to_sqlite_from_reader_007(self):
        self.blogger_corpus()
        self.prj_folder()
        self.blogger_lists()
        reader = Reader(os.path.join(self.tempdir_blogger_corp,
                                     self.txt_blogger_hightrepetativ_set),
                        "txt",
                        send_end_file_marker=False,
                        regex_template="blogger",
                        mode=self.mode)
        exporter = Exporter(reader.getlazy(), mode=self.mode)
        dbname = "blogger_corpus"

        exporter.tosqlite(self.tempdir_project_folder, dbname, self.fieldnames)

        for item in os.listdir(self.tempdir_project_folder):
            if ".db" in item:
                if dbname not in item:
                    assert False
示例#5
0
    def test_export_to_csv_from_list_000(self):
        self.blogger_corpus()
        self.prj_folder()
        self.blogger_lists()
        exporter = Exporter(self.input_list_fake_blogger_corpus,
                            mode=self.mode)

        exporter.tocsv(self.tempdir_project_folder,
                       "blogger_corpus",
                       self.fieldnames,
                       rows_limit_in_file=1)

        i = 0
        for item in os.listdir(self.tempdir_project_folder):
            if ".csv" in item:
                i += 1

        #p((len(self.input_list_fake_blogger_corpus), i))
        if len(self.input_list_fake_blogger_corpus) != i:
            assert False
示例#6
0
    def test_export_to_xml_from_list_002(self):
        self.blogger_corpus()
        self.prj_folder()
        self.blogger_lists()
        #real_fold = os.path.join(self.path_to_zas_rep_tools, "data/tests_data/Corpora/BloggerCorpus/xml")
        exporter = Exporter(self.input_list_fake_blogger_corpus,
                            mode=self.mode)
        #exporter = Exporter(self.input_list_fake_blogger_corpus, mode=self.mode)

        exporter.toxml(self.tempdir_project_folder,
                       "blogger_corpus",
                       rows_limit_in_file=1)
        #exporter.toxml(real_fold, "blogger_corpus", rows_limit_in_file=1)

        i = 0
        for item in os.listdir(self.tempdir_project_folder):
            if ".xml" in item:
                i += 1

        if len(self.input_list_fake_blogger_corpus) != i:
            assert False
示例#7
0
    def test_export_to_json_from_list_004(self):
        self.blogger_corpus()
        self.prj_folder()
        self.blogger_lists()
        #real_fold = os.path.join(self.path_to_zas_rep_tools, "data/tests_data/Corpora/BloggerCorpus/json")
        #exporter = Exporter( , mode="dev", rewrite=True)
        exporter = Exporter(self.input_list_fake_blogger_corpus,
                            mode=self.mode,
                            rewrite=True,
                            silent_ignore=True)
        exporter.tojson(self.tempdir_project_folder,
                        "blogger_corpus",
                        rows_limit_in_file=1)
        #exporter.tojson(real_fold, "blogger_corpus", rows_limit_in_file=1)
        i = 0
        for item in os.listdir(self.tempdir_project_folder):
            if ".json" in item:
                i += 1

        #p((len(self.input_list_fake_blogger_corpus), i, j))
        if len(self.input_list_fake_blogger_corpus) != i:
            assert False
示例#8
0
 def test_exporter_initialisation_with_list_000(self):
     #self.prj_folder()
     self.blogger_lists()
     exporter = Exporter(self.input_list_fake_blogger_corpus,
                         mode=self.mode)
     exporter.should.be.a(Exporter)
示例#9
0
    def create_testsets_in_diff_file_formats(self, rewrite=False, abs_path_to_storage_place=False, silent_ignore = True):
        #p(abs_path_to_storage_place)
        #sys.exit()
        if not  rewrite:
            rewrite = self._rewrite
        if not abs_path_to_storage_place:
            abs_path_to_storage_place = self._path_to_zas_rep_tools
        #p("fghjk")
        created_sets = []
        if not abs_path_to_storage_place:
            sys.exit()
        try:
            # make test_sets for Blogger Corp 
            for  file_format, test_sets in self._types_folder_names_of_testsets.iteritems():
                for  name_of_test_set, folder_for_test_set in test_sets.iteritems():
                    if file_format == "txt":
                        continue
                    abs_path_to_current_test_case = os.path.join(abs_path_to_storage_place, self._path_to_testsets["blogger"], folder_for_test_set)
                    # p((file_format, name_of_test_set))
                    # p(abs_path_to_current_test_case)
                    if rewrite:
                        if os.path.isdir(abs_path_to_current_test_case):
                            shutil.rmtree(abs_path_to_current_test_case)
                            #os.remove(abs_path_to_current_test_case)

                    if not os.path.isdir(abs_path_to_current_test_case):
                        os.makedirs(abs_path_to_current_test_case)


                    path_to_txt_corpus = os.path.join(self.path_to_zas_rep_tools,self._path_to_testsets["blogger"] , self._types_folder_names_of_testsets["txt"][name_of_test_set] )

                            

                    reader = Reader(path_to_txt_corpus, "txt", regex_template="blogger",logger_level= self._logger_level,logger_traceback=self._logger_traceback, logger_folder_to_save=self._logger_folder_to_save,logger_usage=self._logger_usage, logger_save_logs= self._logger_save_logs, mode=self._mode ,  error_tracking=self._error_tracking,  ext_tb= self._ext_tb)
                    exporter = Exporter(reader.getlazy(),  rewrite=rewrite, silent_ignore=silent_ignore, logger_level= self._logger_level,logger_traceback=self._logger_traceback, logger_folder_to_save=self._logger_folder_to_save,logger_usage=self._logger_usage, logger_save_logs= self._logger_save_logs, mode=self._mode ,  error_tracking=self._error_tracking,  ext_tb= self._ext_tb)

                    if file_format == "csv":
                        if name_of_test_set == "small":
                            flag = exporter.tocsv(abs_path_to_current_test_case, "blogger_corpus",self._columns_in_doc_table["blogger"], rows_limit_in_file=5)
                            if not flag:
                                yield False
                            else:
                                created_sets.append("csv")
                                yield True
                        else:
                            flag= exporter.tocsv(abs_path_to_current_test_case, "blogger_corpus",self._columns_in_doc_table["blogger"], rows_limit_in_file=2)
                            if not flag:
                                yield False
                            else:
                                created_sets.append("csv")
                                yield True
                        
                    

                    elif file_format == "xml":
                        if name_of_test_set == "small":
                            flag = exporter.toxml(abs_path_to_current_test_case, "blogger_corpus", rows_limit_in_file=5)
                            if not flag:
                                yield False
                            else:
                                created_sets.append("xml")
                                yield True
                        else:
                            flag = exporter.toxml(abs_path_to_current_test_case, "blogger_corpus", rows_limit_in_file=2)
                            if not flag:
                                yield False
                            else:
                                created_sets.append("xml")
                                yield True


                    elif file_format == "json":
                        if name_of_test_set == "small":
                            flag = exporter.tojson(abs_path_to_current_test_case, "blogger_corpus", rows_limit_in_file=5)
                            if not flag:
                                yield False
                            else:
                                created_sets.append("json")
                                yield True
                        
                        else:
                            flag = exporter.tojson(abs_path_to_current_test_case, "blogger_corpus", rows_limit_in_file=2)
                            if not flag:
                                yield False
                            else:
                                created_sets.append("json")
                                yield True
  


                    elif file_format == "sqlite":
                        flag = exporter.tosqlite(abs_path_to_current_test_case, "blogger_corpus",self._columns_in_doc_table["blogger"])
                        if not flag:
                            yield False
                        else:
                            created_sets.append("sqlite")
                            yield True

            #p(created_sets, "created_sets")
            for created_set in set(created_sets):
                path_to_set = os.path.join(abs_path_to_storage_place, self._path_to_testsets["blogger"], created_set)
                #p(path_to_set)
                #p(os.path.join(os.path.split(path_to_set)[0], created_set+".zip"))
                make_zipfile(os.path.join(os.path.split(path_to_set)[0], created_set+".zip"), path_to_set)

            self.logger.info("TestSets (diff file formats) was initialized.")
        except Exception, e:
            print_exc_plus() if self._ext_tb else ""
            self.logger.error("SubsetsCreaterError: Throw following Exception: '{}'. ".format(e), exc_info=self._logger_traceback)