示例#1
0
    def test_util_file_create_if_necessary__file_read(self):
        if self._test_exec("test_file_create_if_necessary"):
            Prg = self.Prg
            Fname = os.path.join(Prg["DirWork"],
                                 "test_file_create_if_necessary.txt")
            util.file_del(Fname)

            Content = "Cloud\nRain\nSun\r\nSnow   "
            Created = util.file_create_if_necessary(Prg,
                                                    Fname,
                                                    ContentDefault=Content)
            self.assertTrue(Created)

            RetRead, ContentReadAll = util.file_read_all(Prg, Fname)
            self.assertEqual(ContentReadAll, Content)

            ContentReadLines = util.file_read_lines(Prg, Fname, Lower=True)
            LinesWanted = ["cloud\n", "rain\n", "sun\r\n", "snow   "]
            self.assertEqual(LinesWanted, ContentReadLines)

            ContentReadLines = util.file_read_lines(Prg, Fname, Strip=True)
            LinesWanted = ["Cloud", "Rain", "Sun", "Snow"]
            self.assertEqual(LinesWanted, ContentReadLines)

            util.file_del(Fname)
示例#2
0
def file_sentence_create(Prg, FileSentencesAbsPath, Text="", FileTextAbsPath=""):
    if not os.path.isfile(FileSentencesAbsPath):
        if FileTextAbsPath: # for testing it's easier to get Text from param - and not create/del tmpfile
            _ReadSuccess, Text = util.file_read_all(Prg, Fname=FileTextAbsPath, CheckIsFile=False)
        Sentences = text.sentence_separator(Text)

        SentencesFiltered = []
        KnownSentences = set()

        ###### filter sentences with too much numbers ##################
        for Sentence in Sentences:

            ############## word with/without nums ratio filter ####################
            WordNumRatioLow = False
            WordsHasNum, WordsWithoutNum = util.count_words_with_num(Sentence)
            if WordsWithoutNum >= WordsHasNum * 3:
                WordNumRatioLow = True

            ################### avoid duplications in one text ####################
            if Sentence in KnownSentences:
                Repeated = True
            else:
                Repeated = False
                KnownSentences.add(Sentence)
            #######################################################################

            if WordNumRatioLow and not Repeated:
                SentencesFiltered.append(Sentence)
        ###### filter sentences with too much numbers ##################

        util.file_write_utf8_error_avoid(Prg, FileSentencesAbsPath, "\n".join(SentencesFiltered))
示例#3
0
def obj_from_file(JsonFileName, UseFilePointer=False):
    try:
        if UseFilePointer:
            with open(JsonFileName) as Fp:
                return "ok", json.load(Fp)
        else:
            _ReadStatus, FileContent = util.file_read_all(Prg={}, Fname=JsonFileName)
            return "ok", json.loads(FileContent)

    except json.decoder.JSONDecodeError:
        FileContent = util.file_read_all_simple(JsonFileName)
        Msg = f"Json decoder error: {JsonFileName}:>>" + FileContent + "<<"
        print(Msg)
        return "error", Msg
示例#4
0
def docs_copy_samples_into_dir(Prg, DirTarget):
    util.dir_create_if_necessary(Prg, DirTarget)

    # sample texts are gzipped
    for FileName in util.files_abspath_collect_from_dir(Prg["DirTextSamples"]):
        # don't duplicate other files, document info json for example
        if ".gz" in FileName[-3:]:
            BaseName = os.path.basename(FileName).replace(".gz", "")
            # print(f"Sample doc duplication... {BaseName}   {FileName}")
            ReadSuccess, TextContent = util.file_read_all(Prg,
                                                          FileName,
                                                          Gzipped=True)
            FileNameSaved = os.path.join(DirTarget, BaseName)
            # if a previous file exists with same name, it overwrites
            util.file_write_utf8_error_avoid(Prg, FileNameSaved, TextContent)
示例#5
0
def file_index_create(Prg, FileIndexAbsPath, FileSentencesAbsPath, SubSentenceMultiplier=100, WordPositionMultiplier=100, ForcedWrite=False):
    if "TestExecution" in Prg: # in multicore indexing we can't see it in Prg but during tests I use single core
        if Prg["TestExecution"]: ForcedWrite = True

    if (not os.path.isfile(FileIndexAbsPath)) or ForcedWrite:
        WordPositions = dict()

        _, TextAll = util.file_read_all(Prg, FileSentencesAbsPath, CheckIsFile=False)
        TextAll = TextAll.lower()

        # more than one minus: -- or --- signs: replace them
        TextAll = text.replace_regexp(TextAll, "[-][-]+", " ")

        Lines = TextAll.split("\n") # one sentence is in one line, it's guaranted
        SubSentenceMultiplyerMinusOne = SubSentenceMultiplier - 1
        WordPositionMultiplyerMinusOne = WordPositionMultiplier - 1

        for LineNum, Line in enumerate(Lines):

            LineNumMultiplied = LineNum * SubSentenceMultiplier * WordPositionMultiplier

            _, SubSentences = text.subsentences(Prg, Line)
            for SubSentenceNum, SubSentence in enumerate(SubSentences):
                if SubSentenceNum > SubSentenceMultiplyerMinusOne:
                    SubSentenceNum = SubSentenceMultiplyerMinusOne # the last num that we can represent

                LineSubWordBase = LineNumMultiplied + SubSentenceNum * WordPositionMultiplier
                indexing(WordPositions, SubSentence, LineSubWordBase, WordPositionMultiplyerMinusOne)

        Out = []
        for Word, LineNumsInts in WordPositions.items():
            LineNums = [] #easier to debug it instead of list comprehension
            for Num in LineNumsInts:
                try:
                    LineNums.append(str(Num))
                except:
                    print("index problem:", Num)
            Out.append(f'"{Word}": [{",".join(LineNums)}]')
        Content = "{\n"+"\n,".join(Out) + "\n}"

        util.file_write_with_check(Prg, Fname=FileIndexAbsPath, Content=Content)
示例#6
0
def test_exec(Args):
    DirPrgExecRoot = config.get_dir_prg_exec_root()
    DirDocuments = os.path.join(DirPrgExecRoot, "test_files",
                                "documents_user_dir_simulator")
    Prg = config.prg_config_create(TestExecution=True,
                                   PrintForDeveloper=False,
                                   DirDocuments=DirDocuments)

    print("\n" * 22)
    print(
        "##################### TEST BEGIN #####################################"
    )
    import test_converter
    import test_document
    import test_eng
    import test_result_selectors
    import test_seeker
    import test_seeker_logic
    import test_text
    import test_tokens
    import test_ui_html
    import test_util
    import test_util_json
    import test_util_ui

    # for token testing I need a real, huge text base.
    # to avoid plus storage, I use the text samples
    BooksForTest = [("_novels",
                     "WilliamShakespeare__CompleteWorks__gutenberg_org_100-0")]
    for Book in BooksForTest:
        DirSample, BookBaseName = Book
        PathTest = os.path.join(Prg["DirDocuments"], BookBaseName + ".txt")
        PathSource = os.path.join(Prg["DirTextSamples"], DirSample,
                                  BookBaseName + ".txt.gz")
        if not os.path.isfile(PathTest):
            print("Doesn't EXIST:", PathTest)
            #print(PathSource)
            _, Txt = util.file_read_all(Fname=PathSource, Gzipped=True)
            util.file_write_utf8_error_avoid(dict(),
                                             Fname=PathTest,
                                             Content=Txt)
        else:
            print("       EXISTs:", PathTest)

    test_util.run_all_tests(Prg)
    test_util_json.run_all_tests(Prg)
    test_document.run_all_tests(Prg)
    test_converter.run_all_tests(Prg)
    test_text.run_all_tests(Prg)
    test_seeker.run_all_tests(Prg)
    test_seeker_logic.run_all_tests(Prg)
    test_util_ui.run_all_tests(Prg)
    test_eng.run_all_tests(Prg)
    test_ui_html.run_all_tests(Prg)
    test_tokens.run_all_tests(Prg)
    test_result_selectors.run_all_tests(Prg)
    util_test.result_all(Prg)

    print(
        "##################### TEST END #####################################")
    sys.exit(0)
    # print("\n"*22)

    ### EXTRAS AFTER BASIC TESTS ###
    # execute search from ui
    seeker.be_ready_to_seeking(Prg)
    ui_console.seek_and_display(Prg, Prg["QueryExamples"]["bird_or_cat"])
示例#7
0
 def test_file_read_all(self):
     TxtRaw = "  Test Line 1\n\n  Test Line 3"
     Path = os.path.join(self.Prg["DirPrgParent"], "test",
                         "test_file_read_lines.txt")
     self.assertEqual(TxtRaw, util.file_read_all(self.Prg, Path))
示例#8
0
    def test_util_file_write_append_del(self):
        if self._test_exec("test_file_write_append_del"):
            Prg = self.Prg

            Ret = util.file_write(Prg, Fname="")
            self.assertFalse(Ret)

            Content = "apple "
            Fname = os.path.join(Prg["DirWork"], "test_file_write.txt")
            RetWrite = util.file_write(Prg, Fname=Fname, Content=Content)
            RetAppend = util.file_append(Prg, Fname=Fname, Content="tree")
            self.assertTrue(RetWrite)
            self.assertTrue(RetAppend)
            RetRead, ContentRead = util.file_read_all(Prg, Fname)

            self.assertTrue(RetRead)
            self.assertEqual(ContentRead, "apple tree")

            FileState, FileGzipped = util.file_is_gzipped(Prg, Fname)
            self.assertEqual("file_exists", FileState)
            self.assertEqual("not_gzipped", FileGzipped)

            Sample = "Árvíztűrő tükörfúrógép"
            RetWriteGz = util.file_write(Prg,
                                         Fname=Fname,
                                         Content=Sample,
                                         Gzipped=True)
            self.assertTrue(RetWriteGz)
            RetReadGz, ContentReadGz = util.file_read_all(Prg,
                                                          Fname,
                                                          Gzipped=True)
            self.assertTrue(RetReadGz)
            self.assertEqual(ContentReadGz, Sample)

            FileState, FileGzipped = util.file_is_gzipped(Prg, Fname)
            self.assertEqual("file_exists", FileState)
            self.assertEqual("gzipped", FileGzipped)

            BinWanted = Sample.encode()
            util.file_write_utf8_error_avoid(Prg, Fname, Sample)
            BinFromFile = util.file_read_all_simple(Fname, "rb")
            # print("\n######### >>" + util.file_read_all(Prg, Fname)[1] + "<<")
            # print("\n######### >>", Sample.encode(), "<<")
            self.assertEqual(BinWanted, BinFromFile)

            FileWriteRet = util.file_write_with_check(Prg, Fname, Sample)
            TxtFromFile = util.file_read_all_simple(Fname)
            self.assertEqual(TxtFromFile, Sample)
            self.assertTrue(FileWriteRet)

            util.file_write_with_check(Prg, Fname,
                                       "")  # clear the content of the file

            # writing is unsuccessful because writer fun doesn't do anything
            def empty_writer_fun(Prg, Fname, Sample):
                pass

            FileWriteRet = util.file_write_with_check(
                Prg, Fname, Sample, WriterFun=empty_writer_fun)
            self.assertFalse(FileWriteRet)

            RetDel1 = util.file_del(Fname)
            RetDel2 = util.file_del(Fname)
            self.assertTrue(RetDel1)
            self.assertFalse(RetDel2)

            FileState, FileGzipped = util.file_is_gzipped(Prg, Fname)
            self.assertEqual("file_not_found", FileState)
            self.assertEqual("", FileGzipped)