Пример #1
0
def own_latin1_test (inArgs):
  args = inArgs
  print("ARGS:", args)
  print("")
  a = """ISO8859-1 (Latin-1) text:
"c\xE3o vir\xE1 na dire\xE7\xE3o certa, abre a p\xE1gina diz \xD3scar \xE0 \xE9gua!"

\xE1\xE9\xED\xF3\xFA
\xE0....
\xC1\xC9\xCD\xD3\xDA
\xC0....
\xE3..\xF5.
\xC3..\xD5.

Cedil:
\x09\xE7\xC7
"""
  aList = a.split( "\n" )
  for lineStr in aList:
    s = char_map.simpler_ascii( lineStr )
    t = char_map.simpler_ascii( lineStr, 1 )
    if s=="":
      continue
    print("s:", s)
    print("t:", t)
    lastS = s
    print("")
  isOk = lastS.strip()=="cC"
  assert isOk
  return 0
Пример #2
0
def dump_file(out, name, do_txc, opts=None) -> int:
    """ Dump (text-like) file """
    if opts is None:
        opts = DEF_DUMP_OPTS
    verbose = opts["verbose"]
    kind = 1 if (do_txc or verbose > 0) else 0
    _, data, codex = read_txc(name, do_txc)
    if do_txc:
        shown = data.strip() + "\n"
    else:
        shown = data
    if not out:
        return 0
    streamed = out != sys.stdout
    if opts["encode-out"]:
        out_encode = opts["encode-out"]
        streamed = True
    else:
        out_encode = codex
    if opts["simplify"]:
        if streamed:
            out.write(char_map.simpler_ascii(shown, kind).encode("ascii"))
        else:
            out.write(char_map.simpler_ascii(shown, kind))
    else:
        if streamed:
            out.write(shown.encode(out_encode))
        else:
            out.write(shown)
    return 0
Пример #3
0
def simpler_list(a, sep=None):
    res = []
    if isinstance(a, (list, tuple)):
        for elem in a:
            s = char_map.simpler_ascii(elem)
            res.append(s)
    else:
        return char_map.simpler_ascii(a)
    if sep is None:
        return res
    return sep.join(res)
Пример #4
0
def check_country_accs() -> bool:
    alist = ibanpt.bank_accounts()
    idx = 0
    for entry in alist:
        idx += 1
        shown = char_map.simpler_ascii(entry)
        print("\n#{}/{}:\n>>>{}<<<" "".format(idx, len(alist), shown))
        assert shown.strip('\n') == shown
        assert shown.replace("  ", " ") == shown
    return True
Пример #5
0
def dig_throu(astr) -> tuple:
    basic = char_map.simpler_ascii(astr)
    iban, bank_id, inst_name, type_of = basic.split('\t')
    check = int(bank_id)
    assert check >= 0
    check = int(iban)
    kind = " ".join(type_of.split(' '))
    assert kind == type_of
    tup = (iban, bank_id, inst_name)
    return tup
Пример #6
0
 def _normal_s_value(self, s):
     if isinstance(s, str):
         if self.strict_ch is not None:
             res = char_map.simpler_ascii(s)
             if res != s:
                 self._add_ref(res, s)
         else:
             res = s
     else:
         res = s
     return res
Пример #7
0
 def _convert_to_ids(self, tags) -> dict:
     dct = {
         "@id3v2": valid_id3v2(tags),
         "id3v2": dict(),
         "id3v2:out": list(),
     }
     for akey in tags:
         if self.is_excluded(akey):
             continue
         ukey = char_map.simpler_ascii(akey, 1)  # Latin-1 conversion
         if ukey != akey:
             dct["id3v2:out"].append((ukey, akey))
             continue
         dct["id3v2"][akey] = tags[akey]
     return dct
Пример #8
0
def simple_ascii(s, special=None):
    """ Similar to simpler_ascii(), but allows a few extra chars. """
    if special is None:
        conv = {0xc5: "A", # A with ring above
                0xe5: "a", # a with ring above
                }
    else:
        conv = dict()
    plain = ""
    for a_chr in s:
        to_s = conv.get(ord(a_chr))
        if to_s is None:
            to_s = char_map.simpler_ascii(a_chr)
        plain += to_s
    return plain
Пример #9
0
def simplified(astr) -> str:
    assert isinstance(astr, str)
    newstr = astr
    while True:
        this = newstr.replace("  ", " ")
        if this == newstr:
            break
        newstr = this
    res = char_map.simpler_ascii(newstr)
    if DEBUG > 0:
        if res != newstr:
            print(''.join([
                f"{char_map.simpler_ascii(ch)}({ord(ch)}d)" for ch in newstr
            ]))
    return res
Пример #10
0
def run_test_cat(notes, opts, d):
    debug = 1
    tap = TaPath(d)
    if not tap.ok_path():
        print("Invalid path:", tap)
        return 1
    if not tap.is_dir():
        print("Not a directory:", tap)
    if tap.path.startswith("../"):
        pass
    else:
        tap.cd_path()
    print("Dir: {}, abs_path: {}".format(tap, tap.abs_path))
    ttb = ttext.TsvBase("any-db")
    if opts["ext"]:
        ttb.ext = opts["ext"]
    rel_names = ttb.scan_tsv(tap.path)
    tbl = ttb.get_multiple_subnames()
    assert not tbl
    fails = ttb.read_files(rel_names, debug=debug)
    print("ttb.read_files(rel_names={}) returned fails={}".format(
        rel_names, fails))
    if not ttb.names:
        print("No files found: {} (ext: {}).".format(d, opts["ext"]))
        return 2
    print("Tables:")
    print(expand_list(ttb.names, "\t- ", 1))
    print(
        expand_list(util.strlist.dict_order(ttb.names, "z")[0],
                    "\t=",
                    post=" (reverse order)\n"))
    for name in ttb.names:
        cont = ttb.get_content(name)
        tbl = ttb.get_table(name)
        shown = [astr.split("\t") for astr in cont]
        flown = flow_list(shown)
        s_str = char_map.simpler_ascii(flown)
        if s_str != flown:  # ...except UnicodeEncodeError (Avoid that!)
            notes[name] = flown
        msgs = tbl[3]
        print("Error msgs ({}): {}\n...\n".format(type(msgs), msgs))
    return 0
Пример #11
0
def dump_import(imp, opts, out=None) -> dict:
    err = sys.stderr
    stocks = list()
    isins, symbs = dict(), dict()
    filtered = opts["filter"]
    pre = opts["pre"]
    if pre:
        post = ","
    for row in imp.content:
        alist = char_map.simpler_ascii(row)
        tup = alist[4], alist[1], alist[2], alist[0], alist[3]
        coin, isin, symb = tup[0], tup[1], tup[2]
        if isin in isins:
            err.write(f"Duplicate ISIN {isin}: {isins[isin]}\n")
            continue
        shown = tup[:-1]
        if filtered is None or filtered == coin:
            if out:
                out.write(f"{pre}{shown}{post}\n")
            stocks.append(tup)
        isins[isin] = tup
        if symb == "-":
            continue
        if symb in symbs:
            err.write(f"Duplicate symbol '{symb}', ISIN {isin}: {symbs[symb]}\n")
        symbs[symb] = tup
    res = {"list": stocks,
           "markets": [],
           "market-isin": dict(),
           }
    for stock in stocks:
        market = stock[-1]
        symb = stock[1]
        if market not in res["markets"]:
            res["markets"].append(market)
            mname = short_market_name(market)
            res["market-isin"][mname] = list()
    for stock in stocks:
        mname = short_market_name(stock[-1])
        isin = stock[1]
        res["market-isin"][mname].append(isin)
    return res
Пример #12
0
def list_smas(param, ux_find, verbose) -> str:
    """ List ...Consumos.xlsx """
    found = ""
    where = param if param else ["."]

    def show(ux_str):
        print(ux_str[2:])

    for path in where:
        here = ""
        adir = Dirs(path)
        for ux_str in adir.uxnames:
            name = char_map.simpler_ascii(ux_str)
            if name == ux_find:
                show(name)
                if not found:
                    found, here = name, name
        if verbose > 0:
            print(f"{path} {ux_find}:", "found" if here else "not found")
    return found
Пример #13
0
def dump_text(out, name, opts, debug=0) -> int:
    """ Dump one text file """
    if name.endswith(SPECIAL_TXC):
        with open(name, "r", encoding=LATIN1_TEXT) as file:
            data = file.read()
            print(char_map.simpler_ascii(data))
        return 0

    tred = BareText(name)
    if _READ_AS_UTF:
        is_ok = tred.utf_file_reader()
    else:
        is_ok = tred.file_reader()
    print("tred, ok?{}: {}".format(is_ok, tred))
    if is_ok:
        for line in tred.lines:
            out.write(line + opts["dosCR"] + "\n")
    print("Debug:", name)
    if debug > 0:
        dump_bare(out, tred)
    return 0
Пример #14
0
def show_table(outFile, param, showOpts, debug=0):
    """
    Show table.
    :param outFile: output stream
    :param param: parameters
    :param showOpts: show options
    :param debug: whether debug is required
    :return: None, on parameter(s) fault, or an error-code
    """
    code = 0
    if param == []:
        return None
    cmd, sep, adapt, verbose = showOpts
    a_path = LPath(param[0])
    inName = a_path.to_os_path()
    assert inName is not None
    rest = param[1:]
    z = ZSheets(inName, rest)
    _, cont = z.sheets, z.cont
    idx = 0
    for pages in cont:
        idx += 1
        y = 0
        t = ZTable(pages)
        for entry in t.cont:
            y += 1
            aStr = t.alt_chr_separated(entry, adapt, sep)
            s = char_map.simpler_ascii(aStr)
            pre = "" if verbose <= 0 else "row#{}\t".format(y)
            isBin = cmd == "cat"
            if isBin:
                outFile.write("{}{}\n".format(pre, s).encode("ascii"))
            else:
                outFile.write("{}{}\n".format(pre, s))
        shown = "{}, {}/ #{}".format(inName, idx, len(cont))
        if debug > 0:
            print("ZTable({}) minCol={}, maxCol={}".format(
                shown, t.minCol, t.maxCol))
    return code
Пример #15
0
def show_id3_tags(fname, exclude_tags) -> bool:
    """ Shows id3 tags (raw!)
    """
    aud = mpaudio.Audio(fname)
    if not aud.has_tag_ids():
        print("No tag ids:", fname)
        return False
    is_ok = aud.tag_ids() is not None
    print("Time (seconds):", aud.seconds(), is_ok)
    if not is_ok:
        return False
    for akey in sorted(aud.tag_ids()):
        item = aud.tag_ids()[akey]
        skip = exclude_tags and mpaudio.tag_str_within(akey,
                                                       exclude_tags) != ""
        newstr = "[skipped]" if skip else item.pprint()
        shown = char_map.simpler_ascii(newstr, 1)
        print("akey:", type(item), akey, shown)
        unused_tags = aud.tag_unused()
        if unused_tags:
            print("Unused tags (Latin-1 approximation):",
                  [tag for tag, _ in unused_tags])
    return True
Пример #16
0
def run_main(args):
    """ Main basic module test.
    """
    aformat = CountryFormats()
    ibn = IBAN()
    abbrev = None if args == [] else args[0]
    print("IBAN format, abbrev='{}': {}"
          "".format(abbrev, ibn.gen_format(abbrev)))
    dct = banks.names.BANK_NAMES
    keys = dct.keys()
    for bkey in keys:
        yyyy = bkey[:4]
        s = banks.names.get_original_name(bkey)
        if aformat.simple_latin1:
            original_name = char_map.simpler_ascii(s)
        else:
            original_name = s
        print("Bank code: {} (IBAN yyyy='{}', ccode={}): {}"
              "".format(bkey, yyyy, bkey[4:], original_name))
        num = int(bkey[4:])
        valid = num > 0
        assert valid
    assert check_country_accs()
    return 0
Пример #17
0
def show_stocks(outFile, param, showOpts, debug):
    """ Show stocks from Excel file """
    _, sep, adapt, verbose = showOpts
    a_path = LPath(param[0])
    inName = a_path.to_os_path()
    assert inName is not None
    z = ZSheets(inName)
    _, cont = z.sheets, z.cont
    idx = 0
    for pages in cont:
        idx += 1
        y = 0
        t = ZTable(pages)
        for entry in t.cont:
            y += 1
            aStr = t.alt_chr_separated(entry, adapt, sep)
            s = char_map.simpler_ascii(aStr)
            pre = "" if verbose <= 0 else "row#{}\t".format(y)
            outFile.write("{}{}\n".format(pre, s))
        shown = "{}, {}/ #{}".format(inName, idx, len(cont))
        if debug > 0:
            print("ZTable({}) minCol={}, maxCol={}".format(
                shown, t.minCol, t.maxCol))
    return 0
Пример #18
0
def nodified(node) -> str:
    """ Returns a string-ified node. """
    shown = char_map.simpler_ascii(node.lines, 1)
    astr = f"{node.kind}={shown}"
    return astr
Пример #19
0
def simpler_str(s, subst_chr="?"):
    """ Simpler ASCII string """
    return char_map.simpler_ascii(s)
Пример #20
0
def simpler_ascii(a_chr):
    return char_map.simpler_ascii(a_chr)
Пример #21
0
def try_markdown(md_file) -> int:
    """ Try to check pangram at markdown! """
    pangram = ""
    tal = 0
    try:
        file = open(md_file, "r", encoding=LATIN1_TEXT)
    except FileNotFoundError:
        file = None
    if file is None:
        print("Skipped test (file not there):", md_file)
        return 2
    lines = file.read().splitlines()
    for line in lines:
        if line.startswith(">"):
            pangram = line[1:].strip()
            break
    tred = BareText(md_file)
    #tred.file_reader()
    tred.add_from_buffer(pangram)
    hist = tred.histogram
    shown = char_map.simpler_ascii(pangram)
    print(f"Pangram (len={len(pangram)}): '{shown}'")
    for letter in char_map.lowercase():
        outras = 0
        upper = letter.upper()
        count = hist.seen[ord(letter)]
        count += hist.seen[ord(upper)]
        for oth in range(128, 256):
            letra = char_map.simpler_ascii(chr(oth))
            if letra == letter:
                outras += hist.seen[oth]
        print("Letter {}: {} {}{}".
              format(upper, count, outras,
                     f" (sum: {count+outras})" \
                     if outras else ""),
              tal)
        tal += count + outras
    count, outras, unconv = 0, 0, []
    for letter in pangram:
        num = ord(letter)
        letra = char_map.simpler_ascii(letter)
        if letra.isalpha():
            count += 1
            outras += int(num >= 128)
        else:
            if num >= 128:
                note = f"symbol={num}d, hex=0x{num:02x}"
                unconv.append(note)
    nunc = len(unconv)
    is_ok = shown == get_pangram("pt")
    print(f"Letter (all): {tal}+{nunc}, {count+outras} ({count}+{outras})",
          f"ok? {is_ok}")
    print(f"""
Example:
	94 un-accented letters from pangram;
	1 unconverted (in this case {nunc})
	107 letters from pangram (including accented): ({count}+{outras})
""")
    if unconv:
        print("Unconverted follows:\n" + "\n".join(unconv))
    assert is_ok
    return 0
Пример #22
0
def dump_wordlist(out, err, whash, opts: dict) -> dict:
    """ Dumps hash for each word in a file. """
    # pylint: disable=line-too-long
    show_all = bool(opts.get("show-all"))
    wset = whash.infos.stats()
    queue, hshing = wset['queue'], wset['hshing']
    arange = whash.alpha_number()
    assert arange >= 10, f"alpha_number() is usually 1000; at least 10; got {arange}"
    whash.reader()
    wset = whash.infos.stats()
    wset['excl'] = whash.excl
    words = [
        wordhash.valid_word(word.rstrip('\n')) for word in whash.lines
        if not word.startswith('#')
    ]
    dct, bysize = dict(), dict()
    for size in range(3, 7 + 1, 1):
        bysize[size] = dict()
        for hsh in range(arange):
            bysize[size][hsh] = list()
    for hsh in range(arange):
        dct[hsh] = list()
    last = ""
    wset['nwords'] = len(words)
    for aword in words:
        word = char_map.simpler_ascii(aword, 1)
        s_word = char_map.simpler_ascii(aword)
        # *if* hsh in (63, 104) and word.startswith("ch") ...; cha~ vs cha'
        hsh = wordhash.word_hash(s_word)
        dct[hsh].append((word, s_word))
        last = s_word
        size = len(s_word)
        if not size:
            continue
        if 3 <= size <= 7:
            bysize[size][hsh].append(s_word)
        # f"{hsh:>4} {word}\n")
        #if s_word < last:
        #    err.write(f"Word '{s_word}' is not sorted alphabetically (last was '{last}')\n")
    fname = whash.fname
    if not last:
        err.write(f"Invalid: {fname}\n")
        return wset
    for hsh in range(arange):
        words = dct[hsh]
        shown = ';'.join([word for word, _ in words])
        queue.append((hsh, shown))
    info_up, excluded = list(), whash.excl['must']
    info_up += [list(), dict()]
    maxsize, where = -1, 0
    # Stats
    wset['stats-bysize'][0] = 0
    for size in range(3, 7 + 1, 1):
        wset['stats-bysize'][size] = 0
    # Main loop
    for hsh in range(arange):
        idx = 0
        candidates = list()
        for size in range(3, 7 + 1, 1):
            words = bysize[size][hsh]
            if not words:
                continue
            rest, up_words = list(), list()
            for word in words:
                if word.islower():
                    if not wordhash.was_excluded(word, excluded, whash.excl):
                        if word not in rest:
                            rest.append(word)
                elif upperwords.valid_uppercase_word(word):
                    up_words.append((word, 1))
                elif word[1:].islower():
                    up_words.append((word, 1 + len(word)))
            if not rest:
                if not up_words:
                    continue
                for word, up_kind in up_words:
                    tofu = {
                        'hsh': hsh,
                        'size': size,
                        'word': word,
                        'kind': up_kind,
                    }
                    info_up[0].append(tofu)
                    if hsh in info_up[1]:
                        info_up[1][hsh].append(tofu)
                    else:
                        info_up[1][hsh] = [tofu]
                continue
            candidates.append((size, hsh, rest))
        if candidates:
            size, hsh, rest = candidates[0]
            hshing.append(candidates[0])
            wset['stats-bysize'][size] += 1
            idx = size
            if idx > maxsize:
                maxsize, where = idx, hsh
        if idx <= 0:
            hshing.append((0, hsh, ["(NADA)"]))
            #out.write(f"bysize:- {hsh:>4} (NADA)\n")
            wset['stats-bysize'][0] += 1
    wset['hsh-capital'] = info_up
    wset['where'], wset['maxsize'] = where, maxsize
    wset['wthere'] = dict(
    )  # wset['hshing'][915] = ['word1', 'word2', ...]; here indexes a word to its hash
    msg = iterate_wording(wset['hshing'], wset['wthere'])
    assert msg == "", msg
    word_sub_info_upper(info_up, wset)
    if show_all:
        word_subcalc(out, whash, hshing, wset)
    return wset
Пример #23
0
 def _from_fname(self, astr):
     """ Returns the applicable header string from filename """
     res = char_map.simpler_ascii(astr)
     return res