Exemplo n.º 1
0
def check_spans(doc_fn, tag_fn, options):
    doc_count, span_count, mismatches = 0, 0, 0
    with open_file(doc_fn, 'r', options) as doc_f:
        doc_reader = DocReader(doc_f)
        with open_file(tag_fn, 'r', options) as tag_f:
            span_reader = SpanReader(tag_f)
            for doc in doc_reader:
                for span in span_reader.document_spans(doc.id):
                    doc_span_text = doc.text[span.start:span.end + 1]
                    if doc_span_text != span.text:
                        dt, st = safe_str(doc_span_text), safe_str(span.text)
                        print(f'text mismatch in {doc.id}: "{dt}" '
                              f'vs "{st}: {span}"')
                        mismatches += 1
                    span_count += 1
                doc_count += 1
                if doc_count % 10000 == 0:
                    print(
                        f'processed {doc_count} documents '
                        f'({span_count} spans)',
                        file=sys.stderr)
            span_count, errors = span_reader.iter.index - 1, span_reader.errors
            if span_reader.current_doc_id() is not None:
                print(f'ERROR: extra lines in {tag_fn}')
            if mismatches or errors:
                print(f'Checked {span_count} spans, found {errors} errors '
                      f'and {mismatches} mismatches')
            else:
                print(f'OK, checked {span_count} spans')
Exemplo n.º 2
0
def cut_tags(doc_fn, tag_fn, out_fn, options):
    removed, total = 0, 0
    with open_file(doc_fn, 'r', options) as doc_f:
        doc_reader = DocReader(doc_f)
        with open_file(tag_fn, 'r', options) as tag_f:
            span_reader = SpanReader(tag_f, no_type_mapping=True)
            with open_file(out_fn, 'w', options) as out_f:
                for doc_idx, doc in enumerate(doc_reader):
                    offset_map = get_offset_map(doc, options)
                    if offset_map is None:
                        # no-op, quick copy without parsing
                        for span in span_reader.document_lines(doc.id):
                            print(span, end='', file=out_f)
                            total += 1
                    else:
                        # need to parse, map and filter
                        spans = list(span_reader.document_spans(doc.id))
                        mapped = apply_offset_map(spans, offset_map)
                        removed += len(spans) - len(mapped)
                        total += len(spans)
                        for span in mapped:
                            print(span, file=out_f)
                    if (doc_idx+1) % 100000 == 0:
                        print(f'processed {doc_idx+1} documents',
                              file=sys.stderr)
    print(f'removed {removed}/{total} spans ({removed/total:.1%})',
          file=sys.stderr)
Exemplo n.º 3
0
def cut_documents(doc_fn, out_fn, options):
    cut_count = 0
    with open_file(doc_fn, 'r', options) as doc_f:
        doc_reader = DocReader(doc_f)
        with open_file(out_fn, 'w', options) as out_f:
            for doc_idx, doc in enumerate(doc_reader):
                cut_count += cut_document(doc, options)
                print(doc, file=out_f)
                if (doc_idx + 1) % 100000 == 0:
                    print(f'processed {doc_idx+1} documents', file=sys.stderr)
    print(f'cut {cut_count}/{doc_idx} documents ({cut_count/doc_idx:.1%})',
          file=sys.stderr)
Exemplo n.º 4
0
def filter_tags(tag_fn, out_fn, ids, options):
    out_count = 0
    with open_file(tag_fn, 'r', options) as tag_f:
        with open_file(out_fn, 'w', options) as out_f:
            for ln, line in enumerate(tag_f, start=1):
                id_ = line.split('\t')[0]
                if id_ in ids:
                    print(line, file=out_f, end='')
                    out_count += 1
                if ln % 100000 == 0:
                    print(f'processed {ln}, output {out_count}',
                          file=sys.stderr)
    print(f'output {out_count}/{ln} lines ({out_count/ln:.1%})',
          file=sys.stderr)
Exemplo n.º 5
0
def filter_documents(doc_fn, out_fn, ids, options):
    out_count = 0
    with open_file(doc_fn, 'r', options) as doc_f:
        doc_reader = DocReader(doc_f)
        with open_file(out_fn, 'w', options) as out_f:
            for doc_idx, doc in enumerate(doc_reader):
                if doc.id in ids:
                    print(doc, file=out_f, flush=True)
                    out_count += 1
                if (doc_idx + 1) % 100000 == 0:
                    print(f'processed {doc_idx+1}, output {out_count}',
                          file=sys.stderr)
    print(f'output {out_count}/{doc_idx} documents ({out_count/doc_idx:.1%})',
          file=sys.stderr)
Exemplo n.º 6
0
def convert_to_standoff(doc_fn, tag_fn, out_dir, options):
    NOTE_TYPE = 'AnnotatorNotes'
    with open_file(doc_fn, 'r', options) as doc_f:
        doc_reader = DocReader(doc_f)
        with open_file(tag_fn, 'r', options) as tag_f:
            # Read spans that include source information
            span_reader = SpanReader(tag_f, source=True)
            for doc in doc_reader:
                spans = list(span_reader.document_spans(doc.id))
                try:
                    convert_single(doc, spans, out_dir, options)
                except Exception as e:
                    error(f'failed to convert {doc.id}: {e}')
                    raise
Exemplo n.º 7
0
def modify_init_rc(directory):
  lines = c.get_lines_from_file(directory, 'init.rc')

  f = c.open_file(directory, 'init.rc', c.WRITE)
  for line in lines:
    if 'export ANDROID_ROOT' in line:
      c.write(f, line)
      c.write(f, c.whitespace(line) + "export ANDROID_CACHE /cache\n")

    elif re.search(r"mkdir /system(\s|$)", line):
      c.write(f, line)
      c.write(f, re.sub("/system", "/raw-system", line))

    elif re.search(r"mkdir /data(\s|$)", line):
      c.write(f, line)
      c.write(f, re.sub("/data", "/raw-data", line))

    elif re.search(r"mkdir /cache(\s|$)", line):
      c.write(f, line)
      c.write(f, re.sub("/cache", "/raw-cache", line))

    elif 'yaffs2' in line:
      c.write(f, re.sub(r"^", "#", line))

    else:
      c.write(f, line)

  f.close()
Exemplo n.º 8
0
def modify_init_target_rc(directory):
  lines = c.get_lines_from_file(directory, 'init.target.rc')

  previous_line = ""

  f = c.open_file(directory, 'init.target.rc', c.WRITE)
  for line in lines:
    if re.search(r"^\s+wait\s+/dev/.*/cache.*$", line):
      c.write(f, re.sub(r"^", "#", line))

    elif re.search(r"^\s+check_fs\s+/dev/.*/cache.*$", line):
      c.write(f, re.sub(r"^", "#", line))

    elif re.search(r"^\s+mount\s+ext4\s+/dev/.*/cache.*$", line):
      c.write(f, re.sub(r"^", "#", line))

    elif re.search(r"^\s+mount_all\s+fstab.qcom.*$", line) and \
        re.search(r"^on\s+fs\s*$", previous_line):
      c.write(f, line)
      c.write(f, c.whitespace(line) + "exec /sbin/busybox-static sh /init.dualboot.mounting.sh\n")

    else:
      c.write(f, line)

    previous_line = line

  f.close()
Exemplo n.º 9
0
def modify_fstab(directory):
  # Ignore all contents for Google Edition
  for i in [ 'fstab.qcom', 'fstab.jgedlte' ]:
    lines = c.get_lines_from_file(directory, i)

    has_cache_line = False

    f = c.open_file(directory, i, c.WRITE)
    for line in lines:
      if re.search(r"^/dev[a-zA-Z0-9/\._-]+\s+/system\s+.*$", line):
        c.write(f, "/dev/block/platform/msm_sdcc.1/by-name/system /raw-system ext4 ro,errors=panic wait\n")

      elif re.search(r"^/dev[^\s]+\s+/cache\s+.*$", line):
        c.write(f, "/dev/block/platform/msm_sdcc.1/by-name/cache /raw-cache ext4 nosuid,nodev,barrier=1 wait,check\n")
        has_cache_line = True

      elif re.search(r"^/dev[^\s]+\s+/data\s+.*$", line):
        c.write(f, "/dev/block/platform/msm_sdcc.1/by-name/userdata /raw-data ext4 nosuid,nodev,noatime,noauto_da_alloc,discard,journal_async_commit,errors=panic wait,check,encryptable=footer\n")

      else:
        c.write(f, line)

    if not has_cache_line:
      c.write(f, "/dev/block/platform/msm_sdcc.1/by-name/cache /raw-cache ext4 nosuid,nodev,barrier=1 wait,check\n")

    f.close()
Exemplo n.º 10
0
def modify_fstab(directory):
  lines = c.get_lines_from_file(directory, 'fstab.qcom')

  # For Android 4.2 ROMs
  has_cache_line = False

  f = c.open_file(directory, 'fstab.qcom', c.WRITE)
  for line in lines:
    if re.search(r"^/dev[a-zA-Z0-9/\._-]+\s+/system\s+.*$", line):
      c.write(f, re.sub("\s/system\s", " /raw-system ", line))

    elif re.search(r"^/dev[^\s]+\s+/cache\s+.*$", line):
      c.write(f, re.sub("\s/cache\s", " /raw-cache ", line))
      has_cache_line = True

    elif re.search(r"^/dev[^\s]+\s+/data\s+.*$", line):
      c.write(f, re.sub("\s/data\s", " /raw-data ", line))

    else:
      c.write(f, line)

  if not has_cache_line:
    c.write(f, "/dev/block/platform/msm_sdcc.1/by-name/cache          /raw-cache       ext4    nosuid,nodev,barrier=1 wait,check")

  f.close()
Exemplo n.º 11
0
def main():
    """ Main function

    Initiate argument parser
    """

    # Parse command line arguments
    parser = argparse.ArgumentParser(
        description='Position occurrences of search words in file and produce latex output and statistics. Default settings in settings.py-file')
    parser.add_argument('input', nargs='?',
                        help='The file to be parsed. If none is given, the settings-file is used.')
    parser.add_argument('output', nargs='?',
                        help='Full filename of the selected output.')
    parser.add_argument('--stats', '-s',
                        help='(Not implemented yet). Calculate and output statistics. Default = false.',
                        action='store_true',
                        default=False)
    parser.add_argument('--passages', '-p',
                        help='Print all passages and exceptions in search terms to file. Default = false.',
                        action='store_true',
                        default=False)
    parser.add_argument('--tex', '-t',
                        help='Create output to LaTeX file with TikZ formatted diagram. Choose whether it goes to shell, file or both. Default = shell.',
                        action='store',
                        choices=['shell', 'file', 'both', 'none'],
                        default='shell')
    parser.add_argument('--log', '-l',
                        help='Set the log level (output to shell). Default = WARNING.',
                        default='INFO')

    # Parse command line arguments
    args = parser.parse_args()

    # Set log level and initiate logging module
    loglevel = args.log
    logging.basicConfig(level=getattr(logging, loglevel.upper()))
    logging.getLogger(__name__)

    # Read the string
    string = common.open_file(args.input)
    names, terms, exceptions = separate_terms(settings.terms)

    occurrences = create_occurrence_lists(terms, exceptions, string)

    output_data = prepare_diagram_data(occurrences, names, string)

    if args.tex is not 'none':
        render_tex(args.tex,
                   set_filename(args.output, '.tex'),
                   *output_data)

    if args.passages:
        render_passages_in_html(terms,
                                exceptions,
                                string,
                                set_filename("/tmp/output_dump.html", '.html'))
Exemplo n.º 12
0
def modify_MSM8960_lpm_rc(directory):
  lines = c.get_lines_from_file(directory, 'MSM8960_lpm.rc')

  f = c.open_file(directory, 'MSM8960_lpm.rc', c.WRITE)
  for line in lines:
    if re.search(r"^\s+mount.*/cache.*$", line):
      c.write(f, re.sub(r"^", "#", line))

    else:
      c.write(f, line)

  f.close()
Exemplo n.º 13
0
def convert_single(doc, spans, out_dir, options):
    for span in spans:
        span.type = normalize_type(span.type)
    spans = deduplicate_spans(spans, options)
    with open_file(os.path.join(out_dir, f'{doc.id}.txt'), 'w', options) as f:
        print(doc.text.replace('\t', '\n'), file=f)
    offset_map = make_offset_map(doc.text)
    with open_file(os.path.join(out_dir, f'{doc.id}.ann'), 'w', options) as f:
        n = 1
        for i, span in enumerate(spans, start=1):
            s, e = span.start, span.end + 1  # end-exclusive
            s, e = offset_map[s], offset_map[e]  # char offsets
            if len(span.sources) == 2:  # assume two sources
                t = f'{span.type}'
            else:
                t = f'{span.type}-{span.source}'
            print(f'T{i}\t{t} {s} {e}\t{span.text}', file=f)
            for serial in span.serials:
                if serial != DUMMY_SERIAL:
                    print(f'N{n}\tReference T{i} string:{serial}', file=f)
                    n += 1
Exemplo n.º 14
0
def modify_init_qcom_rc(directory):
  lines = c.get_lines_from_file(directory, 'init.qcom.rc')

  f = c.open_file(directory, 'init.qcom.rc', c.WRITE)
  for line in lines:
    # Change /data/media to /raw-data/media
    if re.search(r"/data/media(\s|$)", line):
      c.write(f, re.sub('/data/media', '/raw-data/media', line))

    else:
      c.write(f, line)

  f.close()
Exemplo n.º 15
0
 def xconf_iter(self, fname):  # {{{1
     # type: (Text) -> Iterable[Tuple[int, Text, Text]]
     self.section_parser_clear()
     flagsSyn = XSectionSynaptics()
     with common.open_file(fname, "r") as fp:
         for i, line in enumerate(fp):
             sec = self.section_parser(line)
             if sec >= 0:
                 flagsSyn.parse_line(line)
                 yield (i, self.cur_section, line)
             else:
                 flagsSyn = XSectionSynaptics()
                 yield (i, "", line)
Exemplo n.º 16
0
def modify_init_qcom_rc(directory):
  lines = c.get_lines_from_file(directory, 'init.qcom.rc')

  f = c.open_file(directory, 'init.qcom.rc', c.WRITE)
  for line in lines:
    if 'export EMULATED_STORAGE_TARGET' in line:
      c.write(f, line)
      c.write(f, c.whitespace(line) + "export EXTERNAL_SD /storage/sdcard1\n")

    # Change /data/media to /raw-data/media
    elif re.search(r"/data/media(\s|$)", line):
      c.write(f, re.sub('/data/media', '/raw-data/media', line))

    else:
      c.write(f, line)

  f.close()
Exemplo n.º 17
0
def main():
    from os import path
    from common import open_file
    if path.exists('list.txt'):
        download_list = open_file('list.txt').split('\n')
        from common import url_to_module, start_download, test_download
        for i in download_list:
            if len(i) > 1 and i[0] == '#':
                continue
            m, url = url_to_module(i)
            if m is not None:
                info = m.get_info(url)
                # start_download(m, info)
                test_download(m, info)
                # book_name = m.get_m80txt_info(url, True)
                # if len(book_name) > 1:
                #     test_download(m, book_name, info)
        exit(0)
        pass
    else:
        start_main()
Exemplo n.º 18
0
    def process(self, start=0, stop=None):
        assert os.path.exists(self.path), "'%s' does not exist!" % (file)

        data = []
        # 		cmdline = 'cat %s | diag_parser' % (file)
        # 		ret, stdout, stderr = pycommons.run(cmdline, log=False)
        # 		lines = stdout.split('\n')

        lines = []
        with open_file(self.path, "rb") as f:
            for line in f:
                lines.append(line.strip())

        for idx in range(len(lines)):
            line = lines[idx]
            m = self.START_PATTERN.match(line)
            if m:
                obj_data = []
                while idx < len(lines):
                    line = lines[idx]
                    obj_data.append(lines[idx])
                    m = self.END_PATTERN.match(line)
                    if m:
                        break
                    idx += 1

                obj = self.parse(obj_data)
                if not obj:
                    # logger.debug("Could not convert: \n%s\n" % ('\n'.join(obj_data)))
                    pass
                else:
                    if obj.os_timestamp > start:
                        if not stop or (stop and obj.os_timestamp < stop):
                            data.append(obj)
            idx += 1

        self.data = data
Exemplo n.º 19
0
    def cmdreport(self):  # {{{2
        # type: () -> None
        import sys
        import platform
        from datetime import datetime

        fname = datetime.now().strftime("report-%Y%m%d-%H%M%S.txt")
        fp = open_file(fname, "a")
        msg = common.check_output(["uname", "-a"])
        fp.write(msg + "\n")
        msg = common.check_output(["python3", "-m", "platform"])
        fp.write(msg + "\n")
        fp.write("Python: {}\n".format(str(sys.version_info)))
        if sys.version_info[0] == 2:
            sbld = platform.python_build()  # type: ignore
            scmp = platform.python_compiler()  # type: ignore
        else:
            sbld = platform.python_build()
            scmp = platform.python_compiler()
        fp.write("Python: {} {}\n".format(sbld, scmp))
        msg = common.check_output(["xinput", "list"])
        fp.write(msg + u"\n")
        msg = common.check_output(["xinput", "list-props", Text(xi.dev)])
        fp.write(msg + u"\n")
        fp.write(u"\n\n--- current settings (in app)---\n")
        fp.write(xi.dumps())
        fp.write(u"\n\n--- initial settings (at app startup)---")
        cmds = u""
        for i in cmdorg:
            cmds += u"\n" + u" ".join(i)
        fp.write(cmds + "\n")
        fp.close()

        msg = u"Report: {} was made,\n" \
              u"use this file to report a issue.".format(fname)
        messagebox.showinfo(u"Make a Report", msg)
Exemplo n.º 20
0
def save_package(package, path):
    json.dump(package, open_file(path, 'w'), indent=2)
Exemplo n.º 21
0
    def save(self, fname, fnameIn, db):  # {{{1
        # type: (Text, Text, NPropDb) -> bool
        '''sample output {{{3
            # Example xorg.conf.d snippet that assigns the touchpad driver
            # ...
            Section "InputClass"
                    Identifier "touchpad catchall"
                    Driver "synaptics"
                    MatchIsTouchpad "on"
                    Option "TapButton3" "2"
                    Option "FingerLow" "50"
                    Option "FingerHigh" "100"
                    Option "VertTwoFingerScroll" "on"
                    Option "HorizTwoFingerScroll" "on"
            # This option is recommend on all Linux systems using evdev,
            # ...
                  MatchDevicePath "/dev/input/event*"
            EndSection

            # This option enables the bottom right corner to be a right button
            # ...
            Section "InputClass"
                    Identifier "Default clickpad buttons"
                    MatchDriver "synaptics"
                    Option "SoftButtonAreas" "50% 0 82% 0 0 0 0 0"
                    Option "SecondarySoftButtonAreas"
                        "58% 0 0 15% 42% 58% 0 15%"
            EndSection  # }}}
        '''
        fp = common.open_file(fname, "w")
        prv_sec = ""
        done = []  # type: List[Text]
        for i, sec, line in self.xconf_iter(fnameIn):
            if len(sec) < 1:
                if len(prv_sec) > 0:
                    self.save_remains(fp, db, prv_sec, done)
                prv_sec, done = "", []
                fp.write(line)
                continue
            prv_sec = sec
            for i in range(1):
                tup = NProp.parse_xconfline(line)
                if tup is None:
                    continue  # write through
                prop = tup[1]
                cur = db.get(sec, prop, NProp("", None, ""))
                if cur.key == "":
                    done.append(tup[0])
                    continue  # write thruogh
                if prop.same_prop(cur):
                    done.append(tup[0])
                    continue  # write thruogh
                # just update props, write at save_remains().
                cur.update_by_prop_passive(prop)
                break
            else:
                fp.write(line)

        # did not close section...
        fp.close()
        return False
Exemplo n.º 22
0
def save(file_obj, dependencies, path):
    file_obj = insert_dependencies(file_obj, dependencies)

    json.dump(file_obj, open_file(path, 'w'), indent=2)
Exemplo n.º 23
0
 def button_clicked(self, widget):
     value = self.entry.get_text()
     common.open_file(value, self._window)
Exemplo n.º 24
0
def get_file(package_path):
    try:
        return json.load(open_file(package_path))
    except Exception as ex:
        logging.error(Fore.RED + 'ERR: ' + str(ex))
        raise
Exemplo n.º 25
0
 def button_clicked(self, widget):
     value = self.entry.get_text()
     common.open_file(value, self._window)
Exemplo n.º 26
0
if __name__ == '__main__':
    import sys
    from optparse import OptionParser
    from pprint import pprint
    from common import to_date
    pars = OptionParser()
    pars.add_option('-u', '--username')
    pars.add_option('-p', '--password')
    pars.add_option('-d', '--date', help='dd-mm-yyyy')
    pars.add_option('', '--mutasi-file')
    pars.add_option('', '--saldo-file')
    pars.add_option('', '--output-file')
    option, remain = pars.parse_args(sys.argv[1:])

    if option.mutasi_file:
        content = open_file(option.mutasi_file)
        parser = MutasiParser()
        parser.feed(content)
        pprint(parser.get_clean_data())
        sys.exit()

    if option.saldo_file:
        content = open_file(option.saldo_file)
        parser = SaldoParser()
        parser.feed(content)
        pprint(parser.get_clean_data())
        sys.exit()

    if not option.username or not option.password:
        print('--username dan --password harus diisi')
        sys.exit()