示例#1
0
    def run(self):
        print_header("Setting up GPG")

        add_zsh_hook(Hook.POST, "10-gpg", SCRIPT_DIR.joinpath("setuphook.zsh"))
        add_fish_hook(Hook.POST, "10-gpg",
                      SCRIPT_DIR.joinpath("setuphook.fish"))

        if file_exists(INSTALLED_CANARY) and "force" not in self.args:
            print_line("GPG already setup")
            return

        # Use macOS binary location for consistancy
        if not file_exists("/usr/local/bin/gpg2"):
            link_file("/usr/bin/gpg2", "/usr/local/bin/gpg2", sudo=True)

        self.install_pkgs()

        os.makedirs(GNUPG_DIR, exist_ok=True)
        link_file(SCRIPT_DIR.joinpath("gpg.conf"),
                  GNUPG_DIR.joinpath("gpg.conf"))

        if platform.is_mac:
            link_file(
                SCRIPT_DIR.joinpath("gpg-agent.conf"),
                GNUPG_DIR.joinpath("gpg-agent.conf"),
            )
        else:
            remove(GNUPG_DIR.joinpath("gpg-agent.conf"))

        run_command_no_out(f"chmod -R og-rwx {str(GNUPG_DIR)}")

        gpg_key = settings["gpg"]["key"]
        print_line(f"GPG Key: {gpg_key}")

        # Import my public key and trust it ultimately
        if "nokey" not in self.args:
            run_command(f"gpg2 --recv-keys {gpg_key}")
            trust_key = f"{gpg_key}:6:"
            run_command(f"echo '{trust_key}' | gpg2 --import-ownertrust",
                        shell=True)

        if platform.is_fedora and file_exists(
                "/etc/xdg/autostart/gnome-keyring-ssh.desktop"):
            run_command(
                "sudo mv -f /etc/xdg/autostart/gnome-keyring-ssh.desktop /etc/xdg/autostart/gnome-keyring-ssh.desktop.inactive"
            )

        # Idempotency
        run_command('rm -f "$HOME/.gnupg/.dotfile-installed.*"', shell=True)
        run_command(f"touch {str(INSTALLED_CANARY)}")
示例#2
0
    def run(self):
        if not platform.is_arch:
            return

        print_header("Setting up VNC")

        install_pkg("tigervnc", "lxde-gtk3")

        if file_exists("/etc/pam.d/tigervnc.pacnew"):
            move("/etc/pam.d/tigervnc.pacnew", "/etc/pam.d/tigervnc")

        link_files([
            [
                SCRIPT_DIR.joinpath("vncserver.users"),
                Path("/etc/tigervnc/vncserver.users"),
                True,
            ],
            [
                SCRIPT_DIR.joinpath("config"),
                Path.home().joinpath(".vnc", "config"),
                True,
            ],
        ])

        run_command("sudo systemctl enable vncserver@:1")
        run_command("sudo systemctl start vncserver@:1")
示例#3
0
    def setup_project_list(self):
        project_list = home_dir.joinpath(".project.list")
        if not file_exists(project_list):
            with open(project_list, "w") as f:
                f.write("")

        link_file(project_list, home_dir.joinpath(".warprc"))

        home_dir.joinpath("code").mkdir(parents=True, exist_ok=True)
示例#4
0
def parse_test(data_path, w_path, doc_w_path=None, doc_token_span_w_path=None):
    if doc_token_span_w_path and not file_exists(doc_token_span_w_path):
        print('{} not found, computing doc-level-span information dictionary'.
              format(doc_token_span_w_path))
        documents_spans = get_real_token_span(data_path)
        # keep a copy of token spans to avoid re-computing it during training etc.,
        write_pickle(documents_spans, doc_token_span_w_path)
        print('{} created'.format(doc_token_span_w_path))
    else:
        documents_spans = read_pickle(doc_token_span_w_path)
    txt_files = get_files(data_path, ext='txt')
    documents_tokens = []
    documents_pos = []
    documents_ortho = []
    documents_fname = []
    for txt_path in txt_files:
        document_tokens = []
        document_pos = []
        document_ortho = []
        document_fname = []
        f_name = get_filename(txt_path)
        sentences = documents_spans[f_name]
        for sentence in sentences:
            sentence_tokens = []
            sentence_pos = []
            sentence_ortho = []
            sentence_fname = []
            for word_dictio in sentence:
                sentence_tokens.append(word_dictio['word'])
                sentence_pos.append(word_dictio['pos'])
                sentence_ortho.append(get_ortho_feature(word_dictio['word']))
                sentence_fname.append(f_name)
            document_tokens.append(sentence_tokens)
            document_pos.append(sentence_pos)
            document_ortho.append(sentence_ortho)
            document_fname.append(sentence_fname)
        documents_tokens.append(document_tokens)
        documents_pos.append(document_pos)
        documents_ortho.append(document_ortho)
        documents_fname.append(document_fname)
    write_bio_test(w_path,
                   documents_tokens,
                   documents_pos,
                   documents_ortho,
                   documents_fname,
                   sentence_level=True)
    if doc_w_path:
        write_bio_test(doc_w_path,
                       documents_tokens,
                       documents_pos,
                       documents_ortho,
                       documents_fname,
                       sentence_level=False)
示例#5
0
    def setup_oh_my_zsh(self):
        if dir_exists(omz_dir) and not file_exists(
                omz_dir.joinpath("oh-my-zsh.sh")):
            os.rmdir(omz_dir)

        # oh-my-zsh
        if not dir_exists(omz_dir):
            print_line("Installing oh-my-zsh")
            run_command("bash " +
                        str(script_dir.joinpath("install-oh-my-zsh.sh")))

            if platform.is_mac:
                run_command(
                    'sudo dscl . -create "/Users/${USER}" UserShell /usr/local/bin/zsh',
                    shell=True,
                )

        # Update Oh My ZSH
        run_command("git pull --rebase --stat origin master", cwd=omz_dir)
示例#6
0
    def setup_hooks(self):
        os.makedirs(home_dir.joinpath(".local.zsh.d", "pre"), exist_ok=True)
        os.makedirs(home_dir.joinpath(".local.zsh.d", "post"), exist_ok=True)
        os.makedirs(home_dir.joinpath(".local.zsh.d", "paths"), exist_ok=True)

        # Convert old custom into new hooks system
        # Shouldn't be needed anymore, but just in case
        if file_exists(home_dir.joinpath(".local.zsh")):
            run_command(
                'mv -n "$HOME/.local.zsh" "$HOME/.local.zsh.d/post/00-old-local.zsh"',
                shell=True,
            )

        add_to_path("zsh", home_dir.joinpath("bin"))
        add_to_path("zsh", home_dir.joinpath(".scripts"))
        add_to_path("zsh", home_dir.joinpath(".local/scripts"))

        add_zsh_hook(Hook.POST, "10-vars", script_dir.joinpath("vars.zsh"))
        add_zsh_hook(Hook.POST, "10-nnn-setup",
                     script_dir.joinpath("nnn-setup.zsh"))
示例#7
0
def fish_hook_exists(hook: Hook, name: str):
    return file_exists(FISH_HOOKS_DIR.joinpath(hook.value, name + ".fish"))
示例#8
0
def extract_tagger_predictions(model_path,
                               span_path,
                               output_path=None,
                               f_eval=None,
                               parameters=None,
                               return_raw_predictions=False):
    assert file_exists(span_path)
    documents = read_pickle(span_path)
    if not f_eval:
        model = Model(model_path=model_path)
        parameters = model.parameters
        if 'language_model' not in parameters:
            parameters['language_model'] = False
        # Load reverse mappings
        word_to_id, char_to_id, tag_to_id = [{
            v: k
            for k, v in x.items()
        } for x in [model.id_to_word, model.id_to_char, model.id_to_tag]]
        pos_to_id, ortho_to_id, segment_to_id = [{
            v: k
            for k, v in x.items()
        } for x in [model.id_to_pos, model.id_to_ortho, model.id_to_segment]]
        word_to_id_1 = {v: k for k, v in model.id_to_word_1.items()}
        # Load the model
        _, f_eval = model.build(training=False, **parameters)
        model.reload()
        id_to_tag = model.id_to_tag
    else:
        # load mappings
        mappings = read_pickle(join_path(model_path, 'mappings.pkl'))
        id_to_word = mappings['id_to_word']
        id_to_char = mappings['id_to_char']
        id_to_tag = mappings['id_to_tag']
        id_to_pos = mappings['id_to_pos']
        id_to_ortho = mappings['id_to_ortho']
        id_to_segment = mappings['id_to_segment']
        id_to_word_1 = mappings['id_to_word_1']
        # reverse mappings
        word_to_id, char_to_id, tag_to_id = [{
            v: k
            for k, v in x.items()
        } for x in [id_to_word, id_to_char, id_to_tag]]
        pos_to_id, ortho_to_id, segment_to_id = [{
            v: k
            for k, v in x.items()
        } for x in [id_to_pos, id_to_ortho, id_to_segment]]
        word_to_id_1 = {v: k for k, v in id_to_word_1.items()}
    predictions = {}
    docs_count = 0
    for doc_name, sentences in documents.items():
        for sentence in sentences:
            words = [span['word'] for span in sentence]
            start = [span['start'] for span in sentence]
            end = [span['end'] for span in sentence]
            pos = [span['pos'] for span in sentence]
            ortho = [get_ortho_feature(w) for w in words]
            doc_names = [doc_name] * len(words)
            input_dict = {
                'words': words,
                'pos': pos,
                'ortho': ortho,
                'doc_names': doc_names
            }
            sentence_cl = ' '.join(words)
            if parameters['lower']:
                sentence_cl = sentence_cl.lower()
            # Replace all digits with zeros
            if parameters['zeros']:
                sentence_cl = zero_digits(sentence_cl)
            words = sentence_cl.split(' ')
            assert len(words) == len(start) == len(end)
            # Prepare input
            sentence = prepare_sentence(input_dict,
                                        word_to_id,
                                        char_to_id,
                                        pos_to_id,
                                        ortho_to_id,
                                        segment_to_id,
                                        word_to_id_1,
                                        lower=parameters['lower'])
            input = create_input(sentence, parameters, add_label=False)
            # Decoding
            if parameters['crf']:
                y_preds = np.array(f_eval(*input))[1:-1]
            else:
                y_preds = f_eval(*input).argmax(axis=1)
            y_preds = [id_to_tag[y_pred] for y_pred in y_preds]
            # Output tags in the IOB2 format
            if parameters['tag_scheme'] == 'iobes':
                y_preds = iobes_iob(y_preds)
            if not return_raw_predictions:
                y_preds = resolve_inconsistencies(y_preds)
                entities = extract_entities(words, y_preds, start, end)
                if doc_name not in predictions:
                    predictions[doc_name] = []
                if len(entities) > 0:
                    predictions[doc_name] += entities
            else:
                if doc_name not in predictions:
                    predictions[doc_name] = {}
                    predictions[doc_name]['words'] = []
                    predictions[doc_name]['tags'] = []
                    predictions[doc_name]['start'] = []
                    predictions[doc_name]['end'] = []
                predictions[doc_name]['words'].append(words)
                predictions[doc_name]['tags'].append(y_preds)
                predictions[doc_name]['start'].append(start)
                predictions[doc_name]['end'].append(end)
        docs_count += 1
        if docs_count % 100 == 0:
            print('{} documents processed'.format(docs_count))

    if return_raw_predictions:
        return predictions
    else:
        write_predictions(output_path, predictions)
示例#9
0
def parse_from_list(txt_files,
                    w_path,
                    doc_token_span_w_path,
                    train_data_path,
                    dev_data_path,
                    ann_file_ext='ann',
                    append_i_tag=True):
    assert doc_token_span_w_path is not None
    documents_spans = read_pickle(doc_token_span_w_path)
    documents_tokens = []
    documents_tags = []
    documents_pos = []
    documents_ortho = []
    documents_segment = []
    documents_fname = []
    # 'txt_path' is a misnomer, instead of path it's just a file name without the extension
    for txt_path in txt_files:
        document_tokens = []
        document_tags = []
        document_pos = []
        document_ortho = []
        document_segment = []
        document_fname = []
        att_path = join_path(train_data_path,
                             '{}.{}'.format(txt_path, ann_file_ext))
        if not file_exists(att_path):
            att_path = join_path(dev_data_path,
                                 '{}.{}'.format(txt_path, ann_file_ext))
        entities_dict = parse_annotation_file(att_path)
        f_name = txt_path
        sentences = documents_spans[f_name]
        for sentence in sentences:
            sentence_tokens = []
            sentence_tags = []
            sentence_pos = []
            sentence_ortho = []
            sentence_segment = []
            sentence_fname = []
            for word_dictio in sentence:
                _, tag = is_token_an_entity(word_dictio, entities_dict)
                if append_i_tag:
                    if tag != 'O':
                        tag = 'I-{}'.format(tag)
                segment = 'O' if tag == 'O' else 'I-SEGMENT'
                sentence_tokens.append(word_dictio['word'])
                sentence_tags.append(tag)
                sentence_pos.append(word_dictio['pos'])
                sentence_ortho.append(get_ortho_feature(word_dictio['word']))
                sentence_segment.append(segment)
                sentence_fname.append(f_name)
            document_tokens.append(sentence_tokens)
            document_tags.append(sentence_tags)
            document_pos.append(sentence_pos)
            document_ortho.append(sentence_ortho)
            document_segment.append(sentence_segment)
            document_fname.append(sentence_fname)
        documents_tokens.append(document_tokens)
        documents_tags.append(document_tags)
        documents_pos.append(document_pos)
        documents_ortho.append(document_ortho)
        documents_segment.append(document_segment)
        documents_fname.append(document_fname)
    write_bio(w_path, documents_tokens, documents_tags, documents_pos,
              documents_ortho, documents_segment, documents_fname)
示例#10
0
def parse(data_path,
          w_path,
          doc_token_span_w_path=None,
          ann_file_ext='ann',
          append_i_tag=True):
    create_directory(get_parent_directory(w_path))
    if not file_exists(doc_token_span_w_path):
        print('{} not found, computing doc-level-span information dictionary'.
              format(doc_token_span_w_path))
        documents_spans = get_real_token_span(data_path)
        # keep a copy of token spans to avoid re-computing it during training etc.,
        write_pickle(documents_spans, doc_token_span_w_path)
        print('{} created'.format(doc_token_span_w_path))
    else:
        documents_spans = read_pickle(doc_token_span_w_path)
    txt_files = get_files(data_path, ext='txt')
    documents_tokens = []
    documents_tags = []
    documents_pos = []
    documents_ortho = []
    documents_segment = []
    documents_fname = []
    for txt_path in txt_files:
        document_tokens = []
        document_tags = []
        document_pos = []
        document_ortho = []
        document_segment = []
        document_fname = []
        att_path = join_path(
            data_path, '{}.{}'.format(get_filename(txt_path), ann_file_ext))
        entities_dict = parse_annotation_file(att_path)
        f_name = get_filename(txt_path)
        sentences = documents_spans[f_name]
        for sentence in sentences:
            sentence_tokens = []
            sentence_tags = []
            sentence_pos = []
            sentence_ortho = []
            sentence_segment = []
            sentence_fname = []
            for word_dictio in sentence:
                _, tag = is_token_an_entity(word_dictio, entities_dict)
                if append_i_tag:
                    if tag != 'O':
                        tag = 'I-{}'.format(tag)
                segment = 'O' if tag == 'O' else 'I-SEGMENT'
                sentence_tokens.append(word_dictio['word'])
                sentence_tags.append(tag)
                sentence_pos.append(word_dictio['pos'])
                sentence_ortho.append(get_ortho_feature(word_dictio['word']))
                sentence_segment.append(segment)
                sentence_fname.append(f_name)
            document_tokens.append(sentence_tokens)
            document_tags.append(sentence_tags)
            document_pos.append(sentence_pos)
            document_ortho.append(sentence_ortho)
            document_segment.append(sentence_segment)
            document_fname.append(sentence_fname)
        documents_tokens.append(document_tokens)
        documents_tags.append(document_tags)
        documents_pos.append(document_pos)
        documents_ortho.append(document_ortho)
        documents_segment.append(document_segment)
        documents_fname.append(document_fname)
    write_bio(w_path, documents_tokens, documents_tags, documents_pos,
              documents_ortho, documents_segment, documents_fname)
示例#11
0
def install_font(url, dest):
    if not file_exists(dest):
        run_command(f"wget -q --show-progress -O '{dest}' '{url}'")
        return platform.is_linux
    return False