Пример #1
0
def _write_aspect_lex(parsed_data: Union[str, PathLike],
                      generated_aspect_lex: dict, out_dir: PathLike):
    parsed_docs = _load_parsed_docs_from_dir(parsed_data)
    aspect_dict = {}
    max_examples = 20
    label = 'AS'
    for doc in parsed_docs.values():
        for sent_text, _ in doc.sent_iter():

            for term, lemma in generated_aspect_lex.items():
                if term in sent_text.lower():
                    _find_aspect_in_sentence(term, lemma, sent_text,
                                             aspect_dict, label, max_examples,
                                             False)
                if lemma != '' and lemma in sent_text.lower():
                    _find_aspect_in_sentence(term, lemma, sent_text,
                                             aspect_dict, label, max_examples,
                                             True)

    # write aspect lex to file
    header_row = ["Term", "Alias1", "Alias2", "Alias3"]
    for k in range(1, max_examples + 1):
        header_row.append("Example" + str(k))
    aspect_table = [header_row]

    for [term, lemma], sentences in aspect_dict.items():
        term_row = [term, lemma, '', '']
        for sent in sentences:
            term_row.append(sent)
        aspect_table.append(term_row)

    out_dir.mkdir(parents=True, exist_ok=True)
    out_file_path = out_dir / 'generated_aspect_lex.csv'
    _write_table(aspect_table, out_file_path)
    print('Aspect lexicon written to {}'.format(out_file_path))
Пример #2
0
def extract_download(
    url: str,
    directory: PathLike,
    filenames: [str] = None,
    known_hash: str = None,
    overwrite: bool = False,
):
    if not isinstance(directory, Path):
        directory = Path(directory)

    if filenames is None:
        filenames = []

    if not directory.exists():
        directory.mkdir(parents=True, exist_ok=True)

    temporary_filename = directory / 'temp.tar.gz'
    logging.debug(f'downloading {url} -> {temporary_filename}')
    temporary_filename = pooch.retrieve(url,
                                        known_hash=known_hash,
                                        fname=temporary_filename)
    logging.debug(f'extracting {temporary_filename} -> {directory}')
    with tarfile.open(temporary_filename) as local_file:
        if len(filenames) > 0:
            for filename in filenames:
                if filename in local_file.getnames():
                    path = directory / filename
                    if not path.exists() or overwrite:
                        if path.exists():
                            os.remove(path)
                        local_file.extract(filename, directory)
        else:
            local_file.extractall(directory)
Пример #3
0
def init_dir(adir: PathLike, exist_ok=False, parents=False, rmtree=False):
    adir = Path(adir)
    if adir.is_dir():
        if rmtree:
            shutil.rmtree(adir)
    adir.mkdir(exist_ok=exist_ok, parents=parents)
    return adir
Пример #4
0
def plot_dataset(
        dataset: xr.Dataset,
        num_chains: int = 8,
        therm_frac: float = 0.,
        title: str = None,
        outdir: os.PathLike = None,
        subplots_kwargs: dict[str, Any] = None,
        plot_kwargs: dict[str, Any] = None,
        ext: str = 'png',
):
    plot_kwargs = {} if plot_kwargs is None else plot_kwargs
    subplots_kwargs = {} if subplots_kwargs is None else subplots_kwargs

    if outdir is None:
        import os
        tstamp = get_timestamp('%Y-%m-%d-%H%M%S')
        outdir = Path(os.getcwd()).joinpath('plots', f'plots-{tstamp}')
        outdir.mkdir(exist_ok=True, parents=True)

    for idx, (key, val) in enumerate(dataset.data_vars.items()):
        color = f'C{idx%9}'
        plot_kwargs['color'] = color

        fig, subfigs, ax = plot_metric(
            val=val.values,
            key=str(key),
            title=title,
            outdir=None,
            therm_frac=therm_frac,
            num_chains=num_chains,
            plot_kwargs=plot_kwargs,
            subplots_kwargs=subplots_kwargs,
        )
        if outdir is not None:
            outfile = Path(outdir).joinpath(f'{key}.{ext}')
            Path(outfile.parent).mkdir(exist_ok=True, parents=True)
            outfile = outfile.as_posix()

        if subfigs is not None:
            # edgecolor = plt.rcParams['axes.edgecolor']
            plt.rcParams['axes.edgecolor'] = plt.rcParams['axes.facecolor']
            ax = subfigs[0].subplots(1, 1)
            # ax = fig[1].subplots(constrained_layout=True)
            cbar_kwargs = {
                # 'location': 'top',
                # 'orientation': 'horizontal',
            }
            im = val.plot(ax=ax, cbar_kwargs=cbar_kwargs)
            # ax.set_ylim(0, )
            im.colorbar.set_label(f'{key}')  # , labelpad=1.25)
            sns.despine(subfigs[0], top=True, right=True,
                        left=True, bottom=True)
            if outdir is not None:
                print(f'Saving figure to: {outfile}')
                plt.savefig(outfile, dpi=400, bbox_inches='tight')
        else:
            fig.savefig(outfile, dpi=400, bbox_inches='tight')

    return dataset
Пример #5
0
def write_csvs(
    dirname: PathLike, adata: AnnData, skip_data: bool = True, sep: str = ','
):
    """See :meth:`~anndata.AnnData.write_csvs`.
    """
    dirname = Path(dirname)
    if dirname.suffix == '.csv':
        dirname = dirname.with_suffix('')
    logger.info("writing '.csv' files to %s", dirname)
    if not dirname.is_dir():
        dirname.mkdir(parents=True, exist_ok=True)
    dir_uns = dirname / 'uns'
    if not dir_uns.is_dir():
        dir_uns.mkdir(parents=True, exist_ok=True)
    d = dict(
        obs=adata._obs,
        var=adata._var,
        obsm=adata._obsm.to_df(),
        varm=adata._varm.to_df(),
    )
    if not skip_data:
        d['X'] = pd.DataFrame(
            adata._X.toarray() if issparse(adata._X) else adata._X
        )
    d_write = {**d, **adata._uns}
    not_yet_raised_sparse_warning = True
    for key, value in d_write.items():
        if issparse(value):
            if not_yet_raised_sparse_warning:
                warnings.warn(
                    'Omitting to write sparse annotation.', WriteWarning
                )
                not_yet_raised_sparse_warning = False
            continue
        filename = dirname
        if key not in {'X', 'var', 'obs', 'obsm', 'varm'}:
            filename = dir_uns
        filename /= f'{key}.csv'
        df = value
        if not isinstance(value, pd.DataFrame):
            value = np.array(value)
            if np.ndim(value) == 0:
                value = value[None]
            try:
                df = pd.DataFrame(value)
            except Exception as e:
                warnings.warn(
                    f'Omitting to write {key!r} of type {type(e)}.',
                    WriteWarning,
                )
                continue
        df.to_csv(
            filename,
            sep=sep,
            header=key in {'obs', 'var', 'obsm', 'varm'},
            index=key in {'obs', 'var'},
        )
Пример #6
0
def write_csvs(dirname: PathLike,
               adata: AnnData,
               skip_data: bool = True,
               sep: str = ","):
    """See :meth:`~anndata.AnnData.write_csvs`."""
    dirname = Path(dirname)
    if dirname.suffix == ".csv":
        dirname = dirname.with_suffix("")
    logger.info(f"writing .csv files to {dirname}")
    if not dirname.is_dir():
        dirname.mkdir(parents=True, exist_ok=True)
    dir_uns = dirname / "uns"
    if not dir_uns.is_dir():
        dir_uns.mkdir(parents=True, exist_ok=True)
    d = dict(
        obs=adata._obs,
        var=adata._var,
        obsm=adata._obsm.to_df(),
        varm=adata._varm.to_df(),
    )
    if not skip_data:
        d["X"] = pd.DataFrame(
            adata._X.toarray() if issparse(adata._X) else adata._X)
    d_write = {**d, **adata._uns}
    not_yet_raised_sparse_warning = True
    for key, value in d_write.items():
        if issparse(value):
            if not_yet_raised_sparse_warning:
                warnings.warn("Omitting to write sparse annotation.",
                              WriteWarning)
                not_yet_raised_sparse_warning = False
            continue
        filename = dirname
        if key not in {"X", "var", "obs", "obsm", "varm"}:
            filename = dir_uns
        filename /= f"{key}.csv"
        df = value
        if not isinstance(value, pd.DataFrame):
            value = np.array(value)
            if np.ndim(value) == 0:
                value = value[None]
            try:
                df = pd.DataFrame(value)
            except Exception as e:
                warnings.warn(
                    f"Omitting to write {key!r} of type {type(e)}.",
                    WriteWarning,
                )
                continue
        df.to_csv(
            filename,
            sep=sep,
            header=key in {"obs", "var", "obsm", "varm"},
            index=key in {"obs", "var"},
        )
Пример #7
0
def download_mesh(url: str, directory: PathLike, overwrite: bool = False):
    if not isinstance(directory, Path):
        directory = Path(directory)
    if not directory.exists():
        directory.mkdir(parents=True, exist_ok=True)

    if not (directory / 'fort.14').exists() or overwrite:
        logging.info(f'downloading mesh files to {directory}')
        extract_download(url, directory, ['fort.13', 'fort.14'])

    return directory
Пример #8
0
    def train(
            self, episodes: int,
            validation: int = None,
            save_path: PathLike = None,
            gamma: float = 1.0, epsilon: float = 0.2,
    ) -> None:
        """

        :param episodes:
        :param validation:
        :param save_path: path to save models and logs.
        :param gamma: γ - discount factor. Is used to balance immediate and future reward.
        :param epsilon: ε - chance to get random move in ε-greedy policy
        """
        if save_path:
            save_path = pathlib.Path(save_path) / f'SIGMOID_24_negative_reward_gamma_{gamma}_epsilon_{epsilon}_q_learning'
            save_path.mkdir(exist_ok=False, parents=False)

        for episode in tqdm(range(episodes)):
            if validation is not None and not (episode + 1) % validation:
                self.validate(path=save_path)
                if save_path:
                    self.save(save_path, episode + 1)

            self.model.train()

            players = (self.agent_cls(self.model), self.agent_cls(self.model))
            game = bg.Game(players=players)

            with self.e_greedy_get_action(episode, epsilon=epsilon):
                for agent, new_board, prev_board, move, available_moves in game.play_step_by_step():
                    agent: agents.NNAgent
                    pred_q = agent.estimate(board=prev_board)
                    if new_board.status:
                        reward = new_board.status
                        self.update(pred_q, torch.Tensor([reward]))
                        with prev_board.reverse() as reversed_board:
                            self.update(agent.estimate(board=reversed_board), torch.Tensor([-reward]))
                        break
                    else:
                        estimated_moves = list(agent.estimate_moves(available_moves=available_moves, board=prev_board))
                        agent_checkers, opp_checkers = prev_board.to_schema()

                        if estimated_moves:
                            max_q = np.max(estimated_moves)
                            new_q = gamma * max_q
                        else:
                            # it is too bad, if we could not make any step.
                            new_q = torch.Tensor([-1])

                        self.update(pred_q, new_q)
Пример #9
0
def ensure_directory(directory: PathLike) -> Path:
    """
    ensure that a directory exists

    :param directory: directory path to ensure
    :returns: path to ensured directory
    """

    if not isinstance(directory, Path):
        directory = Path(directory)
    directory = directory.expanduser()
    if directory.is_file():
        directory = directory.parent
    if not directory.exists():
        directory.mkdir(parents=True, exist_ok=True)
    return directory
Пример #10
0
def _download_file(
    fname: Union[AVAILABLE_MODELS, AVAILABLE_DATA],
    branch: str,
    save_dir: PathLike,
    type: Literal["model", "data"],
) -> Path:
    """Download a file to disk if it does not already exist.

    Args:
        fname: The file name.
        branch: Which branch of the unlockNN repository to download from.
        save_dir: The directory to check for already-downloaded models and
            in which to save newly downloaded models.
        type: The type of file.

    Returns:
        The path to the downloaded file/folder.

    """
    save_dir = Path(save_dir)
    if not save_dir.exists():
        save_dir.mkdir()

    specific_dir = save_dir / (
        f"{fname}-{branch}" + (".parquet" if type == "data" else "")
    )
    # Add .pkl extension only if we're downloading data
    url = MODELS_URL if type == "model" else DATA_URL
    download_url = url.format(branch=branch, fname=fname)

    if not specific_dir.exists():
        r = requests.get(download_url)
        if type == "model":
            tar_f = tarfile.open(fileobj=BytesIO(r.content))
            tar_f.extractall(specific_dir)
            tar_f.close()
        else:
            specific_dir.write_bytes(r.content)

    return specific_dir
def download_file(
    url: PathLike,
    filename: PathLike = None,
    directory: PathLike = None,
    show_progress: bool = True,
    silent: bool = False,
    timeout: int = 5,
) -> str:
    """
    Download a file from a url and save it to the local filesystem. The file is saved to the
    current directory by default, or to `directory` if specified. If a filename is not given,
    the filename of the URL will be used.

    :param url: URL that points to the file to download
    :param filename: Name of the local file to save. Should point to the name of the file only,
                     not the full path. If None the filename from the url will be used
    :param directory: Directory to save the file to. Will be created if it doesn't exist
                      If None the file will be saved to the current working directory
    :param show_progress: If True, show an TQDM ProgressBar
    :param silent: If True, do not print a message if the file already exists
    :return: path to downloaded file
    """
    try:
        opener = urllib.request.build_opener()
        opener.addheaders = [("User-agent", "Mozilla/5.0")]
        urllib.request.install_opener(opener)
        urlobject = urllib.request.urlopen(url, timeout=timeout)
        if filename is None:
            filename = urlobject.info().get_filename() or Path(urllib.parse.urlparse(url).path).name
    except urllib.error.URLError as error:
        if isinstance(error.reason, socket.timeout):
            raise Exception(
                "Connection timed out. If you access the internet through a proxy server, please "
                "make sure the proxy is set in the shell from where you launched Jupyter. If your "
                "internet connection is slow, you can call `download_file(url, timeout=30)` to "
                "wait for 30 seconds before raising this error."
            ) from None
    except urllib.error.HTTPError as e:
        raise Exception(f"File downloading failed with error: {e.code} {e.msg}") from None
    filename = Path(filename)
    if len(filename.parts) > 1:
        raise ValueError(
            "`filename` should refer to the name of the file, excluding the directory. "
            "Use the `directory` parameter to specify a target directory for the downloaded file."
        )

    # create the directory if it does not exist, and add the directory to the filename
    if directory is not None:
        directory = Path(directory)
        directory.mkdir(parents=True, exist_ok=True)
        filename = directory / Path(filename)

    # download the file if it does not exist, or if it exists with an incorrect file size
    urlobject_size = int(urlobject.info().get("Content-Length", 0))
    if not filename.exists() or (os.stat(filename).st_size != urlobject_size):
        progress_callback = DownloadProgressBar(
            total=urlobject_size,
            unit="B",
            unit_scale=True,
            unit_divisor=1024,
            desc=str(filename),
            disable=not show_progress,
        )
        urllib.request.urlretrieve(url, filename, reporthook=progress_callback.update_to)
        if os.stat(filename).st_size >= urlobject_size:
            progress_callback.update(urlobject_size - progress_callback.n)
            progress_callback.refresh()
    else:
        if not silent:
            print(f"'{filename}' already exists.")
    return filename.resolve()
Пример #12
0
def make_path(p: PathLike) -> Path:
    p = Path(p)
    p.mkdir(exist_ok=True, parents=True)
    return p