Exemplo n.º 1
0
def train_tars(mode="prod"):
    # Interdict the Transformers cache
    # TODO: Make this cleaner somehow
    import os
    from keter.stage import FileSystemStage, get_path

    with FileSystemStage():
        os.environ["TRANSFORMERS_CACHE"] = str(
            get_path("external") / "transformers")

        from keter.actors.flair import ChemicalUnderstandingTARS

        ChemicalUnderstandingTARS()
Exemplo n.º 2
0
def drug_discovery_on_moses(mode="prod"):
    from tqdm.auto import tqdm
    import pandas as pd

    from keter.stage import FileSystemStage, get_path
    from keter.actors.sklearn import Analyzer
    from keter.datasets.raw import Moses
    from keter.interfaces.chemistry import create_jamstack

    with FileSystemStage():
        if mode == "prod":
            analyzer = Analyzer()
        elif mode == "doc2vec":
            analyzer = Analyzer("doc2vec")
        elif mode == "lda":
            analyzer = Analyzer("lda")
        else:
            raise ValueError(f"Invalid mode: {mode}")
        moses = Moses().to_df()["SMILES"].tolist()

        last = 0
        block_size = 107609
        blocks = []

        get_path("output").mkdir(parents=True, exist_ok=True)

        for i in tqdm(
                range(0, len(moses), block_size),
                total=len(moses) // block_size,
                unit="block",
        ):
            blocks.append(analyzer.analyze(moses[i:i + block_size]))
        pd.concat(blocks).reset_index(drop=True).to_parquet(
            get_path("output") / "moses_drugs.parquet")

        create_jamstack()
Exemplo n.º 3
0
    def train(self):
        tox_corpus = FlairTox21().to_corpus()

        self.model = TARSClassifier(
            task_name="Toxicity",
            label_dictionary=tox_corpus.make_label_dictionary(),
            document_embeddings="distilbert-base-uncased",
        )

        trainer = ModelTrainer(self.model, tox_corpus)

        trainer.train(
            base_path=get_path("model") / self.filename,
            learning_rate=0.02,
            mini_batch_size=1,
            max_epochs=10,
        )
Exemplo n.º 4
0
    def to_csv(self) -> Sequence[str]:
        constructed_data_root = get_path("constructed")
        csv_file = (constructed_data_root /
                    self.filename).with_suffix(".csv.xz")

        if csv_file.exists():
            with lzma.open(csv_file, "rt") as fd:
                for line in fd:
                    yield line.rstrip()
                return

        corona_deaths = CoronaDeathsUSA().to_df()
        corona_deaths = corona_deaths.rename(
            columns={
                column: int(parse(column).timestamp())
                for column in corona_deaths.columns if "/" in column
            })
        timestamp_columns = [
            column for column in corona_deaths.columns
            if isinstance(column, int)
        ]
        corona_deaths[timestamp_columns] = corona_deaths[
            timestamp_columns].diff(axis=1)
        corona_deaths = corona_deaths.dropna(axis=1)

        constructed_data_root.mkdir(parents=True, exist_ok=True)
        fd = lzma.open(csv_file, "wt")
        for row in corona_deaths.iterrows():
            _, series = row
            for column, val in series.items():
                if isinstance(column, int):
                    for record in construct_infection_records(
                            column, val, series.Lat, series.Long_):
                        fd.write(record + "\n")
                        yield record
        fd.close()
Exemplo n.º 5
0
 def download(self, assay: str):
     if assay not in self.tox21_assays:
         raise ValueError(f"Not a valid Tox21 assay: {assay}")
     raw_dir = get_path("raw") / "tox21"
     raw_url = f"https://tripod.nih.gov/tox21/assays/download/{assay}.zip"
     return urlopen(raw_url).read()
Exemplo n.º 6
0
def create_jamstack():
    app.config["FREEZER_IGNORE_MIMETYPE_WARNINGS"] = True
    app.config["FREEZER_DESTINATION"] = get_path("output") / "static_html"
    freezer = Freezer(app)
    freezer.freeze()
Exemplo n.º 7
0
def make_drug_db():
    db.make_drug_db(get_path("output"))