示例#1
0
def setup_ripple(num_top):
    config = {
        "region": "us-west-2",
        "role": "service-role/lambdaFullAccessRole",
        "memory_size": 3008
    }
    pipeline = ripple.Pipeline(name="tide",
                               table="s3://maccoss-tide",
                               log="s3://maccoss-log",
                               timeout=600,
                               config=config)
    params = {"database_bucket": "maccoss-fasta"}

    input = pipeline.input(format="mzML")
    # Filter for the most intense spectra
    step = input.top(identifier="tic", number=num_top)
    # Run param-medic on top spectra
    step = input.run("parammedic", params=params, output_format="mzML")
    # For each matching FASTA, run Tide
    tide = lambda input_key, bucket_key: input_key.run(
        "tide",
        params={
            "database_bucket": "maccoss-fasta",
            "num_threads": 0,
            "output_format": "tsv",
            "species": bucket_key,
        })
    step = step.map(func=tide, params={"directories": True})
    # Calculate the confidence score of the results
    step = step.run("confidence", params=params, output_format="confidence")
    # Find the result with the top confidence score
    step = step.match("qvalue")

    pipeline.compile("json/medic-tide.json", dry_run=False)
示例#2
0
def setup(table, log, json_name, split_size, mem_size, region):
    config = {
        "region": region,
        "role": "service-role/lambdaFullAccessRole",
        "memory_size": 3008
    }

    pipeline = ripple.Pipeline(name="tide",
                               table="s3://" + table,
                               log="s3://" + log,
                               timeout=600,
                               config=config)
    input = pipeline.input(format="mzML")
    step = input.split({"split_size": split_size}, {"memory_size": 128})

    params = {
        "database_bucket": "maccoss-fasta",
        "num_threads": 0,
        "species": "normalHuman",
    }
    step = input.run("tide",
                     params=params,
                     output_format="tsv",
                     config={"memory_size": mem_size})
    step = step.combine(params={"sort": False}, config={"memory_size": 256})
    params = {
        "database_bucket": "maccoss-fasta",
        "max_train": 10 * 1000,
        "output": "peptides",
    }
    step = step.run("percolator", params=params)
    pipeline.compile(json_name, dry_run=False)
    return len(pipeline.pipeline)
示例#3
0
import ripple

config = {
  "region": "us-west-2",
  "role": "serverless-role",
  "memory_size": 3008
}
pipeline = ripple.Pipeline(name="spacenet", table="s3://spacenet", log="s3://log", timeout=600, config=config)
input = pipeline.input(format="tif")
step = input.run("convert_to_pixels", params={"pixels_per_bin": 1000}, output_format="pixel")
step = step.run("pair", params={"split_size": 10*1000*1000})
step = step.run("run_knn", {"k": 100}, output_format="knn")
step = step.combine(params={"k": 100, "sort": True})
step = step.combine(params={"k": 100,  "sort": False})
step = step.run("draw_borders", {"image": input}, output_format="tif")
pipeline.compile("json/spacenet-classification.json")
示例#4
0
文件: simple.py 项目: saj9191/ripple
import ripple

config = {
    "region": "us-west-2",
    "role": "service-role/lambdaFullAccessRole",
    "memory_size": 1024,
}
pipeline = ripple.Pipeline(name="tide",
                           table="s3://maccoss-tide",
                           log="s3://maccoss-log",
                           timeout=600,
                           config=config)
input = pipeline.input(format="new_line")
step = input.run("echo")

pipeline.compile("json/simple.json", dry_run=False)
示例#5
0
import ripple

config = {
  "region": "us-east-1",
  "role": "serverless-role",
  "memory_size": 3008,
}
pipeline = ripple.Pipeline(name="compression", table="s3://compression", log="s3://log", timeout=600, config=config)
input = pipeline.input(format="bed")
step = input.sort(identifier="start_position", params={"split_size": 500*1000*1000}, config={"memory_size": 3008})
step = step.run("compress_methyl", params={"program_bucket": "program"})
pipeline.compile("json/compression.json")
示例#6
0
import ripple

config = {
  "region": "us-west-2",
  "role": "service-role/lambdaFullAccessRole",
  "memory_size": 3008,
}
pipeline = ripple.Pipeline(name="compression", table="s3://maccoss-tide-west-2", log="s3://maccoss-log-west-2", timeout=600, config=config)
input = pipeline.input(format="bed")
step = input.sort(identifier="start_position", params={"split_size": 500*1000*1000, "num_bins": 35}, config={"memory_size": 3008})
step = step.run("compress_methyl", params={"program_bucket": "maccoss-methyl-data"})
pipeline.compile("json/compression.json")
示例#7
0
import ripple

config = {
    "region": "us-east-1",
    "role": "service-role/lambdaFullAccessRole",
    "memory_size": 3008,
}
pipeline = ripple.Pipeline(name="document",
                           table="s3://maccoss-tide-east-1",
                           log="s3://maccoss-log-east-1",
                           timeout=600,
                           config=config)
input = pipeline.input(format="new_line")
input.map(table="train-data",
          func=lambda input_key, bucket_key: input_key.run(
              "compress_fastore", params={"train_data": bucket_key}))
pipeline.compile("json/document.json", dry_run=True)