def setup_ripple(num_top): config = { "region": "us-west-2", "role": "service-role/lambdaFullAccessRole", "memory_size": 3008 } pipeline = ripple.Pipeline(name="tide", table="s3://maccoss-tide", log="s3://maccoss-log", timeout=600, config=config) params = {"database_bucket": "maccoss-fasta"} input = pipeline.input(format="mzML") # Filter for the most intense spectra step = input.top(identifier="tic", number=num_top) # Run param-medic on top spectra step = input.run("parammedic", params=params, output_format="mzML") # For each matching FASTA, run Tide tide = lambda input_key, bucket_key: input_key.run( "tide", params={ "database_bucket": "maccoss-fasta", "num_threads": 0, "output_format": "tsv", "species": bucket_key, }) step = step.map(func=tide, params={"directories": True}) # Calculate the confidence score of the results step = step.run("confidence", params=params, output_format="confidence") # Find the result with the top confidence score step = step.match("qvalue") pipeline.compile("json/medic-tide.json", dry_run=False)
def setup(table, log, json_name, split_size, mem_size, region): config = { "region": region, "role": "service-role/lambdaFullAccessRole", "memory_size": 3008 } pipeline = ripple.Pipeline(name="tide", table="s3://" + table, log="s3://" + log, timeout=600, config=config) input = pipeline.input(format="mzML") step = input.split({"split_size": split_size}, {"memory_size": 128}) params = { "database_bucket": "maccoss-fasta", "num_threads": 0, "species": "normalHuman", } step = input.run("tide", params=params, output_format="tsv", config={"memory_size": mem_size}) step = step.combine(params={"sort": False}, config={"memory_size": 256}) params = { "database_bucket": "maccoss-fasta", "max_train": 10 * 1000, "output": "peptides", } step = step.run("percolator", params=params) pipeline.compile(json_name, dry_run=False) return len(pipeline.pipeline)
import ripple config = { "region": "us-west-2", "role": "serverless-role", "memory_size": 3008 } pipeline = ripple.Pipeline(name="spacenet", table="s3://spacenet", log="s3://log", timeout=600, config=config) input = pipeline.input(format="tif") step = input.run("convert_to_pixels", params={"pixels_per_bin": 1000}, output_format="pixel") step = step.run("pair", params={"split_size": 10*1000*1000}) step = step.run("run_knn", {"k": 100}, output_format="knn") step = step.combine(params={"k": 100, "sort": True}) step = step.combine(params={"k": 100, "sort": False}) step = step.run("draw_borders", {"image": input}, output_format="tif") pipeline.compile("json/spacenet-classification.json")
import ripple config = { "region": "us-west-2", "role": "service-role/lambdaFullAccessRole", "memory_size": 1024, } pipeline = ripple.Pipeline(name="tide", table="s3://maccoss-tide", log="s3://maccoss-log", timeout=600, config=config) input = pipeline.input(format="new_line") step = input.run("echo") pipeline.compile("json/simple.json", dry_run=False)
import ripple config = { "region": "us-east-1", "role": "serverless-role", "memory_size": 3008, } pipeline = ripple.Pipeline(name="compression", table="s3://compression", log="s3://log", timeout=600, config=config) input = pipeline.input(format="bed") step = input.sort(identifier="start_position", params={"split_size": 500*1000*1000}, config={"memory_size": 3008}) step = step.run("compress_methyl", params={"program_bucket": "program"}) pipeline.compile("json/compression.json")
import ripple config = { "region": "us-west-2", "role": "service-role/lambdaFullAccessRole", "memory_size": 3008, } pipeline = ripple.Pipeline(name="compression", table="s3://maccoss-tide-west-2", log="s3://maccoss-log-west-2", timeout=600, config=config) input = pipeline.input(format="bed") step = input.sort(identifier="start_position", params={"split_size": 500*1000*1000, "num_bins": 35}, config={"memory_size": 3008}) step = step.run("compress_methyl", params={"program_bucket": "maccoss-methyl-data"}) pipeline.compile("json/compression.json")
import ripple config = { "region": "us-east-1", "role": "service-role/lambdaFullAccessRole", "memory_size": 3008, } pipeline = ripple.Pipeline(name="document", table="s3://maccoss-tide-east-1", log="s3://maccoss-log-east-1", timeout=600, config=config) input = pipeline.input(format="new_line") input.map(table="train-data", func=lambda input_key, bucket_key: input_key.run( "compress_fastore", params={"train_data": bucket_key})) pipeline.compile("json/document.json", dry_run=True)