示例#1
0
def run(database, file, params, input_format, output_format, offsets):
    work_dir = "/tmp/{0:f}-{1:d}".format(input_format["timestamp"],
                                         input_format["nonce"])
    os.mkdir(work_dir)
    program_path = "{0:s}/output".format(work_dir)
    input_name = work_dir + "/input"
    os.rename(file, input_name)

    database.download(params["program_bucket"], "output", program_path)
    subprocess.call("chmod 755 " + program_path, shell=True)

    arguments = [program_path, "compress", input_name, work_dir]

    command = " ".join(arguments)
    util.check_output(command)

    output_files = []
    result_dir = "{0:s}/compressed_input".format(work_dir)
    for subdir, dirs, files in os.walk(result_dir):
        for f in files:
            output_format["suffix"] = f.split("_")[-1].split("-")[0]

            output_file = "/tmp/{0:s}".format(util.file_name(output_format))
            os.rename("{0:s}/compressed_input/{1:s}".format(work_dir, f),
                      output_file)
            output_files.append(output_file)

    return output_files
示例#2
0
def find_match(bucket_name, key, input_format, output_format, offsets, params):
    util.print_read(input_format, key, params)
    [combine, keys, last] = util.combine_instance(bucket_name, key, params)
    if combine:
        print("Finding match")
        best_match = None
        match_score = 0
        format_lib = importlib.import_module(params["format"])
        iterator = getattr(format_lib, "Iterator")
        s3 = boto3.resource("s3")

        keys.sort()
        with open(util.LOG_NAME, "a+") as f:
            for key in keys:
                obj = s3.Object(bucket_name, key)
                it = iterator(obj, params["chunk_size"])
                if params["find"] == "max sum":
                    score = it.sum(params["identifier"])
                else:
                    raise Exception("Not implemented", params["find"])

                print("key {0:s} score {1:d}".format(key, score))
                f.write("key {0:s} score {1:d}\n".format(key, score))
                if score > match_score:
                    best_match = key
                    match_score = score

        if best_match is None:
            best_match = keys[0]

        output_format["ext"] = "match"
        file_name = util.file_name(output_format)
        util.write(bucket_name, file_name, str.encode(best_match), {}, params)
示例#3
0
def run(file, params, input_format, output_format, offsets):
  util.print_read(input_format, file, params)

  s3 = boto3.resource('s3')
  database_bucket = s3.Bucket(params["database_bucket"])

  with open("/tmp/crux", "wb") as f:
    database_bucket.download_fileobj("crux", f)

  subprocess.call("chmod 755 /tmp/crux", shell=True)
  output_dir = "/tmp/confidence-crux-output-{0:f}-{1:d}".format(input_format["timestamp"], input_format["nonce"])

  arguments = [
    "--output-dir", output_dir,
  ]

  command = "cd /tmp; ./crux assign-confidence {0:s} {1:s}".format(file, " ".join(arguments))
  subprocess.check_output(command, shell=True)

  output_files = []
  input_file = "{0:s}/assign-confidence.target.txt".format(output_dir)
  output_format["ext"] = "confidence"
  output_file = "/tmp/{0:s}".format(util.file_name(output_format))
  os.rename(input_file, output_file)
  output_files.append(output_file)

  return output_files
示例#4
0
def find_top(d: Database, table: str, key: str, input_format: Dict[str, Any],
             output_format: Dict[str, Any], offsets: List[int],
             params: Dict[str, Any]):
    entry = d.get_entry(table, key)
    format_lib = importlib.import_module(params["format"])
    iterator = getattr(format_lib, "Iterator")
    it = iterator(entry, offsets)

    top = []
    more = True
    while more:
        [items, _, more] = it.next()

        for item in items:
            score: float = it.get_identifier_value(item, params["identifier"])
            heapq.heappush(top, Element(score, item))
            if len(top) > params["number"]:
                heapq.heappop(top)

    file_name = util.file_name(output_format)
    temp_name = "/tmp/{0:s}".format(file_name)
    items = list(map(lambda t: t.value, top))
    with open(temp_name, "wb+") as f:
        [content, metadata] = iterator.from_array(items, f, it.get_extra())

    with open(temp_name, "rb") as f:
        d.put(table, file_name, f, metadata)
示例#5
0
def make_file(output_format):
    output_format["bin"] += 1
    util.make_folder(output_format)
    name = util.file_name(output_format)
    output_file = "/tmp/" + name
    f = open(output_file, "wb+")
    return [f, name]
示例#6
0
def run(database: Database, file: str, params, input_format, output_format):
    database.download(params["database_bucket"], "crux", "/tmp/crux")
    subprocess.call("chmod 755 /tmp/crux", shell=True)
    output_dir = "/tmp/percolator-crux-output-{0:f}-{1:d}".format(
        input_format["timestamp"], input_format["nonce"])

    arguments = [
        "--subset-max-train",
        str(params["max_train"]),
        "--quick-validation",
        "T",
        "--output-dir",
        output_dir,
    ]

    command = "cd /tmp; ./crux percolator {0:s} {1:s}".format(
        file, " ".join(arguments))
    try:
        subprocess.check_output(command, stderr=subprocess.STDOUT, shell=True)
    except subprocess.CalledProcessError as exc:
        print("Status : FAIL", exc.returncode, exc.output)
        raise exc

    output_files = []
    for item in ["target.{0:s}".format(params["output"])]:
        input_file = "{0:s}/percolator.{1:s}.txt".format(output_dir, item)
        output_format["ext"] = "percolator"
        output_file = "/tmp/{0:s}".format(util.file_name(output_format))
        os.rename(input_file, output_file)
        output_files.append(output_file)
    shutil.rmtree(output_dir)

    return output_files
示例#7
0
文件: match.py 项目: wkclalala/ripple
def find_match(database, bucket_name: str, key: str,
               input_format: Dict[str, Any], output_format: Dict[str, Any],
               offsets: List[int], params: Dict[str, Any]):
    [combine, last, keys] = util.combine_instance(bucket_name, key, params)
    if combine:
        print("Finding match")
        best_match = None
        match_score = 0
        format_lib = importlib.import_module("formats." +
                                             params["input_format"])
        iterator_class = getattr(format_lib, "Iterator")

        keys.sort()
        with open(util.LOG_NAME, "a+") as f:
            for key in keys:
                entry = database.get_entry(bucket_name, key)
                it = iterator_class(entry, None)
                score: float = it.sum(
                    format_lib.Identifiers[params["identifier"]])

                print("key {0:s} score {1:d}".format(key, score))
                f.write("key {0:s} score {1:d}\n".format(key, score))
                if score > match_score:
                    best_match = key
                    match_score = score

        if best_match is None:
            best_match = keys[0]

        output_format["ext"] = "match"
        file_name = util.file_name(output_format)
        database.write(bucket_name, file_name, str.encode(best_match), {},
                       True)
示例#8
0
  def __get_objects__(self):
    if self.prefixes:
      file_name = self.prefixes + "/"
    else:
      identifier = self.find_queue.get()
      parts = identifier[0].split("-")
      m = {
        "prefix": identifier[1],
        "timestamp": float(parts[0]),
        "nonce": int(parts[1]),
        "bin": identifier[2],
        "num_bins": identifier[3],
        "file_id": identifier[4],
        "execute": 0,
        "suffix": "0",
        "num_files": identifier[5],
        "ext": "log",
      }
      file_name = util.file_name(m)

    objects = self.__fetch_objects__(file_name)

    count = 0
    found = False
    for obj in objects:
      key = obj["Key"]
      found |= (key == file_name)
      if key not in self.processed_logs:
        count += 1
        self.key_queue.put(key)

    if not found and not self.prefixes:
      self.find_queue.put(identifier)
示例#9
0
def write_binned_input(database: Database, binned_input: List[Any],
                       bin_ranges: List[Dict[str, int]], extra: Dict[str, Any],
                       output_format, iterator_class, params):
    for i in range(len(binned_input)):
        [content,
         metadata] = iterator_class.from_array(binned_input[i], None, extra)
        output_format["bin"] = bin_ranges[i]["bin"]
        output_format["num_bins"] = len(bin_ranges)
        bin_key = util.file_name(output_format)
        database.write(params["bucket"], bin_key, content, metadata, True)
示例#10
0
 def test_file_name_parser(self):
     m = {
         "prefix": 0,
         "timestamp": 123.4,
         "nonce": 42,
         "bin": 12,
         "num_bins": 13,
         "file_id": 3,
         "execute": False,
         "num_files": 4,
         "suffix": "hello",
         "ext": "txt"
     }
     self.assertDictEqual(m, util.parse_file_name(util.file_name(m)))
     self.assertEqual(
         "0/123.400000-13/1-4/1-0.000000-0-suffix.txt",
         util.file_name(
             util.parse_file_name(
                 "0/123.400000-13/1-4/1-0.000000-0-suffix.txt")))
示例#11
0
def run_application(database, bucket_name: str, key: str,
                    input_format: Dict[str, Any], output_format: Dict[str,
                                                                      Any],
                    offsets: List[int], params: Dict[str, Any]):
    temp_file = "/tmp/{0:s}".format(key)
    util.make_folder(util.parse_file_name(key))

    if len(offsets) == 0:
        database.download(bucket_name, key, temp_file)
    else:
        obj = database.get_entry(bucket_name, key)
        format_lib = importlib.import_module("formats." +
                                             params["input_format"])
        iterator_class = getattr(format_lib, "Iterator")
        iterator = iterator_class(obj, OffsetBounds(offsets[0], offsets[1]))
        items = iterator.get(iterator.get_start_index(),
                             iterator.get_end_index())
        with open(temp_file, "wb+") as f:
            items = list(items)
            iterator_class.from_array(list(items), f, iterator.get_extra())

    application_lib = importlib.import_module("applications." +
                                              params["application"])
    application_method = getattr(application_lib, "run")
    output_files = application_method(database, temp_file, params,
                                      input_format, output_format)

    found = False
    for output_file in output_files:
        p = util.parse_file_name(output_file.replace("/tmp/", ""))
        if p is None:
            index = output_file.rfind(".")
            ext = output_file[index + 1:]
            output_format["ext"] = ext
            new_key = util.file_name(output_format)
        else:
            new_key = util.file_name(p)

        with open(output_file, "rb") as f:
            database.put(params["bucket"], new_key, f, {})
    return True
示例#12
0
def wait_for_execution_to_finish(db, table, key, num_steps):
    entries = []
    m = util.parse_file_name(key)
    m["prefix"] = num_steps
    prefix = util.key_prefix(util.file_name(m))
    start_time = time.time()
    while len(entries) == 0:
        entries = db.get_entries(table, prefix)
        if time.time() - start_time > num_steps * 60 * 2:
            print("Tiemout", num_steps * 60 * 2)
            raise TimeoutError
        time.sleep(10)
示例#13
0
文件: tide.py 项目: wkclalala/ripple
def run(database: Database, file: str, params, input_format, output_format):
    if "species" in params:
        species = params["species"]
    else:
        raise Exception("Tide needs species parameter specified")

    database.download(params["database_bucket"], "{0:s}/fasta".format(species),
                      "/tmp/fasta")
    database.download(params["database_bucket"], "crux", "/tmp/crux")

    subprocess.call("chmod 755 /tmp/crux", shell=True)
    index_files = ["auxlocs", "pepix", "protix"]
    if not os.path.isdir("/tmp/fasta.index"):
        os.mkdir("/tmp/fasta.index")

    for index_file in index_files:
        name = "{0:s}/{1:s}".format(species, index_file)
        database.download(params["database_bucket"], name,
                          "/tmp/fasta.index/{0:s}".format(index_file))

    output_dir = "/tmp/crux-output-{0:f}-{1:d}".format(
        input_format["timestamp"], input_format["nonce"])

    arguments = [
        "--num-threads",
        str(params["num_threads"]),
        "--txt-output",
        "T",
        "--concat",
        "T",
        "--output-dir",
        output_dir,
        "--overwrite",
        "T",
    ]

    command = "cd /tmp; ./crux tide-search {0:s} fasta.index {1:s}".format(
        file, " ".join(arguments))
    try:
        subprocess.check_output(command, stderr=subprocess.STDOUT, shell=True)
    except subprocess.CalledProcessError as exc:
        print("Status : FAIL", exc.returncode, exc.output)
        raise exc
    input_file = "{0:s}/tide-search.txt".format(output_dir)
    output_format["suffix"] = species
    output_format["ext"] = "txt"
    output_file = "/tmp/{0:s}".format(util.file_name(output_format))
    os.rename(input_file, output_file)
    shutil.rmtree(output_dir)

    return [output_file]
示例#14
0
def run(database: Database, file: str, params, input_format, output_format,
        offsets: List[int]):
    print("compress", file)
    dir_path = "/tmp/fastore_test/"
    if not os.path.exists(dir_path):
        os.makedirs(dir_path)
    bin_path = dir_path  #+ "bin/"
    if not os.path.exists(bin_path):
        os.makedirs(bin_path)
    script_path = dir_path  #+ "script/"
    if not os.path.exists(script_path):
        os.makedirs(script_path)

    script_files = ["fastore_compress.sh"]
    bin_files = ["fastore_bin", "fastore_pack", "fastore_rebin"]

    for s in script_files:
        download(database, params["program_bucket"], script_path, s)

    for b in bin_files:
        download(database, params["program_bucket"], bin_path, b)

    with open(script_path + "reference.fq", "wb+") as f:
        database.get_entries(params["bucket"],
                             params["input_prefix"])[0].download(f)

    input_file = file
    output_format["ext"] = "compress"

    arguments = [
        "in " + input_file,
        "pair reference.fq",
        "threads 2",
    ]

    command = "cd {0:s}; ./fastore_compress.sh --lossless --{1:s}".format(
        script_path, " --".join(arguments))
    print(command)
    subprocess.check_output(command, stderr=subprocess.STDOUT, shell=True)
    print("after")

    output_files = ["OUT.cdata", "OUT.cmeta"]
    output_list = []
    for f in output_files:
        output_format["ext"] = f.split(".")[-1]
        output_file = "/tmp/{0:s}".format(util.file_name(output_format))
        shutil.move(script_path + f, output_file)
        output_list.append(output_file)

    return output_list
示例#15
0
def run(database: Database, test_key: str, params, input_format, output_format,
        offsets: List[int]):
    train_obj = database.get_entry("spacenet", params["train_key"])
    train_it = classification.Iterator(
        train_obj,
        OffsetBounds(params["train_offsets"][0], params["train_offsets"][1]))
    train_x = []
    train_y = []
    more = True
    while more:
        [items, _, more] = train_it.next()
        for [features, c] in items:
            train_x.append(features)
            train_y.append(c)

    neigh = NearestNeighbors(n_neighbors=params["k"], algorithm="brute")
    neigh.fit(train_x)

    pixels = []
    rgb = []
    with open(test_key, "rb") as f:
        lines = filter(lambda l: len(l.strip()) > 0, f.read().split(b"\n\n"))
        for line in lines:
            parts = line.split(b' ')
            x = int(parts[0])
            y = int(parts[1])
            pixels.append([x, y])
            rgb.append(np.frombuffer(b' '.join(parts[2:]), dtype=int))

    [distances, indices] = neigh.kneighbors(rgb)

    items = []
    for i in range(len(distances)):
        [x, y] = pixels[i]
        neighbors = []
        for j in range(len(distances[i])):
            distance = distances[i][j]
            clz = train_y[indices[i][j]]
            neighbors.append((distance, clz))
        items.append((str.encode("{x} {y}".format(x=x, y=y)), neighbors))

    output_format["ext"] = "knn"
    output_file = "/tmp/{0:s}".format(util.file_name(output_format))
    with open(output_file, "wb+") as f:
        knn.Iterator.from_array(items, f, {})

    return [output_file]
示例#16
0
def create_s3_key_name(key, execute=None):
    now = time.time()
    nonce = random.randint(1, 1000)
    _, ext = os.path.splitext(key)

    m = {
        "prefix": "0",
        "timestamp": now,
        "nonce": nonce,
        "num_bins": 1,
        "bin": 1,
        "file_id": 1,
        "suffix": "tide",
        "num_files": 1,
        "ext": ext[1:],  # Remove period
    }

    if execute:
        m["execute"] = execute
    return util.file_name(m)
示例#17
0
def run(database: Database, key: str, params, input_format, output_format,
        offsets: List[int]):
    train_key = "train.classification.w1-h1"
    obj = database.get_entry("spacenet", train_key)
    content_length: int = obj.content_length()
    split_size = params["split_size"]
    num_files = int((content_length + split_size - 1) / split_size)
    file_id = 1

    threads = []
    token = "{0:f}-{1:d}".format(output_format["timestamp"],
                                 output_format["nonce"])
    while file_id <= num_files:
        offsets = [(file_id - 1) * split_size,
                   min(content_length, (file_id) * split_size) - 1]
        extra_params = {
            **output_format,
            **{
                "file_id": file_id,
                "num_files": num_files,
                "train_key": train_key,
                "train_offsets": offsets,
            }
        }
        payload = database.create_payload(params["bucket"],
                                          util.file_name(input_format),
                                          extra_params)
        payload["log"] = [
            token, output_format["prefix"], output_format["bin"],
            output_format["num_bins"], file_id, num_files
        ]

        threads.append(
            threading.Thread(target=database.invoke,
                             args=(params["output_function"], payload)))
        threads[-1].start()
        file_id += 1

    for thread in threads:
        thread.join()
    return []
示例#18
0
文件: ssw.py 项目: delimitrou/Ripple
def run(database: Database, target_key: str, params, input_format,
        output_format, offsets: List[int]):
    with open("/tmp/ssw_test", "wb+") as f:
        database.download(params["program_bucket"], "ssw_test", f)

    subprocess.call("chmod 755 /tmp/ssw_test", shell=True)

    query_obj = database.get_entries(params["bucket"],
                                     params["input_prefix"])[0]
    query_key = query_obj.key
    with open("/tmp/{0:s}".format(query_key), "wb+") as f:
        query_obj.download(f)

    output_format["ext"] = "blast"
    output_file = "/tmp/{0:s}".format(util.file_name(output_format))

    command = "cd /tmp; ./ssw_test -p {0:s} {1:s} > {2:s}".format(
        target_key, query_key, output_file)

    subprocess.check_output(command, stderr=subprocess.STDOUT, shell=True)
    return [output_file]
示例#19
0
def run(database: Database, file: str, params, input_format, output_format):
  database.download(params["database_bucket"], "crux", "/tmp/crux")

  subprocess.call("chmod 755 /tmp/crux", shell=True)
  output_dir = "/tmp/confidence-crux-output-{0:f}-{1:d}".format(input_format["timestamp"], input_format["nonce"])

  arguments = [
    "--output-dir", output_dir,
  ]

  command = "cd /tmp; ./crux assign-confidence {0:s} {1:s}".format(file, " ".join(arguments))
  subprocess.check_output(command, shell=True)

  output_files = []
  input_file = "{0:s}/assign-confidence.target.txt".format(output_dir)
  output_format["ext"] = "confidence"
  output_file = "/tmp/{0:s}".format(util.file_name(output_format))
  os.rename(input_file, output_file)
  output_files.append(output_file)

  return output_files
示例#20
0
def combine(database: Database, table_name, key, input_format, output_format,
            offsets, params):
    output_format["file_id"] = input_format["bin"]
    output_format["bin"] = 1
    output_format["num_bins"] = 1
    output_format["num_files"] = input_format["num_bins"]
    file_name = util.file_name(output_format)
    util.make_folder(output_format)
    [combine, last_file, keys] = util.combine_instance(table_name, key, params)
    if combine:
        msg = "Combining TIMESTAMP {0:f} NONCE {1:d} BIN {2:d} FILE {3:d}"
        msg = msg.format(input_format["timestamp"], input_format["nonce"],
                         input_format["bin"], input_format["file_id"])
        print(msg)

        format_lib = importlib.import_module("formats." +
                                             params["output_format"])
        iterator_class = getattr(format_lib, "Iterator")
        temp_name = "/tmp/{0:s}".format(file_name)
        # Make this deterministic and combine in the same order
        keys.sort()
        entries: List[Entry] = list(
            map(lambda key: database.get_entry(table_name, key), keys))
        metadata: Dict[str, str] = {}
        if database.contains(table_name, file_name):
            return True

        with open(temp_name, "wb+") as f:
            metadata = iterator_class.combine(entries, f, params)

        found = database.contains(table_name, file_name)
        if not found:
            with open(temp_name, "rb") as f:
                database.put(params["bucket"], file_name, f, metadata, True)
        os.remove(temp_name)
        return True
    else:
        return database.contains(table_name, file_name) or key != last_file
示例#21
0
def handle_pivots(database: Database, bucket_name, key, input_format,
                  output_format, offsets, params):
    entry: Entry = database.get_entry(bucket_name, key)

    format_lib = importlib.import_module("formats." + params["input_format"])
    iterator_class = getattr(format_lib, "Iterator")
    if len(offsets) > 0:
        it = iterator_class(entry, OffsetBounds(offsets[0], offsets[1]))
    else:
        it = iterator_class(entry, None)

    items = it.get(it.get_start_index(), it.get_end_index())
    pivots: List[float] = create_pivots(database, format_lib, iterator_class,
                                        list(items), params)

    output_format["ext"] = "pivot"
    pivot_key = util.file_name(output_format)

    spivots = "\t".join(list(map(lambda p: str(p), pivots)))
    content = str.encode("{0:s}\n{1:s}\n{2:s}".format(bucket_name, key,
                                                      spivots))
    database.write(params["bucket"], pivot_key, content, {}, True)
    return True
示例#22
0
def run(database: Database, file: str, params, input_format, output_format):
    database.download(params["database_bucket"], "crux", "/tmp/crux")
    subprocess.call("chmod 755 /tmp/crux", shell=True)

    command = "cd /tmp; ./crux param-medic {0:s}".format(file)
    output = subprocess.check_output(command,
                                     shell=True,
                                     stderr=subprocess.STDOUT).decode("utf-8")
    print(output)

    phos = PHOSPHORYLATION.search(output)
    itraq = ITRAQ.search(output)
    silac = SILAC.search(output)
    tmt6 = TMT6.search(output)
    tmt10 = TMT10.search(output)

    map_bucket = None
    if tmt6:
        if phos:
            map_bucket = "maccoss-tmt6-phosphorylation-fasta"
        else:
            map_bucket = "maccoss-tmt6-fasta"
    elif tmt10:
        if phos:
            map_bucket = "maccoss-tmt10-phosphorylation-fasta"
        else:
            map_bucket = "maccoss-tmt10-fasta"
    elif itraq:
        if phos:
            map_bucket = "maccoss-itraq-phosphorylation-fasta"
        else:
            map_bucket = "maccoss-itraq-fasta"
    elif phos:
        map_bucket = "maccoss-phosphorylation-fasta"
    elif silac:
        map_bucket = "maccoss-silac-fasta"
    else:
        map_bucket = "maccoss-normal-fasta"

    payload = {
        "Records": [{
            "s3": {
                "bucket": {
                    "name": params["bucket"],
                },
                "object": {
                    "key": util.file_name(input_format),
                },
                "extra_params": {
                    "map_bucket": map_bucket,
                    "prefix": output_format["prefix"],
                }
            }
        }]
    }

    output_file = util.file_name(output_format)
    database.write(params["bucket"], output_file, output, {}, False)
    database.invoke(params["output_function"], payload)

    return []
示例#23
0
def run(database: Database, key: str, params, input_format, output_format,
        offsets: List[int]):
    prefix = "{0:d}/{1:f}-{2:d}/".format(params["image"],
                                         input_format["timestamp"],
                                         input_format["nonce"])
    entries = database.get_entries(params["bucket"], prefix)
    assert (len(entries) == 1)
    entry = entries[0]

    output_format["ext"] = entry.key.split(".")[-1]
    output_file = "/tmp/{0:s}".format(util.file_name(output_format))
    with open(output_file, "wb+") as f:
        entry.download(f)

    im = Image.open(output_file)
    width, height = im.size

    classifications = np.empty([height, width], dtype=int)
    it = knn.Iterator(
        database.get_entry(params["bucket"], key.replace("/tmp/", "")))
    more = True
    while more:
        [items, _, more] = it.next()
        for [point, neighbors] in items:
            [x, y] = list(map(lambda p: int(p), point.split(b' ')))
            scores = [0, 0, 0]
            neighbors = sorted(neighbors, key=lambda n: n[0])
            d1 = neighbors[0][0]
            dk = neighbors[-1][0]
            denom = dk - d1 if dk != d1 else 0.0001
            for i in range(len(neighbors)):
                [d, c] = neighbors[i]
                w = 1 if i == 0 else (dk - d) / denom
                scores[c] += w
            m = max(scores)
            top = [i for i, j in enumerate(scores) if j == m]
            if len(top) == 1:
                classifications[y][x] = top[0]
            else:
                classifications[y][x] = Indices.outside.value

    for y in range(height):
        for x in range(width):
            if classifications[y][x] == Indices.border.value:
                counts = [0, 0, 0]
                for dy in range(-1, 2):
                    for dx in range(-1, 2):
                        iy = y + dy
                        ix = x + dx
                        if (0 <= iy and iy < height) and (0 <= ix
                                                          and ix < width):
                            counts[classifications[iy][ix]] += 1
                if counts[Indices.border.value] <= 1:
                    classifications[y][x] = Indices.outside.value

#  visited = {}
#  for y in range(height):
#    for x in range(width):
#      if x in visited and y in visited[x]:
#        continue
#
#      if classifications[y][x] == Indices.outside.value:
#        counts = [0, 0, 0]
#        total = 0
#        for dy in range(-1, 2):
#          for dx in range(-1, 2):
#            iy = y + dy
#            ix = x + dx
#            if (0 <= iy and iy < height) and (0 <= ix and ix < width):
#              counts[classifications[iy][ix]] += 1
#            total += 1
#        if counts[Indices.outside.value] == total:
#          flood(classifications, x, y, visited, width, height, Indices.outside.value)

    for y in range(height):
        for x in range(width):
            if classifications[y][x] == Indices.border.value:
                im.putpixel((x, y), (255, 0, 0))


#      elif classifications[y][x] == Indices.inside.value:
#        im.putpixel((x, y), (255, 255, 0))

    im.save(output_file)

    return [output_file]
示例#24
0
def run(file, params, input_format, output_format):
    util.print_read(input_format, file, params)

    s3 = boto3.resource('s3')
    print("Download from database ", params["database_bucket"])
    database_bucket = s3.Bucket(params["database_bucket"])

    if "species" in params:
        species = params["species"]
        print("species", params["species"])
    else:
        raise Exception("Tide needs species parameter specified")

    with open("/tmp/fasta", "wb") as f:
        print("downloading")
        database_bucket.download_fileobj("{0:s}/fasta".format(species), f)
        print("downloaded")

    with open("/tmp/crux", "wb") as f:
        database_bucket.download_fileobj("crux", f)

    subprocess.call("chmod 755 /tmp/crux", shell=True)
    index_files = ["auxlocs", "pepix", "protix"]
    if not os.path.isdir("/tmp/fasta.index"):
        os.mkdir("/tmp/fasta.index")

    for index_file in index_files:
        name = "{0:s}/{1:s}".format(species, index_file)
        with open("/tmp/fasta.index/{0:s}".format(index_file), "wb") as f:
            database_bucket.download_fileobj(name, f)

    output_dir = "/tmp/crux-output-{0:f}-{1:d}".format(
        input_format["timestamp"], input_format["nonce"])

    arguments = [
        "--num-threads",
        str(params["num_threads"]),
        "--txt-output",
        "T",
        "--concat",
        "T",
        "--output-dir",
        output_dir,
        "--overwrite",
        "T",
    ]

    ppm_arguments = arguments + [
        "--precursor-window-type",
        "ppm",
        "--auto-precursor-window",
        "fail",
    ]

    command = "cd /tmp; ./crux tide-search {0:s} fasta.index {1:s}".format(
        file, " ".join(ppm_arguments))
    try:
        subprocess.check_output(command, stderr=subprocess.STDOUT, shell=True)
    except subprocess.CalledProcessError as exc:
        if exc.returncode == 1:
            command = "cd /tmp; ./crux tide-search {0:s} fasta.index {1:s}".format(
                file, " ".join(arguments))
            try:
                subprocess.check_output(command,
                                        stderr=subprocess.STDOUT,
                                        shell=True)
            except subprocess.CalledProcessError as exc:
                print("Status : FAIL", exc.returncode, exc.output)
                raise exc
        else:
            print("Status : FAIL", exc.returncode, exc.output)
            raise exc

    input_file = "{0:s}/tide-search.txt".format(output_dir)
    output_format["suffix"] = species
    output_format["ext"] = "txt"
    output_file = "/tmp/{0:s}".format(util.file_name(output_format))
    os.rename(input_file, output_file)
    return [output_file]
示例#25
0
    maxpointline = 0
    for i in range(len(lpoint)):
        if lpoint[i][3] > maxpoint:
            maxpoint = lpoint[i][3]
            maxpointline = i

    retpoint = [
        int(lpoint[maxpointline][0]),
        int(lpoint[maxpointline][1]),
        int(lpoint[maxpointline][2])
    ]
    return retpoint


data = []
aa = util.file_name("hongye10/", ".jpg")
pinnum = len(aa)
for i in range(pinnum):
    lena = Image.open('hongye10/' + aa[i] + '.jpg')
    X_im1 = np.asarray(lena).copy()
    data = np.append(data, X_im1)

data = data.reshape(pinnum, 100 * 320, 3)
newpic = []
# 对读入的所有图片的所有像素点颜色进行判断,用重复数量最多的像素点颜色生成新的图像
for j in range(100 * 320):
    point = []
    for i in range(pinnum):
        point = np.append(point, data[i][j])

    point = point.reshape(pinnum, 3)
示例#26
0
def run(database: Database, file: str, params, input_format, output_format):
    output_file = "/tmp/" + util.file_name(output_format)
    print("Echo!")
    shutil.move(file, output_file)
    return [output_file]