def add_to_barcode(read, barcode):
    latest_buf = barcode_bufs[barcode]
    if len(latest_buf[0].reads) >= MAX_BUF_SIZE:
        print("Writing file %d for %s" % (latest_buf[1], barcode))
        with open("protobufs/%s_%i.protobuf" % (barcode, latest_buf[1]),
                  'wb') as output:
            output.write(latest_buf[0].SerializeToString())
        latest_buf = (reads_pb2.FakReads(), barcode_bufs[barcode][1] + 1)
        barcode_bufs[barcode] = latest_buf

    new_read = latest_buf[0].reads.add()
    new_read.uuid = read.uuid
    new_read.mod_base_probs = read.mod_base_probs
Пример #2
0
protobuf_files = []
read_ids = []

print("Looking for protobufs")
for filename in os.listdir(args.protobufs):
    if not args.nonbarcoded:
        if args.barcode in filename and ".protobuf" in filename:
            protobuf_files.append(args.protobufs + filename)
    else:
        if ".protobuf" in filename:
            protobuf_files.append(args.protobufs + filename)

print("Reading protobufs")
for protobuf_name in protobuf_files:
    protobuf = open(protobuf_name, 'rb')
    protobuf_reads = reads_pb2.FakReads()
    protobuf_reads.ParseFromString(protobuf.read())
    for index, read in enumerate(protobuf_reads.reads):
        protobuf_index[read.uuid] = (protobuf_name, index)
        read_ids.append(read.uuid)
    protobuf.close()

# Import sacCer3 reference sequence as a dictionary
print("Importing {} reference sequence".format(genome_name))
genome_data = collections.OrderedDict()
with open(args.genome, "r") as ref_seq:
    header = True
    for line in ref_seq:
        line = line.strip("\n")
        if line.startswith(">"):
            if not header:
    if ".fast5" in entry:
        filenames.append(entry)

print(filenames)
print(len(filenames))

# Check if any fast5 files were found and only continue if some were

if len(filenames) == 0:
    print("Error: Filenames length is zero")
    sys.exit()

# Iterate through a list of filenames of the fast5 files currently in the directory

for file in filenames:
    reads = reads_pb2.FakReads()
    file = file.split('.')[0]
    print(file)
    count_fast5 = 0
    count_proto = 0

    with get_fast5_file(file + ".fast5") as f5:
        count_fast5 = len(f5.get_read_ids())

        for read_id in f5.get_read_ids():
            read = f5.get_read(read_id)
            latest_basecall = read.get_latest_analysis("Basecall_1D")
            mod_base_table = read.get_analysis_dataset(
                latest_basecall, "BaseCalled_template/ModBaseProbs")
            read = reads.reads.add()
            read.uuid = read_id
import sys
import reads_pb2
import os

MAX_BUF_SIZE = 1000

barcode_bufs = {
    "unclassified": (reads_pb2.FakReads(), 0),
    "barcode01": (reads_pb2.FakReads(), 0),
    "barcode02": (reads_pb2.FakReads(), 0),
    "barcode03": (reads_pb2.FakReads(), 0),
    "barcode04": (reads_pb2.FakReads(), 0),
    "barcode05": (reads_pb2.FakReads(), 0),
    "barcode06": (reads_pb2.FakReads(), 0),
    "barcode07": (reads_pb2.FakReads(), 0),
    "barcode08": (reads_pb2.FakReads(), 0),
    "barcode09": (reads_pb2.FakReads(), 0),
    "barcode10": (reads_pb2.FakReads(), 0),
    "barcode11": (reads_pb2.FakReads(), 0),
    "barcode12": (reads_pb2.FakReads(), 0),
    "barcode13": (reads_pb2.FakReads(), 0),
    "barcode14": (reads_pb2.FakReads(), 0),
    "barcode15": (reads_pb2.FakReads(), 0),
    "barcode16": (reads_pb2.FakReads(), 0),
    "barcode17": (reads_pb2.FakReads(), 0),
    "barcode18": (reads_pb2.FakReads(), 0),
    "barcode19": (reads_pb2.FakReads(), 0),
    "barcode20": (reads_pb2.FakReads(), 0),
    "barcode21": (reads_pb2.FakReads(), 0),
    "barcode22": (reads_pb2.FakReads(), 0),
    "barcode23": (reads_pb2.FakReads(), 0),