Exemplo n.º 1
0
def ec2emase(file_in, file_out, target_file=None):
    ec = ec_file.parse(file_in)
    new_shape = (len(ec._targets_list), len(ec._haplotypes_list), len(ec._ec_list))

    LOG.info('Creating APM...')
    LOG.debug('Shape={}'.format(new_shape))


    apm = APM(shape=new_shape, haplotype_names=ec._haplotypes_list, locus_names=ec._targets_list, read_names=ec._ec_list)

    LOG.debug('ec._haplotypes_list={}'.format(str(ec._haplotypes_list)))
    LOG.debug('ec._targets_list[0:10]={}'.format(str(ec._targets_list[0:10])))
    LOG.debug('ec._ec_list[0:10]={}'.format(str(ec._ec_list[0:10])))

    # counts -> the number of times this equivalence class has appeared
    apm.count = ec._ec_counts_list

    counter = 0
    num_haplotypes = len(ec._haplotypes_list)

    try:

        for alignment in ec._alignments:
            #LOG.verbose(str(alignment))
            ec_index = alignment[0]
            target_index = alignment[1]
            temp_bits = alignment[2]

            if temp_bits == 0:
                continue

            bits = int_to_list(temp_bits, num_haplotypes)
            for i, bit in enumerate(bits):
                if bit:
                    # lid, hid, rid, value
                    apm.set_value(target_index, i, ec_index, 1)
    except Exception, e:
        _show_error()
        raise e
Exemplo n.º 2
0
def convert(file_in, file_out, target_file=None, emase=False):
    """

    :param file_in: Input BAM/SAM file.
    :param file_out: Output file name.
    :param target_file: The target file is a list of main targets that will be used as main targets,
                        not to limit the main targets.  Useful for comparison purposes between BAM files.
    :param emase: Emase output or normal.
    :return:
    """
    LOG.info('Input File: {}'.format(file_in))
    LOG.info('Output File: {}'.format(file_out))

    if target_file:
        LOG.info('Target File: {}'.format(target_file))

    if emase:
        LOG.info('Emase format requested')

    main_targets = OrderedDict()

    if target_file:
        main_targets = parse_target_file(target_file)
        if len(main_targets) == 0:
            LOG.error("Unable to parse target file")
            sys.exit(-1)

    # ec = equivalence class
    #      the KEY is a comma separated string of tids
    #      the VALUE is the number of times this equivalence class has appeared
    ec = OrderedDict()

    # ec_idx = lookup to ec
    #          the KEY is a comma separated string of tids
    #          the VALUE is a number specifying the insertion order of the KEY value in ec
    ec_idx = {}

    # all the haplotypes
    haplotypes = set()

    # a lookup of tids to main_targets (Ensembl IDs)
    target_idx_to_main_target = {}

    # unique number of tids encountered and the count
    unique_tids = {}

    # unique reads
    unique_reads = {}

    # times encountering new read id
    read_id_switch_counter = 0

    same_read_target_counter = 0

    try:
        sam_file = pysam.Samfile(file_in, 'rb')
        if len(sam_file.header) == 0:
            raise Exception("BAM File has no header information")
    except:
        sam_file = pysam.Samfile(file_in, 'r')
        if len(sam_file.header) == 0:
            raise Exception("SAM File has no header information")

    line_no = 0
    ec_key = None
    tid = None

    target_ids = []
    try:
        read_id = None

        while True:
            alignment = sam_file.next()
            line_no += 1

            # reference_sequence_name = Column 3 from file, the Reference NAME (EnsemblID_Haplotype)
            # tid = the target id, which is 0 or a positive integer mapping to entries
            #       within the sequence dictionary in the header section of a BAM file
            # main_target = the Ensembl id of the transcript

            if alignment.flag == 4:
                continue

            reference_sequence_name = sam_file.getrname(alignment.tid)
            tid = str(alignment.tid)
            main_target = reference_sequence_name.split('_')[0]

            try:
                unique_tids[tid] += 1
            except KeyError:
                unique_tids[tid] = 1

            #LOG.verbose("{}\t{}\t{}".format(main_target, reference_sequence_name, tid))

            if target_file:
                if main_target not in main_targets:
                    LOG.error("Unexpected target found in BAM file: {}".format(main_target))
                    sys.exit(-1)
            else:
                if main_target not in main_targets:
                    main_targets[main_target] = len(main_targets)

            target_idx_to_main_target[tid] = main_target

            try:
                haplotypes.add(reference_sequence_name.split('_')[1])
            except:
                LOG.info('Unable to parse Haplotype from {}'.format(reference_sequence_name))
                return

            # read_id = Column 1 from file, the Query template NAME
            if read_id is None:
                read_id = alignment.qname

            try:
                unique_reads[read_id] += 1
            except KeyError:
                unique_reads[read_id] = 1

            if read_id != alignment.qname:
                ec_key = ','.join(sorted(target_ids))

                try:
                    ec[ec_key] += 1
                except KeyError:
                    ec[ec_key] = 1
                    ec_idx[ec_key] = len(ec_idx)

                read_id = alignment.qname
                target_ids = [tid]
                read_id_switch_counter += 1
            else:
                if tid not in target_ids:
                    target_ids.append(tid)
                else:
                    same_read_target_counter += 1

            if line_no % 1000000 == 0:
                LOG.info("{0:,} alignments processed, with {1:,} equivalence classes".format(line_no, len(ec)))

    except StopIteration:
        LOG.info("{0:,} alignments processed, with {1:,} equivalence classes".format(line_no, len(ec)))

    if tid not in target_ids:
        target_ids.append(tid)
    else:
        same_read_target_counter += 1

    ec_key = ','.join(sorted(target_ids))

    try:
        ec[ec_key] += 1
    except KeyError:
        ec[ec_key] = 1
        ec_idx[ec_key] = len(ec_idx)

    haplotypes = sorted(list(haplotypes))

    LOG.info("# Unique Reads: {:,}".format(len(unique_reads)))
    LOG.info("# Reads/Target Duplications: {:,}".format(same_read_target_counter))
    LOG.info("# Main Targets: {:,}".format(len(main_targets)))
    LOG.info("# Haplotypes: {:,}".format(len(haplotypes)))
    LOG.info("# Unique Targets: {:,}".format(len(unique_tids)))
    LOG.info("# Equivalence Classes: {:,}".format(len(ec)))

    try:
        os.remove(file_out)
    except OSError:
        pass

    if emase:
        try:
            LOG.info('Creating APM...')
            if LOG.isEnabledFor(VERBOSE_LEVELV_NUM):
                LOG.verbose("HAPLOTYPES")
                for h in haplotypes:
                    LOG.verbose(h)
                LOG.verbose("MAIN TARGETS")
                for m in main_targets:
                    LOG.verbose(m)

            new_shape = (len(main_targets), len(haplotypes), len(ec))

            ec_ids = [x for x in xrange(0, len(ec))]

            LOG.debug('Shape={}'.format(new_shape))

            apm = APM(shape=new_shape, haplotype_names=haplotypes, locus_names=main_targets.keys(), read_names=ec_ids)

            # ec.values -> the number of times this equivalence class has appeared
            apm.count = ec.values()

            # k = comma seperated string of tids
            # v = the count
            for k, v in ec.iteritems():
                arr_target_idx = k.split(",")

                # get the main targets by name
                temp_main_targets = set()
                for idx in arr_target_idx:
                    temp_main_targets.add(target_idx_to_main_target[idx])

                # loop through the targets and haplotypes to get the bits
                for main_target in temp_main_targets:
                    # main_target is not an index, but a value like 'ENMUST..001'

                    for i, hap in enumerate(haplotypes):
                        read_transcript = '{}_{}'.format(main_target, hap) # now 'ENMUST..001_A'
                        # get the numerical tid corresponding to read_transcript
                        read_transcript_idx = str(sam_file.gettid(read_transcript))

                        if read_transcript_idx in arr_target_idx:
                            LOG.debug("{}\t{}\t{}".format(ec_idx[k], main_targets[main_target], i))

                            # main_targets[main_target] = idx of main target
                            # i = the haplotype
                            # ec_idx[k] = index of ec
                            apm.set_value(main_targets[main_target], i, ec_idx[k], 1)

            LOG.info("Finalizing...")
            apm.finalize()
            apm.save(file_out, title='bam2ec')
        except:
            _show_error()
    else:
        try:
            LOG.info("Generating BIN file...")

            f = open(file_out, "wb")

            # version
            f.write(pack('<i', 1))
            LOG.verbose("1\t# VERSION")

            # targets
            LOG.verbose("{:,}\t# NUMBER OF TARGETS".format(len(main_targets)))
            f.write(pack('<i', len(main_targets)))
            for main_target, idx in main_targets.iteritems():
                LOG.verbose("{:,}\t{}\t# {:,}".format(len(main_target), main_target, idx))
                f.write(pack('<i', len(main_target)))
                f.write(pack('<{}s'.format(len(main_target)), main_target))

            # haplotypes
            LOG.verbose("{:,}\t# NUMBER OF HAPLOTYPES".format(len(haplotypes)))
            f.write(pack('<i', len(haplotypes)))
            for idx, hap in enumerate(haplotypes):
                LOG.verbose("{:,}\t{}\t# {:,}".format(len(hap), hap, idx))
                f.write(pack('<i', len(hap)))
                f.write(pack('<{}s'.format(len(hap)), hap))

            # equivalence classes
            LOG.verbose("{:,}\t# NUMBER OF EQUIVALANCE CLASSES".format(len(ec)))
            f.write(pack('<i', len(ec)))
            for idx, k in enumerate(ec.keys()):
                # ec[k] is the count
                LOG.verbose("{:,}\t# {}\t{:,}".format(ec[k], k, idx))
                f.write(pack('<i', ec[k]))

            LOG.info("Determining mappings...")

            # equivalence class mappings
            counter = 0
            for k, v in ec.iteritems():
                arr_target_idx = k.split(",")

                # get the main targets by name
                temp_main_targets = set()
                for idx in arr_target_idx:
                    temp_main_targets.add(target_idx_to_main_target[idx])

                counter += len(temp_main_targets)

            LOG.verbose("{:,}\t# NUMBER OF EQUIVALANCE CLASS MAPPINGS".format(counter))
            f.write(pack('<i', counter))

            for k, v in ec.iteritems():
                arr_target_idx = k.split(",")

                # get the main targets by name
                temp_main_targets = set()
                for idx in arr_target_idx:
                    temp_main_targets.add(target_idx_to_main_target[idx])

                # loop through the haplotypes and targets to get the bits
                for main_target in temp_main_targets:
                    # main_target is not an index, but a value like 'ENMUST..001'

                    bits = []

                    for hap in haplotypes:
                        read_transcript = '{}_{}'.format(main_target, hap) # now 'ENMUST..001_A'
                        read_transcript_idx = str(sam_file.gettid(read_transcript))

                        if read_transcript_idx in arr_target_idx:
                            bits.append(1)
                        else:
                            bits.append(0)

                    LOG.verbose("{}\t{}\t{}\t# {}\t{}".format(ec_idx[k], main_targets[main_target], list_to_int(bits), main_target, bits))
                    f.write(pack('<i', ec_idx[k]))
                    f.write(pack('<i', main_targets[main_target]))
                    f.write(pack('<i', list_to_int(bits)))

            f.close()
        except:
            _show_error()

    LOG.info("Done with converting BAM file!")
Exemplo n.º 3
0
def emasify(binary_file_name, emase_file_name):
    """

    :param binary_file_name:
    :param emase_file_name:
    :return:
    """

    if not binary_file_name:
        raise ValueError("empty file name, cannot load")

    print "Binary File: {0}".format(binary_file_name)

    f = open(binary_file_name, 'rb')

    file_version = np.fromfile(f, dtype=np.dtype('i'), count=1)[0]

    if file_version == 0:
        print "Version: 0, Reads"
    elif file_version == 1:
        print "Version: 1, Equivalence Class"
    else:
        print "Unknown version, exiting"

    # TARGETS

    target_ids = []
    targets = OrderedDict()

    num_targets = np.fromfile(f, dtype=np.dtype('i'), count=1)[0]
    print "Target Count: {0}".format(num_targets)

    for i in xrange(0, num_targets):
        str_len = np.fromfile(f, dtype=np.dtype('i'), count=1)[0]
        target = np.fromfile(f, dtype=np.dtype('a' + str(str_len)), count=1)[0]
        targets[target] = i
        target_ids.append(target)

    # HAPLOTYPES

    haplotype_ids = []
    haplotypes = OrderedDict()

    num_haplotypes = np.fromfile(f, dtype=np.dtype('i'), count=1)[0]
    print "Haplotype Count: {0}".format(num_haplotypes)

    for i in xrange(0, num_haplotypes):
        str_len = np.fromfile(f, dtype=np.dtype('i'), count=1)[0]
        haplotype = np.fromfile(f, dtype=np.dtype('a' + str(str_len)),
                                count=1)[0]
        haplotypes[haplotype] = i
        haplotype_ids.append(haplotype)

    if file_version == 0:

        # READS

        read_ids = []
        reads = OrderedDict()

        num_reads = np.fromfile(f, dtype=np.dtype('i'), count=1)[0]
        print "Read Count: {0}".format(num_reads)

        for i in xrange(0, num_reads):
            str_len = np.fromfile(f, dtype=np.dtype('i'), count=1)[0]
            read_id = np.fromfile(f,
                                  dtype=np.dtype('a' + str(str_len)),
                                  count=1)[0]
            reads[read_id] = i
            read_ids.append(read_id)

        # ALIGNMENTS

        num_alignments = np.fromfile(f, dtype=np.dtype('i'), count=1)[0]
        print "Alignment Count: {0}".format(num_alignments)

        alignments = np.fromfile(f,
                                 dtype=np.dtype('i'),
                                 count=num_alignments * 3)

        print 'Creating APM...'
        new_shape = (len(target_ids), len(haplotypes), len(read_ids))
        aln_mat_kallisto = APM(shape=new_shape,
                               haplotype_names=haplotype_ids,
                               locus_names=target_ids,
                               read_names=read_ids)

        print 'Parsing alignments...'
        widgets = [Bar('>'), ' ', ETA(), ' ', Percentage()]
        pbar = ProgressBar(widgets=widgets, maxval=num_alignments * 3).start()
        counter = 0

        for i in xrange(0, num_alignments * 3, 3):
            rid = alignments[i]
            lid = alignments[i + 1]
            temp_bits = alignments[i + 2]

            counter += 1
            pbar.update(i)
            if temp_bits == 0:
                continue

            bits = simple_from_one(temp_bits, num_haplotypes)
            for hid, b in enumerate(bits):
                if b:
                    aln_mat_kallisto.set_value(lid, hid, rid, 1)

        pbar.finish()
        print "Finalizing..."
        aln_mat_kallisto.finalize()
        aln_mat_kallisto.save(emase_file_name, title='KALLISTOALIGN')

        print "DONE"
    else:

        # EQUIVALENCE CLASSES

        num_ec = np.fromfile(f, dtype=np.dtype('i'), count=1)[0]
        print "Equivalance Class Count: {0}".format(num_ec)

        ec_ids = [x for x in xrange(0, num_ec)]
        counts = np.fromfile(f, dtype=np.dtype('i'), count=num_ec)

        # ALIGNMENTS

        num_alignments = np.fromfile(f, dtype=np.dtype('i'), count=1)[0]
        print "Alignment Count: {0}".format(num_alignments)

        alignments = np.fromfile(f,
                                 dtype=np.dtype('i'),
                                 count=num_alignments * 3)

        print 'Creating APM...'
        new_shape = (len(target_ids), len(haplotypes), len(counts))
        aln_mat_kallisto = APM(shape=new_shape,
                               haplotype_names=haplotype_ids,
                               locus_names=target_ids,
                               read_names=ec_ids)

        aln_mat_kallisto.count = counts

        print 'Parsing alignments...'
        widgets = [Bar('>'), ' ', ETA(), ' ', Percentage()]
        pbar = ProgressBar(widgets=widgets, maxval=num_alignments * 3).start()
        counter = 0

        for i in xrange(0, num_alignments * 3, 3):
            rid = alignments[i]
            lid = alignments[i + 1]
            temp_bits = alignments[i + 2]

            counter += 1
            pbar.update(i)
            if temp_bits == 0:
                continue

            bits = simple_from_one(temp_bits, num_haplotypes)
            for hid, b in enumerate(bits):
                if b:
                    aln_mat_kallisto.set_value(lid, hid, rid, 1)

        pbar.finish()

        print "Finalizing..."
        aln_mat_kallisto.finalize()
        aln_mat_kallisto.save(emase_file_name, title='KALLISTOALIGN')

        print "DONE"
Exemplo n.º 4
0
def bin2emase(binary_file_name, emase_file_name):
    try:
        if not binary_file_name:
            raise ValueError("empty file name, cannot load")

        LOG.info("Binary File: {0}".format(binary_file_name))

        f = open(binary_file_name, 'rb')

        file_version = np.fromfile(f, dtype=np.dtype('i'), count=1)[0]

        if file_version == 0:
            LOG.info("Version: 0, Reads, exiting")
            sys.exit(-1)
        elif file_version == 1:
            LOG.info("Version: 1, Equivalence Class")
        else:
            LOG.info("Unknown version, exiting")
            sys.exit(-1)

        # TARGETS

        target_ids = []
        targets = OrderedDict()

        num_targets = np.fromfile(f, dtype=np.dtype('i'), count=1)[0]
        LOG.info("Target Count: {0:,}".format(num_targets))

        for i in xrange(0, num_targets):
            str_len = np.fromfile(f, dtype=np.dtype('i'), count=1)[0]
            target = np.fromfile(f, dtype=np.dtype('a' + str(str_len)), count=1)[0]
            targets[target] = i
            target_ids.append(target)

            LOG.verbose("{}\t{}".format(i, target))

        # HAPLOTYPES

        haplotype_ids = []
        haplotypes = OrderedDict()

        num_haplotypes = np.fromfile(f, dtype=np.dtype('i'), count=1)[0]
        LOG.info("Haplotype Count: {0:,}".format(num_haplotypes))

        for i in xrange(0, num_haplotypes):
            str_len = np.fromfile(f, dtype=np.dtype('i'), count=1)[0]
            haplotype = np.fromfile(f, dtype=np.dtype('a' + str(str_len)), count=1)[0]
            haplotypes[haplotype] = i
            haplotype_ids.append(haplotype)

            LOG.verbose("{}\t{}".format(i, haplotype))

        # EQUIVALENCE CLASSES

        num_ec = np.fromfile(f, dtype=np.dtype('i'), count=1)[0]
        LOG.info("Equivalance Class Count: {0:,}".format(num_ec))

        ec_ids = [x for x in xrange(0, num_ec)]
        counts = np.fromfile(f, dtype=np.dtype('i'), count=num_ec)

        # ALIGNMENTS

        num_alignments = np.fromfile(f, dtype=np.dtype('i'), count=1)[0]
        LOG.info("Alignment Count: {0:,}".format(num_alignments))


        new_shape = (num_targets, num_haplotypes, num_ec)
        LOG.info('Creating APM...')

        ec_ids = [x for x in xrange(0, num_ec)]

        LOG.debug('Shape={}'.format(new_shape))

        apm = APM(shape=new_shape, haplotype_names=haplotype_ids, locus_names=target_ids, read_names=ec_ids)

        # counts -> the number of times this equivalence class has appeared
        apm.count = counts

        counter = 0
        alignments = np.fromfile(f, dtype=np.dtype('i'), count=num_alignments*3)

        for i in xrange(0, num_alignments*3, 3):
            rid = alignments[i]
            lid = alignments[i+1]
            temp_bits = alignments[i+2]

            counter += 1
            if temp_bits == 0:
                continue

            try:
                bits = int_to_list(temp_bits, num_haplotypes)
                for i, bit in enumerate(bits):
                    if bit:
                        apm.set_value(rid, i, lid, 1)
            except Exception, e:
                _show_error()
                raise e

        LOG.info("Finalizing...")
        apm.finalize()
        apm.save(emase_file_name, title='bam2ec')
Exemplo n.º 5
0
def emasify(binary_file_name, emase_file_name):
    """

    :param binary_file_name:
    :param emase_file_name:
    :return:
    """

    if not binary_file_name:
        raise ValueError("empty file name, cannot load")

    print "Binary File: {0}".format(binary_file_name)

    f = open(binary_file_name, 'rb')

    file_version = np.fromfile(f, dtype=np.dtype('i'), count=1)[0]

    if file_version == 0:
        print "Version: 0, Reads"
    elif file_version == 1:
        print "Version: 1, Equivalence Class"
    else:
        print "Unknown version, exiting"

    # TARGETS

    target_ids = []
    targets = OrderedDict()

    num_targets = np.fromfile(f, dtype=np.dtype('i'), count=1)[0]
    print "Target Count: {0}".format(num_targets)

    for i in xrange(0, num_targets):
        str_len = np.fromfile(f, dtype=np.dtype('i'), count=1)[0]
        target = np.fromfile(f, dtype=np.dtype('a' + str(str_len)), count=1)[0]
        targets[target] = i
        target_ids.append(target)

    # HAPLOTYPES

    haplotype_ids = []
    haplotypes = OrderedDict()

    num_haplotypes = np.fromfile(f, dtype=np.dtype('i'), count=1)[0]
    print "Haplotype Count: {0}".format(num_haplotypes)

    for i in xrange(0, num_haplotypes):
        str_len = np.fromfile(f, dtype=np.dtype('i'), count=1)[0]
        haplotype = np.fromfile(f, dtype=np.dtype('a' + str(str_len)), count=1)[0]
        haplotypes[haplotype] = i
        haplotype_ids.append(haplotype)

    if file_version == 0:

        # READS

        read_ids = []
        reads = OrderedDict()

        num_reads = np.fromfile(f, dtype=np.dtype('i'), count=1)[0]
        print "Read Count: {0}".format(num_reads)

        for i in xrange(0, num_reads):
            str_len = np.fromfile(f, dtype=np.dtype('i'), count=1)[0]
            read_id = np.fromfile(f, dtype=np.dtype('a' + str(str_len)), count=1)[0]
            reads[read_id] = i
            read_ids.append(read_id)

        # ALIGNMENTS

        num_alignments = np.fromfile(f, dtype=np.dtype('i'), count=1)[0]
        print "Alignment Count: {0}".format(num_alignments)

        alignments = np.fromfile(f, dtype = np.dtype('i'), count=num_alignments*3)

        print 'Creating APM...'
        new_shape = (len(target_ids), len(haplotypes), len(read_ids))
        aln_mat_kallisto = APM(shape=new_shape, haplotype_names=haplotype_ids, locus_names=target_ids, read_names=read_ids)

        print 'Parsing alignments...'
        widgets = [Bar('>'), ' ', ETA(), ' ', Percentage()]
        pbar = ProgressBar(widgets=widgets, maxval=num_alignments*3).start()
        counter = 0

        for i in xrange(0, num_alignments*3, 3):
            rid = alignments[i]
            lid = alignments[i+1]
            temp_bits = alignments[i+2]

            counter += 1
            pbar.update(i)
            if temp_bits == 0:
                continue

            bits = simple_from_one(temp_bits, num_haplotypes)
            for hid, b in enumerate(bits):
                if b:
                    aln_mat_kallisto.set_value(lid, hid, rid, 1)

        pbar.finish()
        print "Finalizing..."
        aln_mat_kallisto.finalize()
        aln_mat_kallisto.save(emase_file_name, title='KALLISTOALIGN')

        print "DONE"
    else:

        # EQUIVALENCE CLASSES

        num_ec = np.fromfile(f, dtype=np.dtype('i'), count=1)[0]
        print "Equivalance Class Count: {0}".format(num_ec)

        ec_ids = [x for x in xrange(0, num_ec)]
        counts = np.fromfile(f, dtype=np.dtype('i'), count=num_ec)

        # ALIGNMENTS

        num_alignments = np.fromfile(f, dtype=np.dtype('i'), count=1)[0]
        print "Alignment Count: {0}".format(num_alignments)

        alignments = np.fromfile(f, dtype = np.dtype('i'), count=num_alignments*3)

        print 'Creating APM...'
        new_shape = (len(target_ids), len(haplotypes), len(counts))
        aln_mat_kallisto = APM(shape=new_shape, haplotype_names=haplotype_ids, locus_names=target_ids, read_names=ec_ids)

        aln_mat_kallisto.count = counts

        print 'Parsing alignments...'
        widgets = [Bar('>'), ' ', ETA(), ' ', Percentage()]
        pbar = ProgressBar(widgets=widgets, maxval=num_alignments*3).start()
        counter = 0

        for i in xrange(0, num_alignments*3, 3):
            rid = alignments[i]
            lid = alignments[i+1]
            temp_bits = alignments[i+2]

            counter += 1
            pbar.update(i)
            if temp_bits == 0:
                continue

            bits = simple_from_one(temp_bits, num_haplotypes)
            for hid, b in enumerate(bits):
                if b:
                    aln_mat_kallisto.set_value(lid, hid, rid, 1)

        pbar.finish()

        print "Finalizing..."
        aln_mat_kallisto.finalize()
        aln_mat_kallisto.save(emase_file_name, title='KALLISTOALIGN')

        print "DONE"