Пример #1
0
logger.info("Existing binning output file: "+contig_bins_file)
logger.info("Final binning output file: "+output_path)
logger.info("Depth: "+str(depth))
logger.info("Threshold: "+str(threshold))
logger.info("Number of threads: "+str(nthreads))

logger.info("GraphBin2 started")

start_time = time.time()

# Get length and coverage of contigs
#--------------------------------------------------------

contig_lengths = {}

contig_names = BidirectionalMap()

contig_num = 0

with open(contig_paths, "r") as file:

        for line in file.readlines():

            if not line.startswith("#"):
                strings = line.strip().split()
                contig_names[contig_num] = strings[0]
                contig_lengths[contig_num] = int(strings[1])
                contig_num += 1

contig_names_rev = contig_names.inverse
Пример #2
0
                str(n_bins))
except:
    logger.error(
        "Please make sure that the correct path to the binning result file is provided and it is having the correct format."
    )
    logger.info("Exiting GraphBin... Bye...!")
    sys.exit(1)

logger.info("Constructing the assembly graph")

# Get the links from the .asqg file
#-----------------------------------

links = []

my_map = BidirectionalMap()

node_count = 0

try:
    # Get contig connections from .asqg file
    with open(assembly_graph_file) as file:
        line = file.readline()

        while line != "":

            # Count the number of contigs
            if "VT" in line:
                start = 'contig-'
                end = ''
                contig_num = int(
    while line != "":
        strings = line.split("\t")

        start_n = 'edge_'
        end_n = ''

        contig_num = int(
            re.search('%s(.*)%s' % (start_n, end_n), strings[0]).group(1)) - 1

        coverages[contig_num] = int(strings[1])
        line = my_file.readline()

# Get the links from the .gfa file
#-----------------------------------

my_map = BidirectionalMap()

node_count = 0

my_names_map = BidirectionalMap()

links = []

# try:
# Get contig connections from .gfa file
with open(assembly_graph_file) as file:
    line = file.readline()

    while line != "":

        # Count the number of contigs