def prepareConverter(self, fileIn): FileUtility.isValid(fileIn) sys.stderr.write("counting lines to prepare converter\n") numOfLines = FileUtility.countLines(fileIn) sys.stderr.write(str(numOfLines)+" to read\n") readed = 0 for line in open(fileIn): genome = int(line.split()[0]) taxon = int(line.split()[1]) self.Converter[genome] = taxon readed += 1 if readed%100000 == 0: sys.stderr.write(str(readed)+" lines readed out of "+str(numOfLines)+"\n")
def populate(self, file_name1, taxon_name): # Create table... curs = self.cursor curs.execute("create table GENOME_TO_TAXON (GENOMES integer, TAXON integer, PRIMARY KEY (GENOMES))") self.converter.commit() # Check is file is ok FileUtility.isValid(file_name1) sys.stderr.write("counting lines to prepare converter\n") n_lines = FileUtility.countLines(file_name1) sys.stderr.write(str(n_lines)+" to read\n") readed = 0 # populate the DB for line in open(file_name1): genome = int(line.split()[0]) taxon = int(line.split()[1]) curs.execute('insert into GENOME_TO_TAXON (GENOMES, TAXON) values (?, ?)', (genome, taxon)) readed += 1 if readed % 10000 == 0: self.converter.commit() sys.stderr.write(str(readed)+" lines readed out of "+str(n_lines)+"\n") FileUtility.isValid(taxon_name) sys.stderr.write("counting lines to prepare converter\n") n_lines = FileUtility.countLines(taxon_name) sys.stderr.write(str(n_lines)+" to read\n") readed = 0 curs.execute("create table TAXON_NAMES (TAXON integer, NAME VARCHAR(100), RANK VARCHAR(30), " "PRIMARY KEY (TAXON))") for line in open(taxon_name): taxon = int(line.split("\t")[0]) name = str(line.split("\t")[1]) rank = str(line.split("\t")[2]).rstrip("\n") curs.execute('insert into TAXON_NAMES (TAXON, NAME, RANK) values (?, ?, ?)', (taxon, name, rank)) readed += 1 if readed % 10000 == 0: self.converter.commit() sys.stderr.write(str(readed)+" lines readed out of "+str(n_lines)+"\n")
def prepareConverter(self, fileIn): FileUtility.isValid(fileIn) sys.stderr.write("counting lines to prepare converter\n") numOfLines = FileUtility.countLines(fileIn) sys.stderr.write(str(numOfLines) + " to read\n") readed = 0 for line in open(fileIn): genome = int(line.split()[0]) taxon = int(line.split()[1]) self.Converter[genome] = taxon readed += 1 if readed % 100000 == 0: sys.stderr.write( str(readed) + " lines readed out of " + str(numOfLines) + "\n")
def read_tree_of_life(self, file_in): FileUtility.isValid(file_in) to_read = FileUtility.countLines(file_in) sys.stderr.write(str(to_read)+" lines to read to construct tree\n") num_line = 0 for line in open(file_in): parent = int(line.split()[0]) kid = int(line.split()[1]) if kid == parent: sys.stderr.write("Warning: I can't create a link from %s to %s (line %s)\n" % (parent, kid, num_line)) continue if self.node_exist(parent): if self.node_exist(kid): self.get_node(parent).add_kid(kid) self.get_node(kid).set_parent(parent) else: new_node = Taxon(kid) self.create_node(new_node) self.get_node(parent).add_kid(kid) self.get_node(kid).set_parent(parent) else: if self.node_exist(kid): new_node = Taxon(parent) self.create_node(new_node) self.get_node(kid).set_parent(parent) self.get_node(parent).add_kid(kid) else: new_kid = Taxon(kid) new_parent = Taxon(parent) new_kid.set_parent(parent) new_parent.add_kid(kid) self.create_node(new_kid) self.create_node(new_parent) num_line += 1 if num_line % 100000 == 0: sys.stderr.write(str(num_line)+" lines read\n") sys.stderr.write("Tree base constructed\n")