# get the sequences from the sequence names
novelseq = {}
for sequence in seqnames:
    novelseq[sequence] = get_seqs(sequence)

# Get homolog data for each sequence
for record in novelseq:
        qseqid =novelseq[record].seq
        taxarecords = []
        print('Getting homolog data for qseqid ' + record)
        for taxa in taxalist:
            taxainfo = {}
            taxainfo['novel'] = record
            taxainfo['taxa'] = taxa
            taxainfo['taxid'] = ncbi.get_taxid(taxa)
            taxainfo['blast_record'] = get_BLAST(taxainfo['taxid'], qseqid)
            try:
                taxainfo['sbjctseq'] = taxainfo['blast_record'].alignments[0].hsps[0].sbjct[:50]
                print(taxainfo['sbjctseq'])
                try:
                    taxainfo['GI'] = int(taxainfo['blast_record'].alignments[0].title.split('|')[1])
                    print("GI is: " + str(taxainfo['GI']))
                except:
                    print("GI not valid for " + taxa + " : " + taxainfo['blast_record'].alignments[0].title.split('|')[1])
                try:
                    taxainfo['seqdef'] = ncbi.get_gene_data(taxainfo['GI'])[0]['GBSeq_definition']
                    print('GBSeq_definition is ' + taxainfo['seqdef'])
                except:
                    print("Unable to get GBSeq_definition")
                try:
handle = open(noveltrans, "rU")
record_dict = SeqIO.to_dict(SeqIO.parse(handle, "fasta"))
handle.close()
sid = genus + "_" + species
name = genus + "_" + species
record_dict[novelseq1].name = name
record_dict[novelseq2].name = name
record_dict[novelseq1].id = sid
record_dict[novelseq2].id = sid
record_dict[novelseq1].description = novelseq1
record_dict[novelseq2].description = novelseq2

homologs = ()
for ataxa in list(alltaxa)[:4]:
    print("lookup " + ataxa)
    taxid = ncbi.get_taxid(ataxa)
    print(ataxa + " tax id is " + str(taxid))
    qseq1 = str(record_dict[novelseq2].seq)
    blast_record = get_BLAST(taxid, qseq1)
    try:
        sbjctseq1 = blast_record.alignments[0].hsps[0].sbjct[:50]
        sgi1 = blast_record.alignments[0].title.split('|')[1]
        print("1", sbjctseq1)
        seqdef1 = ncbi.get_gene_data(sgi1)[0]['GBSeq_definition']
        lineage1 = ncbi.get_lineage(taxid)
        sequence1 = ncbi.get_gene_seq(sgi1)
    except:
        print("No match")
        sgi1 = 0