###############################################
if args.prefix is not None:
    raw_fetch_fname = os.path.join( args.prefix, args.raw_fetch )
    gb_fname = os.path.join( args.prefix, args.genbank )
else:
    raw_fetch_fname = args.raw_fetch
    gb_fname = args.genbank
# get the common path for later use ...
common_path = os.path.commonprefix([raw_fetch_fname,gb_fname])
common_path = os.path.dirname(common_path)
#
# # don'r forget to provide you email
# Entrez.email = args.email if args.email else "your_email@mail_server.com"
crit_threshold = args.threshold
# Reading genbank mindfully next ...
gbrecs = ms.genebank_fix_n_read(gb_fname)
######################################
# assign some module internal stuff ...
ms.gbrecs = gbrecs


############################
# READING file containing GeneName(and/or locus) and FetchID association ...
print "Reading %s with the updated spectrum that includes fetchid column ..."%raw_fetch_fname
raw_fetch = pd.read_csv(raw_fetch_fname)


# here is the NEW plan!:
# first, we try to assign a single protein to each peptide
# we collect peptide-protein pairs that failed to match, declare them BAD and send them to manuall processing ...
#####################################################################################################
from StringIO import StringIO
import warnings
from Bio import BiopythonWarning, BiopythonParserWarning
import subprocess as sub



dest = "../PULLED_PROTEINS_TOTAL"

# get file names of all the pulled files from destination ...
pulled_files = sub.check_output(['ls',dest])
pulled_files = pulled_files.strip().split('\n')
pulled_files = ['/'.join([dest,fname]) for fname in pulled_files]

# # # Reading genbank mindfully next ...
gbrecs_id = [ms.genebank_fix_n_read(fn,'id') for fn in pulled_files]
gbrecs_gi = [ms.genebank_fix_n_read(fn,'gi') for fn in pulled_files]
# # # ######################################
# # # # assign some module internal stuff ...
# # # ms.gbrecs = gbrecs


# perform some simple and stupid tests ...
id_keys = [np.asarray(gg.keys()) for gg in gbrecs_id]
gi_keys = [np.asarray(gg.keys()) for gg in gbrecs_gi]
#####################################################################################
print "Proteins in each of id groups are unique, True or False:"
answ1 = np.asarray([f.size==np.unique(f).size for f in id_keys]).all()
print answ1
#####################################################################################
print "Proteins in each of gi groups are unique, True or False:"