print("# negative .fa sequences: %i" % (c_neg_fa)) # Check additional files. if args.opt_pos_fa: assert args.opt_neg_fa, "--opt-pos but no --opt-neg given" if args.opt_neg_fa: assert args.opt_pos_fa, "--opt-neg but no --opt-pos given" # Check for lowercase only sequences, which cause GP to crash. error_mess = ( "input sequences encountered containing " "only lowercase characters or lowercase characters in between " "uppercase characters. Please provide either all uppercase " "sequences or sequences containing uppercase regions surrounded " "by lowercase context regions for structure calculation (see " "viewpoint concept in original GraphProt publication " "for more details)") seqs_dic = gplib.read_fasta_into_dic(args.in_pos_fa) bad_ids = gplib.check_seqs_dic_format(seqs_dic) assert not bad_ids, "%s" % (error_mess) seqs_dic = gplib.read_fasta_into_dic(args.in_neg_fa) bad_ids = gplib.check_seqs_dic_format(seqs_dic) assert not bad_ids, "%s" % (error_mess) if args.opt_pos_fa: seqs_dic = gplib.read_fasta_into_dic(args.opt_pos_fa) bad_ids = gplib.check_seqs_dic_format(seqs_dic) assert not bad_ids, "%s" % (error_mess) if args.opt_neg_fa: seqs_dic = gplib.read_fasta_into_dic(args.opt_neg_fa) bad_ids = gplib.check_seqs_dic_format(seqs_dic) assert not bad_ids, "%s" % (error_mess) # If parop .fa files given.
assert "linux" in sys.platform, "please use Linux" # Check tool availability. assert gplib.is_tool("GraphProt.pl"), "GraphProt.pl not in PATH" # Check file inputs. assert os.path.exists( args.in_fa), 'input .fa file "%s" not found' % (args.in_fa) assert os.path.exists( args.in_model), 'input .model file "%s" not found' % (args.in_model) assert os.path.exists( args.in_params), 'input .params file "%s" not found' % (args.in_params) # Count .fa entries. c_in_fa = gplib.count_fasta_headers(args.in_fa) assert c_in_fa, 'input .fa file "%s" no headers found' % (args.in_fa) print("# input .fa sequences: %i" % (c_in_fa)) # Read in FASTA sequences to check for uppercase sequences. seqs_dic = gplib.read_fasta_into_dic(args.in_fa) # Check for lowercase only sequences, which cause GP to crash. error_mess = ( "input sequences encountered containing " "only lowercase characters or lowercase characters in between " "uppercase characters. Please provide either all uppercase " "sequences or sequences containing uppercase regions surrounded " "by lowercase context regions for structure calculation (see " "viewpoint concept in original GraphProt publication " "for more details)") if args.ws_pred: bad_ids = gplib.check_seqs_dic_format(seqs_dic) assert not bad_ids, "%s" % (error_mess) c_uc_nt = gplib.seqs_dic_count_uc_nts(seqs_dic) assert c_uc_nt, ("no uppercase nucleotides in input .fa sequences. "