def check_arguments(args): errors = [] errors += check_inputs(args, stdin_allowed=True) errors += check_outputs(args, stdout_allowed=True) errors += check_cpu_count(args) errors += check_file_arg(args.seqids_path, none_allowed=True, prefix='-S/--seqids_path') errors += check_file_arg(args.descriptions_path, none_allowed=True, prefix='-D/--descriptions_path') errors += check_file_arg(args.names_path, none_allowed=True, prefix='-N/--names_path') errors += check_file_arg(args.mol_types_path, none_allowed=True, prefix='-M/--mol_types_path') errors += check_file_arg(args.taxids_path, none_allowed=True, prefix='-T/--taxids_path') if args.input_format != 'genbank': values = [args.names, args.mol_types, args.taxids] paths = [args.names_path, args.mol_types_path, args.taxids_path] prefixes = ['-n/-N', '-m/-M', '-t/-T'] for value, path, prefix in zip(values, paths, prefixes): if value or path is not None: errors += ('{}: These options require input file(s) ' 'in GenBank format.'.format(prefix)) if args.comparison is None: args.comparison = COMPARISON if errors: for error in errors: print_stderr(error, prefix='ERROR') sys.exit(1)
def check_arguments(args): errors = [] # Insert argument verifications here if errors: for error in errors: print_stderr(error, prefix='ERROR') sys.exit(1)
def print_missing_seqids(seqids_found): not_found = [] for seqid in seqids_found: if not seqids_found[seqid]: not_found.append(seqid) if not_found: print_stderr('{:d} identifier(s) not found: {}'.format( len(not_found), ', '.join(not_found)))
def import_annotations(annot_paths, types, levels, obsolete, main_ids, add_ancestors, ancestors, min_level=MIN_LEVEL, max_level=MAX_LEVEL): bp, mf, cc, ec = {}, {}, {}, {} bp_anc, mf_anc, cc_anc = None, None, None if add_ancestors: bp_anc, mf_anc, cc_anc = {}, {}, {} for annot_path in annot_paths: with open_file(annot_path) as annot_file: for row in csv.reader(annot_file, dialect='excel-tab'): seqid = row[0] go_id = row[1] if not (go_id.startswith('GO:') or go_id.startswith('EC:')): print_stderr( 'ERROR: Unknown annotation type: {}.'.format(go_id)) continue if go_id.startswith('EC:'): updict_add_to_set(ec, seqid, go_id) continue try: go_type = types[go_id] except KeyError: print_stderr( 'ERROR: Annotation not found: {}.'.format(go_id)) continue if obsolete[go_id]: continue if levels[go_id] < min_level: continue if max_level is not None and levels[go_id] > max_level: continue try: go_id = main_ids[go_id] except KeyError: pass if go_type == 'biological_process': add_annotation(bp, seqid, go_id, bp_anc, ancestors) elif go_type == 'molecular_function': add_annotation(mf, seqid, go_id, mf_anc, ancestors) elif go_type == 'cellular_component': add_annotation(cc, seqid, go_id, cc_anc, ancestors) return bp, mf, cc, ec, bp_anc, mf_anc, cc_anc
def check_arguments(args): errors = [] errors += check_file_arg(args.input_path, stdio_allowed=True) errors += check_file_arg(args.output_path, mode='w', stdio_allowed=True, prefix='-o/--output_path') if errors: for error in errors: print_stderr(error, prefix='ERROR') sys.exit(1)
def export_removed_list(bad_records, removed_path=REMOVED_PATH): if not bad_records: print_stderr('No bad record found.', time=False) else: nb_bad = len(bad_records) bad = '\n'.join(sorted(bad_records)) if nb_bad == 1: msg = '1 bad record found: {}'.format(bad) else: msg = '{:d} bad records found:\n{}'.format(nb_bad, bad) print_stderr(msg, time=False) if removed_path is not None: with open(removed_path, 'w') as removed_file: removed_file.write(bad)
def check_arguments(args): errors = [] errors += check_sequtils_inputs(args, stdin_allowed=True) errors += check_file_arg(args.seq_counts_path, mode='w', none_allowed=True, prefix='-S/--seq_counts_path') errors += check_file_arg(args.base_counts_path, mode='w', none_allowed=True, prefix='-C/--base_counts_path') if errors: for error in errors: print_stderr(error, prefix='ERROR') sys.exit(1)
def check_arguments(args): errors = [] errors += check_inputs(args) errors += check_outputs(args, stdout_allowed=True) errors += check_cpu_count(args) errors += check_file_arg(args.removed_path, mode='w', none_allowed=True, prefix='-r/--removed') if args.no_output: args.output_paths = None if errors: for error in errors: print_stderr(error, prefix='ERROR') sys.exit(1)
def check_arguments(args): errors = [] for annot_path in args.annot_paths: errors += check_file_arg(annot_path, stdio_allowed=True) errors += check_file_arg(args.tree_path, prefix='-t/--tree_path') errors += check_file_arg(args.seqids_path, none_allowed=True, prefix='-s/--seqids_path') errors += check_file_arg(args.output_path, mode='w', stdio_allowed=True, prefix='-o/--output_path') if errors: for error in errors: print_stderr(error, prefix='ERROR') sys.exit(1)
def check_arguments(args): errors = [] errors += check_inputs(args, zip_ext=BGZIP_EXT) if args.output_path is None: if len(args.input_paths) > 1: args.output_path = 'index.idx' else: input_basename = os.path.basename(args.input_paths[0]) args.output_path = input_basename + '.idx' errors += check_file_arg(args.output_path, mode='w', prefix='-o/--output_path') if errors: for error in errors: print_stderr(error, prefix='ERROR') sys.exit(1)
def get_search_settings(args): seqids = get_values(args.seqids, args.seqids_path) descriptions = get_values(args.descriptions, args.descriptions_path) names = get_values(args.names, args.names_path) mol_types = get_values(args.mol_types, args.mol_types_path) taxids = get_values(args.taxids, args.taxids_path) if not (seqids or descriptions or names or mol_types or taxids): print_stderr('ERROR: No search criterion specified.') sys.exit(0) simple_case = check_if_simple_case( seqids, descriptions, names, mol_types, taxids, args.inverse, args.comparison) comparison_func = set_comparison_func(args.comparison) return (seqids, descriptions, names, mol_types, taxids, comparison_func, simple_case)
def extract_records_by_seqid(seqids, input_paths=INPUT_PATHS, input_format=SEQFILE_FORMAT, index_path=INDEX_PATH, output_path=OUTPUT_PATH, output_format=SEQFILE_FORMAT): records_list = make_indexed_records_list( input_paths, index_path, input_format) with open_file(output_path, 'w') as output_file: not_found = [] for seqid in seqids: try: record = get_indexed_record(seqid, records_list) except KeyError: not_found.append(seqid) else: write_records(record, output_file, output_format) if not_found: print_stderr('{:d} identifier(s) not found: {}'.format( len(not_found), ', '.join(not_found)))
def check_arguments(args): errors = [] errors += check_outputs(args, stdout_allowed=True) errors += check_cpu_count(args) errors += check_num_arg( args.min_length, number_type=int, mini=1, prefix='-l/--min_length') if args.max_length is None: args.max_length = args.min_length else: errors += check_num_arg( args.max_length, number_type=int, mini=args.min_length, prefix='-L/--max_length') errors += check_num_arg( args.nb_sequences, number_type=int, mini=1, prefix='-n/--nb_sequences') if args.seq_type is None: args.seq_type = SEQ_TYPE if errors: for error in errors: print_stderr(error, prefix='ERROR') sys.exit(1)
def check_arguments(args): errors = [] errors += check_sequtils_inputs(args, stdin_allowed=True) errors += check_sequtils_outputs(args, stdout_allowed=True) errors += check_num_arg(args.min_length, number_type=int, mini=1, prefix='-l/--min_length') errors += check_num_arg(args.max_length, number_type=int, mini=args.min_length, none_allowed=True, prefix='-L/--max_length') errors += check_num_arg(args.min_count, number_type=int, mini=0, prefix='-n/--min_count') errors += check_num_arg(args.max_count, number_type=int, mini=args.min_count, none_allowed=True, prefix='-N/--max_count') errors += check_num_arg(args.min_prop, number_type=float, mini=0.0, maxi=100.0, prefix='-p/--min_prop') errors += check_num_arg(args.max_prop, number_type=float, mini=args.min_prop, maxi=100.0, prefix='-P/--max_prop') if args.counted_char is not None and len(args.counted_char) != 1: errors.append('-c/--counted_char: Must be one character.') if errors: for error in errors: print_stderr(error, prefix='ERROR') sys.exit(1)
def check_arguments(args): errors = [] for annot_path in args.annot_paths: errors += check_file_arg(annot_path, stdio_allowed=True) for sample_path in args.sample_paths: errors += check_file_arg(sample_path, prefix='-s/--sample_path') errors += check_file_arg(args.tree_path, prefix='-t/--tree_path') errors += check_file_arg(args.ref_path, prefix='-r/--ref_path') errors += check_file_arg(args.info_path, none_allowed=True, prefix='-i/--info_path') errors += check_dir_arg(args.output_dir_path, mode='w', create=True, prefix='-o/--output_dir_path') errors += check_bin_arg(args.lualatex_path, prefix='--lualatex_path') errors += check_num_arg(args.min_level, number_type=int, mini=1, prefix='-l/--min_level') errors += check_num_arg(args.max_level, number_type=int, mini=1, none_allowed=True, prefix='-L/--max_level') errors += check_num_arg(args.min_fold_change, number_type=float, mini=1.0, prefix='-f/--min_fold_change') errors += check_num_arg(args.max_pvalue, number_type=float, mini=0.0, maxi=1.0, prefix='-p/--max_pvalue') errors += check_num_arg(args.min_seq_count, number_type=int, mini=0, prefix='-n/--min_seq_count') errors += check_num_arg(args.min_seq_prop, number_type=float, mini=0.0, maxi=100.0, prefix='-m/--min_seq_prop') if args.max_level is not None and args.min_level > args.max_level: error = '-l/-L: Max level must be greater than min level.' errors.append(error) if args.sample_names is not None: if len(args.sample_names) != len(args.sample_paths): error = ('-s/-S: The number of sample names does not match ' 'the number of sample paths.') errors.append(error) if errors: for error in errors: print_stderr(error, prefix='ERROR') sys.exit(1) if args.output_types is None: args.output_types = OUTPUT_TYPES if not (args.process or args.function or args.component): args.process, args.function, args.component = True, True, True if not (args.up_reg or args.down_reg or args.not_reg): args.up_reg, args.down_reg, args.not_reg = True, True, False