Exemplo n.º 1
0
def parse_arguments(debug=False):
    parser = argparse.ArgumentParser(prog='ENCODE DCC fastq merger.',
                                     description='')
    parser.add_argument('fastqs',
                        nargs='+',
                        type=str,
                        help='TSV file path or list of FASTQs. '
                        'FASTQs must be compressed with gzip (with .gz). '
                        'Use TSV for multiple fastqs to be merged later. '
                        'row=merge_id, col=end_id).')
    parser.add_argument('--paired-end',
                        action="store_true",
                        help='Paired-end FASTQs.')
    parser.add_argument('--nth',
                        type=int,
                        default=1,
                        help='Number of threads to parallelize.')
    parser.add_argument('--out-dir',
                        default='',
                        type=str,
                        help='Output directory.')
    parser.add_argument('--log-level',
                        default='INFO',
                        choices=[
                            'NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL',
                            'ERROR', 'CRITICAL'
                        ],
                        help='Log level')
    args = parser.parse_args()

    # parse fastqs command line
    if args.fastqs[0].endswith('.gz') or args.fastqs[0].endswith('.fastq') or \
            args.fastqs[0].endswith('.fq'):  # it's fastq
        args.fastqs = [[f] for f in args.fastqs]  # make it a matrix
    else:  # it's TSV
        args.fastqs = read_tsv(args.fastqs[0])

    for i, fastqs in enumerate(args.fastqs):
        if args.paired_end and len(fastqs) != 2:
            raise argparse.ArgumentTypeError(
                'Need 2 fastqs per replicate for paired end.')
        if not args.paired_end and len(fastqs) != 1:
            raise argparse.ArgumentTypeError(
                'Need 1 fastq per replicate for single end.')

    log.setLevel(args.log_level)
    log.info(sys.argv)
    return args
Exemplo n.º 2
0
def parse_arguments(debug=False):
    parser = argparse.ArgumentParser(prog='ENCODE DCC adapter trimmer.',
                                     description='')
    parser.add_argument('fastqs',
                        nargs='+',
                        type=str,
                        help='TSV file path or list of FASTQs. \
                            FASTQs must be compressed with gzip (with .gz). \
                            Use TSV for multiple fastqs to be merged later. \
                            row=merge_id, col=end_id).')
    parser.add_argument('--auto-detect-adapter',
                        action='store_true',
                        help='Automatically detect/trim adapters \
             (supported system: Illumina, Nextera and smallRNA).')
    parser.add_argument(
        '--cutadapt-param',
        type=str,
        default='-e 0.1 -m 5',
        help='Parameters for cutadapt '
        '(default: -e 0.1 -m 5; err_rate=0.1, min_trim_len=5).')
    parser.add_argument(
        '--adapter',
        type=str,
        help='One adapter to use for all fastqs. '
        'This will override individual adapters defined in --adapters.')
    parser.add_argument('--adapters',
                        nargs='+',
                        type=str,
                        help='TSV file path or list of adapter strings. '
                        'Use TSV for multiple fastqs to be merged later. '
                        'row=merge_id, col=end_id).')
    parser.add_argument('--paired-end',
                        action="store_true",
                        help='Paired-end FASTQs.')
    parser.add_argument('--nth',
                        type=int,
                        default=1,
                        help='Number of threads to parallelize.')
    parser.add_argument('--out-dir',
                        default='',
                        type=str,
                        help='Output directory.')
    parser.add_argument('--log-level',
                        default='INFO',
                        choices=[
                            'NOTSET', 'DEBUG', 'INFO', 'WARNING', 'CRITICAL',
                            'ERROR', 'CRITICAL'
                        ],
                        help='Log level')
    args = parser.parse_args()

    # parse fastqs command line
    if args.fastqs[0].endswith('.gz') or args.fastqs[0].endswith('.fastq') or \
            args.fastqs[0].endswith('.fq'):  # it's fastq
        args.fastqs = [[f] for f in args.fastqs]  # make it a matrix
    else:  # it's TSV
        args.fastqs = read_tsv(args.fastqs[0])

    # parse --adapters command line
    if args.adapters:
        if os.path.exists(args.adapters[0]):  # it's TSV
            args.adapters = read_tsv(args.adapters[0])
        else:
            args.adapters = [[a] for a in args.adapters]  # make it a matrix

    # if adapter not given
    if args.adapter or not args.adapters:  # fill empty string in adapter list
        args.adapters = copy.deepcopy(args.fastqs)
        for i, adapters in enumerate(args.adapters):
            for j, adapter in enumerate(adapters):
                args.adapters[i][j] = args.adapter if args.adapter else ''

    # check if fastqs, adapers have same/correct dimension
    if len(args.adapters) != len(args.fastqs):
        raise argparse.ArgumentTypeError(
            'fastqs and adapters dimension mismatch.')
    for i, fastqs in enumerate(args.fastqs):
        if args.paired_end and len(fastqs) != 2:
            raise argparse.ArgumentTypeError(
                'Need 2 fastqs per replicate for paired end.')
        if not args.paired_end and len(fastqs) != 1:
            raise argparse.ArgumentTypeError(
                'Need 1 fastq per replicate for single end.')
        if len(fastqs) != len(args.adapters[i]):
            raise argparse.ArgumentTypeError(
                'fastqs and adapters dimension mismatch.')

    log.setLevel(args.log_level)
    log.info(sys.argv)
    return args