# Output parameters parser.add_argument('-d', dest='database_filepath', required=True, help='Path and filename of database to writ reads to.') parser.add_argument('-f', dest='force_overwrite', action='store_true', default=False, help='Overwrite previous tables with the same name.') args = parser.parse_args() toc = time.time() if args.input == '-': args.input = sys.stdin # Connect/make database db = Reads_db(db_file=args.database_filepath, recbyname=True) if ('seqs' not in db.tables) or (args.force_overwrite == True): db.create_seqs_table(overwrite=args.force_overwrite, read_header=args.header_format) if ('samples' not in db.tables) or (args.force_overwrite == True): db.create_samples_table(overwrite=args.force_overwrite) db.load_seqs(data_files=args.input, barcode_files=args.barcodes, buffer_max=args.buffer_max, read_header=args.header_format) total_t = time.time() - toc print >> sys.stderr, 'Loaded processed reads file in {0}'.format( time.strftime('%H:%M:%S', time.gmtime(total_t))) if __name__ == '__main__':
parser.add_argument( "--max", dest="fmax", default=0, type=int, help="Maximum size of clusters to load. Default = 0 no max limit" ) parser.add_argument( "--skipsort", dest="skipsort", action="store_true", help="Skip sorting the cluster file. Useful if file has already been previously sorted.", ) args = parser.parse_args() if os.path.exists(args.output): db = Reads_db(args.output) else: raise Exception("Database file not found.") # Load cluster file db.load_cluster_file( cluster_file_handle=args.input, table_prefix=args.tableprefix, overwrite=args.overwrite, fmin=args.fmin, fmax=args.fmax, skipsort=args.skipsort, buffer_max=args.buffer, ) total_t = time.time() - toc
help='''SQL expression to filter the query which selects the sequences in the database. Default is to export all sequences in database. Basic query is: SELECT seqid, seq, phred FROM seqs INNER JOIN samples ON seqs.sampleId=samples.sampleId WHERE <filter_expression> ''') parser.add_argument('-s', dest='startidx', default=0, help='Starting base index of DNA sequences that are written to file, used to miss out cutsite if desired.') parser.add_argument('-f', dest='format', default='fasta', help='Format of file written to output.') parser.add_argument('-b', dest='rowbuffer', default=100000, help='Read write buffer. Number of records to read before writing to file.') parser.add_argument('-F', dest='overwrite', default=False, help='Overwrite any file with same name as output.') args = parser.parse_args() # Write records to output db = Reads_db(args.input, recbyname=True) fastafile_handle = db.write_reads(args.output, output_format=args.format, filter_expression=args.filter_expression, startidx=args.startidx, rowbuffer=args.rowbuffer, overwrite=args.overwrite)
Basic query is: SELECT * FROM {clusters} WHERE <filter_expression> ''') parser.add_argument('-s', dest='startidx', default=0, help='Starting base index of DNA sequences that are written to file, used to miss out cutsite if desired.') parser.add_argument('-f', dest='format', default='fasta', help='Format of file written to output.') print sys.argv args = parser.parse_args() # Write records to output db = Reads_db(args.input, recbyname=True) clusters_list = db.get_cluster_by_size() fastafile_handle = db.write_reads(args.output, format=args.format, filter_expression=args.filter_expression, startidx=args.startidx) if args.query_expression: fastafile_handle = db.write_reads(args.pattern, args.output, use_type_column=args.typeflag, format='fasta')