def run(self): self.warning('Storing file: %s' % self.param_required('filename')) # First, rename the file fileO = File(path=self.param_required('filename'), type=self.param_required('type')) layout_dict = self.param_required('layout_dict') newlayout = self.param_required('newlayout').split(",") newbits = [layout_dict[i] for i in newlayout] newname = ".".join(newbits) compression = None if self.param('compression'): compression = self.param('compression') add_date = False if self.param('add_date'): add_date = True fileO.rename(newname=newname, extension=self.param_required('extension'), add_date=add_date, compression=compression) new_f = File(path=self.param_required('filename'), type=self.param_required('type')) new_f.move(self.param('final_dir') + "/" + fileO.name) if self.param_is_defined('store'): hostname = self.param_required('hostname') username = self.param_required('username') db = self.param_required('db') port = self.param_required('port') pwd = self.param_required('pwd') reseqdb = ReseqTrackDB(host=hostname, user=username, port=port, pwd=pwd, db=db) new_f.store(reseqdb, do_md5=True, dry=False) self.param('stored_file', new_f.path)
def fetch_input(self): filename = self.param_required('filename') fileO = None if self.param_is_defined('hostname') and self.param_is_defined('username'): hostname = self.param('hostname') username = self.param('username') db = self.param('db') port = self.param('port') pwd = self.param('pwd') reseqdb = ReseqTrackDB(host=hostname, user=username, port=port, pwd=pwd, db=db) fileO = reseqdb.fetch_file_by_filename(filename) else: fileO = File( path=filename, type='PHASED_VCF', ) self.param('file_object', fileO)
args = parser.parse_args() reseqdb = ReseqTrackDB(host=args.hostname, user=args.username, port=args.port, pwd=args.pwd, db=args.db) dry = None if args.dry == "True": dry = True elif args.dry == "False": dry = False else: raise Exception("I do not recognize this args.dry option:{0}".format( args.dry)) with open(args.f) as f: for line in f: line = line.rstrip('\n') bits = line.split('\t') f = File(path=bits[0], type=bits[2], size=bits[3], md5=bits[1], host_id=bits[4], withdrawn=bits[5], created=bits[6]) f.store(reseqdb, dry=dry)
log_filename = "subset_vcf_%s.log" % outprefix logger = logging.getLogger("subset_vcf") logger.setLevel(logging.INFO) # create the logging file handler fh = logging.FileHandler(log_filename) formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s - %(message)s') fh.setFormatter(formatter) # add handler to logger object logger.addHandler(fh) logger.info("Program started") vcfQC = VcfQC(vcf=file.path, bgzip_folder=args.bgzip_folder, vcftools_folder=args.vcftools_folder) vcffile = vcfQC.subset_vcf(bed=args.bed, outprefix=outprefix, outdir=args.outdir, create_index=True) f = File(path=vcffile, type=args.type, host_id=1, withdrawn=0) f.store(reseqdb, do_md5=True) logger.info("Done!.")
hostname=args.hostname username=args.username db=args.db port=args.port pwd=args.pwd reseqdb = ReseqTrackDB(host=hostname,user=username,port=port,pwd=pwd,db=db) file=reseqdb.fetch_file_by_filename(args.filename) l=os.path.basename(file.path).split('.') l.remove('vcf') l.remove('gz') outprefix='.'.join(l) vcfQC = VcfQC(vcf=file.path,bcftools_folder=args.bcftools_folder) stats=vcfQC.stats(outprefix=outprefix,outdir=args.outdir) #store attributes for attr,value in stats.summary_numbers.items(): Attribute(table_name="file",other_id=file.dbID,name="STATS_"+attr,value=value).store(reseqdb) Attribute(table_name="file",other_id=file.dbID,name="STATS_ts_tv",value=stats.ts_tv).store(reseqdb) Attribute(table_name="file",other_id=file.dbID,name="STATS_ts_tv_1stalt",value=stats.ts_tv_1stalt).store(reseqdb) Attribute(table_name="file",other_id=file.dbID,name="STATS_no_singleton_snps",value=stats.no_singleton_snps).store(reseqdb) #store file stats_f=File(path=stats.filename,type=file.type+"_STATS",host_id=1,withdrawn=0) stats_f.store(reseqdb,do_md5=True) logger.info("Done!.")