Exemplo n.º 1
0
    def run(self):

        self.warning('Storing file: %s' % self.param_required('filename'))

        # First, rename the file
        fileO = File(path=self.param_required('filename'),
                     type=self.param_required('type'))
        layout_dict = self.param_required('layout_dict')
        newlayout = self.param_required('newlayout').split(",")

        newbits = [layout_dict[i] for i in newlayout]

        newname = ".".join(newbits)

        compression = None
        if self.param('compression'):
            compression = self.param('compression')

        add_date = False
        if self.param('add_date'):
            add_date = True

        fileO.rename(newname=newname,
                     extension=self.param_required('extension'),
                     add_date=add_date,
                     compression=compression)

        new_f = File(path=self.param_required('filename'),
                     type=self.param_required('type'))

        new_f.move(self.param('final_dir') + "/" + fileO.name)

        if self.param_is_defined('store'):
            hostname = self.param_required('hostname')
            username = self.param_required('username')
            db = self.param_required('db')
            port = self.param_required('port')
            pwd = self.param_required('pwd')

            reseqdb = ReseqTrackDB(host=hostname,
                                   user=username,
                                   port=port,
                                   pwd=pwd,
                                   db=db)

            new_f.store(reseqdb, do_md5=True, dry=False)

        self.param('stored_file', new_f.path)
Exemplo n.º 2
0
    def fetch_input(self):
        filename = self.param_required('filename')

        fileO = None

        if self.param_is_defined('hostname') and self.param_is_defined('username'):
            hostname = self.param('hostname')
            username = self.param('username')
            db = self.param('db')
            port = self.param('port')
            pwd = self.param('pwd')

            reseqdb = ReseqTrackDB(host=hostname,
                                   user=username,
                                   port=port,
                                   pwd=pwd,
                                   db=db)

            fileO = reseqdb.fetch_file_by_filename(filename)
        else:
            fileO = File(
                path=filename,
                type='PHASED_VCF',
            )

        self.param('file_object', fileO)
Exemplo n.º 3
0
args = parser.parse_args()

reseqdb = ReseqTrackDB(host=args.hostname,
                       user=args.username,
                       port=args.port,
                       pwd=args.pwd,
                       db=args.db)

dry = None
if args.dry == "True":
    dry = True
elif args.dry == "False":
    dry = False
else:
    raise Exception("I do not recognize this args.dry option:{0}".format(
        args.dry))

with open(args.f) as f:
    for line in f:
        line = line.rstrip('\n')
        bits = line.split('\t')
        f = File(path=bits[0],
                 type=bits[2],
                 size=bits[3],
                 md5=bits[1],
                 host_id=bits[4],
                 withdrawn=bits[5],
                 created=bits[6])
        f.store(reseqdb, dry=dry)
Exemplo n.º 4
0
    log_filename = "subset_vcf_%s.log" % outprefix

    logger = logging.getLogger("subset_vcf")
    logger.setLevel(logging.INFO)

    #  create the logging file handler
    fh = logging.FileHandler(log_filename)

    formatter = logging.Formatter(
        '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    fh.setFormatter(formatter)

    # add handler to logger object
    logger.addHandler(fh)

    logger.info("Program started")

    vcfQC = VcfQC(vcf=file.path,
                  bgzip_folder=args.bgzip_folder,
                  vcftools_folder=args.vcftools_folder)
    vcffile = vcfQC.subset_vcf(bed=args.bed,
                               outprefix=outprefix,
                               outdir=args.outdir,
                               create_index=True)

    f = File(path=vcffile, type=args.type, host_id=1, withdrawn=0)
    f.store(reseqdb, do_md5=True)

    logger.info("Done!.")
    hostname=args.hostname
    username=args.username
    db=args.db
    port=args.port
    pwd=args.pwd

    reseqdb = ReseqTrackDB(host=hostname,user=username,port=port,pwd=pwd,db=db)

    file=reseqdb.fetch_file_by_filename(args.filename)
    l=os.path.basename(file.path).split('.')
    l.remove('vcf')
    l.remove('gz')
    outprefix='.'.join(l)
    
    vcfQC = VcfQC(vcf=file.path,bcftools_folder=args.bcftools_folder)
    stats=vcfQC.stats(outprefix=outprefix,outdir=args.outdir)

    #store attributes
    for attr,value in stats.summary_numbers.items():
        Attribute(table_name="file",other_id=file.dbID,name="STATS_"+attr,value=value).store(reseqdb)
    
    Attribute(table_name="file",other_id=file.dbID,name="STATS_ts_tv",value=stats.ts_tv).store(reseqdb)
    Attribute(table_name="file",other_id=file.dbID,name="STATS_ts_tv_1stalt",value=stats.ts_tv_1stalt).store(reseqdb)
    Attribute(table_name="file",other_id=file.dbID,name="STATS_no_singleton_snps",value=stats.no_singleton_snps).store(reseqdb)

    #store file
    stats_f=File(path=stats.filename,type=file.type+"_STATS",host_id=1,withdrawn=0)
    stats_f.store(reseqdb,do_md5=True)
            
    logger.info("Done!.")