示例#1
0
文件: aln.py 项目: Shenglai/apipe
def get_s3_objects(uuid,bucket,name,destination,s3cfg_dir,engine,logger):
    if pipe_util.already_have(destination,name,logger):
        logger.info('already have object(s) %s in %s' % (name,destination))
    else:
        logger.info('downloading object(s) %s to %s' % (name,destination))
        base_name=os.path.splitext(name)[0]
        s3_path=os.path.join('s3://',bucket,base_name)
        home_dir=os.path.expanduser('~')
        s3cmd_path=os.path.join(home_dir,'.local','bin','s3cmd')
        cmd=[s3cmd_path,'-c',os.path.join(s3cfg_dir,'.s3cfg'),'sync',s3_path,destination]
        output=pipe_util.do_command(cmd,logger)
        df=time_util.store_time(uuid,cmd,output,logger)
        df['bucket']=bucket
        df['name']=name
        table_name='time_mem_s3_sync'
        unique_key_dict={'bucket':bucket,'name':name}
        df_util.save_df_to_sqlalchemy(df,unique_key_dict,table_name,engine,logger)
        pipe_util.create_have(destination,name,logger)
        logger.info('finished downloading object(s) %s to %s' % (name,destination))
    return
示例#2
0
文件: vcf.py 项目: Shenglai/apipe
def get_s3_objects(uuid,bucket,name,destination,logger):
    #sync_name=name.split('.')[0]#temp hack to get reference.dict needed by GATK UG/HC
    if pipe_util.already_have(destination,name,logger):
        logger.info('already have object(s) %s in %s' % (name,destination))
    else: