示例#1
0
def async_run_enrich(specie, genefile):
    tmp_dir = randSeq(k=5)
    enrich_dir = os.path.join(UPLOAD_FOLDER, 'enrich', tmp_dir)
    processor.Run("mkdir {tmpDir}".format(tmpDir=enrich_dir))
    if type(genefile) == type([]):
        filepath = os.path.join(enrich_dir, 'tmp.gene.list')
        f = open(filepath, 'w')
        for each in genefile:
            f.write(each + '\n')
        f.close()
    else:
        filepath = os.path.join(enrich_dir, genefile)
        processor.Run("mv {gene_file} {tmpDir}".format(gene_file=os.path.join(
            UPLOAD_FOLDER, 'enrich', genefile),
                                                       tmpDir=enrich_dir))
    cmd = "{pypath} {script}\
     -s '{specie}' \
     -g {genelist} \
     -o {dir}".format(pypath=KOBAS_PATH,
                      script=ENRICH_SCRIPT,
                      specie=specie,
                      genelist=filepath,
                      dir=enrich_dir)
    # print(cmd)
    # print(os.environ['HOME'])
    processor.shRun(cmd)
    href = []
    body = []
    try:
        f = open(os.path.join(enrich_dir, 'enrich.txt'), 'r')
        head = f.readline().strip().split('\t')
        head = head[:7] + head[9:]
        row = f.readline()
        while row:
            row_list = row.strip().split('\t')
            href.append(row_list[8])
            body.append(row_list[:7] + row_list[9:])
            row = f.readline()
        # processor.Run("rm -rf {0}".format(enrich_dir))
        return {
            'task': 'enrich',
            'result': {
                'header': head,
                'body': body,
                'href': href
            }
        }
    except IOError as e:
        print(e)
        # processor.Run("rm -rf {0}".format(enrich_dir))
        return {
            'task': 'enrich',
            'result': {
                'header': [],
                'body': [],
                'href': []
            }
        }
示例#2
0
def run_annotation(vcf_file, annotation_database):
    annotation_prefix = '-'.join([str(int(time.time())), vcf_file.split('.vcf.gz')[0]])
    cmd ="{script}  {vcf_file} {annotation_database} {prefix}".format(
        script=vcf_ann_script,
        vcf_file=os.path.join(UPLOAD_FOLDER, 'vcf_ann', vcf_file),
        annotation_database=annotation_database,
        prefix=os.path.join(UPLOAD_FOLDER, 'vcf_ann', annotation_prefix)
    )
    processor.shRun(cmd)
    processor.Run("zip {zipfile} {files}".format(
            zipfile=os.path.join(UPLOAD_FOLDER, 'vcf_ann', annotation_prefix + '.zip'),
            files=os.path.join(UPLOAD_FOLDER, 'vcf_ann', annotation_prefix) + '.ann.vcf.*'))
    return annotation_prefix + '.zip'
示例#3
0
def fetch_vcf_samples(vcf, vcf_type="WES"):
    # split vcf return each sample
    fetch_sample_cmd = "sh {script} {vcf}".format(
        script=extract_vcf_sample_script, vcf=vcf)
    processor.shRun(fetch_sample_cmd)
    f = open(os.path.join(Config.VCF_FILE_PATH, vcf + '.sample_name'), 'r')
    samples = [each.strip() for each in f.readlines()]
    f.close()
    tc_series = tc_map(vcf, vcf_type, samples)
    # split each sample
    split_vcf_cmd = "sh {script} {vcf} {map_file}".format(
        script=split_vcf_sample_script, vcf=vcf, map_file=vcf + '.idmap')
    processor.shRun(split_vcf_cmd)
    return tc_series
示例#4
0
def async_fetch_vcf_samples(vcf, username, vcf_type="WES"):
    # split vcf return each sample
    fetch_sample_cmd = "sh {script} {vcf}".format(
        script=extract_vcf_sample_script, vcf=vcf)
    processor.shRun(fetch_sample_cmd)
    f = open(os.path.join(Config.VCF_FILE_PATH, vcf + '.sample_name'), 'r')
    samples = [each.strip() for each in f.readlines()]
    f.close()
    tc_series = tc_map(vcf, vcf_type, samples)
    # creare sample in mysql
    for each in tc_series:
        row = Data(tc_id=each[0],
                   provider=username,
                   sample_name=each[1],
                   type=vcf_type)
        row.save()
    # split each sample
    split_vcf_cmd = "sh {script} {vcf} {map_file}".format(
        script=split_vcf_sample_script, vcf=vcf, map_file=vcf + '.idmap')
    processor.shRun(split_vcf_cmd)
    return {
        'task': 'vcf_upload',
        'result': '{0} upload success...'.format(vcf)
    }
示例#5
0
def snp_info(info):
    tmp_param = {
        'not_a_group_id': info['group'],
        'group_names': ["not_a_group_id"]
    }
    genes = info.get('gene_list')
    if genes:
        gene_list = parseInput(genes)
        if len(gene_list) == 0:
            return {'task': 'snp_info', 'result': {'header': [], 'body': []}}
        tmp_param.update({'gene_id': gene_list})
    else:
        tmp_param.update({
            'chrom': info['chr'],
            'chrom_start': info['pos_start'],
            'chrom_end': info['pos_end']
        })
    cmd = "snpInf \
            --gene_bed {gene_bed} \
            --vcf_ann_file /data/wheatdb/data/vcf_private/snp.ann.table.pkl \
            --vcf_dir {vcf_table} \
            --outdir {outdir} \
            --parameters '{param}'".format(
        gene_bed=gene_bed_file,
        vcf_table=Config.VCF_TABLE_PATH,
        param=json.dumps(tmp_param),
        outdir=ANN_PATH,
    )
    print(cmd)
    result = processor.shRun(cmd)
    if result:
        head_data = result[0].split('\t')
        body_data = [row.split('\t') for row in result[1:]]
        return {
            'task': 'snp_info',
            'result': {
                'header': head_data,
                'body': body_data
            }
        }
    return {'task': 'snp_info', 'result': {'header': [], 'body': []}}
示例#6
0
def compare_info(info):
    genes = info.get('gene_id')
    if genes:
        gene_list = parseInput(genes)
        if len(gene_list) == 0:
            return {
                'task': 'compare_info',
                'result': {
                    'header': [],
                    'body': []
                }
            }
        info['gene_id'] = gene_list

    cmd = "snpInf \
            --gene_bed {gene_bed} \
            --vcf_ann_file /data/wheatdb/data/vcf_private/snp.ann.table.pkl \
            --vcf_dir {vcf_table} \
            --outdir {outdir} \
            --parameters '{param}'".format(
        gene_bed=gene_bed_file,
        vcf_table=Config.VCF_TABLE_PATH,
        param=json.dumps(info),
        outdir=ANN_PATH,
    )
    #print(cmd)
    result = processor.shRun(cmd)
    if result:
        head_data = result[0].split('\t')
        body_data = [row.split('\t') for row in result[1:]]
        return {
            'task': 'compare_info',
            'result': {
                'header': head_data,
                'body': body_data
            }
        }
    return {'task': 'compare_info', 'result': {'header': [], 'body': []}}
示例#7
0
def run_bsa(info):
    freq_pattern = 'snp.freq.plot.jpg'
    score_pattern = 'var.score.plot.jpg'
    print(info)
    cmd = "snpScore -p '{info}' -d {vcf_dir} -o {out_dir} --vcf_ann_file /data/wheatdb/data/vcf_private/snp.ann.table.pkl".format(
        info=info, vcf_dir=Config.VCF_TABLE_PATH, out_dir=MAPPING_PATH)

    # test
    print(cmd)

    result = processor.shRun(cmd)
    result_base = result[0]
    result_path = os.path.join(result_base, 'results')
    print(result_path)
    processor.Run(cmd="cd {dir} && zip -r {zip_file} results".format(
        zip_file=os.path.join(result_base, 'results.zip'), dir=result_base))
    print("cd {dir} && zip -r {zip_file} results".format(zip_file=os.path.join(
        result_base, 'results.zip'),
                                                         dir=result_base))
    all_files = os.listdir(result_path)
    path = result_path.split('/home/app/vcfweb/wheatdb/app')[-1]
    freq_files = [
        os.path.join(path, file) for file in all_files
        if file[-len(freq_pattern):] == freq_pattern
    ]
    score_files = [
        os.path.join(path, file) for file in all_files
        if file[-len(score_pattern):] == score_pattern
    ]
    return {
        'task': 'bsa',
        'result': {
            'path': os.path.join(path, '../results.zip'),
            'files': freq_files + score_files
        }
    }