Пример #1
0
def run_and_return(command):
    if system_platform() == 'Windows':
        return run_and_return_winfix(command)
    else:
        with Sultan.load() as s:
            result = s.java('-jar ChunkyLauncher.jar -' + command).run()
            log(str(result.stdout))
            temp = [x.replace('\t', '') for x in result.stderr]  # remove \t
            return temp
Пример #2
0
                    print('ERROR! Choice shoule be `0` or `1`!')

        temp_time = time.time()  # 计个时看看
        name = aao_login(stuID, stuPwd, retry_cnt)
        print('\nMeow~下面开始获取{}课表啦!\n'.format({0: '个人', 1: '班级'}.get(choice)))
        courseTable = getCourseTable(choice=choice)
        list_lessonObj = parseCourseTable(courseTable)
        print('课表获取完成,下面开始生成iCal日历文件啦!')
        cal = create_ics(list_lessonObj, semester_start_date)
        print('日历生成完成,下面开始导出啦!\n')
        export_ics(cal, semester_year, semester, stuID)  # Export `.ics` file
        if not args.notxt:  # 若命令行参数含`--notxt`则不导出
            exportCourseTable(list_lessonObj, semester_year, semester,
                              stuID)  # Export `.txt` file
        if not args.noxlsx:  # 若命令行参数含`--noxlsx`则不导出
            print('\n开始生成xlsx表格文件! ')
            xlsx = create_xls(list_lessonObj, semester_year, semester, stuID)
            print('xlsx文件生成完成,开始导出!')
            export_xls(xlsx, semester_year, semester,
                       stuID)  # Export `.xlsx` file
        print('\n导出完成,累计用时:', time.time() - temp_time, 's')
        print("Thanks for your use! 欢迎来GitHub上点个Star呢!")
    except Exception as e:
        print("ERROR! 欢迎在GitHub上提出issue & Pull Request!")
        print(e)
    finally:
        session.cookies.clear()  # 清一下cookie
        if system_platform(
        ) == 'Windows':  # Fix Linux `sh: 1: pause: not found` bug
            os.system('pause')
Пример #3
0
def mr_test(expdf, outcdf, opts, logger):
    logger.logger.info(
        '>>>>>>>>>>MR analyses for {0} and {1} start<<<<<<<<<<'.format(
            opts.exp_name, opts.out_name))
    expdf['RS'] = expdf['RS'].str.lower()
    outcdf['RS'] = outcdf['RS'].str.lower()
    expdf.to_csv('exposure.txt',
                 sep='\t',
                 na_rep='NA',
                 float_format='%g',
                 encoding='utf-8',
                 index=False)
    outcdf.to_csv('outcome.txt',
                  sep='\t',
                  na_rep='NA',
                  float_format='%g',
                  encoding='utf-8',
                  index=False)
    pleiotropy(opts, logger)
    mr_result = pd.DataFrame({
        'outcome': [],
        'exposure': [],
        'method': [],
        'nsnp': [],
        'b': [],
        'se': [],
        'pval': []
    })
    mr_heter = pd.DataFrame()
    mr_pleio = pd.DataFrame()
    mr_data = pd.DataFrame()
    mr_pleiosnp = pd.DataFrame()
    r_errf = tempfile.TemporaryFile(mode='w+', encoding='utf-8')
    if opts.heterogeneity != 'none':
        if os.path.exists('mr.pleiosnp'):
            mr_pleiosnp = pd.read_csv('mr.pleiosnp',
                                      sep='\s+',
                                      header=0,
                                      dtype=str)
            try:
                os.remove('mr.pleiosnp')
            except:
                pass
    if 'mr_gsmr' in opts.mr_method:
        if expdf.shape[0] < 5:
            logger.logger.info('No enough SNPs for GSMR analysis.')
        else:
            logger.logger.info(
                'Notes: make sure that SNP frequency are accurate for GSMR analysis'
            )
            refdir = os.path.split(os.path.realpath(__file__))[0]
            sysstr = system_platform()
            gctascript = os.path.join(refdir, 'refsource',
                                      'gcta_{0}'.format(sysstr), 'gcta64')
            with open('gsmr_ref_data.txt', 'w') as writer:
                writer.write('\n'.join([
                    os.path.join(refdir, 'refsource', '1000G_EUR_Phase3_plink',
                                 '1000G.EUR.QC.{0}'.format(str(i)))
                    for i in range(1, 23)
                ]))
            with open('gsmr_exposure.txt', 'w') as writer:
                writer.write('\t'.join([opts.exp_name, 'exposure.txt']) + '\n')
            with open('gsmr_outcome.txt', 'w') as writer:
                writer.write('\t'.join([opts.out_name, 'outcome.txt']) + '\n')
            try:
                commands = '{0} --mbfile gsmr_ref_data.txt --gsmr-file gsmr_exposure.txt gsmr_outcome.txt --gsmr-direction 0 --out gsmr_result_pleio --heidi-thresh 0  --effect-plot --diff-freq 1 --clump-r2 1 --gwas-thresh 1 --gsmr-snp-min 5'.format(
                    gctascript)
                content1 = subprocess.check_output(commands,
                                                   stderr=None,
                                                   shell=True,
                                                   bufsize=0)
                '''gsmr_result = pd.read_csv('gsmr_result_pleio.gsmr', sep='\s+', header=0,dtype=str,
                                          names=['exposure', 'outcome', 'b', 'se', 'pval', 'nsnp', 'global_heidi_outlier'])'''
                gsmr_result = pd.read_csv(
                    'gsmr_result_pleio.gsmr',
                    sep='\s+',
                    header=0,
                    dtype=str,
                    names=['exposure', 'outcome', 'b', 'se', 'pval', 'nsnp'])
                gsmr_result['method'] = 'gsmr' * gsmr_result.shape[0]
                gsmr_result = gsmr_result[[
                    'outcome', 'exposure', 'method', 'nsnp', 'b', 'se', 'pval'
                ]].copy()

                mr_result = pd.concat([mr_result, gsmr_result], axis=0)
                #logger.logger.info(content1.decode('utf-8'))
                if opts.heterogeneity == 'none':
                    r_errf.seek(0)
                    r_errf.truncate()
                    content2 = subprocess.check_output(
                        '{0} --mbfile gsmr_ref_data.txt --gsmr-file gsmr_exposure.txt gsmr_outcome.txt --gsmr-direction 0 --out gsmr_result_nonpleio --heidi-thresh 0.01  --effect-plot --diff-freq 1 --clump-r2 1 --gwas-thresh 1 --gsmr-snp-min 5'
                        .format(gctascript),
                        stderr=r_errf,
                        shell=True,
                        bufsize=0)
                    #logger.logger.info(content2.decode('utf-8'))
                    gsmr_result = pd.read_csv('gsmr_result_nonpleio.gsmr',
                                              sep='\s+',
                                              header=0,
                                              names=[
                                                  'exposure', 'outcome', 'b',
                                                  'se', 'pval', 'nsnp'
                                              ],
                                              dtype=str)
                    gsmr_result[
                        'method'] = 'gsmr-outlier-correction' * gsmr_result.shape[
                            0]
                    gsmr_result = gsmr_result[[
                        'outcome', 'exposure', 'method', 'nsnp', 'b', 'se',
                        'pval'
                    ]].copy()
                    mr_result = pd.concat([mr_result, gsmr_result], axis=0)
            except subprocess.CalledProcessError as e:
                r_errf.flush()
                r_errf.seek(0)
                logger.logger.info('Error info:\n{0}'.format(r_errf.read()))
                logger.logger.info("GSMR analysis Failed\nSkiping...")
                r_errf.seek(0)
                r_errf.truncate()
        try:
            os.remove('gsmr_ref_data.txt')
            os.remove('gsmr_exposure.txt')
            os.remove('gsmr_outcome.txt')
            os.remove('gsmr_result_nonpleio.gsmr')
            os.remove('gsmr_result_pleio.gsmr')
            os.remove('gsmr_result_nonpleio.log')
            os.remove('gsmr_result_pleio.log')
            os.remove('gsmr_result_nonpleio.pleiosnps')
        except:
            pass
    if set([
            "mr_wald_ratio", "mr_two_sample_ml mr_egger_regression",
            "mr_egger_regression_bootstrap", "mr_simple_median",
            "mr_weighted_median", "mr_penalised_weighted_median",
            "Penalised weighted median", "mr_ivw", "mr_ivw_radial",
            "mr_ivw_mre", "mr_ivw_fe", "mr_simple_mode", "mr_weighted_mode",
            "mr_weighted_mode_nome", "mr_simple_mode_nome", "mr_raps",
            "mr_sign", "mr_uwr"
    ]) & set(opts.mr_method):
        try:
            #写提示,SNP少某些MR方法无法实现
            output = subprocess.check_output(
                'Rscript mr_test.R {0} {1} {2} {3} {4} {5}'.format(
                    'exposure.txt', 'outcome.txt', opts.exp_name,
                    opts.out_name, ','.join(set(opts.mr_method)), 'no' if
                    opts.heterogeneity in ['mr_presso', 'mr_gsmr'] else 'yes'),
                stderr=r_errf,
                shell=True,
                bufsize=0)
            #logger.logger.info(output.decode('utf-8'))
            if os.path.exists('mr.result'):
                twosamplemr_result = pd.read_csv('mr.result',
                                                 sep='\s+',
                                                 header=0,
                                                 dtype=str)
                mr_result = pd.concat([mr_result, twosamplemr_result], axis=0)
                os.remove('mr.result')
            if os.path.exists('mr.heter'):
                twosamplemr_heter = pd.read_csv('mr.heter',
                                                sep='\s+',
                                                header=0,
                                                dtype=str)
                mr_heter = pd.concat([mr_heter, twosamplemr_heter], axis=0)
                os.remove('mr.heter')
            if os.path.exists('mr.pleio'):
                twosamplemr_pleio = pd.read_csv('mr.pleio',
                                                sep='\s+',
                                                header=0,
                                                dtype=str)
                mr_pleio = pd.concat([mr_pleio, twosamplemr_pleio], axis=0)
                os.remove('mr.pleio')
            if os.path.exists('mr.data'):
                twosamplemr_data = pd.read_csv('mr.data',
                                               sep='\s+',
                                               header=0,
                                               dtype=str)
                mr_data = pd.concat([mr_data, twosamplemr_data], axis=0)
                os.remove('mr.data')
        except subprocess.CalledProcessError as e:
            r_errf.flush()
            r_errf.seek(0)
            logger.logger.info('Error info:\n{0}'.format(r_errf.read()))
            logger.logger.info("MR analysis Failed.\nSkiping...")
        except FileNotFoundError as e1:
            pass

        try:
            os.remove('exposure.txt')
            os.remove('outcome.txt')
        except:
            pass
    r_errf.close()
    if mr_result.empty:
        logger.logger.info('\n=====No MR results=====\n')
    else:

        logger.logger.info('\n=====Main MR results=====\n')
        logger.logger.info(str(mr_result))
    if mr_heter.empty:
        logger.logger.info('\n=====No heterogeneity test results=====\n')
    else:
        logger.logger.info(
            '\n=====Main heterogeneity test for MR results=====\n')
        logger.logger.info(str(mr_heter))
    if mr_pleio.empty:
        logger.logger.info('\n=====No Egger pleiotropy test result=====\n\n')
    else:
        logger.logger.info('\n--Egger pleiotropy: Intercetpt--\n\n')
        logger.logger.info(str(mr_pleio))
    logger.logger.info(
        '\n\n>>>>>>>>>>MR analysis for {0} and {1} finished!<<<<<<<<<<\n'.
        format(opts.exp_name, opts.out_name))
    return mr_result, mr_heter, mr_pleio, mr_data, mr_pleiosnp
Пример #4
0
def pleiotropy(opts, logger):
    r_errf = tempfile.TemporaryFile(mode='w+', encoding='utf-8')
    try:
        os.remove('mr.pleiosnp')
    except:
        pass
    if opts.heterogeneity == 'none':
        logger.logger.info("Notes: no pleiotropy test is adopted!")
    elif opts.heterogeneity == 'mr_presso':
        logger.logger.info('Pleiotropy test for {0} using {1}'.format(
            opts.exp_name, opts.heterogeneity))
        try:
            output = subprocess.check_output(
                'Rscript pleiotropy_test.R {0} {1} {2} {3} {4}'.format(
                    'exposure.txt', 'outcome.txt', opts.exp_name,
                    opts.out_name, opts.heterogeneity),
                stderr=r_errf,
                shell=True,
                bufsize=0)
            #logger.logger.info(output.decode('utf-8'))
            presso_pleiosnp = pd.read_csv('mr.pleiosnp',
                                          sep='\s+',
                                          header=0,
                                          dtype=str)
            if not presso_pleiosnp.empty:
                logger.logger.info(
                    'Pleiotropic SNPs for {0} and {1} using MR-PRESSO global test!'
                    .format(opts.exp_name, opts.out_name))
                logger.logger.info(str(presso_pleiosnp))
            else:
                logger.logger.info(
                    'No pleiotropic SNPs for {0} and {1} identified by MR-PRESSO global test.'
                    .format(opts.exp_name, opts.out_name))
        except subprocess.CalledProcessError as e:
            r_errf.flush()
            r_errf.seek(0)
            logger.logger.info('Error info:\n{0}'.format(r_errf.read()))
            logger.logger.info("Pleiotropy test Failed.\nSkiping...")
            r_errf.seek(0)
            r_errf.truncate()
        except FileNotFoundError as e1:
            pass
    elif opts.heterogeneity == 'mr_gsmr':
        logger.logger.info('Pleiotropy test for {0} using {1}'.format(
            opts.exp_name, opts.heterogeneity))
        if pd.read_csv('exposure.txt', sep='\s+', header=0,
                       dtype=str).shape[0] < 5:
            logger.logger.info(
                'No enough SNPs to perform GSMR HEIDI outlier test')
            return
        refdir = os.path.split(os.path.realpath(__file__))[0]
        sysstr = system_platform()
        gctascript = os.path.join(refdir, 'refsource',
                                  'gcta_{0}'.format(sysstr), 'gcta64')
        try:
            with open('gsmr_ref_data.txt', 'w') as writer:
                writer.write('\n'.join([
                    os.path.join(refdir, 'refsource', '1000G_EUR_Phase3_plink',
                                 '1000G.EUR.QC.{0}'.format(str(i)))
                    for i in range(1, 23)
                ]))
            with open('gsmr_exposure.txt', 'w') as writer:
                writer.write('\t'.join([opts.exp_name, 'exposure.txt']) + '\n')
            with open('gsmr_outcome.txt', 'w') as writer:
                writer.write('\t'.join([opts.out_name, 'outcome.txt']) + '\n')
            command = '{0} --mbfile gsmr_ref_data.txt --gsmr-file gsmr_exposure.txt gsmr_outcome.txt --gsmr-direction 0 --out gsmr_result_nonpleio --heidi-thresh 0.01  --diff-freq 1 --clump-r2 1 --gwas-thresh 1 --gsmr-snp-min 5'.format(
                gctascript)
            content2 = subprocess.check_output(command,
                                               stderr=r_errf,
                                               shell=True,
                                               bufsize=0)
            #logger.logger.info(content2.decode('utf-8'))
            if os.path.exists('gsmr_result_nonpleio.pleio_snps'):
                gsmr_pleiosnp = pd.read_csv(
                    'gsmr_result_nonpleio.pleio_snps',
                    sep='\s+',
                    header=None,
                    names=['exposure', 'outcome', 'RS'],
                    dtype=str)
                gsmr_pleiosnp[
                    'method'] = 'gsmr-outlier-correction' * gsmr_pleiosnp.shape[
                        0]
                logger.logger.info(
                    'Pleiotropic SNPs for {0} and {1} using GSMR HEIDI outlier test!'
                    .format(opts.exp_name, opts.out_name))
                gsmr_pleiosnp = gsmr_pleiosnp[[
                    'exposure', 'outcome', 'method', 'RS'
                ]]
                logger.logger.info(str(gsmr_pleiosnp))
                gsmr_pleiosnp.to_csv('mr.pleiosnp',
                                     header=True,
                                     index=False,
                                     sep='\t',
                                     na_rep='NA',
                                     float_format='%g',
                                     encoding='utf-8')
                for x in ['exposure.txt', 'outcome.txt']:
                    df = pd.read_csv(x, sep='\s+', header=0, dtype=str)
                    df = df[df['RS'].isin(gsmr_pleiosnp['RS']) == False]
                    df.to_csv(x,
                              header=True,
                              sep='\t',
                              na_rep='NA',
                              float_format='%g',
                              encoding='utf-8',
                              index=False)
            else:
                logger.logger.info(
                    'No pleiotropic SNPs for {0} and {1} identified by GSMR HEIDI outlier test'
                    .format(opts.exp_name, opts.out_name))
        except subprocess.CalledProcessError as e:
            r_errf.flush()
            r_errf.seek(0)
            logger.logger.info('Error info:\n{0}'.format(r_errf.read()))
            logger.logger.info("Pleiotropy test Failed.\nSkiping...")
            r_errf.seek(0)
            r_errf.truncate()
        try:
            os.remove('gsmr_ref_data.txt')
            os.remove('gsmr_exposure.txt')
            os.remove('gsmr_outcome.txt')
            os.remove('gsmr_result_nonpleio.pleio_snps')
            os.remove('gsmr_result_nonpleio.gsmr')
            os.remove('gsmr_result_nonpleio.gsmr')
            os.remove('gsmr_result_nonpleio.badsnps')
        except:
            pass
        r_errf.close()

    else:
        logger.logger.info('No pleiotropy test for {0} using {1}'.format(
            opts.exp_name, opts.heterogeneity))
Пример #5
0
def clumping(opts, expdf, refdir, logger):
    plinkreport = expdf.loc[:, ['RS', 'P']]
    plinkreport['RS'] = plinkreport['RS'].str.lower()
    plinkreport.to_csv('plinkreport',
                       sep='\t',
                       header=['SNP', 'P'],
                       index=False)
    independent = []
    if opts.clump and opts.origin == 'offline':
        for i in range(1, 23):
            try:
                errf = tempfile.TemporaryFile()
                sysstr = system_platform()
                plinkscript = os.path.join(refdir, 'refsource',
                                           'plink_{0}'.format(sysstr), 'plink')
                geno = os.path.join(refdir, 'refsource',
                                    '1000G_EUR_Phase3_plink',
                                    '1000G.EUR.QC.{0}'.format(str(i)))
                content = subprocess.check_output(
                    '{0} --bfile {1} --clump {2} --clump-p1 {3} --clump-r2 {4} --clump-kb {5} '
                    .format(plinkscript, geno, 'plinkreport', opts.clump_p1,
                            opts.r2, opts.windowsize),
                    shell=True,
                    stderr=errf,
                    bufsize=0)
                errf.flush()
                errf.seek(0)
                if 'Warning: No significant --clump results' in errf.read(
                ).decode('utf-8'):
                    logger.logger.info(
                        'No significant SNPs in chromosome {0}'.format(str(i)))
                    continue
                else:
                    pass
                    #logger.logger.info(content.decode('utf-8')[-82:-35])
                errf.close()
            except subprocess.CalledProcessError as err1:

                logger.logger.warning(
                    'Warning: plink clumping algorithm for chromosome {0} failed. Please check .log file'
                    .format(str(i)))
                errf.flush()
                errf.seek(0)
                logger.logger.warning(errf.read().decode('utf-8'))
                continue
            with open('plink.clumped', 'r') as reader:
                snps = [
                    x.strip().split()[2] for x in reader.readlines()[1:]
                    if not x.isspace()
                ]
            independent.extend(snps)
        logger.logger.info("{0} independent SNPs after clumping".format(
            len(independent)))
        try:
            os.remove('plink.clumped')
            os.remove('plinkreport')
            os.remove('plink.log')
            os.remove('plink.nosex')
        except:
            pass
    elif opts.clump and opts.origin == 'online':
        logger.logger.info('Not developed now.\nExiting...')
        exit(1)
        pass
        # TODO 在线clumping
    else:
        independent = list(expdf['RS'])
    pd.Series(independent).to_csv('include',
                                  header=False,
                                  sep='\t',
                                  na_rep='NA',
                                  float_format='%g',
                                  encoding='utf-8',
                                  index=False)
    return independent