Пример #1
0
def loadK31(reg, filepath, fromHIV=False):
    '''
    Loading data for 31 additional patients
    
    Input arguments:
    reg: name of genetic region (gag or pol)
    filepath: path to directory where the frequency data are to be stored/downloaded
    fromHIV: download raw data and store them, if True; use stored data, if False 
    '''
    data = {}
    if fromHIV:
        sys.path.append("/scicore/home/neher/neher/HIV/hivwholeseq")
        from hivwholeseq.patients.patients import load_patients, Patient
        pats = load_patients(csv=True)
        fmt = "%d/%m/%Y"
        fhandle = open(filepath + 'K31_info_{}.txt'.format(reg), 'w')
        for pcode, pat in pats.iterrows():
            try:
                EDI = datetime.strptime(pat["infect date best"], fmt)
                P = Patient(pat)
                aft = P.get_allele_frequency_trajectories(reg, cov_min=500)[0]
                for si, (scode, sample) in enumerate(P.samples.iterrows()):
                    try:
                        date = datetime.strptime(sample["date"], fmt)
                        af = aft[si]
                        TI = date.toordinal() - EDI.toordinal()
                        fhandle.write('{}\t{}\t{}\n'.format(pcode, scode, TI))
                        np.save(
                            filepath +
                            '{}_{}_{}_data.npy'.format(pcode, scode, reg),
                            af.data)
                        np.save(
                            filepath +
                            '{}_{}_{}_mask.npy'.format(pcode, scode, reg),
                            af.mask)
                        data['{}_{}'.format(pcode,
                                            scode)] = (date.toordinal() -
                                                       EDI.toordinal(), af)
                        print(pcode, scode, "WORKED!!!")
                    except:
                        print(scode, "didn't work")

            except:
                print("skipping patient ", pcode)
        fhandle.close()
    else:
        with open(filepath + 'K31_info_{}.txt'.format(reg), 'r') as fhandle:
            for line in fhandle:
                words = line.split()
                pat_name = '_'.join(words[:2])
                af_data = np.load(filepath +
                                  '{}_{}_data.npy'.format(pat_name, reg))
                af_mask = np.load(filepath +
                                  '{}_{}_mask.npy'.format(pat_name, reg))
                af = np.ma.masked_array(af_data, mask=af_mask)
                data[pat_name] = (int(words[2]), af)
    return data
                        help='Save alignment to file')

    args = parser.parse_args()
    pnames = args.patients
    width = args.width
    gap = args.gap
    start = args.start
    end = args.end
    VERBOSE = args.verbose
    freqmin = args.freqmin
    countmin = args.countmin
    submit = args.submit
    use_plot = args.plot
    use_save = args.save

    patients = load_patients()
    if pnames is not None:
        patients = patients.loc[pnames]

    data = []
    for pname, patient in patients.iterrows():
        if VERBOSE >= 1:
            print patient.code, start, end

        if submit:
            fork_self(patient.code, width, gap, start, end, VERBOSE=VERBOSE,
                      freqmin=freqmin, countmin=countmin)
            continue

        patient = Patient(patient)
        ref = patient.get_reference('genomewide')
Пример #3
0
    parser.add_argument('--patients', nargs='+',
                        help='Patients to analyze')
    parser.add_argument('--regions', nargs='+', required=True,
                        help='Regions to analyze (e.g. F1 V3)')
    parser.add_argument('--verbose', type=int, default=0,
                        help='Verbosity level [0-4]')
    parser.add_argument('--plot', nargs='?', default=None, const='2D',
                        help='Plot the allele frequency trajectories')

    args = parser.parse_args()
    pnames = args.patients
    regions = args.regions
    VERBOSE = args.verbose
    plot = args.plot

    patients = load_patients()
    if pnames is not None:
        patients = patients.loc[pnames]
    pnames = patients.index.tolist()

    data = []

    for pname, patient in patients.iterrows():
        patient = Patient(patient)
        patient.discard_nonsequenced_samples()

        for ifr, region in enumerate(regions):
            if VERBOSE >= 1:
                print pname, region

            try: