Exemplo n.º 1
0
def get_template_numbers(patients, VERBOSE=0):
    '''Collect template numbers from all patient samples'''
    data = []
    for pname, patient in patients.iterrows():
        patient = Patient(patient)

        if VERBOSE:
            print pname, patient.code

        samples = patient.samples
        n_approx = samples['templates approx']
        dils = [get_dilution(x) for x in samples['dilutions']]
        n_dils = [2 * estimate_ntemplates_Poisson(x) for x in dils]

        # Attach sample date info
        age = np.array((datetime.datetime.now() - samples.date)) / 86400e9

        data.append({
            'n_approx': n_approx,
            'n_dil': n_dils,
            'age': age,
            'pname': patient.code
        })

    return data
Exemplo n.º 2
0
def loadK31(reg, filepath, fromHIV=False):
    '''
    Loading data for 31 additional patients
    
    Input arguments:
    reg: name of genetic region (gag or pol)
    filepath: path to directory where the frequency data are to be stored/downloaded
    fromHIV: download raw data and store them, if True; use stored data, if False 
    '''
    data = {}
    if fromHIV:
        sys.path.append("/scicore/home/neher/neher/HIV/hivwholeseq")
        from hivwholeseq.patients.patients import load_patients, Patient
        pats = load_patients(csv=True)
        fmt = "%d/%m/%Y"
        fhandle = open(filepath + 'K31_info_{}.txt'.format(reg), 'w')
        for pcode, pat in pats.iterrows():
            try:
                EDI = datetime.strptime(pat["infect date best"], fmt)
                P = Patient(pat)
                aft = P.get_allele_frequency_trajectories(reg, cov_min=500)[0]
                for si, (scode, sample) in enumerate(P.samples.iterrows()):
                    try:
                        date = datetime.strptime(sample["date"], fmt)
                        af = aft[si]
                        TI = date.toordinal() - EDI.toordinal()
                        fhandle.write('{}\t{}\t{}\n'.format(pcode, scode, TI))
                        np.save(
                            filepath +
                            '{}_{}_{}_data.npy'.format(pcode, scode, reg),
                            af.data)
                        np.save(
                            filepath +
                            '{}_{}_{}_mask.npy'.format(pcode, scode, reg),
                            af.mask)
                        data['{}_{}'.format(pcode,
                                            scode)] = (date.toordinal() -
                                                       EDI.toordinal(), af)
                        print(pcode, scode, "WORKED!!!")
                    except:
                        print(scode, "didn't work")

            except:
                print("skipping patient ", pcode)
        fhandle.close()
    else:
        with open(filepath + 'K31_info_{}.txt'.format(reg), 'r') as fhandle:
            for line in fhandle:
                words = line.split()
                pat_name = '_'.join(words[:2])
                af_data = np.load(filepath +
                                  '{}_{}_data.npy'.format(pat_name, reg))
                af_mask = np.load(filepath +
                                  '{}_{}_mask.npy'.format(pat_name, reg))
                af = np.ma.masked_array(af_data, mask=af_mask)
                data[pat_name] = (int(words[2]), af)
    return data
Exemplo n.º 3
0
    args = parser.parse_args()
    pnames = args.patients
    regions = args.regions
    VERBOSE = args.verbose
    plot = args.plot

    patients = load_patients()
    if pnames is not None:
        patients = patients.loc[pnames]
    pnames = patients.index.tolist()

    data = []

    for pname, patient in patients.iterrows():
        patient = Patient(patient)
        patient.discard_nonsequenced_samples()

        for ifr, region in enumerate(regions):
            if VERBOSE >= 1:
                print pname, region

            try:
                dg, ind = patient.get_divergence(region, cov_min=10)
            except ValueError:
                continue
            times = patient.times[ind]

            data.append({'pname': pname, 'region': region, 'dg': dg, 't': times})

    if VERBOSE >= 1: