Пример #1
0
def make_nan(wavespaths, chunk, i):

    chunkfam = h.read_fam(wp0.filepaths[i])
    chunkmap = h.read_map(wavespaths.filepaths[0])
    chunkngt = h.read_ngt(wavespaths.filepaths[0])
    chunkinfo = h.read_info(wavespaths.infopaths[0])
    chunkinfo['info'] = "NA"
    chunkinfo['freq'] = "NA"
    chunkinfo['a1'] = "NA"
    chunkinfo['a2'] = "NA"

    dosage = pd.read_csv(wavespaths.filepaths[0] + '.gz', sep=' ', compression='gzip', usecols=['SNP', 'A1', 'A2'])
    for j in range(len(chunkfam.famID)):
        dosage[chunkfam.famID[j]] = pd.Series("NA", index=dosage.index)
        dosage[chunkfam.indID[j]] = pd.Series("NA", index=dosage.index)

    h.write_fam('temp/' + mf + '/'+ chunk + '_' + str(i), chunkfam)
    h.write_map('temp/' + mf + '/' + chunk + '_' + str(i), chunkmap)
    h.write_ngt('temp/' + mf + '/' + chunk + '_' + str(i), chunkngt)
    h.write_info('temp/' + mf + '/' + chunk + '_' + str(i), chunkinfo)
    dosage.to_csv('temp/' + mf + '/' + chunk + '_' + str(i) + '.gz', sep=' ', index=False, compression='gzip')
Пример #2
0
merged_map = merge_maps(wave_maps, s.chunk)
merged_info = merge_infos(wave_infos, merged_map)

wave_fams = h.read_wave_fams(s.wavepaths)
merged_fam = pd.concat(wave_fams)

include_inds = h.read_include_inds(s.indlist)
if include_inds:
    merged_fam = merged_fam[merged_fam["indID"].isin(include_inds)]

d = make_dictionary(merged_map, s)

write_dosage(d, s, include_inds, wave_fams)

h.write_fam(s.mergepath, merged_fam)
h.write_map(s.mergepath, merged_map)


merged_ngts = merge_ngts(wave_ngts, merged_map)


h.write_info(s.mergepath, merged_info)
h.write_ngt(s.mergepath, merged_ngts)
h.write_ngt_matrix(s.mergepath, merged_ngts)

if os.path.exists(s.mergepath + ".gz"):
    os.remove(s.mergepath + ".gz")

subprocess.call(["gzip", s.mergepath])