Exemplo n.º 1
0
def get_gse_gsm_dict(fn, gsm_raw_dict, gsm_dict):
    gsms = gsm_raw_dict['gsm']
    # gse_gsms_dict
    fn_pkl = f'{fn}.pkl'
    if os.path.isfile(fn_pkl):
        gse_gsms_dict = load_table_dict_pkl(fn_pkl)
    else:
        gses = set()
        for gses_raw in gsm_raw_dict["series_id"]:
            gses_curr = gses_raw.split(',')
            for gse in gses_curr:
                gses.add(gse)
        gses = list(gses)
        gse_gsms_dict = {}
        for gse in gses:
            gse_gsms_dict[gse] = []

        for gsm in gsms:
            gses_raw = gsm_dict["series_id"][gsm]
            gses_curr = gses_raw.split(',')
            for gse in gses_curr:
                gse_gsms_dict[gse].append(gsm)

        save_table_dict_pkl(fn_pkl, gse_gsms_dict)

    return gse_gsms_dict
Exemplo n.º 2
0
def get_raw_dict(fn):
    fn_xlsx = f'{fn}.xlsx'
    fn_pkl = f'{fn}.pkl'
    if os.path.isfile(fn_pkl):
        gsm_raw_dict = load_table_dict_pkl(fn_pkl)
    else:
        gsm_raw_dict = load_table_dict_xlsx(fn_xlsx)
        save_table_dict_pkl(fn_pkl, gsm_raw_dict)
    return gsm_raw_dict
Exemplo n.º 3
0
def get_gsm_dict(fn, gsm_raw_dict):
    gsms = gsm_raw_dict['gsm']
    fn_pkl = f'{fn}.pkl'
    if os.path.isfile(fn_pkl):
        gsm_dict = load_table_dict_pkl(fn_pkl)
    else:
        gsm_dict = {}
        for key in tqdm(gsm_raw_dict, desc='gsm_dict processing'):
            gsm_dict[key] = {}
            for index, gsm in enumerate(gsms):
                gsm_dict[key][gsm] = gsm_raw_dict[key][index]
        save_table_dict_pkl(fn_pkl, gsm_dict)
    return gsm_dict
Exemplo n.º 4
0
            passed_words.append(word)
    return passed_words


GPL = '21145'
suffix = '22_09_20'

gsm_key = 'gsm'
gse_key = 'series_id'
source_key = 'source_name_ch1'
characteristics_key = 'characteristics_ch1'

fn_xlsx = f'{get_data_path()}/GPL{GPL}/GPL{GPL}_gsm_table_{suffix}.xlsx'
fn_pkl = f'{get_data_path()}/GPL{GPL}/GPL{GPL}_gsm_table_{suffix}.pkl'
if os.path.isfile(fn_pkl):
    gsm_raw_dict = load_table_dict_pkl(fn_pkl)
else:
    gsm_raw_dict = load_table_dict_xlsx(fn_xlsx)
    save_table_dict_pkl(fn_pkl, gsm_raw_dict)

gsms = gsm_raw_dict[gsm_key]

fn = f'{get_data_path()}/GPL{GPL}/bad_words.txt'
f = open(fn)
bad_words = set(f.read().splitlines())
f.close()

fn = f'{get_data_path()}/GPL{GPL}/target_chs.txt'
f = open(fn)
target_chs = set(f.read().splitlines())
f.close()