Пример #1
0
    def test_system_german_2(self):
        test_accs = list()
        no_gen_test_accs = list()
        size = 60
        for seed in range(25):
            fname = f'../data/german/quant/train{size}_{seed}.txt'
            test_path = fname.replace(f'train{size}', 'test')
            pairs, feature_space = load_pairs(fname, 'german')
            tp = ATP(apply_phonology=False, feature_space=feature_space)
            tp.train(pairs)

            # compute accuracies
            train_acc = self._evaluate('german', fname, tp, no_feats=False)
            assert (train_acc == 1.0)
            test_acc = self._evaluate('german', test_path, tp, no_feats=False)
            test_accs.append(test_acc)
            no_gen_test_acc = self._evaluate('german',
                                             test_path,
                                             tp,
                                             no_feats=True)
            no_gen_test_accs.append(no_gen_test_acc)
        if not 0.54 <= np.mean(test_accs) < 0.57:
            print(0.54, np.mean(test_accs), 0.57)
        assert (0.54 < np.mean(test_accs) < 0.57)
        assert (0.54 < np.mean(no_gen_test_accs) < 0.57)
Пример #2
0
    def test_system_german_3(self):
        num_seeds = 100
        train_sizes = [50, 100, 200, 300, 400]

        en = [0] * len(train_sizes)
        n = [0] * len(train_sizes)
        e = [0] * len(train_sizes)
        null = [0] * len(train_sizes)
        er = [0] * len(train_sizes)
        s = [0] * len(train_sizes)

        seed = 0  # only one seed for now
        for i, size in enumerate(train_sizes):
            for seed in range(num_seeds):
                fname = f'../data/german/growth/train{size}_{seed}.txt'
                pairs, feature_space = load_pairs(fname, 'german')
                tp = ATP(apply_phonology=False, feature_space=feature_space)
                tp.train(pairs)

                suffixes = set()
                for leaf in tp.get_leaves():
                    if leaf.switch_statement.productive:
                        suffix = leaf.switch_statement.default_case.name.split(
                            'lemma')[-1].replace(' + ', '')

                        suffixes.add(suffix)
                for suffix in suffixes:
                    if suffix == 'en':
                        en[i] += 1
                    if suffix == 'e':
                        e[i] += 1
                    if suffix == 'n':
                        n[i] += 1
                    if suffix == '':
                        null[i] += 1
                    if suffix == 'er':
                        er[i] += 1
                    if suffix == 's':
                        s[i] += 1

        assert (n == [100, 100, 100, 100, 100])
        assert (en == [0, 0, 100, 100, 100])
        assert (e[1:] == [100, 100, 100, 100])
        assert (25 <= e[0] <= 27)
        assert (null[1:] == [100, 100, 100, 100])
        assert (74 <= null[0] <= 76)
        assert (s[:-2] == [0, 0, 0])
        assert (0 <= s[-2] <= 2)
        assert (14 <= s[-1] <= 16)
Пример #3
0
    def test_system_english_1(self):
        test_accs = list()
        size = 1000
        for seed in range(10):
            fname = f'../data/english/quant/unimorph_celex0_train{size}_{seed}.txt'
            test_path = fname.replace(f"{fname.split('_')[-2]}", 'test')
            pairs, feature_space = load_pairs(fname, 'english-quant')
            tp = ATP(apply_phonology=False, feature_space=feature_space)
            tp.train(pairs)

            # compute accuracies
            train_acc = self._evaluate('english', fname, tp, no_feats=False)
            assert (0.999 <= train_acc <= 1.0)
            test_acc = self._evaluate('english', test_path, tp, no_feats=False)
            test_accs.append(test_acc)

        assert (0.9 < np.mean(test_accs) < 0.92)
Пример #4
0
def run_feature_processor():
    processes = list()
    histone_df_dict = utils.load_histons(
        transcript_root_path='../data/IMR90/transcription factor/',
        histone_modification_root_path=None)
    chrom_vec = utils.load_chrom()
    histone_name_vec = list(histone_df_dict.keys())
    #histone_name_vec = histone_name_vec[:35]
    #print(histone_name_vec)
    samples = utils.load_pairs(
        hic_ep_file='../EP-interaction/data/IMR90/midfile/hic_ep.csv')
    samples = samples[samples['label'] == 0]
    samples['id'] = samples.index
    print(samples)
    feature_save_path = '../EP-interaction/data/IMR90/muti-midfile/'

    avg_process_histone = len(histone_name_vec) // CPU_CORE_NUMS

    init_process_histone_idx = 0
    for i in range(CPU_CORE_NUMS):
        print('Process will start, process histon_df nums={}'.format(
            init_process_histone_idx))
        p = Process(target=feature_processor,
                    args=(
                        chrom_vec,
                        samples,
                        histone_df_dict,
                        histone_name_vec[
                            init_process_histone_idx:init_process_histone_idx +
                            avg_process_histone],
                        feature_save_path,
                    ))
        init_process_histone_idx += avg_process_histone
        if i == CPU_CORE_NUMS:
            avg_process_histone = 100
        print('Process will start, process histon_df nums={}'.format(
            init_process_histone_idx))
        p.start()
        processes.append(p)

    for p in processes:
        p.join()
    print('Process end.')
if not os.path.exists(snapshot_dis_dir):
    os.makedirs(snapshot_dis_dir)
    
if not os.path.exists(snapshot_det_dir):
    os.makedirs(snapshot_det_dir)

cudnn.enabled = False

class ArgsLocal:
    pass

args_ = ArgsLocal()

args_.epoch = 1
args_.pair_list = utils.load_pairs(data_path, subpath_list)
args_.epoch_len = len(args_.pair_list)
args_.batch_size = 24
args_.gpu = 0
args_.input_scale = 256

args_.lambda_loc = 1
args_.lambda_det = 0.01
args_.lambda_dis = 0.01

args_.loc_update_stride = 1
args_.snapshot_prefix_loc = snapshot_loc_dir + 'DMAC_loc_'
args_.snapshot_prefix_dis = snapshot_dis_dir + 'DMAC_dis_'
args_.snapshot_prefix_det = snapshot_det_dir + 'DMAC_det_'
args_.snapshot_stride = 1000