示例#1
0
def gather_fit_data(fit_count, maxfev):
    fn_format = "fits.fit_count={fit_count}_maxfev={maxfev}.txt"
    try:
        return utils.load_table(fn_format.format(**locals()))
    except OSError:
        pass

    d = utils.filter_preferred_ml(utils.load_all())
    d = d[~d["method"].isin(["imsrg[f]+eom[n]"])]
    results = []
    with multiprocessing.Pool(4) as p:
        results = p.map(
            functools.partial(gather_fit_data_inner,
                              fit_count=fit_count,
                              maxfev=maxfev),
            tuple(
                d.groupby(
                    ["label", "interaction", "num_filled", "freq", "method"])))
    d = pd.DataFrame.from_records(itertools.chain(*results))
    print("{} fits failed, out of {}".format(
        (d["fit_method"] == "fixedab").sum(), len(d)))
    # fit_count=5:
    #  maxfev=default: 198 fits failed, out of 2247
    #  maxfev=10k: 40 fits failed, out of 2248
    #  maxfev=100k: 0 fits failed

    cols = """
    interaction
    label
    freq
    num_filled
    method
    best_chisq
    best_coefficient
    best_coefficient_err
    best_constant
    best_constant_err
    best_fixedab_constant_err
    best_exponent
    best_exponent_err
    best_fit_method
    best_fit_stop
    chisq
    coefficient
    coefficient_err
    constant
    constant_err
    fixedab_constant_err
    exponent
    exponent_err
    fit_method
    fit_stop
    rel_discrep
    rel_discrep_err
    rel_dist
    rel_dist_err
    """.split()
    assert len(d.columns) == len(cols)
    utils.save_table(fn_format.format(**locals()), d[cols])
    return d
示例#2
0
def plot_table(path_dict, save_path):
    pd.set_option('display.precision', 4)
    pd.set_option('display.width', 40)
    pd.set_option('display.float_format', '{:,.3f}'.format)

    for i, act in enumerate(path_dict.keys()):
        row_labels, values_train, values_test = [], [], []
        for path in path_dict[act]:
            with open(path + '/results.json', 'r') as f:
                results = json.load(f)
                if (plot_it is not None) and (results['combinator']
                                              not in plot_it):
                    continue
                if i == 0:
                    col_labels = utils.fill_col_labels(results)
                temp_train, temp_test = utils.fill_row_values(
                    results, path, act)
                values_train.append(temp_train)
                values_test.append(temp_test)
                if 'test_acc_hr_0.0' in results:
                    temp_train, temp_test = utils.fill_row_values(results,
                                                                  path,
                                                                  act,
                                                                  hr=0.0)
                    values_train.append(temp_train)
                    values_test.append(temp_test)

        # create table
        table_train = utils.create_table(values_train, col_labels, act,
                                         'train')
        table_test = utils.create_table(values_test, col_labels, act, 'test')
        # save table
        utils.save_table(table_train, table_test, save_path, act)
示例#3
0
def gather_fit_data(fit_count, maxfev):
    fn_format = "fits.fit_count={fit_count}_maxfev={maxfev}.txt"
    try:
        return utils.load_table(fn_format.format(**locals()))
    except OSError:
        pass

    d = utils.filter_preferred_ml(utils.load_all())
    d = d[~d["method"].isin(["imsrg[f]+eom[n]"])]
    results = []
    with multiprocessing.Pool(4) as p:
        results = p.map(
            functools.partial(gather_fit_data_inner,
                              fit_count=fit_count,
                              maxfev=maxfev),
            tuple(d.groupby(["label", "interaction", "num_filled",
                             "freq", "method"])))
    d = pd.DataFrame.from_records(itertools.chain(*results))
    print("{} fits failed, out of {}"
          .format((d["fit_method"] == "fixedab").sum(), len(d)))
    # fit_count=5:
    #  maxfev=default: 198 fits failed, out of 2247
    #  maxfev=10k: 40 fits failed, out of 2248
    #  maxfev=100k: 0 fits failed

    cols = """
    interaction
    label
    freq
    num_filled
    method
    best_chisq
    best_coefficient
    best_coefficient_err
    best_constant
    best_constant_err
    best_fixedab_constant_err
    best_exponent
    best_exponent_err
    best_fit_method
    best_fit_stop
    chisq
    coefficient
    coefficient_err
    constant
    constant_err
    fixedab_constant_err
    exponent
    exponent_err
    fit_method
    fit_stop
    rel_discrep
    rel_discrep_err
    rel_dist
    rel_dist_err
    """.split()
    assert len(d.columns) == len(cols)
    utils.save_table(fn_format.format(**locals()), d[cols])
    return d
示例#4
0
def plot_table_attention(path_dict, save_path):
    for i, act in enumerate(path_dict.keys()):
        row_labels, values_train, values_test = [], [], []
        if act not in COMBINED_ACT:
            continue
        for path in path_dict[act]:
            # print(act)
            with open(f'{path}/results.json', 'r') as f:
                results = json.load(f)
            if results['combinator'] not in ATT_LIST or results[
                    'combinator'] not in plot_it:
                continue
            if i == 0:
                col_labels = utils.fill_col_labels(results, att=1)
            temp_train, temp_test = utils.fill_row_values(results,
                                                          path,
                                                          act,
                                                          att=1)
            values_train.append(temp_train)
            values_test.append(temp_test)
            if 'test_acc_hr_0.0' in results:
                temp_train, temp_test = utils.fill_row_values(results,
                                                              path,
                                                              act,
                                                              att=1,
                                                              hr=0.0)
                values_train.append(temp_train)
                values_test.append(temp_test)

        # create table
        table_train = utils.create_table(values_train,
                                         col_labels,
                                         act,
                                         'train',
                                         att=1)
        table_test = utils.create_table(values_test,
                                        col_labels,
                                        act,
                                        'test',
                                        att=1)
        # save table
        utils.save_table(table_train, table_test, save_path + 'ATT_', act)
示例#5
0
def load_full_fit_data(fit_count=DEFAULT_FIT_COUNT, maxfev=DEFAULT_MAXFEV):
    '''Load fit data from file if available.  Otherwise calculate the fits.'''
    fn = "fit_data.fit_count={fit_count}_maxfev={maxfev}.txt".format(
        **locals())
    try:
        return utils.load_table(fn)
    except OSError:
        pass

    sys.stderr.write("Fit data has not yet been calculated.  "
                     "This may take a few minutes...\n")
    sys.stderr.flush()
    d = utils.filter_preferred_ml(utils.load_all())
    d = d[~d["method"].isin(["imsrg[f]+eom[n]"])]
    with multiprocessing.Pool(4) as p:
        results_s, missing_num_shells = zip(*p.map(
            functools.partial(
                gather_fit_data, fit_count=fit_count, maxfev=maxfev),
            tuple(
                d.groupby([
                    "label", "interaction", "num_filled", "freq", "method"
                ]))))
    results = itertools.chain(*results_s)

    missing_fn = ("fits_missing_points."
                  "fit_count={fit_count}_maxfev={maxfev}.log".format(
                      **locals()))
    utils.save_table(missing_fn.format(**locals()),
                     pd.DataFrame.from_records(missing_num_shells))
    sys.stderr.write("Missing data points logged to: {}\n".format(missing_fn))
    sys.stderr.flush()

    d = pd.DataFrame.from_records(results)
    num_failed = (d["fit_method"] == "fixedab").sum()
    if num_failed:
        sys.stderr.write("{} out of {} fits failed\n".format(
            num_failed, len(d)))
        sys.stderr.flush()

    utils.save_table(fn, d)
    return d
示例#6
0
def plot_table_max(path_dict, save_path, limit):
    res_json = ['results.json', 'results_hr.json']
    row_labels, values_train, values_test = [], [], []
    for i, act in enumerate(path_dict.keys()):
        for path in path_dict[act]:
            for res in res_json:
                try:
                    with open(f'{path}/{res}', 'r') as f:
                        results = json.load(f)
                        # att = 2 if res == 'results_hr.json' else 0
                except Exception as e:
                    continue
                if i == 0:
                    col_labels = utils.fill_col_labels(results,
                                                       max_=True,
                                                       att=2)
                temp_train, temp_test = utils.fill_row_values(results,
                                                              path,
                                                              act,
                                                              max_=True,
                                                              att=2)
                if True not in np.where(temp_test[8] >= limit, True, False):
                    continue
                values_train.append(temp_train)
                values_test.append(temp_test)

    # create table
    table_train = utils.create_table(values_train,
                                     col_labels,
                                     '',
                                     'train',
                                     max_=True)
    table_test = utils.create_table(values_test,
                                    col_labels,
                                    '',
                                    'test',
                                    max_=True)

    # save table
    utils.save_table(table_train, table_test, save_path, 'best')
示例#7
0
def plot_table_attention(path_dict, save_path):
    res_json = ['results.json', 'results_hr.json']
    for i, act in enumerate(path_dict.keys()):
        row_labels, values_train, values_test = [], [], []
        if act not in COMBINED_ACT:
            continue
        for path in path_dict[act]:
            # print(act)
            for res in res_json:
                try:
                    with open(f'{path}/{res}', 'r') as f:
                        results = json.load(f)
                        # att = 1 if res == 'results_hr.json' else 0
                except Exception as e:
                    continue
                if results['combinator'] not in ATT_LIST:
                    continue
                if i == 0:
                    col_labels = utils.fill_col_labels(results, att=1)
                temp_train, temp_test = utils.fill_row_values(results,
                                                              path,
                                                              act,
                                                              att=1)
                values_train.append(temp_train)
                values_test.append(temp_test)

        # create table
        table_train = utils.create_table(values_train,
                                         col_labels,
                                         act,
                                         'train',
                                         att=1)
        table_test = utils.create_table(values_test,
                                        col_labels,
                                        act,
                                        'test',
                                        att=1)
        # save table
        utils.save_table(table_train, table_test, save_path + 'ATT_', act)
示例#8
0
def plot_table(path_dict, save_path):
    pd.set_option('display.precision', 4)
    pd.set_option('display.width', 40)

    for i, act in enumerate(path_dict.keys()):
        row_labels, values_train, values_test = [], [], []
        for path in path_dict[act]:
            with open(path + '/results.json', 'r') as f:
                results = json.load(f)
                if i == 0:
                    col_labels = utils.fill_col_labels(results)
                temp_train, temp_test = utils.fill_row_values(
                    results, path, act)
                values_train.append(temp_train)
                values_test.append(temp_test)

        # create table
        table_train = utils.create_table(values_train, col_labels, act,
                                         'train')
        table_test = utils.create_table(values_test, col_labels, act, 'test')
        # save table
        utils.save_table(table_train, table_test, save_path, act)
示例#9
0
def plot_table_max(path_dict, save_path, limit):
    row_labels, values_train, values_test = [], [], []
    for i, act in enumerate(path_dict.keys()):
        for path in path_dict[act]:
            with open(f'{path}/results.json', 'r') as f:
                results = json.load(f)
            if (plot_it is not None) and (results['combinator']
                                          not in plot_it):
                continue
            if i == 0:
                col_labels = utils.fill_col_labels(results, max_=True, att=2)
            temp_train, temp_test = utils.fill_row_values(results,
                                                          path,
                                                          act,
                                                          max_=True,
                                                          att=2)
            # print(temp_test[9])
            if True not in np.where(temp_test[12] >= limit, True, False):
                continue
            values_train.append(temp_train)
            values_test.append(temp_test)

    # create table
    table_train = utils.create_table(values_train,
                                     col_labels,
                                     '',
                                     'train',
                                     max_=True)
    table_test = utils.create_table(values_test,
                                    col_labels,
                                    '',
                                    'test',
                                    max_=True)

    # save table
    utils.save_table(table_train, table_test, save_path, 'best')
示例#10
0
#!/usr/bin/env python3
import os, re, sys
sys.path.insert(1, os.path.join(os.path.dirname(__file__), ".."))
import utils

fn = re.match(r"(.*)-postprocess\.py", __file__).group(1) + ".txt"
d = utils.load_table(fn)
d = utils.check_fun_dep(
    d, ["interaction", "num_shells", "num_filled", "freq", "method"],
    {"energy": 2e-5},
    combiner=utils.rightmost_combiner)
d = d.sort_values(
    ["interaction", "num_shells", "num_filled", "freq", "method", "energy"])
with open(fn, "w") as f:
    f.write("""
# Functional dependencies:
#
#   * (interaction, num_shells, num_filled, freq, method) -> energy
#
"""[1:])
    utils.save_table(f, d)
#!/usr/bin/env python3
import os, re, sys
sys.path.insert(1, os.path.join(os.path.dirname(__file__), ".."))
import utils

fn = re.match(r"(.*)-postprocess\.py", __file__).group(1) + ".txt"
d = utils.load_table(fn)
# canonicalization can introduce duplicates, in addition to whatever
# duplicates that already exist in the file
d["p"] = d["p"].map(utils.canonicalize_p)
d = utils.check_fun_dep(d,
                        ["interaction", "num_shells", "num_filled", "freq",
                         "method", "p", "term_id"],
                        {"correction": 1e-7},
                        combiner=utils.rightmost_combiner)
d = d.sort_values(["interaction", "num_shells", "num_filled", "freq",
                   "method", "p", "term_id", "correction"])
with open(fn, "w") as f:
    f.write("""
# term_ids 3 and 4: QDPT2
# term_ids 5 to 22: QDPT3
#
# Functional dependencies:
#
#   * (num_shells, num_filled, freq, method, p, term_id) -> correction
#
"""[1:])
    utils.save_table(f, d)
def calculate(start, end, name):
    sentences, vnps, adjectives, items = select_data(start, end)
    
    # individual freq
    
    filename = 'items_%d_%s.csv' % (start.year, name)
    print filename
    items_freq = calculate_frequency(sentences, items, 'referred_items')
    save_table(items_freq, open(filename, 'w'))
    
    filename = 'vnps_%d_%s.csv' % (start.year, name)
    print filename
    vnps_freq = calculate_frequency(sentences, vnps, 'verb_noun_pair')
    save_table(vnps_freq, open(filename, 'w'))
    
    filename = 'adjectives_%d_%s.csv' % (start.year, name)
    print filename
    adjs_freq = calculate_frequency(sentences, adjectives, 'adjectives')
    save_table(adjs_freq, open(filename, 'w'))
    

    # user networks
    
    filename = 'vnp_user_net_%d_%s.csv' % (start.year, name)
    print filename
    vnp_links = count_user_links(sentences, vnps, 'verb_noun_pair')
    save_table(vnp_links, open(filename, 'w'))

    filename = 'adj_user_net_%d_%s.csv' % (start.year, name)
    print filename
    adj_links = count_user_links(sentences, adjectives, 'adjectives')
    save_table(adj_links, open(filename, 'w'))

    filename = 'itm_user_net_%d_%s.csv' % (start.year, name)
    print filename
    itm_links = count_user_links(sentences, items, 'referred_items')
    save_table(itm_links, open(filename, 'w'))

    
    # multi-frequencies

    filename = 'item_vs_vnp_%d_%s.csv' % (start.year, name)
    print filename
    item_vs_vnp = calculate_dual_frequency(sentences, items, 'referred_items', vnps, 'verb_noun_pair', diary=True)
    save_table(item_vs_vnp, open(filename, 'w'))
    
    filename = 'item_vs_adj_%d_%s.csv' % (start.year, name)
    print filename
    item_vs_adj = calculate_dual_frequency(sentences, items, 'referred_items', adjectives, 'adjectives', diary=True)
    save_table(item_vs_adj, open(filename, 'w'))
    
    filename = 'vnp_vs_vnp_%d_%s.csv' % (start.year, name)
    print filename
    vnp_vs_vnp = calculate_dual_frequency(sentences, vnps, 'verb_noun_pair', vnps, 'verb_noun_pair', diary=True)
    save_table(vnp_vs_vnp, open(filename, 'w'))
    
    filename = 'adj_vs_adj_%d_%s.csv' % (start.year, name)
    print filename
    adj_vs_adj = calculate_dual_frequency(sentences, adjectives, 'adjectives', adjectives, 'adjectives', diary=True)
    save_table(adj_vs_adj, open(filename, 'w'))
    
    filename = 'user_vs_item_%d_%s.csv' % (start.year, name)
    print filename
    user_vs_item = calculate_frequency_user(sentences, items, 'referred_items')
    save_table(user_vs_item, open(filename, 'w'))
    
    filename = 'user_vs_vnp_%d_%s.csv' % (start.year, name)
    print filename
    user_vs_vnp = calculate_frequency_user(sentences, vnps, 'verb_noun_pair')
    save_table(user_vs_vnp, open(filename, 'w'))
    
    return