示例#1
0
def generate_summary_best(model_name):
    prompts = [1, 2, 3, 4, 5, 6, 7, 8]
    # number of essay in test set
    length = [-1, 179, 180, 173, 177, 181, 180, 157, 73]
    path = utils.mkpath('pred/{}'.format(model_name))

    best_ep = [-1]*9
    with open(os.path.join(path, 'summary_best.txt'), 'w+') as f:
        f.write('{}\n\n'.format(model_name))
        f.write('QWK\n')
        f.write('epoch\tprompt\tqwk\n')
        qwk_avg = 0
        for p in prompts:
            qwk_df = pd.read_csv(os.path.join(path, 'qwk_{}_val.csv'.format(
                p)), header=None, names=['epoch', 'qwk'])
            max_idx = qwk_df['qwk'].idxmax()
            best_ep[p] = int(qwk_df.iloc[max_idx].values[0])

            qwk_df = pd.read_csv(os.path.join(path, 'qwk_{}_test.csv'.format(
                p)), header=None, names=['epoch', 'qwk'])

            try:
                tmp = qwk_df[qwk_df['epoch'] == best_ep[p]].values
                # in case of multiple runs of same epoch, pick one with the best QWK
                ep, qwk = tmp[tmp.argmax(axis=0)[-1]]
            except:
                raise Exception(
                    'Error: epoch {} of prompt {} not found in test'.format(best_ep[p], p))

            f.write('{}\t{}\t{}\n'.format(best_ep[p], p, qwk))
            qwk_avg += qwk

        f.write('\nRobustness per prompt\n')
        r_avg = 0
        r_aug_avg = 0
        for p in prompts:
            robustness_df = pd.read_csv(os.path.join(
                path, 'robustness_{}_{}.csv'.format(p, best_ep[p])))
            r = (robustness_df['worse_resolved'] -
                 robustness_df['better_resolved']).values[-1]
            f.write('{}\t{}\n'.format(p, r))
            r_avg += r

            r_aug = (robustness_df['worse_resolved'] -
                     robustness_df['better_resolved']).values[:-2]/length[p]
            r_aug_avg += r_aug

        f.write('\nRobustness per augment\n')
        r_aug_avg /= 8
        for a, r in zip(robustness_df['augment'][:-2], r_aug_avg):
            f.write('{}\t{}\n'.format(a, r))

        f.write('\n')
        f.write('QWK Average:\t{}\n'.format(qwk_avg / 8))
        f.write('Robustness Average:\t{}\n'.format(r_avg / 8))
        f.write('Robustness Average:\t{}\n'.format(r_aug_avg.mean()))
    print('summary generated!')
示例#2
0
 def parse_tweet(self, tweet):
     self.max_id = tweet.id
     created_at = self.to_datetime(tweet.created_at)
     
     if created_at < self.last_month:
         self.finished = True
     
     path = mkpath(self.tweet_path, mkdate(tweet), tweet.id)
     mkdir(path)
     
     f = open(mkpath(path, tweet.id)  + '.json', 'w')
     f.write(tweet.AsJsonString())
     
     retweet_path = mkpath(path, 'retweets')
     mkdir(retweet_path)
     for retweet in self.api.GetRetweets(tweet.id):
         rt_f = open(mkpath(retweet_path, retweet.id)  + '.json', 'w')
         rt_f.write(retweet.AsJsonString())
示例#3
0
 def reset(self):
   self.cache = mkpath("%s/archiveCache"%(Specs().s['tmpdir']))
   self.adir=Specs().s["archiveDir"]
   Debug().p("archive dir %s"%self.adir)
   self.archives = []
   for a in glob.glob(self.adir+"/*.tgz"):
     #Debug().p("a: %s"%a)
     self.archives.append(a)
   self.randList = random.sample(range(len(self.archives)),len(self.archives))
   self.randListIndex = 0
示例#4
0
def generate_score(prompt, model_name, epoch, y_true, y_pred, aug_pred, test_df):
    path = utils.mkpath('pred/{}'.format(model_name))

    df = pd.DataFrame()
    df['essay_id'] = test_df['essay_id']
    df['essay_set'] = test_df['essay_set']
    df['domain1_score'] = y_true
    df['test'] = y_pred
    for key in aug_pred:
        df['test_' + key] = aug_pred[key]
    df.to_csv(os.path.join(path, 'score_{}_{}.tsv'.format(prompt, epoch)),
              sep='\t', index=False)
    return df
示例#5
0
def generate_summary(model_name, epoch):
    prompts = [1, 2, 3, 4, 5, 6, 7, 8]
    # number of essay in test set
    length = [-1, 179, 180, 173, 177, 181, 180, 157, 73]
    path = utils.mkpath('pred/{}'.format(model_name))

    with open(os.path.join(path, 'summary_{}.txt'.format(epoch)), 'w+') as f:
        f.write('{} epoch {}\n\n'.format(model_name, epoch))
        f.write('QWK\n')
        qwk_avg = 0
        for p in prompts:
            qwk_df = pd.read_csv(os.path.join(path, 'qwk_{}_test.csv'.format(
                p)), header=None, names=['epoch', 'qwk'])
            qwk = qwk_df[qwk_df['epoch'] == epoch].values[-1, -1]
            f.write('{}\t{}\n'.format(p, qwk))
            qwk_avg += qwk

        f.write('\nRobustness per prompt\n')
        r_avg = 0
        r_aug_avg = 0
        for p in prompts:
            robustness_df = pd.read_csv(os.path.join(
                path, 'robustness_{}_{}.csv'.format(p, epoch)))
            r = (robustness_df['worse_resolved'] -
                 robustness_df['better_resolved']).values[-1]
            f.write('{}\t{}\n'.format(p, r))
            r_avg += r

            r_aug = (robustness_df['worse_resolved'] -
                     robustness_df['better_resolved']).values[:-2]/length[p]
            r_aug_avg += r_aug

        f.write('\nRobustness per augment\n')
        r_aug_avg /= 8
        for a, r in zip(robustness_df['augment'][:-2], r_aug_avg):
            f.write('{}\t{}\n'.format(a, r))

        f.write('\n')
        f.write('QWK Average:\t{}\n'.format(qwk_avg / 8))
        f.write('Robustness Average:\t{}\n'.format(r_avg / 8))
        f.write('Robustness Average:\t{}\n'.format(r_aug_avg.mean()))
    print('summary generated!')
示例#6
0
def generate_robustness(prompt, model_name, epoch, y_true, y_pred, aug_pred):
    path = utils.mkpath('pred/{}'.format(model_name))

    # y_true = rescale_to_int(y_true, prompt)
    y_pred_int = rescale_to_int(y_pred, prompt)
    aug_pred_int = {}
    wr_t, br_t, w_t, b_t = 0, 0, 0, 0
    N = len(y_pred) * len(aug_pred)
    print('N :', N)

    with open(os.path.join(path, 'robustness_{}_{}.csv'.format(prompt, epoch)), 'w+') as f:
        f.write('augment,worse_raw,better_raw,worse_resolved,better_resolved\n')
        for key in aug_pred:
            aug_pred_int[key] = rescale_to_int(aug_pred[key], prompt)

            wr, br, w, b = robustness(
                y_pred, aug_pred[key], y_pred_int, aug_pred_int[key])
            wr_t += wr
            br_t += br
            w_t += w
            b_t += b
            f.write('{},{},{},{},{}\n'.format(key, wr, br, w, b))
        f.write('sum,{},{},{},{}\n'.format(wr_t, br_t, w_t, b_t))
        f.write('avg,{},{},{},{}\n'.format(wr_t/N, br_t/N, w_t/N, b_t/N))
示例#7
0
def main(no_debug=True):
    print("Starting...")
    solved = {}

    for year in range(2000, 3000):
        year_path = mkpath(ROOT_PATH, year)
        if not isdir(year_path):
            continue

        #                part1  part1  README
        solved[year] = [[False, False, False] for _ in range(25)]

        # Handle days
        for day in range(0, 25):
            day_path = mkpath(year_path, "Day {:02d}".format(day + 1))
            if not isdir(day_path):
                continue

            files = [
                filename for filename in os.listdir(day_path)
                if isfile(mkpath(day_path, filename))
            ]
            solved[year][day] = [
                "part1.py" in files, "part2.py" in files, "README.md" in files
            ]

            # ------------------------------------------- Long line warning --------------------------------------------
            for filename in files:
                if os.path.splitext(filename)[1] == ".py":
                    with open(mkpath(day_path, filename),
                              'r',
                              encoding="utf-8") as file:
                        for line in file:
                            if len(line.strip()) > 120:
                                print(
                                    "Warning: long line detected in {}".format(
                                        mkpath(day_path, filename)))

            # -------------------------------- Solution files: Replace tabs with spaces --------------------------------
            if no_debug:
                if solved[year][day][0]:
                    replace_tabs(mkpath(day_path, "part1.py"))
                if solved[year][day][1]:
                    replace_tabs(mkpath(day_path, "part2.py"))

            # ----------------------------------------------- Day README -----------------------------------------------
            if "README.md" in files:
                readme_path = mkpath(day_path, "README.md")

                with open(readme_path, 'r', encoding="utf-8") as file:
                    readme = file.read()

                # Place non-breaking spaces in markdown `code` tags:
                readme = re.sub(r"`[^`\n\t\b\r]+`",
                                lambda m: m.group(0).replace(' ', chr(0x2007)),
                                readme)

                # Handle "<!-- Execute code: "smth" -->" blocks
                if no_debug:
                    readme = readme_exec(readme, day_path)

                with open(readme_path, 'w', encoding="utf-8") as file:
                    file.write(readme)

            # ----------------------------------------- TXT files (input.txt) ------------------------------------------
            for filename in files:
                if os.path.splitext(filename)[1] in (".txt", ".py", ".md"):
                    filepath = mkpath(day_path, filename)

                    with open(filepath, 'r', encoding="utf-8") as file:
                        file.seek(0, 2)
                        if file.tell() == 0:
                            ends_with_newline = True
                        else:
                            file.seek(file.tell() - 1, 0)
                            ends_with_newline = file.read() == '\n'

                    if not ends_with_newline:
                        with open(filepath, 'a', encoding="utf-8") as file:
                            file.write('\n')

        # Handle year README and webpage
        if no_debug:
            gen_year_table(mkpath(year_path, "README.md"), solved[year], year)
            gen_year_page(mkpath(ROOT_PATH, "docs", year, "index.html"),
                          solved[year], year)

    # Handle global README and webpage
    if no_debug:
        gen_global_table(mkpath(ROOT_PATH, "README.md"), solved)
        gen_home_page(mkpath(ROOT_PATH, "docs", "index.html"), solved)
示例#8
0
EPOCH = args.epoch
BATCH_SIZE = args.bs
MODEL_NAME = args.name

print(args)
print('ALL PROMPTS :', prompts)
print('BATCH SIZE :', BATCH_SIZE)
print('MODEL_NAME :', MODEL_NAME)
print('EPOCH :', EPOCH)
print('-------')

for p in prompts:
    print('PROMPT :', p)

    weight_path = utils.mkpath('weight/{}/{}'.format(MODEL_NAME, p))
    weight = utils.get_weight_at_epoch(weight_path, EPOCH)
    if not weight:
        print('weight not found')
        continue

    test_df = data_utils.load_data(p, 'test')
    print(test_df.shape)

    from keras import backend as K
    K.clear_session()
    model = models.build_elmo_model_full(p,
                                         only_elmo=False,
                                         use_mask=True,
                                         summary=False)
示例#9
0
def makefolders():
    mkpath("out/datapack/data/milhaxcustommodel/functions/")
    mkpath("minecraft/tags/functions/","out/datapack/data/")
示例#10
0
def readme_exec(text, path):
    return re.sub(
        REGEX["exec_code"],
        lambda match: handle_match(mkpath(path, match.group(1)), match), text)
示例#11
0
 def getImageCache():
     path = "%s/imageCache" % (Specs().s['tmpdir'])
     return mkpath(path)
示例#12
0
 def getCacheDir(id):
     path = ImageHandler.getImageCache() + "/%d" % id
     return mkpath(path)
示例#13
0
 def getCacheDir(id):
     path = "%s/%d" % (ImageHandler.getImageCache(), id)
     return mkpath(path)
示例#14
0
from sys import path as sys_path
import os

sys_path.append("../")

from readme_exec import count_time
from utils import mkpath


result = []
for year in range(2000, 3000):
    for path, folders, files in os.walk(mkpath("../../", year)):
        cur_path = os.getcwd()
        os.chdir(path)

        for filename in files:
            if os.path.splitext(filename)[1] != ".py":
                continue

            with open(filename, 'r', encoding="utf-8") as file:
                code = file.read().strip()

            time = round(count_time(code))

            if time >= 1000:
                result.append((mkpath(path, filename), time))

        os.chdir(cur_path)

        with open("exec_time_result.txt", 'w') as file:
            for path, time in result: