Exemplo n.º 1
0
def build_dob_cache(dob_cache, assts):
    for ix, ass in enumerate(assts.iterrows()):
        id, ts, gb_id, gr_id = ass_extract(ass)
        students = list(get_student_list(gr_id)["user_id"])
        # print("#{}: PREP: grp {} at {}".format(ix, gr_id, ts))
        group_df = get_user_data(students)
        for psi in students:
            if psi not in dob_cache:
                # print("age gen...")
                age_df = get_age_df(ts, group_df)
                age_df["dob"] = pandas.to_datetime(age_df["dob"])
                # age = age_df.loc[psi, "age"]
                for psi_inner in students:
                    dob = age_df.loc[psi,"dob"]
                    # print(type(dob))
                    assert isinstance(dob, Timestamp)
                    dob_cache[psi_inner] = dob
    return dob_cache
Exemplo n.º 2
0
def filter_assignments(assignments, book_only):
    # query = "select id, gameboard_id, group_id, owner_user_id, creation_date from assignments order by creation_date asc"
    assignments["include"] = True
    print(assignments.shape)
    map = make_gb_question_map()
    meta = get_meta_data()
    for ix in range(assignments.shape[0]):
        include = True
        gr_id = assignments.loc[ix, "group_id"]

        if book_only:
            gb_id = assignments.loc[ix, "gameboard_id"]
            hexes = map[gb_id]
            for hx in hexes:
                hx = hx.split("|")[0]
                if not (hx.startswith("ch_") or hx.startswith("ch-i")):
                    include = False
                    break

        if include:
            students = get_student_list([gr_id])
            if students.empty:
                include = False

        if include:
            include = False
            for psi in list(students["user_id"]):
                # print("checking",psi)
                atts = get_attempts_from_db(psi)
                if not atts.empty:
                    # print("OK")
                    include = True
                    break

        if not include:
            assignments.loc[ix, "include"] = False

    # assignments = assignments[assignments["include"]==True]
    print(assignments.shape)
    return assignments
Exemplo n.º 3
0
    def __init__(self, assts, batch_size=512, FRESSSH=False, return_qhist=False):
        self.assts: pandas.DataFrame = assts
        self.assts.loc[:,'creation_date'] = pandas.to_datetime(assts['creation_date'])

        self.gb_qmap = make_gb_question_map()

        self.batch_size=batch_size if batch_size!="assignment" else 0
        self.return_qhist = return_qhist

        if not FRESSSH:
            print("APPEND mode")
            #recycle old pap
            try:
                f = open(prof_fname, 'rb')
                self.profiles = pickle.load(f)
                print("got this many profiles:",len(self.profiles))
                # print(list(profiles.keys())[0:10])
                f.close()
            except:
                self.profiles = {}
            # d = open(dob_cache, 'rb')
            # self.dob_cache = pickle.load(d)
            # print("loaded dob cache with {} entries".format(self.dob_cache))
            # d.close()
        else:
            print("Baking FRESH, like cinnamon!")
            self.profiles = {}
            # self.dob_cache = {}

        self.ts_cache = {}
        self.assid_list = []
        self.ts_master_list = []
        self.gb_id_list = []
        self.gr_id_list = []
        self.students_list = []

        print("building dob_cache")
        empty_cache = {}
        self.dob_cache = build_dob_cache(empty_cache, assts)
        print(len(empty_cache))
        print("done")

        for ix, ass in enumerate(self.assts.iterrows()):
            id, ts, gb_id, gr_id = ass_extract(ass)
            self.assid_list.append(id)
            self.ts_master_list.append(ts)
            self.gb_id_list.append(gb_id)
            self.gr_id_list.append(gr_id)
            students = list(get_student_list(gr_id)["user_id"])
            self.students_list.append(students)
            # print("#{}: PREP: grp {} at {}".format(ix, gr_id, ts))
            for psi in students:
                if psi in self.ts_cache.keys():
                    # print("try add ts {}".ts)
                    # temp = self.ts_cache[psi]
                    # print(temp)
                    # temp.append(ts)
                    # self.ts_cache[psi] = temp
                    t = self.ts_cache[psi]
                    t.append(ts)
                    self.ts_cache[psi] = t
                else:
                    self.ts_cache[psi] = [ts]

        c=-1
        for i,ts,gb_id,gr_id in zip(self.assid_list, self.ts_master_list, self.gb_id_list, self.gr_id_list):
            c += 1
            has_changed = False
            students = list(get_student_list(gr_id)["user_id"])
            for psi in students:  # set up the training arrays here
                fn = "prof_{}_{}".format(psi, ts)
                if fn not in self.profiles:
                    print("{}- - - -   profile for {} .. not found .. will create all ={}".format(c,psi, SAVE_TO_PROF_CACHE))
                    has_changed = True
                    group_df = get_user_data(students)
                    ts_list = self.ts_cache[psi]
                    print("ts_list", ts_list)
                    print("s..")
                    s_psi_list = gen_semi_static(psi, self.dob_cache, ts_list)
                    print("done")
                    print("x..")
                    x_psi_list = gen_experience(psi, ts_list)
                    print("done")
                    print("u..")
                    u_psi_list = gen_success(psi, ts_list)
                    print("done")
                    for ts,s_psi,x_psi,u_psi in zip(sorted(ts_list),s_psi_list,x_psi_list, u_psi_list):
                        loopvar = "prof_{}_{}".format(psi, ts)
                        self.profiles[fn] = zlib.compress(pickle.dumps((s_psi, x_psi, u_psi)))
                        print("created profile for ",loopvar, "xp=",numpy.sum(x_psi),"sxp=",numpy.sum(u_psi),"S=",s_psi)
                else:
                    print(".. {} f/cache".format(fn))
            if has_changed:
                f = open(prof_fname, 'wb')
                pickle.dump(self.profiles, f)
                f.close()
                print("*** *** *** SAVED")
Exemplo n.º 4
0
    def __iter__(self):
        b = 0  # batch counter
        c = 0  # cumulative counter
        S = []
        X = []
        U = []
        len_assts = len(self.assts)
        y = []
        awgt = []
        assids = []
        psi_list = []
        qhist_list = []

        last_i = None
        for i, ts, gb_id, gr_id in zip(self.assid_list, self.ts_master_list, self.gb_id_list, self.gr_id_list):
            c += 1
            hexagons = [self.gb_qmap[gb_id][0]]
            students = get_student_list(gr_id)
            students = list(students["user_id"])

            print("...", ts, students, hexagons)
            for psi in students:  # set up the training arrays here
                hexagons = [hx.split("|")[0] for hx in hexagons]

                fn = "prof_{}_{}".format(psi, ts)
                if fn not in self.profiles:
                    print(fn, "not in profiles, why??")
                    continue

                tripat = pickle.loads(zlib.decompress(self.profiles[fn]))
                if tripat is None:
                    print(fn, "gives none")
                else:
                    (s_psi, x_psi, u_psi) = tripat
                    for hx in hexagons:
                        if self.pid_override is not None and hx not in self.pid_override:
                            print("pid problem", hx)
                            continue

                        print(">>>", ts, psi, hx, s_psi, numpy.sum(x_psi), numpy.sum(u_psi))

                        S.append(s_psi)
                        X.append(x_psi)
                        U.append(u_psi)
                        y.append([hx])
                        assids.append(i)
                        awgt.append([len(hexagons)])
                        psi_list.append(psi)
                        if(self.return_qhist):
                            qhist_list.append(gen_qhist(psi,ts))
                        else:
                            qhist_list.append(None)

            print(len(X), "in the pipe...")
            bs = self.batch_size
            if (bs == 0 and i != last_i) or ((bs > 0) and (len(X) >= bs)):
                if last_i is None:
                    last_i = i
                    continue  # special frist nop case
                print("b={}, n samples = {} ({}/{}={:.1f}%)".format(b, len(X), c, len_assts, (100.0 * c / len_assts)))
                b += 1
                yield S, X, U, y, assids, awgt, psi_list, qhist_list
                last_i = i
                S = []
                X = []
                U = []
                y = []
                assids = []
                awgt = []
                psi_list = []
                qhist_list = []
                gc.collect()
        print("out of assts")
        yield S, X, U, y, assids, awgt, psi_list, qhist_list
Exemplo n.º 5
0
def evaluate_phybook_loss(tt, sxua, model, sc):
    aid_list, s_list, x_list, u_list, a_list, y_list = augment_data(tt, sxua)

    # hex_list = []
    # all_page_ids = pid_override
    # ailist = []
    for row in tt.iterrows():
        aid = row[1]["id"]
        # ts = row[1]["creation_date"]
        gr_id = row[1]["group_id"]
        gb_id = row[1]["gameboard_id"]
        student_ids = list(get_student_list(gr_id)["user_id"])
        print(student_ids)
        student_data = get_user_data(student_ids)
        hexes = list(gb_qmap[gb_id])
        print(hexes)

        for _ in student_ids:
            aid_list.append(aid)
            # hex_list.append(hexes)

    s_list = sc.transform(s_list)
    s_list = numpy.array(s_list)

    x_list = numpy.array(x_list)
    u_list = numpy.array(u_list)
    a_list = numpy.array(a_list)

    print(s_list.shape, x_list.shape, u_list.shape, a_list.shape)

    print("results")
    print(model.get_input_shape_at(0))
    predictions = model.predict([s_list, u_list])
    j_max = 0
    thresh_max = 0
    dir_hits_max = 0
    for j_thresh in [0.01, 0.025, .05, 0.075, .1, .2, 0.3, 0.4, 0.5, 0.6, 0.7]:
        # for j_thresh in [0.4]:
        j_sum = 0
        # dir_sum = 0
        incl_sum = 0
        dir_hits = 0
        N = len(predictions)
        this_ai = None
        for ai, p, s, x, a, y in zip(aid_list, predictions, s_list, x_list,
                                     a_list, y_list):
            t = [pid_override[yix] for yix, yval in enumerate(y) if yval == 1]
            if ai != this_ai:
                print("\n...new asst", ai)
                this_ai = ai
            phxs = []
            probs = []
            print("pshape", p.shape)
            maxpox = numpy.argmax(p)
            print(maxpox, len(pid_override))
            max_guess = pid_override[maxpox]
            phxs.append(max_guess)

            probs.append(p[maxpox])
            for ix, el in enumerate(p):
                if el > j_thresh and pid_override[ix] not in phxs:
                    phxs.append(pid_override[ix])
                    probs.append(p[ix])
            probs_shortlist = list(reversed(sorted(probs)))
            Z = list(reversed([x for _, x in sorted(zip(probs, phxs))]))
            # if Z:
            #     for t_el in t:
            #         if t_el in Z:#'direct hit'
            #             dir_sum += 1.0/len(t)
            print(t, Z)
            print(probs_shortlist)
            # print([all_page_ids[hx] for hx,el in enumerate(a) if el==1])
            if max_guess not in t:
                robot = "BAD ROBOT"
            else:
                if max_guess == t[0]:
                    robot = "GREAT ROBOT"
                    dir_hits += 1
                else:
                    robot = "GOOD ROBOT"
            print("{} {}, XP={}".format(robot, sc.inverse_transform(s),
                                        numpy.sum(x)))
            t = set(t)
            phxs = set(phxs)
            if len(t.intersection(phxs)) > 0:
                incl_sum += 1
            j_sum += len(t.intersection(phxs)) / len(t.union(phxs))
        j_score = j_sum / N
        # dir_score = dir_sum/N
        if dir_hits > dir_hits_max:
            j_max = j_score
            thresh_max = j_thresh
            dir_hits_max = dir_hits
            # dir_for_j_max = dir_score
        print("j_thresh =", j_thresh)
        print("Jaccard:", j_score)
        print("Incl:", incl_sum / N)
        print("D/H:", dir_hits / N)
        print("~ ~ ~ ~")
    print("max thresh/jacc:", thresh_max, j_max, dir_hits_max / N)
    print("num examples", N)
Exemplo n.º 6
0
def create_student_scorecards(tt, sxua, model, sc):
    names_df = get_q_names()
    names_df.index = names_df["question_id"]
    cat_list = []
    ailist = []
    # all_page_ids = pid_override
    aids = []
    for row in tt.iterrows():
        aid_list = []
        a_list = []
        x_list = []
        u_list = []
        y_list = []
        s_list = []
        hex_list = []
        psi_list = []

        print(row)
        aid = row[1]["id"]
        ts = row[1]["creation_date"]
        gr_id = row[1]["group_id"]
        gb_id = row[1]["gameboard_id"]
        student_ids = list(get_student_list(gr_id)["user_id"])
        print(student_ids)
        student_data = get_user_data(student_ids)
        hexes = list(gb_qmap[gb_id])
        print(hexes)

        # n-hot binarise the y vector here
        y_true = numpy.zeros(len(pid_override), dtype=numpy.int8)
        for hx in hexes:
            hxix = pid_override.index(hx)
            y_true[hxix] = 1.0

        aid_list.append(aid)
        incl_psis = []
        for psi in student_ids:
            S, X, U, A = pickle.loads(zlib.decompress(sxua[psi][ts]))
            if S[0] < 10:
                print("s0 under 10")
                continue
            if S[1] == 0:
                print("no time on plaform recorded")
                continue
            psi_list.append(psi)
            hex_list.append(hexes)
            y_list.append(y_true)
            # print(psi)
            # S,X,U,A = sxua[psi][ts]
            s_list.append(S)
            x_list.append(X)
            u_list.append(U)
            a_list.append(A)
            incl_psis.append(psi)
            print("student {} done".format(psi))

        if len(s_list) == 0:
            continue

        s_arr = numpy.array(s_list)
        x_arr = numpy.array(x_list)
        u_arr = numpy.array(u_list)
        a_arr = numpy.array(a_list)

        predictions = model.predict([s_arr, u_arr])

        save_class_report_card(ts, aid, gr_id, s_list, x_list, u_list, a_list,
                               y_list, predictions, incl_psis, names_df)
    with open("a_ids.txt", "w+") as f:
        f.write("({})\n".format(len(aid_list)))
        f.writelines([str(a) + "\n" for a in sorted(aids)])
        f.write("\n")
Exemplo n.º 7
0
def augment_data(tr, sxua):
    concept_list = list(set().union(*concept_map.values()))
    print(concept_list)

    # yship = []
    # qlist = pid_override
    # print(qlist)
    # print("investigate this mofo")
    # exit()

    # hex_counter = Counter()
    # tot = 0

    # last_ts = None
    # for i, ass in enumerate(tr.iterrows()):
    #     ass_id = ass[1]["gameboard_id"]
    #     gb_id = ass[1]["gameboard_id"]
    #     gr_id = ass[1]["group_id"]
    #     ts = ass[1]["creation_date"]
    #     hex_acc = []
    #     if last_ts is not None and ((ts-last_ts).days==0):
    #         print("skipping same-day assignment")
    # hexagons = [gb_qmap[gb_id][0]]
    # for hx in hexagons:
    #     if hx not in hex_acc:
    #         hex_acc.append(hx)
    # continue # do not add same-day assignments

    # last_ts = ts
    # last_ass_id = ass_id

    # hexagons = [gb_qmap[gb_id][0]]
    # students = get_student_list(gr_id)
    # for psi in students:
    #     for hx in hexagons:
    #         if hx not in pid_override:
    #             print(hx, " not in qlist")
    #             pid_override.append(hx)
    #         yship.append(hx)
    #         hex_counter[hx] += 1
    #         tot += 1

    # yship = list(concept_map.keys()) +yship
    # ylb = LabelBinarizer()  # (classes=qlist)
    # qlist = numpy.unique(yship)
    # ylb.fit(qlist)
    # ylb.classes_ = yship  # start by fitting the binariser to the shortlist of book qns

    # for hx in hex_counter.most_common():
    #     print(hx[0], hx[1])
    # print(tot)

    # print(qlist)
    # print(ylb.classes_)
    # assert len(list(qlist)) == len(list(ylb.classes_))
    # assert list(qlist) == list(ylb.classes_)

    # weights = {}
    # class_wgt = compute_class_weight('balanced', ylb.classes_, yship)
    # for clix, (cls, wgt) in enumerate(zip(ylb.classes_, class_wgt)):
    #     print(clix, cls, wgt)
    #     weights[clix] = wgt

    group_ids = pandas.unique(tr["group_id"])

    aid_list = []
    s_list = []
    x_list = []
    u_list = []
    a_list = []
    y_list = []

    fout = open("tr_summ.csv", "w")

    for gr_id in group_ids:
        gr_ass = tr[tr["group_id"] == gr_id]
        last_ts = None
        for row in gr_ass.iterrows():
            aid = row[1]["id"]
            ts = row[1]["creation_date"]
            gr_id = row[1]["group_id"]
            gb_id = row[1]["gameboard_id"]
            student_ids = list(get_student_list(gr_id)["user_id"])
            hexes = list(gb_qmap[gb_id])
            if last_ts is not None and ((ts - last_ts).days == 0):
                print("skipping same-day assignment")
                continue  # do not add same-day assignments
            last_ts = ts

            for psi in student_ids:
                S, X, U, A = pickle.loads(zlib.decompress(sxua[psi][ts]))
                if S[0] < 10:  #i.e. if student has no valid age
                    continue
                if S[1] == 0:  #no time in platform
                    continue

                hexes_tried = []
                hexes_to_try = []
                # if len(hexes)==1:
                #     hexes_to_try = hexes
                # else:
                # for ix, el in enumerate(X):
                #     if el > 0:
                #         page = all_qids[ix].split("|")[0]
                #         if page not in hexes_tried:
                #             hexes_tried.append(page)

                # for hx in hexes:
                #     if hx not in hexes_tried:
                #             hexes_to_try.append(hx)

                y_true = numpy.zeros(
                    len(pid_override))  # numpy.zeros(len(all_page_ids))
                # for hx in hexes_to_try:

                for hx in hexes:
                    hxix = pid_override.index(hx)
                    if X[hxix] == 0:
                        hexes_to_try.append(hx)

                if hexes_to_try == []:
                    print("no hexes to try")
                    continue

                # decay = 0.5
                # w = 1.0
                # for hx in sorted(hexes_to_try):
                #     hxix = pid_override.index(hx)
                #     y_true[hxix] = 1 #/ len(hexes_to_try)
                #     if len(hexes_to_try)>1:
                #         print("trying", hx,w)
                #         input("")
                # w = w * decay
                # y_true = y_true / y_true.sum()
                hxix = pid_override.index(sorted(hexes_to_try)[0])
                y_true[hxix] = 1.0

                # else:
                #     hexes_tried.append(hx)

                # hexes_tried = []
                # for i,el in enumerate(X):
                #     if el>0:
                #         pid = all_qids[i].split("|")[0]
                #         if pid not in hexes_tried:
                #             hexes_tried.append(pid)

                # print("hexes tried: {}".format(hexes_tried))
                print("hexes t try: {}".format(hexes_to_try))
                print("hexes      : {}".format(hexes))
                # print(numpy.sum(A))
                # print([all_page_ids[hx] for hx,el in enumerate(A) if el==1])
                aid_list.append(aid)
                # hex_list.append(hexes_to_try)
                s_list.append(S)
                # x_list.append(numpy.concatenate((X,U,A)))
                x_list.append(X)
                u_list.append(U)
                a_list.append(A)
                y_list.append(y_true)

                fout.write("{},{},{},{},{},{},{}\n".format(
                    ts, psi, ",".join(map(str, S)), X.sum(), numpy.sum(X > 0),
                    numpy.sum(U), " ".join(hexes_to_try)))
    fout.close()
    # exit()
    # input("nibit")
    gc.collect()
    s_list = numpy.array(s_list)
    x_list = numpy.array(x_list, dtype=numpy.int16)
    u_list = numpy.array(u_list, dtype=numpy.int8)
    a_list = numpy.array(a_list, dtype=numpy.int8)
    y_list = numpy.array(y_list, dtype=numpy.int8)
    return aid_list, s_list, x_list, u_list, a_list, y_list
Exemplo n.º 8
0
        print(len(assignments))
        print(len(group_ids))
        print(group_ids[0:20])
        # exit()

        for gr_id in group_ids:
            gr_ass = assignments[assignments["group_id"] == gr_id]
            for row in gr_ass.iterrows():
                # for row in assignments.iterrows():
                aid = row[1]["id"]
                # print(row)
                ts = row[1]["creation_date"]
                # gr_id = row[1]["group_id"]
                gc.collect()
                gb_id = row[1]["gameboard_id"]
                student_ids = list(get_student_list(gr_id)["user_id"])
                # print(student_ids)
                student_data = get_user_data(student_ids)
                now_hexes = list(gb_qmap[gb_id])
                # print(now_hexes)
                # if 118651 not in student_ids:
                #     continue
                for psi in student_ids:
                    # if psi != 118651:
                    #     continue
                    # print(psi)
                    if psi not in SXUA:
                        S = numpy.zeros(6)
                        X = numpy.zeros(len(all_qids), dtype=numpy.int16)
                        U = numpy.zeros(len(all_qids), dtype=numpy.int8)
                        A = numpy.zeros(len(pid_override), dtype=numpy.int8)
Exemplo n.º 9
0
def make_data(ass_n, pickle_at, APPEND=True):
    user_cache = {}

    ass_df = get_all_assignments()
    # ass_df = ass_df.iloc[27000:, :]
    # sprofs = pandas.read_csv(base + "student_profiling/users_all.csv")
    # sprofs["date_of_birth"] = pandas.to_datetime(sprofs["date_of_birth"])
    gb_qmap = make_gb_question_map()
    ass_ct = 0

    ass_df["creation_date"] = pandas.to_datetime(ass_df["creation_date"])
    #ass_df = ass_df[ass_df.event_details!="{}"]
    #ass_df["event_details"] = ass_df["event_details"].str.replace("0L,", "0,")

    profile_df = get_user_data("*")
    profile_df["date_of_birth"] = pandas.to_datetime(
        profile_df["date_of_birth"])

    ct = 0

    if APPEND:
        print("APPEND mode")
        #recycle old pap
        f = open(asst_fname, 'rb')
        asses = pickle.load(f)
        f.close()
        tracking = open("tracking.dat", "w+")
        print("loaded {} existing assignments".format(len(asses)))

    else:
        f = open(asst_fname, 'wb')
        f.truncate(0)
        f.close()
        tracking = open("tracking.dat", "w")
        print("FRESH mode")
        #bake it fresh
        asses = OrderedDict()

    start_at = len(asses)
    number_to_do = ass_n - start_at
    if number_to_do <= 0:
        print("We already have {}>{} samples".format(start_at, ass_n))
        exit(1)

    #if ass_n is -1 then this overrides the trimming of the assts
    ass_df = ass_df.iloc[start_at:, :] if (ass_n > 0) else ass_df

    for ass in ass_df.iterrows():
        id = ass[1]["id"]
        if id in asses and False == FORCE_OVERWRITE:
            # print("this assignment has already been processed, skipping!")
            continue

        print("assct {} of {} ({} users cached)".format(
            ass_ct, ass_n, len(user_cache)))
        ts = ass[1]['creation_date']
        # print(ts)
        # event_details = eval(ass[1]['event_details'])
        gb_id = ass[1]["gameboard_id"]
        if gb_id not in gb_qmap:
            print("gb id unknown")
            continue

        this_concepts = set()
        raw_qns = gb_qmap[gb_id]
        this_levels = []
        this_qns = raw_qns
        if type(raw_qns) is str:
            this_qns = eval(
                raw_qns
            )  #TODO make sure this works hitting the database as well
        for q in this_qns:
            if "|" in q:
                q = q.split("|")[0]
            this_levels.append(lev_page_lookup[q])
            cs = concept_extract(q)
            this_concepts.update(cs)

        gr_id = ass[1]["group_id"]
        students = get_student_list([gr_id])

        if students.empty:
            print(gr_id, "no students")
            continue
        else:
            print(gr_id, "students!")

        students = list(students["user_id"])
        profile_df = get_user_data(list(students))
        # print("get group attempts")
        # attempts_df = get_attempts_from_db(students)
        # print("got group attempts")

        profiles = profile_students(students,
                                    profile_df,
                                    ts,
                                    concepts_all,
                                    hwdf,
                                    user_cache,
                                    attempts_df=None)
        print(len(profiles), len(students))
        assert len(profiles) == len(students)
        assert len(profiles) > 0
        # if len(profiles)==0:
        #     print("no profiles")
        #     continue
        print("compressing_profiles")
        c_profiles = zlib.compress(pickle.dumps(profiles))
        print("compressed")

        ass_entry = (ts, gb_id, gr_id, this_qns, this_concepts, this_levels,
                     students, c_profiles)
        tracking.write(str(ass_entry[0:7] + (len(profiles), )))
        tracking.write("\n")
        # asses.append(ass_entry)
        asses[id] = ass_entry
        ass_ct += 1

        print("...{} students".format(len(profiles)))
        # ct+=1
        # afile.write(str(ass_entry)+"\n")
        # if ct > 100:
        #     afile.flush()
        #     ct=0
        print("ass_ct", ass_ct)
        print("pickle at", pickle_at)
        print("%", (ass_ct % pickle_at))
        if (ass_ct == number_to_do) or (ass_ct % pickle_at) == 0:
            f = open(asst_fname, 'wb')
            pickle.dump(asses, f)
            f.flush()
            print("***SAVED (hallelujah)")

        if ass_ct == number_to_do:
            print("we have hit maximum ass limit")
            break
    # print("taking massive dump")
    # # afile.write("]\n")
    # # afile.close()
    # # joblib.dump(asses, asst_fname)
    # # with gzip.open(asst_fname, 'w') as f:
    # #     #_pickle.dump(asses, f)
    # #     f.write(_pickle.dumps(asses))
    # with open(asst_fname, 'wb') as f:
    #     pickle.dump(asses, f)
    f.close()
    print("We now have {} assignments on disc".format(len(asses)))
    return
    tracking.close()
Exemplo n.º 10
0
if __name__ == "__main__":
    teachers_df = pandas.DataFrame.from_csv("teachers.dat", header=0)
    teacher_ids = list(teachers_df.index)

    model = load_model(base + "hwg_model.hd5")
    (ylb, clb) = joblib.load(base + 'hwg_mlb.pkl')

    up_to_ts = pandas.datetime.now()
    fout = open("predictions.out", "w")
    for t in teacher_ids:
        class_list = get_group_list(t)["id"]
        print("groups:", class_list)
        for c in class_list:
            print("get student lsit for =>", c)
            students = get_student_list(c)
            students = list(students["user_id"])
            print("students:", students)
            if not students:
                continue
            # students = list(students)
            profile_df = get_user_data(students)
            # print("profiles:",profile_df)

            X = []
            for u in students:
                x_psi = gen_experience(u, up_to_ts)
                X.append(x_psi)
            X = numpy.array(X)
            predictions = model.predict(X)
            ymax = ylb.inverse_transform(predictions)