Exemplo n.º 1
0
def test(input_path, output_path, character_list_path, spell2character,
         datanum):
    word2character = load_word2character(character_list_path)
    dataset = load_dataset(input_path)
    with open(input_path + '/testfile/test.txt', 'w') as fr:
        for x in dataset:
            print >> fr, x.encode('GBK')
    with open(input_path + '/testfile/test_pinyin.txt', 'w') as frpinyin:
        with open(input_path + '/testfile/test.txt', 'w') as fr:
            for i in range(0, 1000):
                x = dataset[i]
                pinyin = u''
                sent = u''
                for j in x:
                    character = word2character.setdefault(j, None)
                    if character:
                        pinyin = pinyin + character + ' '
                        sent = sent + j
                print >> frpinyin, pinyin.encode('GBK')
                print >> fr, sent.encode('GBK')
    for i in range(30, 31):
        unit_lambda = i
        start_time = time.time()
        transfer(input_path=input_path + '/testfile/test_pinyin.txt',
                 output_path=output_path,
                 spell2character=spell2character,
                 datanum=datanum,
                 unit_lambda=unit_lambda)
        num_same = 0
        num_all = 0
        with open(input_path + '/testfile/test.txt', 'r') as fri:
            with open(output_path, 'r') as fro:
                while 1:
                    line1 = fri.readline()
                    line2 = fro.readline()
                    if not line1:
                        break
                    if not line2:
                        break
                    line1 = line1.decode('GBK')
                    line2 = line2.decode('GBK')
                    line1.strip('\n')
                    line2.strip('\n')
                    if len(line1) != len(line2):
                        # print('Error')
                        line1 = fri.readline()
                    else:
                        num_all = num_all + len(line1)
                        for i in range(len(line1)):
                            if (line1[i] == line2[i]):
                                num_same = num_same + 1
        print('Unit_lambda:%02d' % unit_lambda)
        print('\tAccuracy rate: %02.3f' % (float(num_same) / float(num_all)))
        print("\tTook: %4.4fs" % (time.time() - start_time))
    return num_same / num_all
    def build(self):
        print "start build BaseEnrollmentFeature..."
        fs = {}
        enrollment = Enrollment("../data/merge/enrollment.csv")
        course_statistic_time = CourseStatisticTimeInfo()
        course_statistic_time.load()
        course_time_sequence_info = CourseTimeSequenceInfo()
        course_time_sequence_info.load()

        for id in enrollment.ids:
            username, course_id = enrollment.enrollment_info.get(id)

            course_id_vec = [0] * COURSE_VEC_NUM
            course_id_vec[course_statistic_time.get_course_id(course_id)] = 1

            whole_site_before_course_ids_vec = [0] * COURSE_VEC_NUM
            whole_site_after_course_ids_vec = [0] * COURSE_VEC_NUM

            before_course_num, after_course_num = course_time_sequence_info.get_course_num_before_after(
                username, id)
            before_course_ids, after_course_ids = course_time_sequence_info.get_course_ids_before_after(
                username, id)
            for k in before_course_ids:
                _username, k = enrollment.enrollment_info.get(k)
                whole_site_before_course_ids_vec[
                    course_statistic_time.get_course_id(k)] = 1
            for k in after_course_ids:
                _username, k = enrollment.enrollment_info.get(k)
                whole_site_after_course_ids_vec[
                    course_statistic_time.get_course_id(k)] = 1
            whole_site_before_course_num_vec = get_vector(
                before_course_num, MAX_ENROLLMENT_VEC_NUM)
            whole_site_after_course_num_vec = get_vector(
                after_course_num, MAX_ENROLLMENT_VEC_NUM)

            enrollment_num = before_course_num + after_course_num
            enrollment_num_vec = get_vector(enrollment_num,
                                            MAX_ENROLLMENT_VEC_NUM)

            user_num = len(enrollment.course_info.get(course_id, []))
            just_num_vec = [
                user_num, enrollment_num, before_course_num, after_course_num
            ]

            fv = [
                course_id_vec, whole_site_before_course_ids_vec,
                whole_site_after_course_ids_vec,
                whole_site_before_course_num_vec,
                whole_site_after_course_num_vec, enrollment_num_vec,
                just_num_vec
            ]

            f = []
            for arr in fv:
                f.append(",".join(["%s" % transfer(k) for k in arr]))

            fs[id] = ",".join(["%s" % k for k in f])
        writepickle(BaseEnrollmentFeature.feature_filename, fs)
        print "build BaseEnrollmentFeature over"
Exemplo n.º 3
0
    def build(self):
        print "start build BaseEnrollmentFeature..."
        fs = {}
        enrollment = Enrollment("../data/merge/enrollment.csv")
        course_statistic_time = CourseStatisticTimeInfo()
        course_statistic_time.load()
        course_time_sequence_info = CourseTimeSequenceInfo()
        course_time_sequence_info.load()

        for id in enrollment.ids:
            username, course_id = enrollment.enrollment_info.get(id)

            course_id_vec = [0] * COURSE_VEC_NUM
            course_id_vec[course_statistic_time.get_course_id(course_id)] = 1

            whole_site_before_course_ids_vec = [0] * COURSE_VEC_NUM
            whole_site_after_course_ids_vec = [0] * COURSE_VEC_NUM

            before_course_num, after_course_num = course_time_sequence_info.get_course_num_before_after(username,id)
            before_course_ids, after_course_ids = course_time_sequence_info.get_course_ids_before_after(username,id)
            for k in before_course_ids:
                _username, k = enrollment.enrollment_info.get(k)
                whole_site_before_course_ids_vec[course_statistic_time.get_course_id(k)] = 1
            for k in after_course_ids:
                _username, k = enrollment.enrollment_info.get(k)
                whole_site_after_course_ids_vec[course_statistic_time.get_course_id(k)] = 1
            whole_site_before_course_num_vec = get_vector(before_course_num, MAX_ENROLLMENT_VEC_NUM)
            whole_site_after_course_num_vec = get_vector(after_course_num, MAX_ENROLLMENT_VEC_NUM)

            enrollment_num = before_course_num + after_course_num
            enrollment_num_vec = get_vector(enrollment_num, MAX_ENROLLMENT_VEC_NUM)

            user_num = len(enrollment.course_info.get(course_id, []))
            just_num_vec = [user_num, enrollment_num, before_course_num, after_course_num]

            fv = [course_id_vec, whole_site_before_course_ids_vec, whole_site_after_course_ids_vec, whole_site_before_course_num_vec, whole_site_after_course_num_vec,
                  enrollment_num_vec, just_num_vec]

            f = []
            for arr in fv:
                f.append(",".join(["%s" % transfer(k) for k in arr]))

            fs[id] = ",".join(["%s" % k for k in f])
        writepickle(BaseEnrollmentFeature.feature_filename, fs)
        print "build BaseEnrollmentFeature over"
Exemplo n.º 4
0
    return num_same / num_all


if __name__ == '__main__':
    # train
    # train(conf=conf)

    # load
    spell2character = []
    words, spell2word, characters, spell2character = load_from_saved(
        path=conf.save_path)

    if len(sys.argv) == 1:
        input_path = conf.input_path
        output_path = conf.output_path
    else:
        input_path = sys.argv[1]
        output_path = sys.argv[2]

    transfer(input_path=input_path,
             output_path=output_path,
             spell2character=spell2character,
             datanum=conf.data_num,
             unit_lambda=conf.unit_lambda)

    # test
    # test(input_path=conf.test_path, output_path=conf.output_path, character_list_path=conf.character_list_path,
    #    spell2character=spell2character, datanum=conf.data_num)
    # print('Success!')
    os.system("pause")
Exemplo n.º 5
0
    def build(self):
        print "start build DayLevelInfo..."
        coursetimeinfo = CourseTimeInfo()
        log = Log("../data/merge/log.csv")
        enrollment = Enrollment("../data/merge/enrollment.csv")
        obj = Obj()
        label = Label()
        userinfo = Userinfo()
        userinfo.load()
        module = Module()
        module.load()
        transfer_day = Transfer()
        transfer_day.load()

        ccc = 0
        fs = {}
        for id in enrollment.ids:
            ccc += 1
            if ccc % 5000 == 0:
                print ccc
            infos = log.enrollment_loginfo.get(id, [])
            username, course_id = enrollment.enrollment_info.get(id)

            #time,source,event,o
            #source: browser,server
            #event:access,discussion,nagivate,page_close,problem,video,wiki
            #category:video,vertical,static_tab,sequential,problem,peergrading,outlink,html,discussion,dictation,course_info,course,combinedopenended,chapter,about
            #time:2014-06-13T09:52:49

            info_by_day = {}
            for info in infos:
                if info[0].find("T") < 0:
                    continue
                day,timehms = info[0].split("T")
                #weight = module.get_weight(info[3])
                weight = 1
                if day not in info_by_day:
                    info_by_day[day] = {}
                    info_by_day[day]["event"] = {}
                if info[1] == "browser":
                    info_by_day[day]["browser"] = info_by_day[day].get("browser", 0) + weight
                else:
                    info_by_day[day]["server"] = info_by_day[day].get("server", 0) + weight

                event_idx = get_event_idx(info[2])
                info_by_day[day]["event"][event_idx] = info_by_day[day]["event"].get(event_idx, 0) + weight

                weekday = week.get(day)
                info_by_day[day]["weekday"] = weekday

                hour = int(timehms[:2]) / 2
                info_by_day[day]["hour"] = hour
        
                cidx = obj.get_index(course_id, week.times(info[0]))
                info_by_day[day]["cidx"] = cidx

            day_event_count = [0] * EVENT_VEC_NUM
            day_weekday_count = [0] * WEEKDAY_VEC_NUM
            day_hour_count = [0] * HOUR_VEC_NUM
            day_cidx_count = [0] * CIDX_VEC_NUM
            _browser = 0
            _server = 0
            for (day, info) in info_by_day.items():
                for (k,v) in info["event"].items():
                    day_event_count[k] = day_event_count[k] + math.sqrt(v)
                _browser = _browser + math.sqrt(info.get("browser", 0))
                _server = _server + math.sqrt(info.get("server", 0))
                day_weekday_count[info["weekday"]] = day_weekday_count[info["weekday"]] + 1
                day_hour_count[info["hour"]] = day_hour_count[info["hour"]] + 1
                day_cidx_count[info["cidx"]] = day_cidx_count[info["cidx"]] + 1

            f = [0] * (2 + EVENT_VEC_NUM + WEEKDAY_VEC_NUM + HOUR_VEC_NUM + CIDX_VEC_NUM)
            f[0] = transfer(_browser)
            f[1] = transfer(_server)
            fv_no_transfer = [day_event_count,day_weekday_count,day_hour_count,day_cidx_count]
            start = 2
            for vs in fv_no_transfer:
                for (i, v) in enumerate(vs):
                    f[start+i] = transfer(v)
                start = start + len(vs)
            fs[id] = ",".join(["%.2f" % k for k in f ])

        print start
        modelFileSave = open('_feature/day.level.info.model', 'wb')
        pickle.dump(fs, modelFileSave)
        modelFileSave.close()
        print "build DayLevelInfo over!"
    def build(self):
        print "start build BaseTimeFeature..."
        fs = {}
        enrollment = Enrollment("../data/merge/enrollment.csv")
        lastdayinfo = LastDayInfo()
        lastdayinfo.load_id_days()
        user_statistic_info = UserStatisticInfo()
        user_statistic_info.load()
        coursetimeinfo = CourseStatisticTimeInfo()
        coursetimeinfo.load()
        transfer_day = Transfer()
        transfer_day.load()
        for id in enrollment.ids:
            username, course_id = enrollment.enrollment_info.get(id)

            days = lastdayinfo.get_days(id)
            # day gap info
            gap_day_vec = [0] * MAX_GAP_DAY_VEC_NUM
            gap_lastday_vec = [0] * MAX_GAP_DAY_VEC_NUM
            for i in range(len(days) - 1):
                gap_day = TimeUtil.diff(days[i + 1], days[i])
                add_vector_value(gap_day_vec, gap_day)

                gap_lastday = TimeUtil.diff(days[-1], days[i]) / 2 + 1
                add_vector_value(gap_lastday_vec, gap_lastday)

            alldays = user_statistic_info.get_unique_days(username)
            before_lastday_day_num = 0
            after_lastday_day_num = 0
            after_lastday_day_vec = [0] * MAX_GAP_DAY_VEC_NUM

            hold_day_in_enrollment = 0
            hold_day_in_site = 0
            hold_day_after_in_site = 0
            if len(days) > 0:
                lastday = days[-1]
                hold_day_in_enrollment = TimeUtil.diff(lastday, days[0])
                hold_day_in_site = TimeUtil.diff(lastday, sorted(alldays)[0])
                hold_day_after_in_site = TimeUtil.diff(
                    sorted(alldays)[-1], lastday)
                for day in alldays:
                    diff = TimeUtil.diff(day, lastday) / 2
                    if diff > 0:
                        after_lastday_day_num = after_lastday_day_num + 1
                        add_vector_value(after_lastday_day_vec, diff)
                    else:
                        before_lastday_day_num = before_lastday_day_num + 1

            lastday_after_num_in_maxgap = sum(after_lastday_day_vec[:-1])

            hold_day_in_enrollment_vec = [0] * 12
            hold_day_in_site_vec = [0] * 12
            hold_day_after_in_site_vec = [0] * 12
            hold_day_in_enrollment_idx = get_gap_idx(hold_day_in_enrollment)
            hold_day_in_site_idx = get_gap_idx(hold_day_in_site)
            hold_day_after_in_site_idx = get_gap_idx(hold_day_after_in_site)
            hold_day_in_enrollment_vec[hold_day_in_enrollment_idx] = 1
            hold_day_in_site_vec[hold_day_in_site_idx] = 1
            hold_day_after_in_site_vec[hold_day_after_in_site_idx] = 1

            lastday_before_num_vec = get_vector(before_lastday_day_num,
                                                DAYS_VEC_NUM)
            lastday_after_num_vec = get_vector(after_lastday_day_num,
                                               DAYS_VEC_NUM)
            lastday_after_num_in_maxgap_vec = get_vector(
                lastday_after_num_in_maxgap, DAYS_VEC_NUM)

            day_len_vec = get_vector(len(days), DAYS_VEC_NUM)
            alldays_len_vec = get_vector(len(alldays), DAYS_VEC_NUM)

            day_len_half_vec = get_vector((len(days) + 1) / 2,
                                          DAYS_VEC_NUM / 2)
            alldays_len_half_vec = get_vector((len(alldays) + 1) / 2,
                                              DAYS_VEC_NUM / 2)

            if len(days) > 0:
                transfer_vec = transfer_day.get_features(days[-1])
            else:
                transfer_vec = transfer_day.get_features("")

            just_num_vec = [
                before_lastday_day_num, after_lastday_day_num,
                lastday_after_num_in_maxgap,
                len(days),
                len(alldays)
            ]
            fv = [
                gap_day_vec, gap_lastday_vec, after_lastday_day_vec,
                hold_day_in_enrollment_vec, hold_day_in_site_vec,
                hold_day_after_in_site_vec, hold_day_in_enrollment_vec,
                hold_day_in_site_vec, hold_day_after_in_site_vec,
                lastday_before_num_vec, lastday_after_num_vec,
                lastday_after_num_in_maxgap_vec, day_len_vec, alldays_len_vec,
                day_len_half_vec, alldays_len_half_vec, transfer_vec,
                just_num_vec
            ]
            f = []
            for arr in fv:
                f.append(",".join(["%s" % transfer(k) for k in arr]))

            fs[id] = ",".join(["%s" % k for k in f])

        writepickle(BaseTimeFeature.feature_filename, fs)
        print "build BaseTimeFeature over"
Exemplo n.º 7
0
    def get_features(self, infos, course_id, isDebug = False):
        week = Week()
        event_count = [0] * EVENT_VEC_NUM
        category_count = [0] * CATEGORY_VEC_NUM
        u_event_count = [0] * EVENT_VEC_NUM
        uday_event_count = [0] * EVENT_VEC_NUM
        event_timesum = [0] * EVENT_VEC_NUM
        event_sqrt_timesum = [0] * EVENT_VEC_NUM
        weekday_count = [0] * WEEKDAY_VEC_NUM
        hour_count = [0] * HOUR_VEC_NUM
        cidx_count = [0] * CIDX_VEC_NUM
        cidx_by_stat_count = [0] * CIDX_VEC_NUM
        month_count = [0] * MONTH_VEC_NUM
        next_public = [0] * CIDX_VEC_NUM
        dx_by_date_count = [0] * 30
        spend_time_count = [0] * 10
        sqrt_spend_time_count = [0] * 10
        browser = 0
        server = 0
        timesum = 0
        sqrt_timesum = 0
        _p = 0
        cc = 0
        _event_idx = -1
        u_event_dict = {}
        for info in infos:
            if info[0].find("T") < 0:
                continue
            p = week.times(info[0])
            _time = 1
            _sqrt_time = 1
            if _p > p - 60 * 3:
                _time = p - _p
                _sqrt_time = math.sqrt(p - _p)
            else:
                cc += 1
            timesum = timesum + _time
            sqrt_timesum = sqrt_timesum + _sqrt_time
            if _event_idx != -1:
                event_timesum[_event_idx] = event_timesum[_event_idx] + _time/10.0
                event_sqrt_timesum[_event_idx] = event_sqrt_timesum[_event_idx] + _sqrt_time/10.0
            if isDebug:
                print timesum ,p,_p,info[0]
            _p = p
            day,timehms = info[0].split("T")
            if info[1] == "browser":
                browser += 1
            else:
                server += 1
            year,month,d = day.split("-")
            month_idx = (int(month) - 1) * 2 + int(d)/16
            month_count[month_idx] = 1

            event_idx = get_event_idx(info[2])
            _event_idx = event_idx
            category_idx = self.obj.get_category_idx(info[3])
            category_count[category_idx] = category_count[category_idx] + 1

            if info[2]+info[-1] not in u_event_dict:
                u_event_count[event_idx] = u_event_count[event_idx] + 1
            if day+info[2]+info[-1] not in u_event_dict:
                uday_event_count[event_idx] = uday_event_count[event_idx] + 1
            u_event_dict[info[2]+info[-1]] = 1
            event_count[event_idx] = event_count[event_idx] + 1

            weekday = week.get(day)
            weekday_count[weekday] = weekday_count[weekday] + 1
            hour = int(timehms[:2]) / 2
            hour_count[hour] = hour_count[hour] + 1
        
            cidx = self.obj.get_index(course_id, week.times(info[0]))
            cidx_count[cidx] = cidx_count[cidx] + 1

            cidx_by_stat = self.coursetimeinfo.get_index(course_id, week.times(info[0]))
            cidx_by_stat_count[cidx_by_stat] = cidx_by_stat_count[cidx_by_stat] + 1

            dx_by_date = self.cdate.get_index(course_id, day)
            dx_by_date_count[dx_by_date] = dx_by_date_count[dx_by_date] + 1
        if _p > 1:
            next_public_diff = self.obj.get_index(course_id, _p)
        else:
            next_public_diff = CIDX_VEC_NUM
        if next_public_diff > CIDX_VEC_NUM - 1:
            next_public_diff = CIDX_VEC_NUM - 1
        next_public[next_public_diff] = 1
        time_idx = self.get_spend_time_idx(timesum)
        spend_time_count[time_idx] = 1
        time_idx = self.get_spend_time_idx(sqrt_timesum)
        sqrt_spend_time_count[time_idx] = 1
        buf = []
        objw = self.objweight.get_features(infos)
        info_vec = [0] * INFO_VEC_NUM
        k = len(infos)
        if k > 3:
            k = int(math.sqrt(k - 3)) + 3
        if k > INFO_VEC_NUM-1:
            k = INFO_VEC_NUM-1
        info_vec[k] = 1
        fp = [cc, len(infos), browser, server, timesum/60.0, sqrt_timesum/60.0]
        buf.append( "%.3f" % ((browser+3.1)/(float(len(infos))+6.5)))
        if isDebug:
            print fp
        event_isclick_count = [0] * (EVENT_VEC_NUM+1)
        k = sum([1 for i in event_count if i > 0])
        #event_isclick_count[k] = 1
        #print k
        
        fv = [event_count,weekday_count,hour_count,cidx_count,cidx_by_stat_count, month_count, spend_time_count, sqrt_spend_time_count, fp, next_public, event_sqrt_timesum, event_timesum, u_event_count, objw, event_isclick_count, uday_event_count, dx_by_date_count]
        fv_debug = ["event_count","weekday_count","hour_count","cidx_count","cidx_by_stat_count", "month_count", "spend_time_count", "sqrt_spend_time_count", "fp", "next_public", "event_sqrt_timesum", "event_timesum", "u_event_count", "objw", "event_isclick_count", "uday_event_count", "dx_by_date_count"]
        for j in range(len(fv)):
            vs = fv[j]
            if isDebug:
                print fv_debug[j],vs
            for (i, v) in enumerate(vs):
                buf.append( "%.3f" % transfer(v))
        return ",".join(buf)
Exemplo n.º 8
0
    def get_features_no_courseid(self, infos, isDebug = False):
        week = Week()
        event_count = [0] * EVENT_VEC_NUM
        weekday_count = [0] * WEEKDAY_VEC_NUM
        hour_count = [0] * HOUR_VEC_NUM
        month_count = [0] * MONTH_VEC_NUM
        spend_time_count = [0] * 10
        sqrt_spend_time_count = [0] * 10
        browser = 0
        server = 0
        timesum = 0
        sqrt_timesum = 0
        _p = 0
        cc = 0
        for info in infos:
            if info[0].find("T") < 0:
                continue
            p = week.times(info[0])
            if _p > p - 60 * 3 and _p <= p:
                timesum = timesum + p - _p
                sqrt_timesum = sqrt_timesum + math.sqrt(p - _p)
            else:
                cc += 1
                timesum = timesum + 1
                sqrt_timesum = sqrt_timesum + 1
            if isDebug:
                print timesum ,p,_p,info[0]
            _p = p
            day,timehms = info[0].split("T")
            if info[1] == "browser":
                browser += 1
            else:
                server += 1
            year,month,d = day.split("-")
            month_idx = (int(month) - 1) * 2 + int(d)/16
            month_count[month_idx] = 1

            event_idx = get_event_idx(info[2])
            event_count[event_idx] = event_count[event_idx] + 1

            weekday = week.get(day)
            weekday_count[weekday] = weekday_count[weekday] + 1
            hour = int(timehms[:2]) / 2
            hour_count[hour] = hour_count[hour] + 1
        
        time_idx = self.get_spend_time_idx(timesum)
        spend_time_count[time_idx] = 1
        time_idx = self.get_spend_time_idx(sqrt_timesum)
        sqrt_spend_time_count[time_idx] = 1
        info_vec = [0] * INFO_VEC_NUM
        k = len(infos)
        if k > 5:
            k = int(math.sqrt(k - 5 )) + 5
        if k > INFO_VEC_NUM-1:
            k = INFO_VEC_NUM-1
        info_vec[k] = 1
        buf = []
        fp = [cc, len(infos), browser, server, timesum/60.0, sqrt_timesum/60.0]
        buf.append( "%.3f" % ((browser+3.1)/(float(len(infos))+6.5)))
        if isDebug:
            print fp
        fv = [event_count,weekday_count,hour_count, month_count, spend_time_count, sqrt_spend_time_count, fp]
        fv_debug = ["event_count","weekday_count","hour_count", "month_count", "spend_time_count", "sqrt_spend_time_count", "fp"]
        for j in range(len(fv)):
            vs = fv[j]
            if isDebug:
                print fv_debug[j],vs
            for (i, v) in enumerate(vs):
                buf.append( "%.3f" % transfer(v))
        return ",".join(buf)
Exemplo n.º 9
0
def get_features(id,IS_DEBUG=False):
    y = label.get(id)
    username, course_id = enrollment.enrollment_info.get(id)

    course_id_vec = [0] * COURSE_VEC_NUM
    course_id_vec[coursetimeinfo.get_course_id(course_id)] = 1

    days = lastdayinfo.get_days(id)

    is_last_vec = [0] * IS_LAST_VEC_NUM #0 not last, 1-5, 6 more than 5
    is_pre_vec = [0] * IS_LAST_VEC_NUM #0 not last, 1-5, 6 more than 5
    is_next_vec = [0] * IS_LAST_VEC_NUM #0 not last, 1-5, 6 more than 5
    next_daynum_vec = [0] * IS_LAST_VEC_NUM
    gap_vec = [0] * IS_LAST_VEC_NUM

    if len(days) < 2:
        is_last_vec[0] = 1
        is_pre_vec[0] = 1
    else:
        last_day = days[-1]

        isCC = 0
        _diff = 100
        for i in range(len(days)-1):
            diff = TimeUtil.diff(days[i+1], days[i])
            if diff > (IS_LAST_VEC_NUM-1):
                diff = (IS_LAST_VEC_NUM-1)
            gap_vec[diff] = gap_vec[diff] + 1
        for day in days[:-1]:
            diff = TimeUtil.diff(last_day, day) / 2 + 1
            if diff < (IS_LAST_VEC_NUM-1):
                is_last_vec[diff] = 1
                isCC = isCC + 1
                if diff < _diff:
                    _diff = diff
        if isCC == 0:
            is_last_vec[IS_LAST_VEC_NUM-1] = 1
            _diff = IS_LAST_VEC_NUM-1
        is_pre_vec[_diff] = 1
    alldays = userinfo.get_days(username)
    daynum = 0
    whole_site_pre_vec = [0] * IS_LAST_VEC_NUM
    whole_site_post_vec = [0] * IS_LAST_VEC_NUM
    whole_site_pre_enrollment_vec = [0] * IS_LAST_VEC_NUM
    whole_site_post_enrollment_vec = [0] * IS_LAST_VEC_NUM

    whole_site_pre_enrollment_id_vec = [0] * COURSE_VEC_NUM
    whole_site_post_enrollment_id_vec = [0] * COURSE_VEC_NUM
    k1,k2 = moreinfo.get_enrollment_features(username,id)
    kids1, kids2 = moreinfo.get_enrollment_ids(username,id)
    for k in kids1:
        _username, k = enrollment.enrollment_info.get(k)
        whole_site_pre_enrollment_id_vec[coursetimeinfo.get_course_id(k)] = 1
    for k in kids2:
        _username, k = enrollment.enrollment_info.get(k)
        whole_site_post_enrollment_id_vec[coursetimeinfo.get_course_id(k)] = 1
    if k1 > IS_LAST_VEC_NUM - 1:
        k1 = IS_LAST_VEC_NUM - 1
    if k2 > IS_LAST_VEC_NUM - 1:
        k2 = IS_LAST_VEC_NUM - 1
    #print k1, k2, kids1, kids2
    whole_site_pre_enrollment_vec[k1] = 1
    whole_site_post_enrollment_vec[k2] = 1
    pre_num = 0
    post_num = 0
    lastday = ""
    hold_day_in_enrollment = 0
    if len(days) > 0:
        lastday = days[-1]
        hold_day_in_enrollment = TimeUtil.diff(lastday, days[0])
        for day in alldays:
            diff = TimeUtil.diff(day,days[-1]) / 2
            if diff > 0:
                post_num = post_num + 1
            else:
                pre_num = pre_num + 1
            if diff > 0  and diff < IS_LAST_VEC_NUM-1:
                is_next_vec[diff] = 1
                daynum += 1
        if daynum >= IS_LAST_VEC_NUM:
            daynum = IS_LAST_VEC_NUM - 1
    else:
        print id,"X"
    hold_day_in_site = 0
    hold_day_after_in_site = 0
    if len(alldays) > 1 and len(lastday) > 1:
        hold_day_in_site = TimeUtil.diff(lastday, sorted(alldays)[0])
        hold_day_after_in_site = TimeUtil.diff(sorted(alldays)[-1],lastday)
    hold_day_in_enrollment_vec = [0] * 12
    hold_day_in_site_vec = [0] * 12
    hold_day_after_in_site_vec = [0] * 12
    hold_day_in_enrollment_idx = get_gap_idx(hold_day_in_enrollment)
    hold_day_in_site_idx = get_gap_idx(hold_day_in_site)
    hold_day_after_in_site_idx = get_gap_idx(hold_day_after_in_site)
    hold_day_in_enrollment_vec[hold_day_in_enrollment_idx] = 1
    hold_day_in_site_vec[hold_day_in_site_idx] = 1
    hold_day_after_in_site_vec[hold_day_after_in_site_idx] = 1
    #print username,hold_day_in_enrollment,hold_day_in_site,hold_day_after_in_site
    if daynum == 0:
        is_next_vec[IS_LAST_VEC_NUM-1] = 1
    if post_num > IS_LAST_VEC_NUM-1:
        post_num = IS_LAST_VEC_NUM-1
    if pre_num > IS_LAST_VEC_NUM-1:
        pre_num = IS_LAST_VEC_NUM-1
    if IS_DEBUG:
        print "post_num,",post_num,"pre_num",pre_num
    whole_site_pre_vec[pre_num] = 1
    whole_site_post_vec[post_num] = 1
    next_daynum_vec[daynum] = 1

    use_vec = userinfo.get_features(username, course_id)
    if len(days) > 0:
        transfer_vec = transfer_day.get_features(days[-1])
    else:
        transfer_vec = transfer_day.get_features("")
    enr_ids = enrollment.user_enrollment_id.get(username, [])
    enrollment_num = len(enr_ids)
    non_unique_days = []
    for _id in enr_ids:
        _days = lastdayinfo.get_days(_id)
        non_unique_days = non_unique_days + _days

    f_last_day = lastdayfeature.get_features(id)
    f_day_level = daylevel.get_features(id)
    f_common = alldayfeature.get_features(id)
    f_user_site = wholesitefeature.get_features(username)
    _lasthour = lastdayinfo.get_lasthour(id)
    f_cor,nodropdays = cor.get_features(id)
    f_statistic = statistic.get_features(lastday, course_id, days, alldays, non_unique_days, _lasthour, y, nodropdays)
    f_statistic_start_idx = statistic.get_start_idx(lastday, course_id)
    f_dayindex = lastdayinfo.get_day_idx_features(non_unique_days)
    f_days = [0] * DAYS_VEC_NUM
    f_all_days = [0] * DAYS_VEC_NUM
    f_days_half = [0] * (DAYS_VEC_NUM/2)
    f_all_days_half = [0] * (DAYS_VEC_NUM/2)
    f_enrollment_num_vec = [0] * MAX_ENROLLMENT_VEC_NUM
    if enrollment_num > MAX_ENROLLMENT_VEC_NUM - 1:
        enrollment_num = MAX_ENROLLMENT_VEC_NUM - 1
    f_enrollment_num_vec[enrollment_num] = 1

    dy_num = len(days)
    if dy_num > DAYS_VEC_NUM-1:
        dy_num = DAYS_VEC_NUM-1
    f_days[dy_num] = 1
    if dy_num > 2:
        dy_num = min(dy_num, DAYS_VEC_NUM/2-1)
    f_days_half[dy_num] = 1
    dy_num = len(alldays)
    if dy_num > DAYS_VEC_NUM-1:
        dy_num = DAYS_VEC_NUM-1
    f_all_days[dy_num] = 1
    if dy_num > 2:
        dy_num = min(dy_num, DAYS_VEC_NUM/2-1)
    f_all_days_half[dy_num] = 1

    f = [0] * 348
    f[0] = transfer(len(enrollment.course_info.get(course_id, [])))
    f[1] = transfer(math.sqrt(len(enrollment.course_info.get(course_id, []))))
    f[2] = transfer(enrollment_num)
    f[3] = transfer(len(days))
    f[4] = transfer(len(alldays))

    fv_no_transfer = [transfer_vec]
    fv_no_transfer_debug = ["transfer_vec"]
    enrollment_vec = get_enrollment_features(lastday, enrollment, username, lastdayinfo, id)
    start = 5
    for j in range(len(fv_no_transfer)):
        vs = fv_no_transfer[j]
        if IS_DEBUG:
            print fv_no_transfer_debug[j],vs
        for (i, v) in enumerate(vs):
            f[start+i] = v
        start = start + len(vs)

    #fv = [course_id_vec,is_last_vec, use_vec, is_next_vec,next_daynum_vec,is_pre_vec,f_days,f_all_days,whole_site_pre_vec,whole_site_post_vec,gap_vec, f_enrollment_num_vec, whole_site_pre_enrollment_vec, whole_site_post_enrollment_vec, whole_site_pre_enrollment_id_vec, whole_site_post_enrollment_id_vec, hold_day_in_enrollment_vec,hold_day_in_site_vec,hold_day_after_in_site_vec]
    fv = [course_id_vec,is_last_vec, use_vec, is_next_vec,next_daynum_vec,is_pre_vec,f_days,f_all_days,whole_site_pre_vec,whole_site_post_vec,gap_vec, f_enrollment_num_vec, whole_site_pre_enrollment_vec, whole_site_post_enrollment_vec, whole_site_pre_enrollment_id_vec, whole_site_post_enrollment_id_vec, enrollment_vec, f_days_half,f_all_days_half]
    fv_debug = ["course_id_vec","is_last_vec", "use_vec", "is_next_vec","next_daynum_vec","is_pre_vec","f_days","f_all_days","whole_site_pre_vec","whole_site_post_vec","gap_vec", "f_enrollment_num_vec", "whole_site_pre_enrollment_vec", "whole_site_post_enrollment_vec", "whole_site_pre_enrollment_id_vec", "whole_site_post_enrollment_id_vec", "hold_day_in_enrollment_vec","hold_day_in_site_vec","hold_day_after_in_site_vec", "enrollment_vec","f_days_half","f_all_days_half"]
    for j in range(len(fv)):
        vs = fv[j]
        if IS_DEBUG:
            print fv_debug[j],vs
        for (i, v) in enumerate(vs):
            f[start+i] = transfer(v)
        start = start + len(vs)
    if IS_DEBUG:
        print start
    f = ",".join(["%.2f" % k for k in f])
    fs = "%s,%s,%s,%d,%d," \
         "+%s,+%s,+%s,+%s,+%s," \
         "+%s,+%s,+%s\n" % (y, id, course_id, len(days),f_statistic_start_idx,
                                                        f_common, f_last_day, f_day_level, f_user_site, f,
                                                        f_statistic, f_dayindex, f_cor)
    #fs = "%s,%s,%s,%d,%s\n" % (y, id, course_id, len(days), f_last_day )
    return fs
#!/usr/bin/python

from SynonymsTree import *
import cPickle as pickle
from transfer import *

def transfer(l, coeff=0.5):
	print l
	print " -> Transfert possible ? " + str(transferConditions(arbre, l, coeff))
	print l
	print "--------------------------------------------------------"

myfile = open("phages.data", "rb")

arbre = pickle.load(myfile)

l1 = ["integrase", "integrase", "recombinase", "recombinase spe0", "hypothetical protein", "recombinase spe2", "integrase spe0"]
l2 = ["integrase", "integrase", "recombinase", "recombinase spe0", "test2", "phage repressor protein", "hypothetical protein"]
l3 = ["integrase", "integrase", "recombinase spe0", "phage repressor protein", "phage repressor protein"]
l4 = ["integrase", "integrase", "recombinase", "hypothetical protein"]

print arbre.displayTree()

transfer(l1)
transfer(l2, 0.8)
transfer(l3)
transfer(l4)

myfile.close()
Exemplo n.º 11
0
        load_data.preprocess_fragment(cfrag))


    train_step = train_step_factory(extractor,
                                    content_targets,
                                    style_targets,
                                    style_content_loss,
                                    content_layers,
                                    style_layers,
                                    style_weight=1000,
                                    content_weight=1
    )

    epochs = 7
    np_data = transfer(transfer_chunk,
                       train_step,
                       epochs = epochs
    )
    
    data_diffs = []
    for j in range(epochs -1):
        npd = np_data[j+1] - np_data[j]
        data_diffs.append(np.abs(npd).sum().sum())

    print(f"Absolute difference per epoch: {data_diffs}")

    transfer_frag = Fragment(
        generated_song,
        i,
        np_data = load_data.postprocess_fragment(
            tf.Variable(np_data[-1]))
    )
					print "******************Executing in the same node",site,"***************"
					print "Parent cost",totalCostAtParent
					transfer(cur,db,site,None,cpuCost,diskCost,tables,totalCostAtParent)
					listOfQueries.append({"siteId":site, "cpuUtilization":cpuCost,
							"diskUtilization":diskCost,  "parentId":None, 
							"tables":tables, "time":int(totalCostAtParent/10)+15,
							"executionTime":int(totalCostAtParent/10)+1})
				else:
					print "******************ALL ARE 100",site,"***************"
					listOfWaititngQueries.append({"tables":tables,"site":site,"isServed":0})
					queueSize+= 1
			'''
			if cpuUsage+cpuCost < 100 and diskUsage+diskCost < 100:
				print "******************Executing in the same node",site,"***************"
				print "Parent cost",totalCostAtParent
				transfer(cur,db,site,None,cpuCost,diskCost,tables,totalCostAtParent)
				listOfQueries.append({"siteId":site, "cpuUtilization":cpuCost,
						"diskUtilization":diskCost,  "parentId":None, 
						"tables":tables, "time":int(totalCostAtParent/10)+15,
						"executionTime":int(totalCostAtParent/10)+1})
			else:
				print "******************ALL ARE 100",site,"***************"
				listOfWaititngQueries.append({"tables":tables,"site":site,"isServed":0})
				queueSize+= 1
				
f.close()
q.close()
c.close()
m.close()
cur.close()
db.close()
						childNode.append(node)
				flag = 0	

				###################### when a child node with the data is found ###############
				for child in childNode:
					if flag == 0:
						cpuCost = (calculateCPUcost(cur,child,tables))/1000
						diskCost =0# (calculateDiskCost(cur,child,tables))/100
						waitingTime = calculateWaitingTime(cur,child)
						totalCostAtChild = cpuCost + diskCost + waitingTime
						cpuUsage = getCPUUtilization(cur,child)
						diskUsage = getDiskUtilization(cur,child)
						if totalCostAtParent > totalCostAtChild and cpuUsage+cpuCost < 100 and diskUsage+diskCost < 100:
							print "Parent cost",totalCostAtParent,"Child cost",totalCostAtChild
							print "************* executing at child",child,"***************"
							transfer(cur,db,child,site,cpuCost,diskCost,tables,totalCostAtChild)
							listOfQueries.append({"siteId":child, "cpuUtilization":cpuCost,
									"diskUtilization":diskCost,  "parentId":site, 
									"tables":tables, "time":int(totalCostAtChild/10)+15,
									"executionTime":int(totalCostAtChild/10)+1})
							flag = 1
				
				###################### search the neighbouring node to execute the query  ###############
				neighbourNode = findNeighbourNode(cur,site)
				for neighbour in neighbourNode:
				   if flag == 0:
					cpuCost = (calculateCPUcost(cur,neighbour,tables))/1000
					diskCost = 0# (calculateDiskCost(cur,neighbour,tables))/100
					waitingTime = calculateWaitingTime(cur,neighbour)
					cpuUsage = getCPUUtilization(cur,neighbour)
					diskUsage = getDiskUtilization(cur,neighbour)
Exemplo n.º 14
0
def main(argv):

	loginType = 0

	accounts = []
	transactions = []
	deletions = []
	withdrawals = {}
	
	if (not len(argv) == 2):
		return
	
	# Infinite loop for receiving input.
	while True:
		
		command = input()
		
		# Disallow any actions until login.
		if (not loginType and command != 'login'):
			print "Error, please login before entering other commands."
		
		# Disallow multiple logins.
		elif (loginType and command == 'login'):
			print "You are already logged in."

		# Execute login
		elif command == 'login':
			loginType = login(accounts, argv[0])
		
		# Execute logout
		elif command == 'logout':
			loginType = logout(transactions, argv[1])

		# Disallow create in retail mode
		elif loginType == 2 and command == 'create':
			print "Error, please use agent mode to create accounts."
			
		# Execute create
		elif command == 'create':
			create(accounts, deletions, transactions)
			
		# Disallow delete in retail mode
		elif loginType == 2 and command == 'delete':
			print "Error, please use agent mode to delete accounts."

		# Execute delete
		elif command == 'delete':
			delete(accounts, deletions, transactions)
			
		# Execute deposit
		elif command == 'deposit':
			deposit(loginType, accounts, deletions, transactions)

		# Execute withdraw
		elif command == 'withdraw':
			withdraw(loginType, accounts, deletions, transactions, withdrawals)
			
		# Execute transfer
		elif command == 'transfer':
			transfer(loginType, accounts, deletions, transactions)
			
		else:
			print "Error, command not recognized."
			
        if loginType == 0:
			withdrawals = {}
			deletions = []
Exemplo n.º 15
0
    def build(self):
        print "start build BaseTimeFeature..."
        fs = {}
        enrollment = Enrollment("../data/merge/enrollment.csv")
        lastdayinfo = LastDayInfo()
        lastdayinfo.load_id_days()
        user_statistic_info = UserStatisticInfo()
        user_statistic_info.load()
        coursetimeinfo = CourseStatisticTimeInfo()
        coursetimeinfo.load()
        transfer_day = Transfer()
        transfer_day.load()
        for id in enrollment.ids:
            username, course_id = enrollment.enrollment_info.get(id)

            days = lastdayinfo.get_days(id)
            # day gap info
            gap_day_vec = [0] * MAX_GAP_DAY_VEC_NUM
            gap_lastday_vec = [0] * MAX_GAP_DAY_VEC_NUM
            for i in range(len(days)-1):
                gap_day = TimeUtil.diff(days[i+1], days[i])
                add_vector_value(gap_day_vec, gap_day)

                gap_lastday = TimeUtil.diff(days[-1], days[i]) / 2 + 1
                add_vector_value(gap_lastday_vec, gap_lastday)

            alldays = user_statistic_info.get_unique_days(username)
            before_lastday_day_num = 0
            after_lastday_day_num = 0
            after_lastday_day_vec = [0] * MAX_GAP_DAY_VEC_NUM

            hold_day_in_enrollment = 0
            hold_day_in_site = 0
            hold_day_after_in_site = 0
            if len(days) > 0:
                lastday = days[-1]
                hold_day_in_enrollment = TimeUtil.diff(lastday, days[0])
                hold_day_in_site = TimeUtil.diff(lastday, sorted(alldays)[0])
                hold_day_after_in_site = TimeUtil.diff(sorted(alldays)[-1],lastday)
                for day in alldays:
                    diff = TimeUtil.diff(day, lastday) / 2
                    if diff > 0:
                        after_lastday_day_num = after_lastday_day_num + 1
                        add_vector_value(after_lastday_day_vec, diff)
                    else:
                        before_lastday_day_num = before_lastday_day_num + 1

            lastday_after_num_in_maxgap = sum(after_lastday_day_vec[:-1])

            hold_day_in_enrollment_vec = [0] * 12
            hold_day_in_site_vec = [0] * 12
            hold_day_after_in_site_vec = [0] * 12
            hold_day_in_enrollment_idx = get_gap_idx(hold_day_in_enrollment)
            hold_day_in_site_idx = get_gap_idx(hold_day_in_site)
            hold_day_after_in_site_idx = get_gap_idx(hold_day_after_in_site)
            hold_day_in_enrollment_vec[hold_day_in_enrollment_idx] = 1
            hold_day_in_site_vec[hold_day_in_site_idx] = 1
            hold_day_after_in_site_vec[hold_day_after_in_site_idx] = 1

            lastday_before_num_vec = get_vector(before_lastday_day_num, DAYS_VEC_NUM)
            lastday_after_num_vec = get_vector(after_lastday_day_num, DAYS_VEC_NUM)
            lastday_after_num_in_maxgap_vec = get_vector(lastday_after_num_in_maxgap, DAYS_VEC_NUM)

            day_len_vec = get_vector(len(days), DAYS_VEC_NUM)
            alldays_len_vec = get_vector(len(alldays), DAYS_VEC_NUM)

            day_len_half_vec = get_vector((len(days)+1)/2, DAYS_VEC_NUM/2)
            alldays_len_half_vec = get_vector((len(alldays)+1)/2, DAYS_VEC_NUM/2)

            if len(days) > 0:
                transfer_vec = transfer_day.get_features(days[-1])
            else:
                transfer_vec = transfer_day.get_features("")

            just_num_vec = [before_lastday_day_num, after_lastday_day_num, lastday_after_num_in_maxgap, len(days), len(alldays)]
            fv = [gap_day_vec, gap_lastday_vec, after_lastday_day_vec, hold_day_in_enrollment_vec, hold_day_in_site_vec, hold_day_after_in_site_vec,
                  hold_day_in_enrollment_vec, hold_day_in_site_vec, hold_day_after_in_site_vec, lastday_before_num_vec, lastday_after_num_vec,
                  lastday_after_num_in_maxgap_vec, day_len_vec, alldays_len_vec, day_len_half_vec, alldays_len_half_vec, transfer_vec, just_num_vec]
            f = []
            for arr in fv:
                f.append(",".join(["%s" % transfer(k) for k in arr]))

            fs[id] = ",".join(["%s" % k for k in f])

        writepickle(BaseTimeFeature.feature_filename, fs)
        print "build BaseTimeFeature over"
    def build(self):
        print "start build DayLevelFeature..."
        log = LogInfo("../data/merge/log.csv")
        enrollment = Enrollment("../data/merge/enrollment.csv")
        obj = Obj()

        ccc = 0
        fs = {}
        feature_num = 0
        for id in enrollment.ids:
            ccc += 1
            if ccc % 5000 == 0:
                print ccc
            infos = log.enrollment_loginfo.get(id, [])
            username, course_id = enrollment.enrollment_info.get(id)

            info_by_day = {}
            for info in infos:
                if info[0].find("T") < 0:
                    continue
                day, timehms = info[0].split("T")
                weight = 1
                if day not in info_by_day:
                    info_by_day[day] = {}
                    info_by_day[day]["event"] = {}
                if info[1] == "browser":
                    info_by_day[day]["browser"] = info_by_day[day].get(
                        "browser", 0) + weight
                else:
                    info_by_day[day]["server"] = info_by_day[day].get(
                        "server", 0) + weight

                event_idx = get_event_idx(info[2])
                info_by_day[day][
                    "event"][event_idx] = info_by_day[day]["event"].get(
                        event_idx, 0) + weight

                info_by_day[day]["dayofweek"] = TimeUtil.getDayWeek(day)

                hour = int(timehms[:2]) / 2
                info_by_day[day]["hour"] = hour

                cidx = obj.get_index(course_id, TimeUtil.timestamp(info[0]))
                info_by_day[day]["cidx"] = cidx

            day_event_count = [0] * EVENT_VEC_NUM
            day_weekday_count = [0] * WEEKDAY_VEC_NUM
            day_hour_count = [0] * HOUR_VEC_NUM
            day_cidx_count = [0] * CIDX_VEC_NUM
            _browser = 0
            _server = 0
            for (day, info) in info_by_day.items():
                for (k, v) in info["event"].items():
                    day_event_count[k] = day_event_count[k] + math.sqrt(v)
                _browser = _browser + math.sqrt(info.get("browser", 0))
                _server = _server + math.sqrt(info.get("server", 0))
                day_weekday_count[info["dayofweek"]] = day_weekday_count[
                    info["dayofweek"]] + 1
                day_hour_count[info["hour"]] = day_hour_count[info["hour"]] + 1
                day_cidx_count[info["cidx"]] = day_cidx_count[info["cidx"]] + 1

            just_num_vec = [_browser, _server]
            fv = [
                day_event_count, day_weekday_count, day_hour_count,
                day_cidx_count, just_num_vec
            ]

            f = []
            feature_num = 0
            for arr in fv:
                feature_num += len(arr)
                arr = ["%s" % transfer(k) for k in arr]
                f.append(",".join(arr))
            fs[id] = ",".join(f)
        writepickle(DayLevelFeature.feature_filename, fs)
        print "build DayLevelFeature over!", feature_num
Exemplo n.º 17
0
from index import *
from transfer import *

if __name__ == '__main__':
    originImagePath = '../images/filter/stars_200.jpg'
    targetImagePath = '../images/filter/golden_gate_bridge.jpg'

    start = time.clock()
    filterData, filterShape = loadImageData(targetImagePath, True, False)
    originData, originShape = loadImageData(originImagePath, True, False)
    filterRange = rangeMat(filterData)
    originRange = rangeMat(originData)
    filterData = normalization(filterData)
    originData = normalization(originData)
    originU, originV = loadUV('../tem/stars_200/mb7')
    targetU, targetV = loadUV('../tem/bridge_400/b7')
    transfer(originU, originV, originData, originRange, targetU, targetV,
             filterData, filterRange)
    print time.clock() - start

    start = time.clock()
    originU, originV = loadUV('../tem/stars_200/ma7')
    targetU, targetV = loadUV('../tem/bridge_400/a7')
    transfer(originU, originV, originData, originRange, targetU, targetV,
             filterData, filterRange)
    print time.clock() - start