def getBehaviorData(self): # video problem sequential chapter ddl_hit self.expand_feature(5) with open(os.path.join(self.path, 'behavior.json')) as f: behavior = json.load(f) with open(os.path.join(self.path, 'element.json')) as f: element = json.load(f, object_hook=json_util.object_hook) for uid in behavior: if uid not in self.feature: continue for date in behavior[uid]: single_date = util.parseDate(date) if single_date < self.start or single_date >= self.end: continue for log in behavior[uid][date]: course, catagory = util.parseLog(log) if element[log]['due'] is not None: if single_date <= util.parseDate(element[log]['due']): self.feature[uid][single_date][4] += 1 if catagory == 'video': self.feature[uid][single_date][0] += 1 elif catagory == 'problem': self.feature[uid][single_date][1] += 1 elif catagory == 'sequential': self.feature[uid][single_date][2] += 1 elif catagory == 'chapter': self.feature[uid][single_date][3] += 1
def getForumData(self): # post, reply, replyed, length, upvoted, cert-friend self.expand_feature(6) with open(FORUM_DIR) as f: forum = json.load(f) for oid, item in forum.iteritems(): if item['course'] != self.course: continue single_date = util.parseDate(item['date']) uid = item['user'] if uid in self.feature and single_date >= self.start and single_date < self.end: if item['father'] == None: self.feature[uid][single_date][0] += 1 else: self.feature[uid][single_date][1] += 1 fid = forum[item['father']]['user'] if fid in self.feature: self.feature[fid][single_date][2] += 1 T = self.getTimeStamp(single_date) if T > 0 and self.score[fid][T - 1] > .5: self.feature[uid][single_date][5] += 1 if T > 0 and self.score[uid][T - 1] > .5: self.feature[fid][single_date][5] += 1 self.feature[uid][single_date][3] += item['length'] self.feature[uid][single_date][4] += item['vote_up']
def getBehaviorData(self): # video problem # in time visit # chapter #self.expand_feature(3) # ddl hit self.expand_feature(6) with open(BEHAVIOR_DIR) as f: behavior = json.load(f) with open(MONGO_DIR) as f: mongo = json.load(f) for uid in behavior: if uid not in self.feature: continue for date in behavior[uid]: single_date = util.parseDate(date) if single_date < self.start or single_date >= self.end: continue for log in behavior[uid][date]: course, catagory = util.parseLog(log) if course == self.course: if log in mongo and mongo[log]['due'] is not None: T_ddl = self.getTimeStamp( util.parseDate(mongo[log]['due'])) T = self.getTimeStamp(single_date) if T_ddl == T: self.feature[uid][single_date][5] += 1 if catagory == 'video': self.feature[uid][single_date][0] += 1 elif catagory == 'problem': self.feature[uid][single_date][1] += 1 elif catagory == 'sequential': self.feature[uid][single_date][2] += 1 try: date_obj = mongo[log]['start'] except: print log continue if date_obj is None: continue date_obj = util.parseDate(date_obj) if self.getTimeStamp( date_obj) == self.getTimeStamp( single_date): self.feature[uid][single_date][3] += 1 elif catagory == 'chapter': self.feature[uid][single_date][4] += 1
def __init__(self): '''generate_Y data as the following format feature[uid][T] is a list of features for user uid at time T the feature shoule be additive we remove register-only student from the dataset ''' self.feature = {} self.feature_num = 0 with open(COURSE_INFO_DIR) as f: courses = json.load(f) self.getUser() self.start = util.parseDate(courses[self.course]['start']) self.end = util.parseDate(courses[self.course]['end']) for uid in self.feature: for single_date in util.daterange(self.start, self.end): self.feature[uid][single_date] = [] logger.info('course: %s user: %d start: %s end: %s', self.course, len(self.feature), self.start.isoformat(), self.end.isoformat())
def queryData(self): parts = self.path.split('/') if len(parts) < 4: return self.finish() log_name = parts[3] game_id = self.getArgs('game_id') server_id = self.getArgs('server_id') if game_id == None or server_id == None: self.finish() path = config.LOG_DIR + '/' + game_id + '/' + server_id if not os.path.exists(path): self.setResponseCode(http.NOT_FOUND) self.finish() start_date = util.parseDate(self.getArgs('start_time')) end_date = util.parseDate(self.getArgs('end_time')) loginname = self.getArgs('loginname') search_dirs = util.listDirs(game_id, server_id, start_date, end_date) search_dirs.sort() for d in search_dirs: filename = path + '/' + d + '/' + log_name + '.dat' if not os.path.isfile(filename): continue f = file(filename, 'r') with (f): while True: line = f.readline() if len(line) == 0: break if loginname != None: parts = line.split('|', 3) if len(parts) < 3: continue if loginname != parts[1]: continue self.write(line) self.finish()
def getLearningData(self): # video_time assign_time # video_day assign_day self.expand_feature(4) with open(os.path.join(self.path, 'duration.json')) as f: learn = json.load(f) for uid in learn: if uid not in self.feature: continue for k, v in learn[uid].iteritems(): single_date = util.parseDate(k) if single_date < self.start or single_date >= self.end: continue self.feature[uid][single_date][0] += v[0] self.feature[uid][single_date][1] += v[1] self.feature[uid][single_date][2] += (v[0] > 0) self.feature[uid][single_date][3] += (v[1] > 0)
def getSequentialRelease(self): with open(MONGO_DIR) as f: mongo = json.load(f) self.expand_X(1) for item in mongo: try: course, categort = util.parseLog(item) except: continue if course == self.course: if mongo[item]['start'] is not None and item.find( 'sequential') != -1: print item, mongo[item]['start'] date_obj = util.parseDate(mongo[item]['start']) T = self.getTimeStamp(date_obj) for uid in self.X: self.X[uid][T][0] += 1
def getLearningData(self): # video_time assign_time # video_day assign_day self.expand_feature(4) with open(LEARNING_TIME_DIR) as f: learn = json.load(f) for uid in learn: if uid not in self.feature: continue if self.course not in learn[uid]: continue for item in learn[uid][self.course]: single_date = util.parseDate(item[0]) if single_date < self.start or single_date >= self.end: continue self.feature[uid][single_date][0] += item[1] self.feature[uid][single_date][1] += item[2] self.feature[uid][single_date][2] += 1 self.feature[uid][single_date][3] += 1
def getDDL(self): self.ddls = [] with open(MONGO_DIR) as f: mongo = json.load(f) for item in mongo: try: course, categort = util.parseLog(item) except: continue if course == self.course: if mongo[item]['due'] is not None: #print item, mongo[item]['due'] self.ddls.append(util.parseDate(mongo[item]['due'])) self.ddls.sort() if self.course == "TsinghuaX/20220332_2X/_": self.ddls = self.ddls[:-1] for item in self.ddls: print item, (item - self.start).days / float( (self.end - self.start).days)
def run(game_id, server_id, log_name, req, args): desc_path = config.LOG_DIR + '/' + game_id + '/desc/' + log_name if not os.path.isfile(desc_path): return req.finish() f = file(desc_path, 'r') lines = f.readlines() field_idx = dict() idx = 0 for line in lines: line = line[0:-1] field_idx[line] = idx idx = idx + 1 group_by = list() if args.has_key('group'): groups = args['group'].split(',') for group in groups: if not field_idx.has_key(group): print 'Invalid field name ' + group return req.finish() group_by.append(field_idx[group]) start_date = util.parseDate(args.get('start_time')) end_date = util.parseDate(args.get('end_time')) dirs = util.listDirs(game_id, server_id, start_date, end_date) path = config.LOG_DIR + '/' + game_id + '/' + server_id final_data = dict() on = -1 on_field = args.get('on') specialnodes, func = cp.generate_function_tree(on_field, field_idx) if func == None: return req.finish() ret_data = {} if len(group_by) == 0: ret_data[''] = [0] * (len(specialnodes) + 1) flag = True #if there are max, min, count, sum if len(specialnodes) > 0: for d in dirs: filename = path + '/' + d + '/' + log_name + '.dat' if not os.path.isfile(filename): continue f = file(filename, 'r') with (f): while True: line = f.readline()[:-1] if len(line) == 0: break parts = line.split('|') if len(parts) > len(field_idx.keys()): continue temp = [] for item in parts: try: temp.append(int(item)) except: temp.append(item) key = str() for idx in group_by: key = key + parts[idx] + '|' if key not in ret_data: ret_data[key] = [0] * (len(specialnodes) + 1) flag = True for i in xrange(len(specialnodes)): sn = specialnodes[i][0] if sn.name == 'min' and flag == True: ret_data[key][i] = 9999999999 elif sn.name == 'count' and flag == True: ret_data[key][i] = [] if sn.name == 'count': ret_data[key][i] = sn.evaluate( [temp, ret_data[key][i]]) else: ret_data[key][i] = sn.evaluate( [temp, [ret_data[key][i]]]) flag = False if cp.has_paramnode(func) == False: for key in ret_data: #adjust 'NA' nodes for i in xrange(len(specialnodes)): sn = specialnodes[i][1] sn.set_value(ret_data[key][i]) ret_data[key][-1] = func.evaluate([]) else: for d in dirs: filename = path + '/' + d + '/' + log_name + '.dat' if not os.path.isfile(filename): continue f = file(filename, 'r') with (f): while True: line = f.readline()[:-1] if len(line) == 0: break parts = line.split('|') if len(parts) > len(field_idx.keys()): continue temp = [] for item in parts: try: temp.append(int(item)) except: temp.append(item) key = str() for idx in group_by: key = key + parts[idx] + '|' #adjust 'NA' nodes for i in xrange(len(specialnodes)): sn = specialnodes[i][1] sn.set_value(ret_data[key][i]) ret_data[key][-1] = func.evaluate(temp) for k, v in ret_data.iteritems(): req.write(k + str(v[-1]) + '\n') return req.finish()
def __init__(self, startDateStr, endDateStr): self.start = parseDate(startDateStr) self.end = parseDate(endDateStr)
def apply(self, pfile): dto = str(pfile.getTag(exif_const.TAG_DATETIME_ORIG)) t = parseDate(dto) return self.start <= t <= self.end
'--end_date', required=True, type=str, help= "End of date range (format: YYYY-MM-DD) to generate date-based directories" ) parser.add_argument( '--keep_file', action='store_true', help= 'If set, an empty ".keep" file will be placed in every directory created' ) parser.add_argument('--verbose', action='store_true', help='If set, more verbose reporting will be done') args = parser.parse_args() keep_file_name = ".keep" start_date = parseDate(args.start_date, "start") end_date = parseDate(args.end_date, "end") start = time.time() global_start = start createDateDirectories(start_date, end_date, args.keep_file) if args.verbose: print("Total runtime: {}".format(time.time() - global_start))
def getDDL(self): # a list of ddls return [util.parseDate(item) for item in self.config['ddl']]
def getEnd(self): # return the end of the course return util.parseDate(self.config['end'])
def getStart(self): # return the start of the course return util.parseDate(self.config['start'])