示例#1
0
	def loadxlsx(self,submitter,xlsxread,nth,duration_start,duration_end):
		wb = load_workbook(xlsxread)
		ws = wb.active
		csvwrite = io.BytesIO()
		writer =  csv.writer(csvwrite, delimiter=',', quotechar="'")
		maplist = self.mapList()
		counter = 0
		dupset = set()
		dupcounter = 0
		nullcount = dict()
		schema = self.getSchema()
		for col in schema:
			nullcount[col.name] = 0


		for rrow in ws.rows:
			crow = list()
			for mapnum, col in zip(maplist, schema):
				if type(rrow[mapnum].value) == datetime:
					crow.append(rrow[mapnum].value.strftime("%Y-%m-%d %H:%M"))
				else :
					crow.append(rrow[mapnum].value)

				if rrow[mapnum].value == "":
					nullcount[col.name] +=1



			dupset.add(unicode(crow))

			utfrow = list ()
			for x in crow:
				if type(x) == unicode :
					utfrow.append(x.encode("utf8"))

				else :
					utfrow.append(x)
			writer.writerow(utfrow)
			counter += 1


		evaluator = User.randomEvaluator()
		
		parsedmodel =  self.parsedclass(nth,duration_start,duration_end,csvwrite,counter, counter - len(dupset))
		parsedmodel.submitterid = submitter.id
		parsedmodel.evaluatorid = evaluator.id

		self.taskrow.addUser(evaluator)
		

		for col in schema :
			setattr(parsedmodel,"null_" + col.name[4:] , nullcount[col.name] / (counter*1.0) )


		self.parseds.append(parsedmodel)

		session.commit()

		return parsedmodel
示例#2
0
	def loadcsv(self,submitter,csvread,nth,duration_start,duration_end):
		reader = csv.reader(csvread, delimiter=',', quotechar="'")
		csvwrite = io.BytesIO()
		writer =  csv.writer(csvwrite, delimiter=',', quotechar="'")
		maplist = self.mapList()
		counter = 0
		dupset = set()
		dupcounter = 0
		nullcount = dict()
		schema = self.getSchema()
		for col in schema:
			nullcount[col.name] = 0


		for rrow in reader:
			crow = list()
			for mapnum, col in zip(maplist, schema):
				crow.append(rrow[mapnum])

				if rrow[mapnum] == "":
					nullcount[col.name] +=1



			dupset.add(unicode(crow))
			writer.writerow(crow)
			counter += 1


		evaluator = User.randomEvaluator()
		
		parsedmodel =  self.parsedclass(nth,duration_start,duration_end,csvwrite,counter, counter - len(dupset))
		parsedmodel.submitterid = submitter.id
		parsedmodel.evaluatorid = evaluator.id
		self.taskrow.addUser(evaluator)
		for col in schema :
			setattr(parsedmodel,"null_" + col.name[4:] , nullcount[col.name] / (counter*1.0) )


		self.parseds.append(parsedmodel)

		session.commit()

		return parsedmodel