Пример #1
0
	def __iter__(self):
		if self.restype=="URL":
			self.tempfile=self.resfile=tempfile.NamedTemporaryFile(mode="w+b")
			resource=urllib2.urlopen(self.res)
			data=resource.read(1024*1024)
			while data:
				self.resfile.write(data)
				data=resource.read(1024*1024)
			self.resfile.seek(0)
			if self.compression=="GZ":
				self.resfile = FastGzip(fileobj=self.resfile)
			elif self.compression=="BZ2":
				self.resfile = bz2.BZ2File(self.resfile.name,mode='rb')

		else:
			if self.compression=="GZ":
				self.resfile = FastGzip(self.res)
			elif self.compression=="BZ2":
				self.resfile = bz2.BZ2File(self.res,mode='rb')
			else:
				self.resfile=open(self.res)

		self.nextobj=self.resfile.__iter__()
		return self
Пример #2
0
class Resource:
	def __init__(self,individual,res):
		self.individual=individual	
		self.res=res
		self.linecount=0

		if re.match("^(http|ftp|https):",res,re.I):
			try: urllib2.urlopen(res).read(100)
			except: raise Exception("Resource not found: {0}".format(res))
			self.restype="URL"
		else:
			if not os.path.isfile(res):
				raise Exception("File not found: {0}".format(res))
			self.restype="FILE"

		if res.lower().endswith(".gz"):
			self.compression="GZ"
		elif res.lower().endswith(".bz2"):
			self.compression="BZ2"
		else:
			self.compression="RAW"

	def __iter__(self):
		if self.restype=="URL":
			self.tempfile=self.resfile=tempfile.NamedTemporaryFile(mode="w+b")
			resource=urllib2.urlopen(self.res)
			data=resource.read(1024*1024)
			while data:
				self.resfile.write(data)
				data=resource.read(1024*1024)
			self.resfile.seek(0)
			if self.compression=="GZ":
				self.resfile = FastGzip(fileobj=self.resfile)
			elif self.compression=="BZ2":
				self.resfile = bz2.BZ2File(self.resfile.name,mode='rb')

		else:
			if self.compression=="GZ":
				self.resfile = FastGzip(self.res)
			elif self.compression=="BZ2":
				self.resfile = bz2.BZ2File(self.res,mode='rb')
			else:
				self.resfile=open(self.res)

		self.nextobj=self.resfile.__iter__()
		return self

	def next(self):
		line=self.nextobj.next()
		if not self.linecount % 4:
			line="@"+self.individual+"@"+line
		self.linecount+=1
		return line

	def close(self):
		if self.restype=="URL":
			if self.compression!="RAW":
				self.resfile.close()
			self.tempfile.close()
		else:
			self.resfile.close()