def __init__(self,sqlquery,connection,first,names,types,*largs,**kargs): """ Works only with one argument splited with ,,,, """ if first: if len(largs)<1: raise functions.OperatorError(__name__.rsplit('.')[-1]," Schema argument was not provided") try: schema=parsesplit(largs[0]) except ParseBaseException: raise functions.OperatorError(__name__.rsplit('.')[-1]," Error in schema definition: %s" %(largs[0])) for el in schema: names.append(el[0]) if len(el)>1: types.append(el[1]) else: types.append('None') self.c=connection.cursor() self.openedc=True try: if first: ### Find names and types execit=peekable(self.c.execute(sqlquery)) samplerow=execit.peek() qtypes=[str(v[1]) for v in self.c.getdescription()] if len(qtypes)<len(types): raise functions.OperatorError(__name__.rsplit('.')[-1],"Setting more columns than result query") for i in xrange(len(types)): if types[i]=="None" and qtypes[i]!="None": types[i]=qtypes[i] self.iter=typed(types,execit) else: self.iter=typed(types,self.c.execute(sqlquery)) except StopIteration: ### if exception keep schema try: self.iter=iter([]) self.openedc=False finally: try: self.c.close() except: pass except apsw.SQLError, e: ### if exception SQLERROR check if it is from empty schema try: if not checkexceptionisfromempty(e): raise else: self.iter=iter([]) self.openedc=False finally: try: self.c.close() except: pass
def __init__(self,sqlquery,connection,first,names,types,*resttype,**destypes): if len(resttype)>1: raise functions.OperatorError(__name__.rsplit('.')[-1],"Cannot resolve more than one unbound types") self.sqlquery=sqlquery self.connection=connection self.c=self.connection.cursor() self.cols=names self.types=types if first: first = False try: execit=peekable(self.c.execute(self.sqlquery)) samplerow=execit.peek() qnames=[str(v[0]) for v in self.c.getdescription()] if not resttype: qtypes=[str(v[1]) for v in self.c.getdescription()] else: qtypes=[resttype[0]]*len(qnames) ### fill types with resttype first element for el in destypes: p=-1 try: p=qnames.index(el) except ValueError: raise functions.OperatorError(__name__.rsplit('.')[-1],"Unknown column name '%s'" %(el)) qtypes[p]=destypes[el] for i in qnames: self.cols.append(i) for i in qtypes: self.types.append(i) except StopIteration: try: raise finally: try: self.c.close() except: pass self.iter=typed(self.types,execit) else: self.iter=typed(self.types,self.c.execute(self.sqlquery))
class FileCursor: def __init__(self, filename, isurl, compressiontype, compression, hasheader, first, namelist, extraurlheaders, **rest): self.encoding = 'utf_8' self.fast = False self.strict = None self.toj = -1 self.namelist = None self.hasheader = hasheader self.namelist = namelist self.dialect = 'csv' if 'encoding' in rest: self.encoding = rest['encoding'] del rest['encoding'] if 'strict' in rest: self.strict = int(rest['strict']) del rest['strict'] if 'fast' in rest: self.fast = True del rest['fast'] if 'toj' in rest: try: self.toj = int(rest['toj']) except ValueError: self.toj = 0 del rest['toj'] if 'dialect' in rest: self.dialect = rest['dialect'] dialects = {'line': line(), 'tsv': tsv(), 'csv': defaultcsv()} if self.dialect in dialects: rest['dialect'] = dialects[self.dialect] if 'useregexfilename' in rest: if rest['useregexfilename'] == "True": filename = getFilenameMatchingRegex(filename) del rest['useregexfilename'] self.nonames = first for el in rest: if el not in csvkeywordparams: raise functions.OperatorError( __name__.rsplit('.')[-1], "Invalid parameter %s" % (el)) pathname = None gzipcompressed = False try: if compression and compressiontype == 'zip': self.fileiter = ZipIter(filename, "r") elif not isurl: pathname = filename.strip() if self.fast or compression or \ (pathname is not None and (pathname.endswith('.gz') or pathname.endswith('.gzip') or pathname.endswith('.avro'))): self.fileiter = open(filename, "rb", buffering=1000000) else: if "MSPW" in functions.apsw_version: self.fileiter = open(filename, "r", buffering=1000000) else: self.fileiter = open(filename, "rU", buffering=1000000) else: pathname = urlparse.urlparse(filename)[2] req = urllib2.Request(filename, None, extraurlheaders) hreq = urllib2.urlopen(req) if [ 1 for x, y in hreq.headers.items() if x.lower() in ('content-encoding', 'content-type') and y.lower().find('gzip') != -1 ]: gzipcompressed = True self.fileiter = hreq if pathname != None and (pathname.endswith('.gz') or pathname.endswith('.gzip')): gzipcompressed = True if compression and compressiontype == 'gz': gzipcompressed = True if gzipcompressed: if filename.endswith('.gz'): filename = filename[:-3] if filename.endswith('.gzip'): filename = filename[:-5] self.fileiter = gzip.GzipFile(mode='rb', fileobj=self.fileiter) except Exception, e: raise functions.OperatorError(__name__.rsplit('.')[-1], e) _, filenameExt = os.path.splitext(filename) filenameExt = filenameExt.lower() if filenameExt == '.json' or filenameExt == '.js' or ( 'dialect' in rest and type(rest['dialect']) == str and rest['dialect'].lower() == 'json'): self.fast = True firstline = self.fileiter.readline() try: schemaline = json.loads(firstline) except ValueError: namelist.append(['C1', 'text']) self.iter = directfile( itertools.chain([firstline], self.fileiter), self.encoding) return schemalinetype = type(schemaline) if schemalinetype == list: for i in xrange(1, len(schemaline) + 1): namelist.append(['C' + str(i), 'text']) self.fileiter = itertools.chain([firstline], self.fileiter) elif schemalinetype == dict and 'schema' in schemaline: namelist += schemaline['schema'] else: namelist.append(['C1', 'text']) self.iter = directfile( itertools.chain([firstline], self.fileiter), self.encoding) return if "MSPW" in functions.apsw_version: self.iter = (json.loads(x) for x in self.fileiter) else: jsonload = json.JSONDecoder().scan_once self.iter = (jsonload(x, 0)[0] for x in self.fileiter) return if filenameExt == '.avro': self.fast = True from lib import fastavro as avro afi = avro.reader(self.fileiter) fields = [x['name'] for x in afi.schema['fields']] namelist.extend([[x, ''] for x in fields]) self.iter = ([x[y] for y in fields] for x in afi) return if filenameExt == '.csv': if self.fast: rest['delimiter'] = ',' rest['dialect'] = lib.inoutparsing.defaultcsv() if filenameExt == '.tsv': if self.fast: rest['delimiter'] = '\t' rest['dialect'] = lib.inoutparsing.tsv() if self.fast: if 'delimiter' not in rest: rest['delimiter'] = ',' if self.dialect == 'tsv': rest['delimiter'] = '\t' if hasheader or len( rest ) > 0: #if at least one csv argument default dialect is csv else line if 'dialect' not in rest: rest['dialect'] = lib.inoutparsing.defaultcsv() linelen = 0 if first and not hasheader: if self.fast: delim = rest['delimiter'] self.iter = peekable( (unicode(r[:-1] if r[-1] == '\n' else r, 'utf_8').split(delim) for r in self.fileiter)) else: self.iter = peekable( nullify( reader(self.fileiter, encoding=self.encoding, **rest))) if self.strict == None: self.strict = 1 sample = self.iter.peek() linelen = len(sample) else: ###not first or header if self.fast: delim = rest['delimiter'] self.iter = (unicode(r[:-1] if r[-1] == '\n' else r, 'utf_8').split(delim) for r in self.fileiter) else: self.iter = nullify( reader(self.fileiter, encoding=self.encoding, **rest)) if self.strict == None: self.strict = 1 linelen = len(namelist) if hasheader: sample = self.iter.next() linelen = len(sample) if self.strict == 0: self.iter = strict0(self.iter, linelen) if self.strict == 1: self.iter = strict1(self.iter, linelen) if self.strict == -1: self.iter = strictminus1(self.iter, linelen, hasheader) namelist += [['linenumber', 'int'], ['foundcols', 'int'], ['expectedcols', 'int'], ['contents', 'text']] if first and namelist == []: if hasheader: for i in sample: namelist.append([cleanBOM(i), 'text']) else: for i in xrange(1, linelen + 1): namelist.append(['C' + str(i), 'text']) else: #### Default read lines if self.encoding == 'utf_8': self.iter = directfileutf8(self.fileiter) self.fast = True else: self.iter = directfile(self.fileiter, encoding=self.encoding) namelist.append(['C1', 'text']) if self.toj >= 0: header = [x[0] for x in namelist] while len(namelist) > self.toj: namelist.pop() header = header[self.toj:] if self.hasheader: namelist.append(['Cjdict', 'text']) self.iter = tojdict(self.iter, header, self.toj) else: namelist.append(['Cjlist', 'text']) self.iter = tojlist(self.iter, self.toj) if self.fast: self.next = self.iter.next