def __init__(self,sqlquery,connection,first,names,types,*largs,**kargs):
        """
        Works only with one argument splited with ,,,,
        """
        if first:
            if len(largs)<1:
                raise functions.OperatorError(__name__.rsplit('.')[-1]," Schema argument was not provided")
            try:
                schema=parsesplit(largs[0])
            except ParseBaseException:
                raise functions.OperatorError(__name__.rsplit('.')[-1]," Error in schema definition: %s" %(largs[0]))
            for el in schema:
                names.append(el[0])
                if len(el)>1:
                    types.append(el[1])
                else:
                    types.append('None')

        self.c=connection.cursor()
        self.openedc=True
        try:

            if first:
                ### Find names and types
                execit=peekable(self.c.execute(sqlquery))
                samplerow=execit.peek()
                qtypes=[str(v[1]) for v in self.c.getdescription()]
                if len(qtypes)<len(types):
                    raise functions.OperatorError(__name__.rsplit('.')[-1],"Setting more columns than result query")

                for i in xrange(len(types)):
                    if types[i]=="None" and qtypes[i]!="None":
                        types[i]=qtypes[i]
                self.iter=typed(types,execit)
            else:
                self.iter=typed(types,self.c.execute(sqlquery))
        except StopIteration: ### if exception keep schema
            try:
                self.iter=iter([])
                self.openedc=False
            finally:
                try:
                    self.c.close()
                except:
                    pass
        except apsw.SQLError, e: ### if exception SQLERROR check if it is from empty schema
            try:
                if not checkexceptionisfromempty(e):
                    raise
                else:
                    self.iter=iter([])
                    self.openedc=False
            finally:
                try:
                    self.c.close()
                except:
                    pass
    def __init__(self,sqlquery,connection,first,names,types,*resttype,**destypes):
        if len(resttype)>1:
            raise functions.OperatorError(__name__.rsplit('.')[-1],"Cannot resolve more than one unbound types")
        
        self.sqlquery=sqlquery
        self.connection=connection
        self.c=self.connection.cursor()

        self.cols=names
        self.types=types
        if first:
            first = False
            try:
                execit=peekable(self.c.execute(self.sqlquery))
                samplerow=execit.peek()
                qnames=[str(v[0]) for v in self.c.getdescription()]
                if not resttype:
                    qtypes=[str(v[1]) for v in self.c.getdescription()]
                else:
                    qtypes=[resttype[0]]*len(qnames) ### fill types with resttype first element
                
                for el in destypes:
                    p=-1
                    try:
                        p=qnames.index(el)
                    except ValueError:
                        raise functions.OperatorError(__name__.rsplit('.')[-1],"Unknown column name '%s'" %(el))
                    qtypes[p]=destypes[el]
                for i in qnames:
                    self.cols.append(i)
                for i in qtypes:
                    self.types.append(i)
            except StopIteration:
                try:
                    raise
                finally:
                    try:
                        self.c.close()
                    except:
                        pass
                    
            self.iter=typed(self.types,execit)
        else:
            self.iter=typed(self.types,self.c.execute(self.sqlquery))
示例#3
0
文件: file.py 项目: LSmyrnaios/madis
class FileCursor:
    def __init__(self, filename, isurl, compressiontype, compression,
                 hasheader, first, namelist, extraurlheaders, **rest):
        self.encoding = 'utf_8'
        self.fast = False
        self.strict = None
        self.toj = -1
        self.namelist = None
        self.hasheader = hasheader
        self.namelist = namelist
        self.dialect = 'csv'

        if 'encoding' in rest:
            self.encoding = rest['encoding']
            del rest['encoding']

        if 'strict' in rest:
            self.strict = int(rest['strict'])
            del rest['strict']

        if 'fast' in rest:
            self.fast = True
            del rest['fast']

        if 'toj' in rest:
            try:
                self.toj = int(rest['toj'])
            except ValueError:
                self.toj = 0
            del rest['toj']

        if 'dialect' in rest:
            self.dialect = rest['dialect']
            dialects = {'line': line(), 'tsv': tsv(), 'csv': defaultcsv()}
            if self.dialect in dialects:
                rest['dialect'] = dialects[self.dialect]

        if 'useregexfilename' in rest:
            if rest['useregexfilename'] == "True":
                filename = getFilenameMatchingRegex(filename)
            del rest['useregexfilename']

        self.nonames = first
        for el in rest:
            if el not in csvkeywordparams:
                raise functions.OperatorError(
                    __name__.rsplit('.')[-1], "Invalid parameter %s" % (el))

        pathname = None
        gzipcompressed = False

        try:
            if compression and compressiontype == 'zip':
                self.fileiter = ZipIter(filename, "r")
            elif not isurl:
                pathname = filename.strip()
                if self.fast or compression or \
                        (pathname is not None and (pathname.endswith('.gz') or pathname.endswith('.gzip') or pathname.endswith('.avro'))):
                    self.fileiter = open(filename, "rb", buffering=1000000)
                else:
                    if "MSPW" in functions.apsw_version:
                        self.fileiter = open(filename, "r", buffering=1000000)
                    else:
                        self.fileiter = open(filename, "rU", buffering=1000000)
            else:
                pathname = urlparse.urlparse(filename)[2]
                req = urllib2.Request(filename, None, extraurlheaders)
                hreq = urllib2.urlopen(req)
                if [
                        1 for x, y in hreq.headers.items()
                        if x.lower() in ('content-encoding', 'content-type')
                        and y.lower().find('gzip') != -1
                ]:
                    gzipcompressed = True
                self.fileiter = hreq

            if pathname != None and (pathname.endswith('.gz')
                                     or pathname.endswith('.gzip')):
                gzipcompressed = True

            if compression and compressiontype == 'gz':
                gzipcompressed = True

            if gzipcompressed:
                if filename.endswith('.gz'):
                    filename = filename[:-3]
                if filename.endswith('.gzip'):
                    filename = filename[:-5]
                self.fileiter = gzip.GzipFile(mode='rb', fileobj=self.fileiter)

        except Exception, e:
            raise functions.OperatorError(__name__.rsplit('.')[-1], e)

        _, filenameExt = os.path.splitext(filename)
        filenameExt = filenameExt.lower()

        if filenameExt == '.json' or filenameExt == '.js' or (
                'dialect' in rest and type(rest['dialect']) == str
                and rest['dialect'].lower() == 'json'):
            self.fast = True
            firstline = self.fileiter.readline()
            try:
                schemaline = json.loads(firstline)
            except ValueError:
                namelist.append(['C1', 'text'])
                self.iter = directfile(
                    itertools.chain([firstline], self.fileiter), self.encoding)
                return
            schemalinetype = type(schemaline)

            if schemalinetype == list:
                for i in xrange(1, len(schemaline) + 1):
                    namelist.append(['C' + str(i), 'text'])
                self.fileiter = itertools.chain([firstline], self.fileiter)

            elif schemalinetype == dict and 'schema' in schemaline:
                namelist += schemaline['schema']

            else:
                namelist.append(['C1', 'text'])
                self.iter = directfile(
                    itertools.chain([firstline], self.fileiter), self.encoding)
                return

            if "MSPW" in functions.apsw_version:
                self.iter = (json.loads(x) for x in self.fileiter)
            else:
                jsonload = json.JSONDecoder().scan_once
                self.iter = (jsonload(x, 0)[0] for x in self.fileiter)
            return

        if filenameExt == '.avro':
            self.fast = True
            from lib import fastavro as avro

            afi = avro.reader(self.fileiter)
            fields = [x['name'] for x in afi.schema['fields']]
            namelist.extend([[x, ''] for x in fields])
            self.iter = ([x[y] for y in fields] for x in afi)
            return

        if filenameExt == '.csv':
            if self.fast:
                rest['delimiter'] = ','
            rest['dialect'] = lib.inoutparsing.defaultcsv()

        if filenameExt == '.tsv':
            if self.fast:
                rest['delimiter'] = '\t'
            rest['dialect'] = lib.inoutparsing.tsv()

        if self.fast:
            if 'delimiter' not in rest:
                rest['delimiter'] = ','
            if self.dialect == 'tsv':
                rest['delimiter'] = '\t'

        if hasheader or len(
                rest
        ) > 0:  #if at least one csv argument default dialect is csv else line
            if 'dialect' not in rest:
                rest['dialect'] = lib.inoutparsing.defaultcsv()

            linelen = 0
            if first and not hasheader:
                if self.fast:
                    delim = rest['delimiter']
                    self.iter = peekable(
                        (unicode(r[:-1] if r[-1] == '\n' else r,
                                 'utf_8').split(delim) for r in self.fileiter))
                else:
                    self.iter = peekable(
                        nullify(
                            reader(self.fileiter,
                                   encoding=self.encoding,
                                   **rest)))
                    if self.strict == None:
                        self.strict = 1
                sample = self.iter.peek()
                linelen = len(sample)
            else:  ###not first or header
                if self.fast:
                    delim = rest['delimiter']
                    self.iter = (unicode(r[:-1] if r[-1] == '\n' else r,
                                         'utf_8').split(delim)
                                 for r in self.fileiter)
                else:
                    self.iter = nullify(
                        reader(self.fileiter, encoding=self.encoding, **rest))
                    if self.strict == None:
                        self.strict = 1
                linelen = len(namelist)

                if hasheader:
                    sample = self.iter.next()
                    linelen = len(sample)

            if self.strict == 0:
                self.iter = strict0(self.iter, linelen)

            if self.strict == 1:
                self.iter = strict1(self.iter, linelen)

            if self.strict == -1:
                self.iter = strictminus1(self.iter, linelen, hasheader)
                namelist += [['linenumber', 'int'], ['foundcols', 'int'],
                             ['expectedcols', 'int'], ['contents', 'text']]

            if first and namelist == []:
                if hasheader:
                    for i in sample:
                        namelist.append([cleanBOM(i), 'text'])
                else:
                    for i in xrange(1, linelen + 1):
                        namelist.append(['C' + str(i), 'text'])

        else:  #### Default read lines
            if self.encoding == 'utf_8':
                self.iter = directfileutf8(self.fileiter)
                self.fast = True
            else:
                self.iter = directfile(self.fileiter, encoding=self.encoding)
            namelist.append(['C1', 'text'])

        if self.toj >= 0:
            header = [x[0] for x in namelist]
            while len(namelist) > self.toj:
                namelist.pop()
            header = header[self.toj:]
            if self.hasheader:
                namelist.append(['Cjdict', 'text'])
                self.iter = tojdict(self.iter, header, self.toj)
            else:
                namelist.append(['Cjlist', 'text'])
                self.iter = tojlist(self.iter, self.toj)

        if self.fast:
            self.next = self.iter.next