示例#1
0
文件: headers.py 项目: DeanWay/petl
 def __iter__(self):
     it = iter(self.table)
     hdr = next(it)
     outhdr = tuple((text_type(f) + text_type(self.suffix)) for f in hdr)
     yield outhdr
     for row in it:
         yield row
示例#2
0
文件: regex.py 项目: juarezr/petl
def itersearch(table, pattern, field, flags, complement):
    prog = re.compile(pattern, flags)
    it = iter(table)
    hdr = next(it)
    flds = list(map(text_type, hdr))
    yield tuple(hdr)

    if field is None:
        # search whole row
        test = lambda r: any(prog.search(text_type(v)) for v in r)
    else:
        indices = asindices(hdr, field)
        if len(indices) == 1:
            index = indices[0]
            test = lambda r: prog.search(text_type(r[index]))
        else:
            getvals = operator.itemgetter(*indices)
            test = lambda r: any(prog.search(text_type(v)) for v in getvals(r))
    # complement==False, return rows that match
    if not complement:
        for row in it:
            if test(row):
                yield tuple(row)
    # complement==True, return rows that do not match
    else:
        for row in it:
            if not test(row):
                yield tuple(row)
示例#3
0
文件: regex.py 项目: DeanWay/petl
def itersearch(table, pattern, field, flags, complement):
    prog = re.compile(pattern, flags)
    it = iter(table)
    hdr = next(it)
    flds = list(map(text_type, hdr))
    yield tuple(hdr)

    if field is None:
        # search whole row
        test = lambda r: any(prog.search(text_type(v)) for v in r)
    else:
        indices = asindices(hdr, field)
        if len(indices) == 1:
            index = indices[0]
            test = lambda r: prog.search(text_type(r[index]))
        else:
            getvals = operator.itemgetter(*indices)
            test = lambda r: any(prog.search(text_type(v)) for v in getvals(r))
    # complement==False, return rows that match
    if not complement:
        for row in it:
            if test(row):
                yield tuple(row)
    # complement==True, return rows that do not match
    else:
        for row in it:
            if not test(row):
                yield tuple(row)
示例#4
0
 def __iter__(self):
     it = iter(self.table)
     hdr = next(it)
     outhdr = tuple((text_type(f) + text_type(self.suffix)) for f in hdr)
     yield outhdr
     for row in it:
         yield row
示例#5
0
def iterhashrightjoin(left, right, lkey, rkey, missing, llookup, lprefix,
                      rprefix):
    lit = iter(left)
    rit = iter(right)

    lhdr = next(lit)
    rhdr = next(rit)

    # determine indices of the key fields in left and right tables
    lkind = asindices(lhdr, lkey)
    rkind = asindices(rhdr, rkey)

    # construct functions to extract key values from left table
    rgetk = operator.itemgetter(*rkind)

    # determine indices of non-key fields in the right table
    # (in the output, we only include key fields from the left table - we
    # don't want to duplicate fields)
    rvind = [i for i in range(len(rhdr)) if i not in rkind]
    rgetv = rowgetter(*rvind)

    # determine the output fields
    if lprefix is None:
        outhdr = list(lhdr)
    else:
        outhdr = [(text_type(lprefix) + text_type(f)) for f in lhdr]
    if rprefix is None:
        outhdr.extend(rgetv(rhdr))
    else:
        outhdr.extend([(text_type(rprefix) + text_type(f))
                       for f in rgetv(rhdr)])
    yield tuple(outhdr)

    # define a function to join rows
    def joinrows(_rrow, _lrows):
        for lrow in _lrows:
            # start with the left row
            _outrow = list(lrow)
            # extend with non-key values from the right row
            _outrow.extend(rgetv(_rrow))
            yield tuple(_outrow)

    for rrow in rit:
        k = rgetk(rrow)
        if k in llookup:
            lrows = llookup[k]
            for outrow in joinrows(rrow, lrows):
                yield outrow
        else:
            # start with missing values in place of the left row
            outrow = [missing] * len(lhdr)
            # set key values
            for li, ri in zip(lkind, rkind):
                outrow[li] = rrow[ri]
            # extend with non-key values from the right row
            outrow.extend(rgetv(rrow))
            yield tuple(outrow)
示例#6
0
文件: unpacks.py 项目: larissarmp/TCC
def iterunpack(source, field, newfields, include_original, missing):
    it = iter(source)

    hdr = next(it)
    flds = list(map(text_type, hdr))
    if field in flds:
        field_index = flds.index(field)
    elif isinstance(field, int) and field < len(flds):
        field_index = field
        field = flds[field_index]
    else:
        raise ArgumentError(
            'field invalid: must be either field name or index')

    # determine output fields
    outhdr = list(flds)
    if not include_original:
        outhdr.remove(field)
    if isinstance(newfields, (list, tuple)):
        outhdr.extend(newfields)
        nunpack = len(newfields)
    elif isinstance(newfields, int):
        nunpack = newfields
        newfields = [
            text_type(field) + text_type(i + 1) for i in range(newfields)
        ]
        outhdr.extend(newfields)
    elif newfields is None:
        nunpack = 0
    else:
        raise ArgumentError(
            'newfields argument must be list or tuple of field '
            'names, or int (number of values to unpack)')
    yield tuple(outhdr)

    # construct the output data
    for row in it:
        value = row[field_index]
        if include_original:
            out_row = list(row)
        else:
            out_row = [v for i, v in enumerate(row) if i != field_index]
        nvals = len(value)
        if nunpack > 0:
            if nvals >= nunpack:
                newvals = value[:nunpack]
            else:
                newvals = list(value) + ([missing] * (nunpack - nvals))
            out_row.extend(newvals)
        yield tuple(out_row)
示例#7
0
def iterhashlookupjoin(left, right, lkey, rkey, missing, lprefix, rprefix):
    lit = iter(left)
    lhdr = next(lit)

    rhdr, rit = iterpeek(right)  # need the whole lot to pass to lookup
    rlookup = lookupone(rit, rkey, strict=False)

    # determine indices of the key fields in left and right tables
    lkind = asindices(lhdr, lkey)
    rkind = asindices(rhdr, rkey)

    # construct functions to extract key values from left table
    lgetk = operator.itemgetter(*lkind)

    # determine indices of non-key fields in the right table
    # (in the output, we only include key fields from the left table - we
    # don't want to duplicate fields)
    rvind = [i for i in range(len(rhdr)) if i not in rkind]
    rgetv = rowgetter(*rvind)

    # determine the output fields
    if lprefix is None:
        outhdr = list(lhdr)
    else:
        outhdr = [(text_type(lprefix) + text_type(f)) for f in lhdr]
    if rprefix is None:
        outhdr.extend(rgetv(rhdr))
    else:
        outhdr.extend([(text_type(rprefix) + text_type(f))
                       for f in rgetv(rhdr)])
    yield tuple(outhdr)

    # define a function to join rows
    def joinrows(_lrow, _rrow):
        # start with the left row
        _outrow = list(_lrow)
        # extend with non-key values from the right row
        _outrow.extend(rgetv(_rrow))
        return tuple(_outrow)

    for lrow in lit:
        k = lgetk(lrow)
        if k in rlookup:
            rrow = rlookup[k]
            yield joinrows(lrow, rrow)
        else:
            outrow = list(lrow)  # start with the left row
            # extend with missing values in place of the right row
            outrow.extend([missing] * len(rvind))
            yield tuple(outrow)
示例#8
0
def make_create_table_statement(table, tablename, schema=None, constraints=True, metadata=None, dialect=None):
    """
    Generate a CREATE TABLE statement based on data in `table`.

    Keyword arguments:

    table : table container
        Table data to use to infer types etc.
    tablename : text
        Name of the table
    schema : text
        Name of the database schema to create the table in
    constraints : bool
        If True use length and nullable constraints
    metadata : sqlalchemy.MetaData
        Custom table metadata
    dialect : text
        One of {'access', 'sybase', 'sqlite', 'informix', 'firebird', 'mysql',
        'oracle', 'maxdb', 'postgresql', 'mssql'}

    """

    import sqlalchemy

    sql_table = make_sqlalchemy_table(table, tablename, schema=schema, constraints=constraints, metadata=metadata)

    if dialect:
        module = __import__("sqlalchemy.dialects.%s" % DIALECTS[dialect], fromlist=["dialect"])
        sql_dialect = module.dialect()
    else:
        sql_dialect = None

    return text_type(sqlalchemy.schema.CreateTable(sql_table).compile(dialect=sql_dialect)).strip()
示例#9
0
def _display_html(table, limit=0, vrepr=None, index_header=None, caption=None,
                  tr_style=None, td_styles=None, encoding=None,
                  truncate=None, epilogue=None):

    # determine defaults
    if limit == 0:
        limit = config.display_limit
    if vrepr is None:
        vrepr = config.display_vrepr
    if index_header is None:
        index_header = config.display_index_header
    if encoding is None:
        encoding = locale.getpreferredencoding()

    table, overflow = _vis_overflow(table, limit)
    buf = MemorySource()
    tohtml(table, buf, encoding=encoding, index_header=index_header,
           vrepr=vrepr, caption=caption, tr_style=tr_style,
           td_styles=td_styles, truncate=truncate)
    output = text_type(buf.getvalue(), encoding)

    if epilogue:
        output += '<p>%s</p>' % epilogue
    elif overflow:
        output += '<p><strong>...</strong></p>'

    return output
示例#10
0
文件: base.py 项目: alimanfoo/petl
 def __repr__(self):
     vreprs = list(map(repr, islice(self, 6)))
     r = text_type(self.field) + ': '
     r += ', '.join(vreprs[:5])
     if len(vreprs) > 5:
         r += ', ...'
     return r
示例#11
0
 def __repr__(self):
     vreprs = list(map(repr, islice(self, 6)))
     r = text_type(self.field) + ': '
     r += ', '.join(vreprs[:5])
     if len(vreprs) > 5:
         r += ', ...'
     return r
示例#12
0
    def __iter__(self):
        from pysam import Tabixfile, asTuple
        f = Tabixfile(self.filename, mode='r')
        try:
            # header row
            if self.header is not None:
                yield self.header
            else:
                # assume last header line has fields
                h = list(f.header)
                if len(h) > 0:
                    header_line = text_type(h[-1], encoding='ascii')
                    yield tuple(header_line.split('\t'))

            # data rows
            for row in f.fetch(reference=self.reference,
                               start=self.start,
                               end=self.stop,
                               region=self.region,
                               parser=asTuple()):
                yield tuple(row)

        except:
            raise
        finally:
            f.close()
示例#13
0
def iterunpack(source, field, newfields, include_original, missing):
    it = iter(source)

    hdr = next(it)
    flds = list(map(text_type, hdr))
    if field in flds:
        field_index = flds.index(field)
    elif isinstance(field, int) and field < len(flds):
        field_index = field
        field = flds[field_index]
    else:
        raise ArgumentError("field invalid: must be either field name or index")

    # determine output fields
    outhdr = list(flds)
    if not include_original:
        outhdr.remove(field)
    if isinstance(newfields, (list, tuple)):
        outhdr.extend(newfields)
        nunpack = len(newfields)
    elif isinstance(newfields, int):
        nunpack = newfields
        newfields = [text_type(field) + text_type(i + 1) for i in range(newfields)]
        outhdr.extend(newfields)
    elif newfields is None:
        nunpack = 0
    else:
        raise ArgumentError(
            "newfields argument must be list or tuple of field " "names, or int (number of values to unpack)"
        )
    yield tuple(outhdr)

    # construct the output data
    for row in it:
        value = row[field_index]
        if include_original:
            out_row = list(row)
        else:
            out_row = [v for i, v in enumerate(row) if i != field_index]
        nvals = len(value)
        if nunpack > 0:
            if nvals >= nunpack:
                newvals = value[:nunpack]
            else:
                newvals = list(value) + ([missing] * (nunpack - nvals))
            out_row.extend(newvals)
        yield tuple(out_row)
示例#14
0
 def __iter__(self):
     it = iter(self.table)
     hdr = next(it)
     outhdr = tuple(
         (text_type(normalize_name(f, self.illegal_v))) for f in hdr)
     yield outhdr
     for row in it:
         yield row
示例#15
0
文件: joins.py 项目: DeanWay/petl
def itercrossjoin(sources, prefix):

    # construct fields
    outhdr = list()
    for i, s in enumerate(sources):
        if prefix:
            # use one-based numbering
            outhdr.extend([text_type(i+1) + '_' + text_type(f) for f in header(s)])
        else:
            outhdr.extend(header(s))
    yield tuple(outhdr)

    datasrcs = [data(src) for src in sources]
    for prod in itertools.product(*datasrcs):
        outrow = list()
        for row in prod:
            outrow.extend(row)
        yield tuple(outrow)
示例#16
0
def itercrossjoin(sources, prefix):

    # construct fields
    outhdr = list()
    for i, s in enumerate(sources):
        if prefix:
            # use one-based numbering
            outhdr.extend(
                [text_type(i + 1) + '_' + text_type(f) for f in header(s)])
        else:
            outhdr.extend(header(s))
    yield tuple(outhdr)

    datasrcs = [data(src) for src in sources]
    for prod in itertools.product(*datasrcs):
        outrow = list()
        for row in prod:
            outrow.extend(row)
        yield tuple(outrow)
示例#17
0
def _ordered_dict_iterator(table):
    it = iter(table)
    hdr = next(it)
    flds = [text_type(f) for f in hdr]
    for row in it:
        items = list()
        for i, f in enumerate(flds):
            try:
                v = row[i]
            except IndexError:
                v = None
            items.append((f, v))
        yield OrderedDict(items)
示例#18
0
文件: base.py 项目: alimanfoo/petl
def asdict(hdr, row, missing=None):
    flds = [text_type(f) for f in hdr]
    try:
        # list comprehension should be faster
        items = [(flds[i], row[i]) for i in range(len(flds))]
    except IndexError:
        # short row, fall back to slower for loop
        items = list()
        for i, f in enumerate(flds):
            try:
                v = row[i]
            except IndexError:
                v = missing
            items.append((f, v))
    return dict(items)
示例#19
0
文件: base.py 项目: alimanfoo/petl
def fieldnames(table):
    """
    Return the string values of the header row. If the header row
    contains only strings, then this function is equivalent to header(), i.e.::

        >>> import petl as etl
        >>> table = [['foo', 'bar'], ['a', 1], ['b', 2]]
        >>> etl.fieldnames(table)
        ('foo', 'bar')
        >>> etl.header(table)
        ('foo', 'bar')

    """

    return tuple(text_type(f) for f in header(table))
示例#20
0
def fieldnames(table):
    """
    Return the string values of the header row. If the header row
    contains only strings, then this function is equivalent to header(), i.e.::

        >>> import petl as etl
        >>> table = [['foo', 'bar'], ['a', 1], ['b', 2]]
        >>> etl.fieldnames(table)
        ('foo', 'bar')
        >>> etl.header(table)
        ('foo', 'bar')

    """

    return tuple(text_type(f) for f in header(table))
示例#21
0
def asdict(hdr, row, missing=None):
    flds = [text_type(f) for f in hdr]
    try:
        # list comprehension should be faster
        items = [(flds[i], row[i]) for i in range(len(flds))]
    except IndexError:
        # short row, fall back to slower for loop
        items = list()
        for i, f in enumerate(flds):
            try:
                v = row[i]
            except IndexError:
                v = missing
            items.append((f, v))
    return dict(items)
示例#22
0
def issorted(table, key=None, reverse=False, strict=False):
    """
    Return True if the table is ordered (i.e., sorted) by the given key. E.g.::

        >>> import petl as etl
        >>> table1 = [['foo', 'bar', 'baz'],
        ...           ['a', 1, True],
        ...           ['b', 3, True],
        ...           ['b', 2]]
        >>> etl.issorted(table1, key='foo')
        True
        >>> etl.issorted(table1, key='bar')
        False
        >>> etl.issorted(table1, key='foo', strict=True)
        False
        >>> etl.issorted(table1, key='foo', reverse=True)
        False

    """

    # determine the operator to use when comparing rows
    if reverse and strict:
        op = operator.lt
    elif reverse and not strict:
        op = operator.le
    elif strict:
        op = operator.gt
    else:
        op = operator.ge

    it = iter(table)
    flds = [text_type(f) for f in next(it)]
    if key is None:
        prev = next(it)
        for curr in it:
            if not op(curr, prev):
                return False
            prev = curr
    else:
        getkey = comparable_itemgetter(*asindices(flds, key))
        prev = next(it)
        prevkey = getkey(prev)
        for curr in it:
            currkey = getkey(curr)
            if not op(currkey, prevkey):
                return False
            prevkey = currkey
    return True
示例#23
0
文件: random.py 项目: alimanfoo/petl
    def __iter__(self):
        nr = self.numrows
        seed = self.seed
        fields = self.fields.copy()

        # N.B., we want this to be stable, i.e., same data each time
        random.seed(seed)

        # construct header row
        hdr = tuple(text_type(f) for f in fields.keys())
        yield hdr

        # construct data rows
        for _ in xrange(nr):
            # artificial delay
            if self.wait:
                time.sleep(self.wait)
            yield tuple(fields[f]() for f in fields)
示例#24
0
    def __iter__(self):
        nr = self.numrows
        seed = self.seed
        fields = self.fields.copy()

        # N.B., we want this to be stable, i.e., same data each time
        random.seed(seed)

        # construct header row
        hdr = tuple(text_type(f) for f in fields.keys())
        yield hdr

        # construct data rows
        for _ in xrange(nr):
            # artificial delay
            if self.wait:
                time.sleep(self.wait)
            yield tuple(fields[f]() for f in fields)
示例#25
0
def make_create_table_statement(table,
                                tablename,
                                schema=None,
                                constraints=True,
                                metadata=None,
                                dialect=None):
    """
    Generate a CREATE TABLE statement based on data in `table`.

    Keyword arguments:

    table : table container
        Table data to use to infer types etc.
    tablename : text
        Name of the table
    schema : text
        Name of the database schema to create the table in
    constraints : bool
        If True use length and nullable constraints
    metadata : sqlalchemy.MetaData
        Custom table metadata
    dialect : text
        One of {'access', 'sybase', 'sqlite', 'informix', 'firebird', 'mysql',
        'oracle', 'maxdb', 'postgresql', 'mssql'}

    """

    import sqlalchemy
    sql_table = make_sqlalchemy_table(table,
                                      tablename,
                                      schema=schema,
                                      constraints=constraints,
                                      metadata=metadata)

    if dialect:
        module = __import__('sqlalchemy.dialects.%s' % DIALECTS[dialect],
                            fromlist=['dialect'])
        sql_dialect = module.dialect()
    else:
        sql_dialect = None

    return text_type(
        sqlalchemy.schema.CreateTable(sql_table).compile(
            dialect=sql_dialect)).strip()
示例#26
0
文件: tabix.py 项目: alimanfoo/petlx
    def __iter__(self):
        from pysam import Tabixfile, asTuple
        f = Tabixfile(self.filename, mode='r')
        try:
            # header row
            if self.header is not None:
                yield self.header
            else:
                # assume last header line has fields
                h = list(f.header)
                if len(h) > 0:
                    header_line = text_type(h[-1], encoding='ascii')
                    yield tuple(header_line.split('\t'))

            # data rows
            for row in f.fetch(reference=self.reference, start=self.start,
                               end=self.stop, region=self.region,
                               parser=asTuple()):
                yield tuple(row)

        except:
            raise
        finally:
            f.close()
示例#27
0
文件: joins.py 项目: DeanWay/petl
def iterlookupjoin(left, right, lkey, rkey, missing=None, lprefix=None,
                   rprefix=None):
    lit = iter(left)
    rit = iter(right)

    lhdr = next(lit)
    rhdr = next(rit)

    # determine indices of the key fields in left and right tables
    lkind = asindices(lhdr, lkey)
    rkind = asindices(rhdr, rkey)

    # construct functions to extract key values from both tables
    lgetk = operator.itemgetter(*lkind)
    rgetk = operator.itemgetter(*rkind)

    # determine indices of non-key fields in the right table
    # (in the output, we only include key fields from the left table - we
    # don't want to duplicate fields)
    rvind = [i for i in range(len(rhdr)) if i not in rkind]
    rgetv = rowgetter(*rvind)

    # determine the output fields
    if lprefix is None:
        outhdr = list(lhdr)
    else:
        outhdr = [(text_type(lprefix) + text_type(f)) for f in lhdr]
    if rprefix is None:
        outhdr.extend(rgetv(rhdr))
    else:
        outhdr.extend([(text_type(rprefix) + text_type(f)) for f in rgetv(rhdr)])
    yield tuple(outhdr)

    # define a function to join two groups of rows
    def joinrows(_lrowgrp, _rrowgrp):
        if _rrowgrp is None:
            for lrow in _lrowgrp:
                outrow = list(lrow)  # start with the left row
                # extend with missing values in place of the right row
                outrow.extend([missing] * len(rvind))
                yield tuple(outrow)
        else:
            rrow = next(iter(_rrowgrp))  # pick first arbitrarily
            for lrow in _lrowgrp:
                # start with the left row
                outrow = list(lrow)
                # extend with non-key values from the right row
                outrow.extend(rgetv(rrow))
                yield tuple(outrow)

    # construct group iterators for both tables
    lgit = itertools.groupby(lit, key=lgetk)
    rgit = itertools.groupby(rit, key=rgetk)
    lrowgrp = []

    # loop until *either* of the iterators is exhausted
    lkval, rkval = None, None  # initialise here to handle empty tables
    try:

        # pick off initial row groups
        lkval, lrowgrp = next(lgit)
        rkval, rrowgrp = next(rgit)

        while True:
            if lkval < rkval:
                for row in joinrows(lrowgrp, None):
                    yield tuple(row)
                # advance left
                lkval, lrowgrp = next(lgit)
            elif lkval > rkval:
                # advance right
                rkval, rrowgrp = next(rgit)
            else:
                for row in joinrows(lrowgrp, rrowgrp):
                    yield tuple(row)
                # advance both
                lkval, lrowgrp = next(lgit)
                rkval, rrowgrp = next(rgit)

    except StopIteration:
        pass

    # make sure any left rows remaining are yielded
    if lkval > rkval:
        # yield anything that got left hanging
        for row in joinrows(lrowgrp, None):
            yield tuple(row)
    # yield the rest
    for lkval, lrowgrp in lgit:
        for row in joinrows(lrowgrp, None):
            yield tuple(row)
示例#28
0
 def __unicode__(self):
     return text_type(self.obj)
示例#29
0
def iterlookupjoin(left,
                   right,
                   lkey,
                   rkey,
                   missing=None,
                   lprefix=None,
                   rprefix=None):
    lit = iter(left)
    rit = iter(right)

    lhdr = next(lit)
    rhdr = next(rit)

    # determine indices of the key fields in left and right tables
    lkind = asindices(lhdr, lkey)
    rkind = asindices(rhdr, rkey)

    # construct functions to extract key values from both tables
    lgetk = operator.itemgetter(*lkind)
    rgetk = operator.itemgetter(*rkind)

    # determine indices of non-key fields in the right table
    # (in the output, we only include key fields from the left table - we
    # don't want to duplicate fields)
    rvind = [i for i in range(len(rhdr)) if i not in rkind]
    rgetv = rowgetter(*rvind)

    # determine the output fields
    if lprefix is None:
        outhdr = list(lhdr)
    else:
        outhdr = [(text_type(lprefix) + text_type(f)) for f in lhdr]
    if rprefix is None:
        outhdr.extend(rgetv(rhdr))
    else:
        outhdr.extend([(text_type(rprefix) + text_type(f))
                       for f in rgetv(rhdr)])
    yield tuple(outhdr)

    # define a function to join two groups of rows
    def joinrows(_lrowgrp, _rrowgrp):
        if _rrowgrp is None:
            for lrow in _lrowgrp:
                outrow = list(lrow)  # start with the left row
                # extend with missing values in place of the right row
                outrow.extend([missing] * len(rvind))
                yield tuple(outrow)
        else:
            rrow = next(iter(_rrowgrp))  # pick first arbitrarily
            for lrow in _lrowgrp:
                # start with the left row
                outrow = list(lrow)
                # extend with non-key values from the right row
                outrow.extend(rgetv(rrow))
                yield tuple(outrow)

    # construct group iterators for both tables
    lgit = itertools.groupby(lit, key=lgetk)
    rgit = itertools.groupby(rit, key=rgetk)
    lrowgrp = []

    # loop until *either* of the iterators is exhausted
    lkval, rkval = None, None  # initialise here to handle empty tables
    try:

        # pick off initial row groups
        lkval, lrowgrp = next(lgit)
        rkval, rrowgrp = next(rgit)

        while True:
            if lkval < rkval:
                for row in joinrows(lrowgrp, None):
                    yield tuple(row)
                # advance left
                lkval, lrowgrp = next(lgit)
            elif lkval > rkval:
                # advance right
                rkval, rrowgrp = next(rgit)
            else:
                for row in joinrows(lrowgrp, rrowgrp):
                    yield tuple(row)
                # advance both
                lkval, lrowgrp = next(lgit)
                rkval, rrowgrp = next(rgit)

    except StopIteration:
        pass

    # make sure any left rows remaining are yielded
    if lkval > rkval:
        # yield anything that got left hanging
        for row in joinrows(lrowgrp, None):
            yield tuple(row)
    # yield the rest
    for lkval, lrowgrp in lgit:
        for row in joinrows(lrowgrp, None):
            yield tuple(row)
示例#30
0
def make_sqlalchemy_column(col, colname, constraints=True):
    """
    Infer an appropriate SQLAlchemy column type based on a sequence of values.

    Keyword arguments:

    col : sequence
        A sequence of values to use to infer type, length etc.
    colname : string
        Name of column
    constraints : bool
        If True use length and nullable constraints

    """

    import sqlalchemy

    col_not_none = [v for v in col if v is not None]
    sql_column_kwargs = {}
    sql_type_kwargs = {}

    if len(col_not_none) == 0:
        sql_column_type = sqlalchemy.String
        if constraints:
            sql_type_kwargs["length"] = NULL_COLUMN_MAX_LENGTH

    elif all(isinstance(v, bool) for v in col_not_none):
        sql_column_type = sqlalchemy.Boolean

    elif all(isinstance(v, int) for v in col_not_none):
        if max(col_not_none) > SQL_INTEGER_MAX or min(col_not_none) < SQL_INTEGER_MIN:
            sql_column_type = sqlalchemy.BigInteger
        else:
            sql_column_type = sqlalchemy.Integer

    elif all(isinstance(v, long) for v in col_not_none):
        sql_column_type = sqlalchemy.BigInteger

    elif all(isinstance(v, (int, long)) for v in col_not_none):
        sql_column_type = sqlalchemy.BigInteger

    elif all(isinstance(v, (int, long, float)) for v in col_not_none):
        sql_column_type = sqlalchemy.Float

    elif all(isinstance(v, datetime.datetime) for v in col_not_none):
        sql_column_type = sqlalchemy.DateTime

    elif all(isinstance(v, datetime.date) for v in col_not_none):
        sql_column_type = sqlalchemy.Date

    elif all(isinstance(v, datetime.time) for v in col_not_none):
        sql_column_type = sqlalchemy.Time

    else:
        sql_column_type = sqlalchemy.String
        if constraints:
            sql_type_kwargs["length"] = max([len(text_type(v)) for v in col])

    if constraints:
        sql_column_kwargs["nullable"] = len(col_not_none) < len(col)

    return sqlalchemy.Column(colname, sql_column_type(**sql_type_kwargs), **sql_column_kwargs)
示例#31
0
文件: random.py 项目: alimanfoo/petl
 def __setitem__(self, item, value):
     self.fields[text_type(item)] = value
示例#32
0
 def __unicode__(self):
     return text_type(self.obj)
示例#33
0
def make_sqlalchemy_column(col, colname, constraints=True):
    """
    Infer an appropriate SQLAlchemy column type based on a sequence of values.

    Keyword arguments:

    col : sequence
        A sequence of values to use to infer type, length etc.
    colname : string
        Name of column
    constraints : bool
        If True use length and nullable constraints

    """

    import sqlalchemy

    col_not_none = [v for v in col if v is not None]
    sql_column_kwargs = {}
    sql_type_kwargs = {}

    if len(col_not_none) == 0:
        sql_column_type = sqlalchemy.String
        if constraints:
            sql_type_kwargs['length'] = NULL_COLUMN_MAX_LENGTH

    elif all(isinstance(v, bool) for v in col_not_none):
        sql_column_type = sqlalchemy.Boolean

    elif all(isinstance(v, int) for v in col_not_none):
        if max(col_not_none) > SQL_INTEGER_MAX \
                or min(col_not_none) < SQL_INTEGER_MIN:
            sql_column_type = sqlalchemy.BigInteger
        else:
            sql_column_type = sqlalchemy.Integer

    elif all(isinstance(v, long) for v in col_not_none):
        sql_column_type = sqlalchemy.BigInteger

    elif all(isinstance(v, (int, long)) for v in col_not_none):
        sql_column_type = sqlalchemy.BigInteger

    elif all(isinstance(v, (int, long, float)) for v in col_not_none):
        sql_column_type = sqlalchemy.Float

    elif all(isinstance(v, datetime.datetime) for v in col_not_none):
        sql_column_type = sqlalchemy.DateTime

    elif all(isinstance(v, datetime.date) for v in col_not_none):
        sql_column_type = sqlalchemy.Date

    elif all(isinstance(v, datetime.time) for v in col_not_none):
        sql_column_type = sqlalchemy.Time

    else:
        sql_column_type = sqlalchemy.String
        if constraints:
            sql_type_kwargs['length'] = max([len(text_type(v)) for v in col])

    if constraints:
        sql_column_kwargs['nullable'] = len(col_not_none) < len(col)

    return sqlalchemy.Column(colname, sql_column_type(**sql_type_kwargs),
                             **sql_column_kwargs)
示例#34
0
 def __setitem__(self, item, value):
     self.fields[text_type(item)] = value