示例#1
0
 def __init__(self, n_samples, out_file, pid, lid, keep_cols,
              n_retry, delimiter, dataset, pred_name, ui, file_context,
              fast_mode, encoding, skip_row_id, output_delimiter):
     self.n_samples = n_samples
     self.out_file = out_file
     self.project_id = pid
     self.model_id = lid
     self.keep_cols = keep_cols
     self.n_retry = n_retry
     self.delimiter = delimiter
     self.dataset = dataset
     self.pred_name = pred_name
     self.out_stream = None
     self._ui = ui
     self.file_context = file_context
     self.fast_mode = fast_mode
     self.encoding = encoding
     self.skip_row_id = skip_row_id
     self.output_delimiter = output_delimiter
     #  dataset_dialect and writer_dialect are set by
     #  investigate_encoding_and_dialect in utils
     self.dialect = csv.get_dialect('dataset_dialect')
     self.writer_dialect = csv.get_dialect('writer_dialect')
     self.scoring_succeeded = False  # Removes shelves when True
     self.is_open = False  # Removes shelves when True
示例#2
0
 def __init__(self):
     self.printer = printer.printer(self.printer_onlineCallback,
                                    self.printer_offlineCallback,
                                    self.printer_positionCallback,
                                    self.printer_temperatureCallback,
                                    self.printer_receiveCallback,
                                    self.printer_sendCallback)
     self.printProcess = printer.printprocess(
         self.printer, self.printProcess_showImageCallback,
         self.printProcess_hideImageCallback,
         self.printProcess_startedPrintingCallback,
         self.printProcess_finishedPrintingCallback)
     self.isOnline = False
     self.isPrinting = False
     self.reader = csv.reader(sys.stdin,
                              csv.get_dialect('StdioPrinterDriverCustom'))
     self.writer = csv.writer(sys.stdout,
                              csv.get_dialect('StdioPrinterDriverCustom'))
     self.verbMap = {
         'move': self.move,
         'moveTo': self.moveTo,
         'home': self.home,
         'lift': self.lift,
         'askTemp': self.askTemp,
         'send': self.send,
         'queryOnline': self.queryOnline,
         'queryPrinting': self.queryPrinting,
         'stopPrinting': self.stopPrinting,
         'terminate': self.terminate
     }
示例#3
0
文件: csv.py 项目: esc/blaze
def discover_dialect(sample, dialect=None, **kwargs):
    """

    >>> s = '''
    ... 1,1
    ... 2,2'''
    >>> discover_dialect(s) # doctest: +SKIP
    {'escapechar': None,
     'skipinitialspace': False,
     'quoting': 0,
     'delimiter': ',',
     'lineterminator': '\r\n',
     'quotechar': '"',
     'doublequote': False}
    """
    if isinstance(dialect, py2help._strtypes):
        dialect = csv.get_dialect(dialect)

    sniffer = csv.Sniffer()
    if not dialect:
        try:
            dialect = sniffer.sniff(sample)
        except:
            dialect = csv.get_dialect("excel")

    # Convert dialect to dictionary
    dialect = dict((key, getattr(dialect, key)) for key in dir(dialect) if not key.startswith("_"))

    # Update dialect with any keyword arguments passed in
    # E.g. allow user to override with delimiter=','
    for k, v in kwargs.items():
        if k in dialect:
            dialect[k] = v

    return dialect
示例#4
0
 def __init__(self, n_samples, out_file, pid, lid, keep_cols,
              n_retry, delimiter, dataset, pred_name, ui, file_context,
              fast_mode, encoding, skip_row_id, output_delimiter,
              pred_threshold_name, pred_decision_name,
              max_prediction_explanations,
              ):
     self.n_samples = n_samples
     self.out_file = out_file
     self.project_id = pid
     self.model_id = lid
     self.keep_cols = keep_cols
     self.n_retry = n_retry
     self.delimiter = delimiter
     self.dataset = dataset
     self.pred_name = pred_name
     self.pred_threshold_name = pred_threshold_name
     self.pred_decision_name = pred_decision_name
     self.out_stream = None
     self._ui = ui
     self.file_context = file_context
     self.fast_mode = fast_mode
     self.encoding = encoding
     self.skip_row_id = skip_row_id
     self.output_delimiter = output_delimiter
     #  dataset_dialect and writer_dialect are set by
     #  investigate_encoding_and_dialect in utils
     self.dialect = csv.get_dialect('dataset_dialect')
     self.writer_dialect = csv.get_dialect('writer_dialect')
     self.scoring_succeeded = False  # Removes shelves when True
     self.is_open = False  # Removes shelves when True
     self.max_prediction_explanations = max_prediction_explanations
示例#5
0
文件: csv.py 项目: aterrel/blaze
def discover_dialect(sample, dialect=None, **kwargs):
    """ Discover CSV dialect from string sample

    Returns dict
    """
    if isinstance(dialect, compatibility._strtypes):
        dialect = csv.get_dialect(dialect)

    sniffer = csv.Sniffer()
    if not dialect:
        try:
            dialect = sniffer.sniff(sample)
        except:
            dialect = csv.get_dialect('excel')

    # Convert dialect to dictionary
    dialect = dict((key, getattr(dialect, key))
                   for key in dir(dialect) if not key.startswith('_'))

    # Update dialect with any keyword arguments passed in
    # E.g. allow user to override with delimiter=','
    for k, v in kwargs.items():
        if k in dialect:
            dialect[k] = v

    return dialect
示例#6
0
    def register_dialect(cls):
        dialect = cls.Meta.dialect

        assert issubclass(dialect, BaseTimeRecordDialect)

        try:
            csv.get_dialect(dialect.dialect_name)
        except csv.Error:
            csv.register_dialect(dialect.dialect_name, dialect)
    def __init__(self,
                 path,
                 mode='r',
                 schema=None,
                 dialect=None,
                 has_header=None,
                 **kwargs):
        if os.path.isfile(path) is not True:
            raise ValueError('CSV file "%s" does not exist' % path)
        self.path = path
        self.mode = mode
        csvfile = open(path, mode=self.mode)

        # Handle Schema
        if isinstance(schema, py2help._strtypes):
            schema = datashape.dshape(schema)
        if isinstance(schema, datashape.DataShape) and len(schema) == 1:
            schema = schema[0]
        if not isinstance(schema, datashape.Record):
            raise TypeError(
                'schema cannot be converted into a blaze record dshape')
        self.schema = str(schema)

        # Handle Dialect
        if dialect is None:
            # Guess the dialect
            sniffer = csv.Sniffer()
            try:
                dialect = sniffer.sniff(csvfile.read(1024))
            except:
                # Cannot guess dialect.  Assume Excel.
                dialect = csv.get_dialect('excel')
            csvfile.seek(0)
        else:
            dialect = csv.get_dialect(dialect)
        self.dialect = dict((key, getattr(dialect, key))
                            for key in dir(dialect) if not key.startswith('_'))

        # Update dialect with any keyword arguments passed in
        # E.g. allow user to override with delimiter=','
        for k, v in kwargs.items():
            if k in self.dialect:
                self.dialect[k] = v

        # Handle Header
        if has_header is None:
            # Guess whether the file has a header or not
            sniffer = csv.Sniffer()
            csvfile.seek(0)
            sample = csvfile.read(1024)
            self.has_header = sniffer.has_header(sample)
        else:
            self.has_header = has_header

        csvfile.close()
示例#8
0
 def _get_dialect(self) -> csv.Dialect:
     # Get default format parameters from dialect
     attr = self._dialect
     if attr is None:
         dialect = csv.get_dialect('excel')  # Default dialect
     elif isinstance(attr, csv.Dialect):
         dialect = attr  # type: ignore
     elif attr in csv.list_dialects():
         dialect = csv.get_dialect(attr)
     else:
         raise ValueError(f"unkown CSV-dialect '{attr}'")
     return dialect  # type: ignore
示例#9
0
def sniff(filestream):
    ##sample = csv.Sniffer().sniff(filestream.read(1024))
    lines = list(line for line in filestream if not (line.startswith("#") or len(line) == 0))
    if all(line.find("\t") >= 0 for line in lines):
        dialect = csv.get_dialect("excel-tab")
    else: 
        sample = "\n".join(lines)
        try:
            dialect = csv.Sniffer().sniff(sample)
        except Exception as ex:
            print("Could not determine delimiter type, proceedings as excel csv", file=sys.stderr)
            dialect = csv.get_dialect("excel")
    return (lines, dialect)
示例#10
0
    def get_reader(self, reader_class=namedtuple_csv_reader):
        f = self.files['file']
        d = self.cleaned_data['dialect']
        if not d: d = "autodetect"
        if d == 'autodetect':
            dialect = csv.Sniffer().sniff(f.read(1024))
            f.seek(0)
            if dialect.delimiter not in "\t,;":
                dialect = csv.get_dialect('excel')
        else:
            dialect = csv.get_dialect(d)

        enc = self.cleaned_data['encoding']
        encoding = {'encoding' : ENCODINGS[int(enc)]} if enc and reader_class == namedtuple_csv_reader else {}
        return reader_class(f, dialect=dialect, **encoding)
示例#11
0
 def open(self):
     if self.is_open:
         self._ui.debug('OPEN CALLED ON ALREADY OPEN RUNCONTEXT')
         return
     self.is_open = True
     self._ui.debug('OPEN CALLED ON RUNCONTEXT')
     csv.register_dialect('dataset_dialect', **self.dialect)
     csv.register_dialect('writer_dialect', **self.writer_dialect)
     self.dialect = csv.get_dialect('dataset_dialect')
     self.writer_dialect = csv.get_dialect('writer_dialect')
     self.db = shelve.open(self.file_context.file_name, writeback=True)
     if six.PY2:
         self.out_stream = open(self.out_file, 'ab')
     elif six.PY3:
         self.out_stream = open(self.out_file, 'a', newline='')
示例#12
0
 def open(self):
     if self.is_open:
         self._ui.debug('OPEN CALLED ON ALREADY OPEN RUNCONTEXT')
         return
     self.is_open = True
     self._ui.debug('OPEN CALLED ON RUNCONTEXT')
     csv.register_dialect('dataset_dialect', **self.dialect)
     csv.register_dialect('writer_dialect', **self.writer_dialect)
     self.dialect = csv.get_dialect('dataset_dialect')
     self.writer_dialect = csv.get_dialect('writer_dialect')
     self.db = shelve.open(self.file_context.file_name, writeback=True)
     if six.PY2:
         self.out_stream = open(self.out_file, 'ab')
     elif six.PY3:
         self.out_stream = open(self.out_file, 'a', newline='')
示例#13
0
def test_build_csv_multiple_cols():
    lt = csv.get_dialect('excel').lineterminator
    dm = csv.get_dialect('excel').delimiter
    data = [
        co.OrderedDict([('col1', 'value'), ('col2', 'another value'),
                        ('col3', 'more')]),
        co.OrderedDict([('col1', 'one value'), ('col2', 'two value'),
                        ('col3', 'three')])
    ]
    result = lt.join([
        dm.join(['col1', 'col2', 'col3']),
        dm.join(['value', 'another value', 'more']),
        dm.join(['one value', 'two value', 'three'])
    ]) + lt
    assert build_csv(data) == result
示例#14
0
 def __init__(self, queue, batch_gen_args, ui):
     self._ui = ui
     self.queue = queue
     self.batch_gen_args = batch_gen_args
     self.dialect = csv.get_dialect('dataset_dialect')
     #  The following should only impact Windows
     self._ui.set_next_UI_name('batcher')
示例#15
0
def main():
    parser = argparse.ArgumentParser(description="Fix csv files")
    parser.add_argument('infile', type=argparse.FileType('rt'))
    parser.add_argument('outfile', type=argparse.FileType('wt'))
    parser.add_argument('extras', nargs='*', help=argparse.SUPPRESS)
    parser.add_argument('--in-delimiter', action='store', nargs='?', type=str)
    parser.add_argument('--in-quote', action='store', default='"', type=str)
    args = parser.parse_args()
    try:
        if len(args.extras) > 0:
            raise BaseException('Too many arguments')
        if len(sys.argv) > 3:
            csv.register_dialect('in',
                                 delimiter=args.in_delimiter,
                                 quotechar=args.in_quote,
                                 quoting=_csv.QUOTE_MINIMAL)
            d_in = csv.get_dialect('in')
        else:
            sniffer = csv.Sniffer()
            d_in = sniffer.sniff(args.infile.read(1024))
            args.infile.seek(0)
        csv_in = csv.reader(args.infile, dialect=d_in)
        csv_out = csv.writer(args.outfile)
        for row in csv_in:
            csv_out.writerow(row)
    finally:
        args.infile.close()
        args.outfile.close()
    def set_dialect(self):
        dialectname = self.settings.get('use_dialect')
        try:
            csv.get_dialect(dialectname)
            return dialectname
        except Exception:
            user_dialects = self.settings.get('dialects')

        try:
            csv.register_dialect(dialectname, **user_dialects[dialectname])
            print("DataConverter: Using custom dialect", dialectname)
            return dialectname

        except Exception:
            print("DataConverter: Couldn't register custom dialect named", dialectname)
            return None
示例#17
0
def sqlite_to_csv(
    input_filename,
    table_name,
    output_filename,
    dialect=csv.excel,
    batch_size=10000,
    encoding="utf-8",
    callback=None,
    query=None,
):
    """Export a table inside a SQLite database to CSV"""

    # TODO: should be able to specify fields
    # TODO: should be able to specify custom query

    if isinstance(dialect, six.text_type):
        dialect = csv.get_dialect(dialect)

    if query is None:
        query = "SELECT * FROM {}".format(table_name)
    connection = sqlite3.Connection(input_filename)
    cursor = connection.cursor()
    result = cursor.execute(query)
    header = [item[0] for item in cursor.description]
    fobj = open_compressed(output_filename, mode="w", encoding=encoding)
    writer = csv.writer(fobj, dialect=dialect)
    writer.writerow(header)
    total_written = 0
    for batch in rows.plugins.utils.ipartition(result, batch_size):
        writer.writerows(batch)
        written = len(batch)
        total_written += written
        if callback:
            callback(written, total_written)
    fobj.close()
示例#18
0
    def __call__(self, value, error_callback, convertor_fmt_str):
        buffer = StringIO(value)

        if self._delimeter is None:
            dialect = csv.Sniffer().sniff(value)
            dialect.skipinitialspace = True
        else:
            csv.register_dialect('my_dialect', delimiter=self._delimeter, quoting=csv.QUOTE_MINIMAL, skipinitialspace=True)
            dialect = csv.get_dialect('my_dialect')

        reader = csv.reader(buffer, dialect)
        lst = next(reader)

        try:
            if self._elem_get_input:
                converted_list = []
                for item in lst:
                    valid, value = self._elem_get_input.process_value(item)

                    if valid is True:
                        converted_list.append(value)
                    else:
                        raise ConvertorError
            else:
                converted_list = lst
        except ConvertorError:
            raise ConvertorError(self.value_error_str)

        return converted_list
示例#19
0
class Manufacturer(db.Model):
    """The normalized information about a manufacturer.

    Ideally users should use the names from this list when submitting
    devices.
    """
    CSV_DELIMITER = csv.get_dialect('excel').delimiter

    name = db.Column(CIText(), primary_key=True)
    name.comment = """The normalized name of the manufacturer."""
    url = db.Column(URL(), unique=True)
    url.comment = """An URL to a page describing the manufacturer."""
    logo = db.Column(URL())
    logo.comment = """An URL pointing to the logo of the manufacturer."""

    __table_args__ = (
        # from https://niallburkley.com/blog/index-columns-for-like-in-postgres/
        db.Index('name_index',
                 text('name gin_trgm_ops'),
                 postgresql_using='gin'),
        {
            'schema': 'common'
        })

    @classmethod
    def add_all_to_session(cls, session: db.Session):
        """Adds all manufacturers to session."""
        cursor = session.connection().connection.cursor()
        #: Dialect used to write the CSV

        with pathlib.Path(__file__).parent.joinpath(
                'manufacturers.csv').open() as f:
            cursor.copy_expert(
                'COPY common.manufacturer FROM STDIN (FORMAT csv)', f)
示例#20
0
    def __init__(self, *args, **kwds):

        self.data = kwds.pop('data')

        kwds["style"] = \
            wx.DEFAULT_DIALOG_STYLE | wx.RESIZE_BORDER | wx.THICK_FRAME

        wx.Dialog.__init__(self, *args, **kwds)

        self.csvwidgets = CsvParameterWidgets(self, None)
        dialect = csv.get_dialect(csv.list_dialects()[0])
        self.has_header = False

        style = wx.TE_MULTILINE | wx.TE_READONLY | wx.HSCROLL
        self.preview_textctrl = CSVPreviewTextCtrl(self, -1, style=style)

        self.button_cancel = wx.Button(self, wx.ID_CANCEL, "")
        self.button_apply = wx.Button(self, wx.ID_APPLY, "")
        self.button_ok = wx.Button(self, wx.ID_OK, "")

        self._set_properties()
        self._do_layout()

        self.preview_textctrl.fill(data=self.data, dialect=dialect)

        self.Bind(wx.EVT_BUTTON, self.OnButtonApply, self.button_apply)
示例#21
0
    def infer(location, default=None):
        """
        Return a format to use for the filename provided.
        """
        url = urllib.parse.urlparse(location)
        path = url.path

        # try and infer using an extension
        while path and '.' in path:
            path, ext = os.path.splitext(path)
            ext = ext[1:].lower()

            # is it JSON?
            if ext.startswith('json'):
                return JSONLines() if 'l' in ext else JSON()

            # is it a registered CSV dialect?
            csv_dialect = csv.get_dialect(ext)
            if csv_dialect is not None:
                return CSV(
                    sep=csv_dialect.delimiter,
                    linesep=csv_dialect.lineterminator,
                    quotechar=csv_dialect.quotechar,
                    escapechar=csv_dialect.escapechar,
                )

        # unable to infer
        return default
示例#22
0
def process_recording(recording, csv_out, out_directory, overwrite=False):
    """Process a single recording

    recordings: List of recording folders
    csv_out: CSV file name under which the result will be saved
    overwrite: Boolean indicating if an existing csv file should be overwritten
    """
    if len(out_directory) == 0:
        csv_out_path = os.path.join(recording, csv_out)
    else:
        csv_out_path = os.path.join(out_directory, csv_out)

    if os.path.exists(csv_out_path):
        if not overwrite:
            logger.warning(
                "{} exists already! Not overwriting.".format(csv_out_path))
            return
        else:
            logger.warning(
                "{} exists already! Overwriting.".format(csv_out_path))

    with open(csv_out_path, "w", newline='') as csv_file:
        writer = csv.writer(csv_file, dialect=csv.get_dialect('excel'))
        writer.writerow(csv_header())

        extracted_rows = load_and_yield_data(recording)
        writer.writerows(extracted_rows)
    return
示例#23
0
def get_dialect(dialect, **fmtparams):
    if isinstance(dialect, basestring):
        dialect = csv.get_dialect(dialect)

    # Unlike the standard csv module, this module does not have its own
    # universal newline handling, but instead expects the provided file objects
    # to be opened in universal newline mode. We therefore convert all newline
    # line terminators to '\n'.
    lineterminator = fmtparams.get('lineterminator', dialect.lineterminator)
    if lineterminator in {'\r', '\r\n'}:
        lineterminator = '\n'

    #XXX csv.Dialect does for some reason not expose strict. We set strict=False
    # by default, but this will of course be wrong if the original dialect had
    # strict=True.
    strict = False
    if hasattr(dialect, 'strict'):
        strict = dialect.strict

    return Dialect(
        delimiter=fmtparams.get('delimiter', dialect.delimiter),
        quotechar=fmtparams.get('quotechar', dialect.quotechar),
        escapechar=fmtparams.get('escapechar', dialect.escapechar),
        doublequote=fmtparams.get('doublequote', dialect.doublequote),
        skipinitialspace=fmtparams.get('skipinitialspace', dialect.skipinitialspace),
        lineterminator=lineterminator,
        quoting=fmtparams.get('quoting', dialect.quoting),
        strict=fmtparams.get('strict', strict)
    )
示例#24
0
文件: reader.py 项目: twardoch/yaplon
def csv(input, dialect=None, header=True, key=None, sort=False):
    obj = []
    fields = None
    if dialect:
        dialect = ocsv.get_dialect(dialect)
    else:
        sniffer = ocsv.Sniffer()
        dialect = sniffer.sniff(input.readline())()
        input.seek(0)
    reader = ocsv.reader(input, dialect=dialect)
    if key:
        header = True
    if header:
        fields = next(reader)
        if key and key <= len(fields):
            obj = OrderedDict()
        else:
            key = None
    for row in reader:
        if header:
            row = OrderedDict(zip(fields, row))
            if key:
                rowkey = row.pop(fields[key - 1])
                obj[rowkey] = row
            else:
                obj.append(row)
        else:
            obj.append(row)
    if sort:
        obj = sort_ordereddict(obj)
    return obj
    def sniff(self, sample):
        try:
            dialect = csv.Sniffer().sniff(sample)
            print dialect.delimiter
        except Exception as e:
            delimiter = self.settings.get('delimiter', ',').pop()
            delimiter = bytes(delimiter)  # dialect definition takes a 1-char bytestring
            print "DataConverter had trouble sniffing:", e
            try:
                csv.register_dialect('barebones', delimiter=delimiter)
                dialect = csv.get_dialect('barebones')

            except Exception as e:
                dialect = csv.get_dialect('excel')

        return dialect
示例#26
0
 def __init__(self, filename, renmwo=True):
     self.filename = filename
     self.renmwo = "%s.~renmwo%d~" % (filename,
                                      os.getpid()) if renmwo else filename
     self.file = open(self.renmwo, "w")
     self.writer = csv.writer(self.file,
                              dialect=csv.get_dialect("excel-tab"))
示例#27
0
def reportcrossval(args, *cvresults):
    h, w = rect(args.parameters)
    fw, fh = pp.rcParams['figure.figsize']
    figsize = h * fh, w * fw
    fig = pp.figure(figsize=figsize)
    fields = ['parameter','slope', 'intercept', 'R^2', 'P-value', 'error']
    writer = csv.DictWriter(sys.stdout, fields,
            dialect=csv.get_dialect('excel'))
    writer.writerow(dict(zip(writer.fieldnames, writer.fieldnames)))
    for i in xrange(args.parameters):
        name = args.paramnames[i]
        x, y = cvresults[i]
        slope, intercept, r, pvalue, err = linregress(x, y)
        row = { 'parameter' : name }
        row.update(zip(fields[1:], (slope, intercept, r ** 2, pvalue, err)))
        writer.writerow(row)
        ax = pp.subplot(h,w,i)
        ax.plot(x, y, ' o', c='white', figure=fig, axes=ax)
        xlim = x.min(), x.max()
        ax.plot(xlim, xlim, 'r-', alpha=.75)
        pp.xlim(*xlim)
        pp.ylim(*xlim)
        pp.xlabel(r'observed', fontsize='small')
        pp.ylabel(r'estimated', fontsize='small')
        pp.title(sanetext(name), fontsize='small')
        pp.draw()
    fig.subplots_adjust(hspace=.5, wspace=.3)
    pp.draw()
    def handle(self, *args, **options):
        if (len(args) < 1):
            return 

        csv.register_dialect('quotescolon', quotechar='"', delimiter=';', doublequote=False, lineterminator='\n', quoting=csv.QUOTE_NONE)
        f = codecs.open(args[0], mode='rU') 
        stops = file.UnicodeDictReader(f, 'utf-8', dialect=csv.get_dialect('quotescolon'))

        with reversion.create_revision(): 
            source, created = Source.objects.get_or_create(source_id=u'govi', defaults={u'name': "GOVI"})          
            for stop in stops:
                split = unicode(stop['TimingPointName']).split(',')
                if len(split) > 1:
                    city = split[0]
                    name = split[1].lstrip()
                else:
                    city = stop['TimingPointTown'].capitalize()
                    name = stop['TimingPointName']
                point = geo.transform_rd(Point(x=int(stop['LocationX_EW']), y=int(stop['LocationY_NS']), srid=28992))
        
                s, created = UserStop.objects.get_or_create(tpc=stop[u"TimingPointCode"], 
                                                            defaults={u'common_name' : name, u'common_city' : city, 'point' : point.wkt})
                
                # Get or create our source
                for attr in stop.keys():
                    self.get_create_update(SourceAttribute, {'stop' : s, 'source' : source, 'key' : attr.capitalize()}, {'value' : stop[attr]} )
                
            reversion.set_comment(u"GOVI Import")
        f.close()
示例#29
0
def print_info(args, cv=False):
    writer = csv.writer(sys.stdout, dialect=csv.get_dialect('excel'))
    rows = []
    rows.append(('Command', 'cross-validation' if cv else 'fit'))
    rows.append(('Data', args.datasetname))
    rows.append(('Date', datetime.now()))
    rows.append(('Mixture Truncation', ('yes' if args.truncated else 'no')))
    rows.append(('Mixture components', args.components))
    r = ['GP params']
    map(r.extend,args.gpparams.items())
    rows.append(r)
    rows.append(('Optimization method', 
            'fmin_l_bfgs_b' if args.bounds else 'fmin'))
    if cv is False:
        rows.append(('Bootstrap', ('yes' if args.bootstrap else 'no')))
        rows.append(('Bootstrap repetitions', args.bootstrap_reps))
        rows.append(('Bootstrap sample', (args.bootstrap_size if args.bootstrap_size
                else 'same as dataset')))
    if args.weights is not None:
        r = ['Weights']
        map(r.extend, zip(args.auxiliary, args.weights))
        rows.append(r)
    else:
        rows.append(('Weights', 'no'))
    if args.bounds is not None:
        r = ['Bounds']
        map(r.extend, map(lambda k,b : (k,) + b, args.paramnames, args.bounds))
        rows.append(r)
    else:
        rows.append(('Bounds', 'no'))
    writer.writerows(rows)
    print
    sys.stdout.flush()
示例#30
0
def makeHVsLon(outFilename, dataFile):
    out = ' >> ' + outFilename
    dialect = csv.get_dialect('excel')
    results = csv.reader(open(dataFile, 'r'), dialect=dialect)
    row = results.next()
    row = results.next()
    rows = []
    for row in results:
        if (int(row[12]) > 5):
            rows.append(row)
    gmt = os.popen(
        'psxy -B2WESn -P -JX6i/-6i -R-111.0/-102.7/24/61 -Sp.08i -G0/0/255 -K '
        + out, 'w')
    for row in rows:
        gmt.write("%s %s\n" % (row[4], row[5]))
    gmt.close()
    gmt = os.popen('psxy -P -JX -R -St.1i -G255/0/0 -O -K ' + out, 'w')
    for row in rows:
        gmt.write("%s %s\n" % (row[4], row[14]))
    gmt.close()
    gmt = os.popen('pstext -P -JX -R -O -W' + out, 'w')
    for row in rows:
        thick = row[5].replace(' km', '')
        thick = '30'
        gmt.write(row[4] + "  " + thick + " 12 90 4 BL " + row[0] + '.' +
                  row[1] + "\n")
    gmt.close()
示例#31
0
    def __init__(self, *args, **kwds):

        self.data = kwds.pop('data')

        kwds["style"] = \
            wx.DEFAULT_DIALOG_STYLE | wx.RESIZE_BORDER | wx.THICK_FRAME

        wx.Dialog.__init__(self, *args, **kwds)

        self.csvwidgets = CsvParameterWidgets(self, None)
        dialect = csv.get_dialect(csv.list_dialects()[0])
        self.has_header = False

        style = wx.TE_MULTILINE | wx.TE_READONLY | wx.HSCROLL
        self.preview_textctrl = CSVPreviewTextCtrl(self, -1, style=style)

        self.button_cancel = wx.Button(self, wx.ID_CANCEL, "")
        self.button_apply = wx.Button(self, wx.ID_APPLY, "")
        self.button_ok = wx.Button(self, wx.ID_OK, "")

        self._set_properties()
        self._do_layout()

        self.preview_textctrl.fill(data=self.data, dialect=dialect)

        self.Bind(wx.EVT_BUTTON, self.OnButtonApply, self.button_apply)
示例#32
0
def makeMagVsDist(outFilename, dataFile):
    out = ' >> ' + outFilename
    #gmt= os.popen('psxy -B2WESn -P -JX6i/-6i -R20/100/5/10 -Sp.05i -G0 -K '+out, 'w')
    dialect = csv.get_dialect('excel')
    dialect.delimiter = '|'
    distMagBin = {}
    results = csv.reader(open(dataFile, 'r'), dialect=dialect)
    row = results.next()
    row = results.next()
    for row in results:
        distaz = DistAz(float(row[4]), float(row[5]), float(row[2]),
                        float(row[3]))
        distBin = 5 + math.ceil(distaz.getDelta() / 10) * 10
        magBin = round(float(row[7]) * 10) / 10.0
        distMagBin["%i %f" % (distBin, magBin)] = distMagBin.setdefault(
            "%i %f" % (distBin, magBin), 0) + 1


#	gmt.write("%f %s\n" % (distaz.getDelta(), row[7]))
#    gmt.close()

    gmt = os.popen(
        'psxyz -R30/100/5/9/1/1000 -P -JX6.5 -JZ2.5i -So0.3ib1 -Ggray -W0.5p  -E150/50 -B10/1/20:"Num Eq for Dist, Mag":WSneZ'
        + out, 'w')
    for key in distMagBin:
        gmt.write("%s %i\n" % (key, distMagBin[key]))
    gmt.close()
示例#33
0
def sniff_dialect(sample, encoding, sep, skip_dialect, ui):
    t1 = time()
    try:
        if skip_dialect:
            ui.debug('investigate_encoding_and_dialect - skip dialect detect')
            if sep:
                csv.register_dialect('dataset_dialect', csv.excel,
                                     delimiter=sep)
            else:
                csv.register_dialect('dataset_dialect', csv.excel)
            dialect = csv.get_dialect('dataset_dialect')
        else:
            sniffer = csv.Sniffer()
            dialect = sniffer.sniff(sample.decode(encoding), delimiters=sep)
            ui.debug('investigate_encoding_and_dialect - seconds to detect '
                     'csv dialect: {}'.format(time() - t1))
    except csv.Error:
        decoded_one = sample.decode(encoding)
        t2 = time()
        detector = Detector()
        delimiter, resampled = detector.detect(decoded_one)

        if len(delimiter) == 1:
            delimiter = delimiter[0]
            ui.info("Detected delimiter as %s" % delimiter)

            if sep is not None and sep != delimiter:
                delimiter = sep
        else:
            raise csv.Error(
                "The csv module failed to detect the CSV dialect. "
                "Try giving hints with the --delimiter argument, "
                "E.g  --delimiter=','"
            )

        sniffer = csv.Sniffer()
        dialect = sniffer.sniff(resampled, delimiters=delimiter)
        ui.debug('investigate_encoding_and_dialect v2 - seconds to detect '
                 'csv dialect: {}'.format(time() - t2))

    if dialect.escapechar is None:
        csv.register_dialect('dataset_dialect', dialect,
                             delimiter=str(dialect.delimiter),
                             quotechar=str(dialect.quotechar),
                             doublequote=True)
        dialect = csv.get_dialect('dataset_dialect')
    return dialect
示例#34
0
def sniff_dialect(sample, sep, skip_dialect, ui):
    t1 = time()
    try:
        if skip_dialect:
            ui.debug('investigate_encoding_and_dialect - skip dialect detect')
            if sep:
                csv.register_dialect('dataset_dialect', csv.excel,
                                     delimiter=sep)
            else:
                csv.register_dialect('dataset_dialect', csv.excel)
            dialect = csv.get_dialect('dataset_dialect')
        else:
            sniffer = csv.Sniffer()
            dialect = sniffer.sniff(sample, delimiters=sep)
            ui.debug('investigate_encoding_and_dialect - seconds to detect '
                     'csv dialect: {}'.format(time() - t1))
    except csv.Error:
        decoded_one = sample
        t2 = time()
        detector = Detector()
        delimiter, resampled = detector.detect(decoded_one)

        if len(delimiter) == 1:
            delimiter = delimiter[0]
            ui.info("Detected delimiter as %s" % delimiter)

            if sep is not None and sep != delimiter:
                delimiter = sep
        else:
            raise csv.Error(
                "The csv module failed to detect the CSV dialect. "
                "Try giving hints with the --delimiter argument, "
                "E.g  --delimiter=','"
            )

        sniffer = csv.Sniffer()
        dialect = sniffer.sniff(resampled, delimiters=delimiter)
        ui.debug('investigate_encoding_and_dialect v2 - seconds to detect '
                 'csv dialect: {}'.format(time() - t2))

    if dialect.escapechar is None:
        csv.register_dialect('dataset_dialect', dialect,
                             delimiter=str(dialect.delimiter),
                             quotechar=str(dialect.quotechar),
                             doublequote=True)
        dialect = csv.get_dialect('dataset_dialect')
    return dialect
示例#35
0
def get_all_tweets(screen_name):
    #Twitter only allows access to a users most recent 3240 tweets with this method

    #authorize twitter, initialize tweepy
    auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
    auth.set_access_token(access_key, access_secret)
    api = tweepy.API(auth)

    #initialize a list to hold all the tweepy Tweets
    alltweets = []

    #make initial request for most recent tweets (200 is the maximum allowed count)
    new_tweets = api.user_timeline(screen_name=screen_name, count=1)

    #save most recent tweets
    alltweets.extend(new_tweets)

    #save the id of the oldest tweet less one
    oldest = alltweets[-1].id - 1

    #keep grabbing tweets until there are no tweets left to grab
    while len(new_tweets) > 0:
        print("getting tweets before %s" % (oldest))

        #all subsequent requests use the max_id param to prevent duplicates
        new_tweets = api.user_timeline(screen_name=screen_name,
                                       count=200,
                                       max_id=oldest,
                                       since_id=first_tweet_id)

        #save most recent tweets
        alltweets.extend(new_tweets)

        #update the id of the oldest tweet less one
        oldest = alltweets[-1].id - 1

        print("...%s tweets downloaded so far" % (len(alltweets)))

    #go through all found tweets and remove the ones with no images
    outtweets = []  #initialize master list to hold our ready tweets
    for tweet in alltweets:
        #not all tweets will have media url, so lets skip them
        try:
            print(tweet.entities['media'][0]['media_url'])
        except (NameError, KeyError):
            media_url = ''
        else:
            #got media_url - means add it to the output
            media_url = tweet.entities['media'][0]['media_url']
        outtweets.append([
            tweet.id_str, tweet.created_at,
            tweet.text.replace("\n", '\\n'), media_url
        ])

    #write the csv
    with open('%s_tweets.csv' % screen_name, 'w') as f:
        writer = csv.writer(f, dialect=csv.get_dialect("unix"))
        writer.writerow(["id", "created_at", "text", "media_url"])
        writer.writerows(outtweets)
def readCsv(pathToCsv, ui):
    """
    Reads csv file to a dict containing lists, each representing a column.
    The keys of the dictionary represent the column names, and the value contains
    the corresponding list of the column.

    Args:
        pathToCsv (string): a path to the csv file to be parsed
    Returns:
        a dictionary with for every key the corresponding column list of data

    """

    encoding = 'utf-8'
#    delimiter = ','
#    quotechar = '"'


    with open(pathToCsv, 'rt', newline='', encoding=encoding) as fp:

        try:
            dialect = csv.Sniffer().sniff(fp.readline())
        except Exception:
            dialect = csv.get_dialect('excel')
        fp.seek(0)


        try:
            data = csv.reader(fp, dialect=dialect)
        except Exception as e:
            errorMessage = ("Cannot process csv file, unknown format, see the log file for more information")
            if ui is not None:
                logging.exception("Cannot process csv file: %s", e)
                ui.showErrorMessage(errorMessage)
            return None

        rowDataList = list(data)
    headerList = rowDataList[0]
    dataTupleList = list(zip(*rowDataList[1:]))


    dataDict = {}

    try:
        for index in range(len(headerList)):

            headerString = headerList[index]
            dataTuple = dataTupleList[index]

            dataDict[headerString] = list(dataTuple)

    except Exception as e:
            errorMessage = ("Cannot process csv file, unknown format")
            if ui is not None:
                logging.exception("Cannot process csv file: %s", e)
                ui.showErrorMessage(errorMessage)
            return None

    return dataDict
示例#37
0
    def set_dialect(self):
        dialectname = self.settings.get('use_dialect')
        try:
            csv.get_dialect(dialectname)
            return dialectname
        except Exception:
            user_dialects = self.settings.get('dialects')

        try:
            csv.register_dialect(dialectname, **user_dialects[dialectname])
            print("DataConverter: Using custom dialect", dialectname)
            return dialectname

        except Exception:
            print("DataConverter: Couldn't register custom dialect named",
                  dialectname)
            return None
示例#38
0
def determine_dialect(file):
    try:
        result = csv.Sniffer().sniff(file.read(1024))
        file.seek(0)
        return result
    except:
        file.seek(0)
        return csv.get_dialect('excel')
    def sniff(self, sample):
        try:
            dialect = csv.Sniffer().sniff(sample)
            print dialect.delimiter
        except Exception as e:
            delimiter = self.settings.get('delimiter', ',').pop()
            delimiter = bytes(
                delimiter)  # dialect definition takes a 1-char bytestring
            print "DataConverter had trouble sniffing:", e
            try:
                csv.register_dialect('barebones', delimiter=delimiter)
                dialect = csv.get_dialect('barebones')

            except Exception as e:
                dialect = csv.get_dialect('excel')

        return dialect
示例#40
0
文件: io.py 项目: spamlab-iee/os
 def sniff_dialect(self):
     self.sample = self.read_sample()
     try:
         self.dialect = get_dialect_data(self.csv_sniffer.sniff(
             self.sample))
     except csv.Error:
         self.dialect = get_dialect_data(csv.get_dialect("excel"))
     return self.dialect
示例#41
0
文件: csvman.py 项目: azelenov/csvMan
 def detect_csv_dialect(self):
     try:
         f = open(self.path, "rb")
     except IOError:
         sys.exit('No such file or directory:' + self.path)
     d = csv.Sniffer().sniff(f.read(1024))
     d.lineterminator = "\n"
     csv.register_dialect('auto', d)
     self.dialect = 'auto'
     print "Detected dialect:"
     print "delimiter=", csv.get_dialect(self.dialect).delimiter
     print "quote char=", csv.get_dialect(self.dialect).quotechar
     print "quoting=", csv.get_dialect(self.dialect).quoting
     print "line terminator=", csv.get_dialect(self.dialect)\
         .lineterminator.replace('\n', '\\n').replace('\r', "\\r")
     print "escape char=", csv.get_dialect(self.dialect).escapechar
     print "----------------------------"
示例#42
0
文件: test_csv.py 项目: zeus911/9miao
 def test_register_kwargs(self):
     name = 'fedcba'
     csv.register_dialect(name, delimiter=';')
     try:
         self.failUnless(csv.get_dialect(name).delimiter, '\t')
         self.failUnless(list(csv.reader('X;Y;Z', name)), ['X', 'Y', 'Z'])
     finally:
         csv.unregister_dialect(name)
示例#43
0
def determine_dialect(file):
    try:
        result = csv.Sniffer().sniff(file.read(1024))
        file.seek(0)
        return result
    except:
        file.seek(0)
        return csv.get_dialect('excel')
示例#44
0
文件: test_csv.py 项目: B-Rich/breve
 def test_register_kwargs(self):
     name = "fedcba"
     csv.register_dialect(name, delimiter=";")
     try:
         self.failUnless(csv.get_dialect(name).delimiter, "\t")
         self.failUnless(list(csv.reader("X;Y;Z", name)), ["X", "Y", "Z"])
     finally:
         csv.unregister_dialect(name)
示例#45
0
 def test_register_kwargs(self):
     name = 'fedcba'
     csv.register_dialect(name, delimiter=';')
     try:
         self.failUnless(csv.get_dialect(name).delimiter, '\t')
         self.failUnless(list(csv.reader('X;Y;Z', name)), ['X', 'Y', 'Z'])
     finally:
         csv.unregister_dialect(name)
示例#46
0
文件: utils.py 项目: aniquez/mod
def send_sms(to, msg, mask="ICICIPRU"):
    p = "http://enterprise.smsgupshup.com/GatewayAPI/rest"

    if len(to) > 10 and type(to) == type([]):
        register_openers()
        at_one_time = 100000

        if type(msg) != type([]):
            msg = [msg for i in range(len(to))]

        zipped = zip(to, msg)
        for bucket in [zipped[i:i+at_one_time] for i in range(0, len(zipped)) if i%at_one_time == 0]:
            csv.register_dialect('gupshup', delimiter=',', quoting=csv.QUOTE_ALL)
            filename = os.path.join("/tmp/", "%s.csv" % hashlib.md5(str(time.time())).hexdigest())
            file_stream = open(filename,'wb')
            writer = UnicodeWriter(file_stream, dialect=csv.get_dialect('gupshup'))
            writer.writerow(["PHONE","MESSAGE"])
            if type(msg) == type([]):
                for i_to, i_msg in bucket:
                    writer.writerow([i_to, "%s" % i_msg])

            file_stream.close()
            wfile_stream = open(filename,'rb')

            datagen, headers = multipart_encode({
                "file": wfile_stream,
                'method' : 'xlsUpload',
                'filetype' : 'csv',
                'msg_type' : 'text',
                'mask' : mask,
                'v' : '1.1',
                'userid' : '2000058874',
                'password' : 'glitterfuck',
            })
            request = urllib2.Request(url=p, data=datagen, headers=headers)
            res = urllib2.urlopen(request).read()
            response_logger.info("Response %s" % (res))

    else:
        if type(to) == type([]):
            to = ",".join(to)

        data = {
            'msg' : msg,
            'send_to' : to,
            'v' : '1.1',
            'userid' : '2000058874',
            'password' : 'glitterfuck',
            'msg_type' : 'text',
            'method' : 'sendMessage',
            'mask' : mask,
        }
        querystring = urllib.urlencode(data)
        request = urllib2.Request(url=p, data=querystring)
        res = urllib2.urlopen(request).read()
        response_logger.info("Response %s" % (res))

    return res
示例#47
0
文件: DSV.py 项目: vishalbelsare/kdvs
 def test_init2(self):
     dsv1_fh = DSV.getHandle(self.num_dsv_path)
     # predefined delimiter, resolved successfully
     # NOTE: class does not check if delimiter is valid at this point
     dsv1 = DSV(self.dbm, self.testdb, dsv1_fh, dtname=self.test_dtname, delimiter='\t')
     self.assertFalse(dsv1.isCreated())
     self.assertEqual(csv.get_dialect('excel-tab'), dsv1.dialect)
     self.assertEqual('\t', dsv1.dialect.delimiter)
     dsv1.close()
    def sniff(self, sample):
        if self.dialect:
            return self.dialect

        try:
            dialect = csv.Sniffer().sniff(sample)
            print 'DataConverter is using this delimiter:', dialect.delimiter
            return dialect
        except Exception as e:
            print "DataConverter had trouble sniffing:", e
            delimiter = self.settings.get('delimiter', ',')
            delimiter = bytes(delimiter)  # dialect definition takes a 1-char bytestring
            try:
                csv.register_dialect('barebones', delimiter=delimiter)
                return csv.get_dialect('barebones')

            except Exception as e:
                return csv.get_dialect('excel')
示例#49
0
 def close(self):
     if not self.is_open:
         self._ui.debug('CLOSE CALLED ON CLOSED RUNCONTEXT')
         return
     self.is_open = False
     self._ui.debug('CLOSE CALLED ON RUNCONTEXT')
     self.dialect = csv.get_dialect('dataset_dialect')
     self.writer_dialect = csv.get_dialect('writer_dialect')
     values = ['delimiter', 'doublequote', 'escapechar', 'lineterminator',
               'quotechar', 'quoting', 'skipinitialspace', 'strict']
     self.dialect = {k: getattr(self.dialect, k) for k in values if
                     hasattr(self.dialect, k)}
     self.writer_dialect = {k: getattr(self.writer_dialect, k) for k
                            in values if hasattr(self.writer_dialect, k)}
     self.db.sync()
     self.db.close()
     if self.out_stream is not None:
         self.out_stream.close()
def read_xsv(
    file: IO,
    dialect: str,
    fieldnames: List[str] = None,
    first_line_is_column_header: bool = True,
    discard: int = None,
    load_at_most: int = None,
) -> Iterable[Dict]:
    """Returns an iterable of dict. Must be iterated while file is still open.

    Args:
        file:
            An open file.
        dialect:
            As used in built-in module `csv`.
        fieldnames:
            TODO: Pending documentation for 'fieldnames'
        first_line_is_column_header:
            If True, parses first line as column headers.
        discard:
            Non-negative integer or None. Initial rows of _data_ to discard.
        load_at_most:
            Non-negative integer or None. Rows of _data_ to load.

    Notes:
        Use 'excel' dialect for CSV. Use 'excel-tab' for TSV.

    Warnings:
        Must be iterated while file is still open.
    """
    kwargs = {
        'fieldnames': fieldnames,
        'dialect': dialect,
    }

    if not first_line_is_column_header and fieldnames is None:
        # use 'Column X' as fieldnames like in OpenRefine
        first_line = file.readline(1)
        file.seek(-1)
        delimiter = csv.get_dialect(dialect).delimiter
        num_cols = len(first_line.split(delimiter))
        kwargs['fieldnames'] = [f'Column {i + 1}' for i in range(num_cols)]

    if first_line_is_column_header and fieldnames is not None:
        raise NotImplementedError(
            "Changing column names isn't supported for simplicity")

    reader = csv.DictReader(file,
                            **select_not_null(kwargs, 'fieldnames', 'dialect'))

    stop = None
    if load_at_most is not None:
        stop = load_at_most
        if discard is not None:
            stop += discard

    return islice(reader, discard, stop)
示例#51
0
def test_investigate_encoding_and_dialect():
    with UI(None, logging.DEBUG, stdout=False) as ui:
        data = 'tests/fixtures/windows_encoded.csv'
        encoding = investigate_encoding_and_dialect(data, None, ui)
        dialect = csv.get_dialect('dataset_dialect')
        assert encoding == 'iso-8859-2'
        assert dialect.lineterminator == '\r\n'
        assert dialect.quotechar == '"'
        assert dialect.delimiter == ','
示例#52
0
def test_investigate_encoding_and_dialect():
    with UI(None, logging.DEBUG, stdout=False) as ui:
        data = 'tests/fixtures/windows_encoded.csv'
        encoding = investigate_encoding_and_dialect(data, None, ui)
        dialect = csv.get_dialect('dataset_dialect')
        assert encoding == 'iso-8859-2'
        assert dialect.lineterminator == '\r\n'
        assert dialect.quotechar == '"'
        assert dialect.delimiter == ','
示例#53
0
文件: fileupload.py 项目: BBie/amcat
    def get_reader(self, reader_class=namedtuple_csv_reader):
        file = self.decode_file(self.files['file'])
        
        if file.name.endswith(".xlsx"):
            if reader_class != namedtuple_csv_reader:
                raise ValueError("Cannot handle xlsx files with non-default reader, sorry!")
            return namedtuple_xlsx_reader(file)

        d = self.cleaned_data['dialect'] or "autodetect"

        if d == 'autodetect':
            dialect = csv.Sniffer().sniff(file.readline())
            file.seek(0)
            if dialect.delimiter not in "\t,;":
                dialect = csv.get_dialect('excel')
        else:
            dialect = csv.get_dialect(d)

        return reader_class(file, dialect=dialect)
示例#54
0
 def go(self):
     dataset_dialect = csv.get_dialect('dataset_dialect')
     args = ([self.batch_gen_args,
              SerializableDialect.from_dialect(dataset_dialect),
              self.queue])
     self.p = multiprocessing.Process(target=self._shove,
                                      args=args,
                                      name='Shovel_Proc')
     self.p.start()
     return self.p
示例#55
0
def set_dialect(dialectname, user_dialects):
    '''Get a CSV dialect from csv.dialects or a register one from passed dict.'''
    try:
        csv.get_dialect(dialectname)
        return dialectname

    except _csv.Error:
        try:
            user_quoting = user_dialects[dialectname].pop('quoting', 'QUOTE_MINIMAL')

            quoting = getattr(csv, user_quoting, csv.QUOTE_MINIMAL)

            csv.register_dialect(dialectname, quoting=quoting, **user_dialects[dialectname])

            print("DataConverter: Using custom dialect", dialectname)
            return dialectname

        except _csv.Error:
            print("DataConverter: Couldn't register custom dialect named", dialectname)
            return None
示例#56
0
文件: utils.py 项目: turicas/rows
def pgexport(
    database_uri,
    table_name,
    filename,
    encoding="utf-8",
    dialect=csv.excel,
    callback=None,
    timeout=0.1,
    chunk_size=8388608,
):
    """Export data from PostgreSQL into a CSV file using the fastest method

    Required: psql command
    """

    if isinstance(dialect, six.text_type):
        dialect = csv.get_dialect(dialect)

    # Prepare the `psql` command to be executed to export data
    command = get_psql_copy_command(
        database_uri=database_uri,
        direction="TO",
        encoding=encoding,
        header=None,  # Needed when direction = 'TO'
        table_name=table_name,
        dialect=dialect,
    )
    fobj = open_compressed(filename, mode="wb")
    try:
        process = subprocess.Popen(
            shlex.split(command),
            stdin=subprocess.PIPE,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
        )
        total_written = 0
        data = process.stdout.read(chunk_size)
        while data != b"":
            written = fobj.write(data)
            total_written += written
            if callback:
                callback(written, total_written)
            data = process.stdout.read(chunk_size)
        stdout, stderr = process.communicate()
        if stderr != b"":
            raise RuntimeError(stderr.decode("utf-8"))

    except FileNotFoundError:
        raise RuntimeError("Command `psql` not found")

    except BrokenPipeError:
        raise RuntimeError(process.stderr.read().decode("utf-8"))

    return {"bytes_written": total_written}
示例#57
0
 def test_registry(self):
     class myexceltsv(csv.excel):
         delimiter = "\t"
     name = "myexceltsv"
     expected_dialects = csv.list_dialects() + [name]
     expected_dialects.sort()
     csv.register_dialect(name, myexceltsv)
     self.addCleanup(csv.unregister_dialect, name)
     self.assertEqual(csv.get_dialect(name).delimiter, '\t')
     got_dialects = sorted(csv.list_dialects())
     self.assertEqual(expected_dialects, got_dialects)
示例#58
0
    def get_reader(self, reader_class=namedtuple_csv_reader):
        f = self.files['file']
        
        if f.name.endswith(".xlsx"):
            if reader_class != namedtuple_csv_reader:
                raise Exception("Cannot handle xlsx files with non-default reader, sorry!")
            return namedtuple_xlsx_reader(f)
            
        d = self.cleaned_data['dialect']
        if not d: d = "autodetect"
        if d == 'autodetect':
            dialect = csv.Sniffer().sniff(f.readline())
            f.seek(0)
            if dialect.delimiter not in "\t,;":
                dialect = csv.get_dialect('excel')
        else:
            dialect = csv.get_dialect(d)

        enc = self.cleaned_data['encoding']
        encoding = {'encoding' : ENCODINGS[int(enc)]} if enc and reader_class == namedtuple_csv_reader else {}
        return reader_class(f, dialect=dialect, **encoding)
示例#59
0
    def initialize(self):
        """Initialize CSV source stream:
        
        #. perform autodetection if required:
            #. detect encoding from a sample data (if requested)
            #. detect whether CSV has headers from a sample data (if requested)
        #.  create CSV reader object
        #.   read CSV headers if requested and initialize stream fields
        
        """

        self.file, self.close_file = base.open_resource(self.resource)

        handle = None
        
        if self._autodetection:
            
            sample = self.file.read(self.sample_size)

            # Encoding test
            if self.detect_encoding and type(sample) == unicode:
                self.encoding = "utf-8"

            if self.detect_header:
                sample = sample.encode('utf-8')
                sniffer = csv.Sniffer()
                self.read_header = sniffer.has_header(sample)

            self.file.seek(0)
            
        if self.dialect:
            if type(self.dialect) == str:
                dialect = csv.get_dialect(self.dialect)
            else:
                dialect = self.dialect
                
            self.reader_args["dialect"] = dialect

        # self.reader = csv.reader(handle, **self.reader_args)
        self.reader = UnicodeReader(self.file, encoding = self.encoding, 
                                    **self.reader_args)

        if self.skip_rows:
            for i in range(0, self.skip_rows):
                self.reader.next()
                
        # Initialize field list
        if self.read_header:
            field_names = self.reader.next()
            
            fields = [ (name, "string", "default") for name in field_names]
            
            self._fields = brewery.metadata.FieldList(fields)