def submitdata(address): csv.register_dialect('mydialect', delimiter='\t', quoting=csv.QUOTE_ALL) with open(address, newline='', encoding='utf8') as submit: file_list = csv.reader(submit, 'mydialect') temp = [] for i in file_list: index = 0 max = float(i[0]) for index_j, j in enumerate(i): if float(j) > max: index = index_j max = float(j) for j in range(len(i)): #set to 0 first then put 1 on i[j] = '0' i[index] = '1' temp.append(i) csv.unregister_dialect('mydialect') with open('submit_data.csv', 'w', newline='') as csv_submit: writer = csv.writer(csv_submit) for i in temp: writer.writerow(i)
def test_dialect_apply(self): class testA(csv.excel): delimiter = "\t" class testB(csv.excel): delimiter = ":" class testC(csv.excel): delimiter = "|" class testUni(csv.excel): delimiter = "\u039B" class unspecified(): # A class to pass as dialect but with no dialect attributes. pass csv.register_dialect('testC', testC) try: self.compare_dialect_123("1,2,3\r\n") self.compare_dialect_123("1,2,3\r\n", dialect=None) self.compare_dialect_123("1,2,3\r\n", dialect=unspecified) self.compare_dialect_123("1\t2\t3\r\n", testA) self.compare_dialect_123("1:2:3\r\n", dialect=testB()) self.compare_dialect_123("1|2|3\r\n", dialect='testC') self.compare_dialect_123("1;2;3\r\n", dialect=testA, delimiter=';') self.compare_dialect_123("1\u039B2\u039B3\r\n", dialect=testUni) finally: csv.unregister_dialect('testC')
def test_dialect_apply(self): class testA(csv.excel): delimiter = '\t' class testB(csv.excel): delimiter = ':' class testC(csv.excel): delimiter = '|' class testUni(csv.excel): delimiter = 'Λ' csv.register_dialect('testC', testC) try: self.compare_dialect_123('1,2,3\r\n') self.compare_dialect_123('1\t2\t3\r\n', testA) self.compare_dialect_123('1:2:3\r\n', dialect=testB()) self.compare_dialect_123('1|2|3\r\n', dialect='testC') self.compare_dialect_123('1;2;3\r\n', dialect=testA, delimiter=';') self.compare_dialect_123('1Λ2Λ3\r\n', dialect=testUni) finally: csv.unregister_dialect('testC')
def with_csv_dialect(name, **kwargs): """ Context manager to temporarily register a CSV dialect for parsing CSV. Parameters ---------- name : str The name of the dialect. kwargs : mapping The parameters for the dialect. Raises ------ ValueError : the name of the dialect conflicts with a builtin one. See Also -------- csv : Python's CSV library. """ import csv _BUILTIN_DIALECTS = {"excel", "excel-tab", "unix"} if name in _BUILTIN_DIALECTS: raise ValueError("Cannot override builtin dialect.") csv.register_dialect(name, **kwargs) try: yield finally: csv.unregister_dialect(name)
def test_write_dialect(tmp_path: Path) -> None: """Write a CSV file in the 'unixpasswd' dialect.""" passwd_rows = [ ["bin", "x", "2", "2", "bin", "/bin", "/usr/sbin/nologin"], [ "hplip", "x", "117", "7", "HPLIP user,,,", "/var/run/hplip", "/bin/false", ], ] csv.register_dialect("unixpasswd", delimiter=":", quoting=csv.QUOTE_NONE) tmp_passwd_path = tmp_path.joinpath("csv_passwd") with open(tmp_passwd_path, "w", newline="") as passwd_file: csv_writer = csv.writer(passwd_file, "unixpasswd") csv_writer.writerows(passwd_rows) csv.unregister_dialect("unixpasswd") with open(tmp_passwd_path) as passwd_sample: data = passwd_sample.read() assert (data == """\ bin:x:2:2:bin:/bin:/usr/sbin/nologin hplip:x:117:7:HPLIP user,,,:/var/run/hplip:/bin/false """)
def test_register_kwargs(self): name = 'fedcba' csv.register_dialect(name, delimiter=';') try: self.failUnless(csv.get_dialect(name).delimiter, '\t') self.failUnless(list(csv.reader('X;Y;Z', name)), ['X', 'Y', 'Z']) finally: csv.unregister_dialect(name)
def test_register_kwargs(self): name = "fedcba" csv.register_dialect(name, delimiter=";") try: self.failUnless(csv.get_dialect(name).delimiter, "\t") self.failUnless(list(csv.reader("X;Y;Z", name)), ["X", "Y", "Z"]) finally: csv.unregister_dialect(name)
def test_read_dialect() -> None: """Read a CSV file in the 'unixpasswd' dialect. Uses a subclass of `Dialect`. """ class UnixPasswdDialect(csv.Dialect): """Dialect for Unix `passwd` files.""" # Specify all attributes because the `Dialect` class defaults all to None! delimiter = ":" quotechar = '"' escapechar = None doublequote = True skipinitialspace = False lineterminator = "\r\n" quoting = csv.QUOTE_NONE csv.register_dialect("unixpasswd", UnixPasswdDialect) fieldnames = [ "Login Name", "Encrypted Password", "User ID", "Group ID", "User Name or Comment", "Home Directory", "Command Interpreter", ] csv_passwd_path = Path(__file__).parent.joinpath("csv_passwd") with open(csv_passwd_path, newline="") as passwd_file: passwd_reader = csv.DictReader(passwd_file, fieldnames=fieldnames, dialect="unixpasswd") passwd_rows = list(passwd_reader) csv.unregister_dialect("unixpasswd") assert passwd_rows == [ { "Login Name": "bin", "Encrypted Password": "******", "User ID": "2", "Group ID": "2", "User Name or Comment": "bin", "Home Directory": "/bin", "Command Interpreter": "/usr/sbin/nologin", }, { "Login Name": "hplip", "Encrypted Password": "******", "User ID": "117", "Group ID": "7", "User Name or Comment": "HPLIP system user,,,", "Home Directory": "/var/run/hplip", "Command Interpreter": "/bin/false", }, ]
def test_dialect_str(self): data = """\ fruit:vegetable apple:brocolli pear:tomato """ exp = DataFrame({"fruit": ["apple", "pear"], "vegetable": ["brocolli", "tomato"]}) dia = csv.register_dialect("mydialect", delimiter=":") # noqa df = self.read_csv(StringIO(data), dialect="mydialect") tm.assert_frame_equal(df, exp) csv.unregister_dialect("mydialect")
def read_test(path): data = [] csv.register_dialect('my', delimiter='\t', quoting=csv.QUOTE_ALL) with open(path) as tsvfile: file_list = csv.reader(tsvfile, "my") first = True for line in file_list: if first: first = False continue data.append(line[1].strip().split(" ")) csv.unregister_dialect('my') return data
def test_registry(self): class myexceltsv(csv.excel): delimiter = "\t" name = "myexceltsv" expected_dialects = csv.list_dialects() + [name] expected_dialects.sort() csv.register_dialect(name, myexceltsv) try: self.failUnless(isinstance(csv.get_dialect(name), myexceltsv)) got_dialects = csv.list_dialects() got_dialects.sort() self.assertEqual(expected_dialects, got_dialects) finally: csv.unregister_dialect(name)
def test_dialect_str(self): data = """\ fruit:vegetable apple:brocolli pear:tomato """ exp = DataFrame({ 'fruit': ['apple', 'pear'], 'vegetable': ['brocolli', 'tomato'] }) dia = csv.register_dialect('mydialect', delimiter=':') # noqa df = self.read_csv(StringIO(data), dialect='mydialect') tm.assert_frame_equal(df, exp) csv.unregister_dialect('mydialect')
def read_data(path): data = [] label = [] csv.register_dialect('my', delimiter='\t', quoting=csv.QUOTE_ALL) with open(path) as tsvfile: file_list = csv.reader(tsvfile, "my") first = True for line in file_list: if first: first = False continue data.append(line[1]) label.append(int(line[0])) csv.unregister_dialect('my') return data, label
def dbf2txt(self, fname): NameFrom = self._pathFrom + "\\" + fname NameTo = self._pathTo + "\\" + fname csv.register_dialect("_1s", _1S_Dialect()) db = dbfpy.dbf.Dbf() db.openFile(NameFrom + dbf_ext) VerStampNum, ActCntNum = self.dbf2hdr(db, NameTo) self.dbf2body(db, NameTo, VerStampNum, ActCntNum) db.close() csv.unregister_dialect("_1s")
def load_data2(path): csv.register_dialect('mydialect', delimiter='\t', quoting=csv.QUOTE_ALL) input_file = os.path.join(path) en = [] fr = [] count = 0 with open(input_file, "r") as f: data = csv.reader(f, 'mydialect') for line in data: try: if line[0] != '' and line[1] != '': en.append(line[0]) fr.append(line[1]) count += 1 except: continue print(count) csv.unregister_dialect('mydialect') return en, fr
def test_dialect_str(all_parsers): dialect_name = "mydialect" parser = all_parsers data = """\ fruit:vegetable apple:broccoli pear:tomato """ exp = DataFrame({ "fruit": ["apple", "pear"], "vegetable": ["broccoli", "tomato"] }) csv.register_dialect(dialect_name, delimiter=":") # Conflicting dialect delimiter. with tm.assert_produces_warning(ParserWarning): df = parser.read_csv(StringIO(data), dialect=dialect_name) tm.assert_frame_equal(df, exp) csv.unregister_dialect(dialect_name)
def read(self, file_name, sheet_name='', read_type='r', dict_name='', to_json=True, **kwargs): file_type = file_name.split('.')[-1] url = os.path.join(self.base_dir, dict_name, file_name) if os.path.exists(url) is False: print 'File not exists, %s. Please check.' % url return None if file_type in ['xlsx', 'xls']: data = xlrd.open_workbook(url) return data.sheet_by_name(sheet_name) f = open(url, read_type) text = f.read() f.close() if file_type in ['csv', 'tsv'] or 'sep' in kwargs: encoding = chardet.detect(text)['encoding'] sep = ',' if file_type == 'csv' else '\t' if 'sep' in kwargs: sep = kwargs['sep'] delimiter = {'delimiter': sep} f = open(url, read_type) csv.register_dialect('my_dialect', **delimiter) cons = csv.reader(f, 'my_dialect') items = [] keys = next(cons) for c in cons: if to_json: item = {} for i in range(len(keys)): k = keys[i].decode(encoding).encode('utf-8') item[k] = c[i].decode(encoding).encode('utf-8') items.append(item) else: items.append([x.decode(encoding).encode('utf-8') for x in c]) csv.unregister_dialect('my_dialect') return items if file_type == 'json': return json.loads(text) return text
def test_dialect_apply(self): class testA(csv.excel): delimiter = "\t" class testB(csv.excel): delimiter = ":" class testC(csv.excel): delimiter = "|" csv.register_dialect('testC', testC) try: fileobj = StringIO() writer = csv.writer(fileobj) writer.writerow([1, 2, 3]) self.assertEqual(fileobj.getvalue(), "1,2,3\r\n") fileobj = StringIO() writer = csv.writer(fileobj, testA) writer.writerow([1, 2, 3]) self.assertEqual(fileobj.getvalue(), "1\t2\t3\r\n") fileobj = StringIO() writer = csv.writer(fileobj, dialect=testB()) writer.writerow([1, 2, 3]) self.assertEqual(fileobj.getvalue(), "1:2:3\r\n") fileobj = StringIO() writer = csv.writer(fileobj, dialect='testC') writer.writerow([1, 2, 3]) self.assertEqual(fileobj.getvalue(), "1|2|3\r\n") fileobj = StringIO() writer = csv.writer(fileobj, dialect=testA, delimiter=';') writer.writerow([1, 2, 3]) self.assertEqual(fileobj.getvalue(), "1;2;3\r\n") finally: csv.unregister_dialect('testC')
def test_dialect_apply(self): class testA(csv.excel): delimiter = "\t" class testB(csv.excel): delimiter = ":" class testC(csv.excel): delimiter = "|" class testUni(csv.excel): delimiter = "\u039B" csv.register_dialect('testC', testC) try: self.compare_dialect_123("1,2,3\r\n") self.compare_dialect_123("1\t2\t3\r\n", testA) self.compare_dialect_123("1:2:3\r\n", dialect=testB()) self.compare_dialect_123("1|2|3\r\n", dialect='testC') self.compare_dialect_123("1;2;3\r\n", dialect=testA, delimiter=';') self.compare_dialect_123("1\u039B2\u039B3\r\n", dialect=testUni) finally: csv.unregister_dialect('testC')
def test_dialect_apply(self): class testA(csv.excel): delimiter = "\t" class testB(csv.excel): delimiter = ":" class testC(csv.excel): delimiter = "|" csv.register_dialect('testC', testC) try: fileobj = StringIO() writer = csv.writer(fileobj) writer.writerow([1,2,3]) self.assertEqual(fileobj.getvalue(), "1,2,3\r\n") fileobj = StringIO() writer = csv.writer(fileobj, testA) writer.writerow([1,2,3]) self.assertEqual(fileobj.getvalue(), "1\t2\t3\r\n") fileobj = StringIO() writer = csv.writer(fileobj, dialect=testB()) writer.writerow([1,2,3]) self.assertEqual(fileobj.getvalue(), "1:2:3\r\n") fileobj = StringIO() writer = csv.writer(fileobj, dialect='testC') writer.writerow([1,2,3]) self.assertEqual(fileobj.getvalue(), "1|2|3\r\n") fileobj = StringIO() writer = csv.writer(fileobj, dialect=testA, delimiter=';') writer.writerow([1,2,3]) self.assertEqual(fileobj.getvalue(), "1;2;3\r\n") finally: csv.unregister_dialect('testC')
with open('example7.csv', newline='') as myFile: reader = csv.reader(myFile, dialect='myDialect') for row in reader: print(row) ### 4. Write a python program using csv.unregister_dialect(name) import csv csv.register_dialect("comma", delimiter=",") with open('example7.csv', newline='') as myFile: reader = csv.reader(myFile, dialect='comma') for row in reader: print(row) csv.unregister_dialect('comma') ### 5.Write a python program using csv.get_dialect csv.register_dialect("comma", delimiter=",") dialect=csv.get_dialect('comma') with open('example7.csv', newline='') as myFile: reader = csv.reader(myFile, dialect=dialect) for row in reader: print(row) ### 6.Write a python program using csv.list_dialects() for list in csv.list_dialects(): print(list)
def test_dialect_apply(self): class testA(csv.excel): delimiter = "\t" class testB(csv.excel): delimiter = ":" class testC(csv.excel): delimiter = "|" csv.register_dialect('testC', testC) try: fd, name = tempfile.mkstemp() fileobj = os.fdopen(fd, "w+b") try: writer = csv.writer(fileobj) writer.writerow([1, 2, 3]) fileobj.seek(0) self.assertEqual(fileobj.read(), "1,2,3\r\n") finally: fileobj.close() os.unlink(name) fd, name = tempfile.mkstemp() fileobj = os.fdopen(fd, "w+b") try: writer = csv.writer(fileobj, testA) writer.writerow([1, 2, 3]) fileobj.seek(0) self.assertEqual(fileobj.read(), "1\t2\t3\r\n") finally: fileobj.close() os.unlink(name) fd, name = tempfile.mkstemp() fileobj = os.fdopen(fd, "w+b") try: writer = csv.writer(fileobj, dialect=testB()) writer.writerow([1, 2, 3]) fileobj.seek(0) self.assertEqual(fileobj.read(), "1:2:3\r\n") finally: fileobj.close() os.unlink(name) fd, name = tempfile.mkstemp() fileobj = os.fdopen(fd, "w+b") try: writer = csv.writer(fileobj, dialect='testC') writer.writerow([1, 2, 3]) fileobj.seek(0) self.assertEqual(fileobj.read(), "1|2|3\r\n") finally: fileobj.close() os.unlink(name) fd, name = tempfile.mkstemp() fileobj = os.fdopen(fd, "w+b") try: writer = csv.writer(fileobj, dialect=testA, delimiter=';') writer.writerow([1, 2, 3]) fileobj.seek(0) self.assertEqual(fileobj.read(), "1;2;3\r\n") finally: fileobj.close() os.unlink(name) finally: csv.unregister_dialect('testC')
def cleanup(): csv.unregister_dialect('custom')
req = urllib.Request(url=datei, headers = { 'User-Agent' : 'elmar2goodrelations/1.0' }) dat2 = urllib.urlopen(req, timeout=self.paramenter.timeout) reader = csv.reader(dat2, "short_life") self.tmpProdMetaData = listen.createHttpMetaDat(dat2.info(), self.paramenter, self.foldername, "products") for row in reader: try: linie = [] for each in row: linie.append(each.decode("latin-1")) self.produkte.append(linie) self.produktecnt+=1 except Exception, e: print "Error while appending row." csv.unregister_dialect("short_life") def writeShopRDF(self): # Head if not os.path.isdir(self.paramenter.outputdir+os.sep+self.foldername+os.sep+"rdf"+os.sep): os.makedirs(self.paramenter.outputdir+os.sep+self.foldername+os.sep+"rdf"+os.sep) fobj = codecs.open(self.paramenter.outputdir+os.sep+self.foldername+os.sep+"rdf"+os.sep+"shop.rdf", "w+", "utf8") fobj.write("""<?xml version=\"1.0\" encoding=\"UTF-8\"?> <!DOCTYPE rdf:RDF [<!ENTITY vcard \"http://www.w3.org/2006/vcard/ns#\"> <!ENTITY xsd \"http://www.w3.org/2001/XMLSchema#\"> ]> <rdf:RDF xmlns:dct=\"http://purl.org/dc/terms/\"
def parse_csv(input_data, delimiter='|', wrap_all_strings=False, null_text='None', wrapper_character="'", double_wrapper_character_when_nested=False, escape_character="\\", line_terminator='\r\n'): """De-serialize CSV data into a Python :class:`dict <python:dict>` object. .. versionadded:: 0.3.0 .. tip:: Unwrapped empty column values are automatically interpreted as null (:obj:`None <python:None>`). :param input_data: The CSV data to de-serialize. Should include column headers and at least **one** row of data. Will ignore any rows of data beyond the first row. :type input_data: :class:`str <python:str>` :param delimiter: The delimiter used between columns. Defaults to ``|``. :type delimiter: :class:`str <python:str>` :param wrapper_character: The string used to wrap string values when wrapping is applied. Defaults to ``'``. :type wrapper_character: :class:`str <python:str>` :param null_text: The string used to indicate an empty value if empty values are wrapped. Defaults to `None`. :type null_text: :class:`str <python:str>` :returns: A :class:`dict <python:dict>` representation of the CSV record. :rtype: :class:`dict <python:dict>` :raises DeserializationError: if ``input_data`` is not a valid :class:`str <python:str>` :raises CSVStructureError: if there are less than 2 (two) rows in ``input_data`` or if column headers are not valid Python variable names """ use_file = False if not checkers.is_file(input_data) and not checkers.is_iterable( input_data): try: input_data = validators.string(input_data, allow_empty=False) except (ValueError, TypeError): raise DeserializationError("input_data expects a 'str', received '%s'" \ % type(input_data)) input_data = [input_data] elif checkers.is_file(input_data): use_file = True if not wrapper_character: wrapper_character = '\'' if wrap_all_strings: quoting = csv.QUOTE_NONNUMERIC else: quoting = csv.QUOTE_MINIMAL if 'sqlathanor' in csv.list_dialects(): csv.unregister_dialect('sqlathanor') csv.register_dialect('sqlathanor', delimiter=delimiter, doublequote=double_wrapper_character_when_nested, escapechar=escape_character, quotechar=wrapper_character, quoting=quoting, lineterminator=line_terminator) if not use_file: csv_reader = csv.DictReader(input_data, dialect='sqlathanor', restkey=None, restval=None) rows = [x for x in csv_reader] else: if not is_py2: with open(input_data, 'r', newline='') as input_file: csv_reader = csv.DictReader(input_file, dialect='sqlathanor', restkey=None, restval=None) rows = [x for x in csv_reader] else: with open(input_data, 'r') as input_file: csv_reader = csv.DictReader(input_file, dialect='sqlathanor', restkey=None, restval=None) rows = [x for x in csv_reader] if len(rows) < 1: raise CSVStructureError( 'expected 1 row of data and 1 header row, missing 1') else: data = rows[0] for key in data: try: validators.variable_name(key) except ValueError: raise CSVStructureError( 'column (%s) is not a valid Python variable name' % key) if data[key] == null_text: data[key] = None csv.unregister_dialect('sqlathanor') return data
# Obtener información del dialecto "personal" dialecto = csv.get_dialect('personal') print('delimiter', dialecto.delimiter) print('skipinitialspace', dialecto.skipinitialspace) print('doublequote', dialecto.doublequote) print('quoting', dialecto.quoting) print('quotechar', dialecto.quotechar) print('lineterminator', dialecto.lineterminator) ##Suprimir dialecto # Suprimir dialecto "personal" csv.unregister_dialect('personal') print(csv.list_dialects()) # Listar dialectos después #####Deducir con Sniffer() el dialecto de un archivo csv with open('salida.csv') as csvarchivo: dialecto = csv.Sniffer().sniff(csvarchivo.read(48)) csvarchivo.seek(0) print("Dialecto:", dialecto) csvarchivo.seek(0) entrada = csv.reader(csvarchivo, dialecto) for reg in entrada: print(reg) #Deducir con Sniffer() si un archivo tiene encabezado
def _get_attribute_csv_header(cls, attributes, delimiter='|', wrap_all_strings=False, wrapper_character="'", double_wrapper_character_when_nested=False, escape_character="\\", line_terminator='\r\n'): r"""Retrieve a header string for a CSV representation of the model. :param attributes: List of :term:`model attributes <model attribute>` to include. :type attributes: :class:`list <python:list>` of :class:`str <python:str>` :param delimiter: The character(s) to utilize between columns. Defaults to a pipe (``|``). :type delimiter: :class:`str <python:str>` :param wrap_all_strings: If ``True``, wraps any string data in the ``wrapper_character``. If ``None``, only wraps string data if it contains the ``delimiter``. Defaults to ``False``. :type wrap_all_strings: :class:`bool <python:bool>` :param wrapper_character: The string used to wrap string values when wrapping is necessary. Defaults to ``'``. :type wrapper_character: :class:`str <python:str>` :param double_wrapper_character_when_nested: If ``True``, will double the ``wrapper_character`` when it is found inside a column value. If ``False``, will precede the ``wrapper_character`` by the ``escape_character`` when it is found inside a column value. Defaults to ``False``. :type double_wrapper_character_when_nested: :class:`bool <python:bool>` :param escape_character: The character to use when escaping nested wrapper characters. Defaults to ``\``. :type escape_character: :class:`str <python:str>` :param line_terminator: The character used to mark the end of a line. Defaults to ``\r\n``. :type line_terminator: :class:`str <python:str>` :returns: A string ending in ``line_terminator`` with the model's CSV column names listed, separated by the ``delimiter``. :rtype: :class:`str <python:str>` """ if not wrapper_character: wrapper_character = '\'' if wrap_all_strings: quoting = csv.QUOTE_NONNUMERIC else: quoting = csv.QUOTE_MINIMAL if 'sqlathanor' in csv.list_dialects(): csv.unregister_dialect('sqlathanor') csv.register_dialect('sqlathanor', delimiter=delimiter, doublequote=double_wrapper_character_when_nested, escapechar=escape_character, quotechar=wrapper_character, quoting=quoting, lineterminator=line_terminator) output = StringIO() csv_writer = csv.DictWriter(output, fieldnames=attributes, dialect='sqlathanor') csv_writer.writeheader() header_string = output.getvalue() output.close() csv.unregister_dialect('sqlathanor') return header_string
def _get_attribute_csv_data(self, attributes, is_dumping=False, delimiter='|', wrap_all_strings=False, null_text='None', wrapper_character="'", double_wrapper_character_when_nested=False, escape_character="\\", line_terminator='\r\n', config_set=None): r"""Return the CSV representation of ``attributes`` extracted from the model instance (record). :param attributes: Names of :term:`model attributes <model attribute>` to include in the CSV output. :type attributes: :class:`list <python:list>` of :class:`str <python:str>` :param is_dumping: If ``True``, then allow :exc:`UnsupportedSerializationError <sqlathanor.errors.UnsupportedSerializationError>`. Defaults to ``False``. :type is_dumping: :class:`bool <python:bool>` :param delimiter: The delimiter used between columns. Defaults to ``|``. :type delimiter: :class:`str <python:str>` :param wrap_all_strings: If ``True``, wraps any string data in the ``wrapper_character``. If ``None``, only wraps string data if it contains the ``delimiter``. Defaults to ``False``. :type wrap_all_strings: :class:`bool <python:bool>` :param null_text: The text value to use in place of empty values. Only applies if ``wrap_empty_values`` is ``True``. Defaults to ``'None'``. :type null_text: :class:`str <python:str>` :param wrapper_character: The string used to wrap string values when wrapping is necessary. Defaults to ``'``. :type wrapper_character: :class:`str <python:str>` :param double_wrapper_character_when_nested: If ``True``, will double the ``wrapper_character`` when it is found inside a column value. If ``False``, will precede the ``wrapper_character`` by the ``escape_character`` when it is found inside a column value. Defaults to ``False``. :type double_wrapper_character_when_nested: :class:`bool <python:bool>` :param escape_character: The character to use when escaping nested wrapper characters. Defaults to ``\``. :type escape_character: :class:`str <python:str>` :param line_terminator: The character used to mark the end of a line. Defaults to ``\r\n``. :type line_terminator: :class:`str <python:str>` :param config_set: If not :obj:`None <python:None>`, the named configuration set to use. Defaults to :obj:`None <python:None>`. :type config_set: :class:`str <python:str>` / :obj:`None <python:None>` :returns: Data from the object in CSV format ending in ``line_terminator``. :rtype: :class:`str <python:str>` """ if not wrapper_character: wrapper_character = '\'' if not attributes: raise SerializableAttributeError("attributes cannot be empty") if wrap_all_strings: quoting = csv.QUOTE_NONNUMERIC else: quoting = csv.QUOTE_MINIMAL if 'sqlathanor' in csv.list_dialects(): csv.unregister_dialect('sqlathanor') csv.register_dialect('sqlathanor', delimiter=delimiter, doublequote=double_wrapper_character_when_nested, escapechar=escape_character, quotechar=wrapper_character, quoting=quoting, lineterminator=line_terminator) data = [] for item in attributes: try: value = self._get_serialized_value(format='csv', attribute=item, config_set=config_set) except UnsupportedSerializationError as error: if is_dumping: value = getattr(self, item) else: raise error data.append(value) for index, item in enumerate(data): if item == '' or item is None or item == 'None': data[index] = null_text elif not checkers.is_string(item) and not checkers.is_numeric( item): data[index] = str(item) data_dict = dict_() for index, column_name in enumerate(attributes): data_dict[column_name] = data[index] output = StringIO() csv_writer = csv.DictWriter(output, fieldnames=attributes, dialect='sqlathanor') csv_writer.writerow(data_dict) data_row = output.getvalue() output.close() csv.unregister_dialect('sqlathanor') return data_row
(str(rows).str.maketrans('',''),'[]\'')) #(str(rows).translate(string.maketrans('', ''), '[]\'')) 3.Write a python program using csv.register_dialect(name, [dialect, ]**fmtparams) import csv csv.register_dialect('mydialect',delimiter=',',quoting=csv.QUOTE_ALL,skipinitialspace=True) with open ('z:/csvtrial.csv','r') as trail: reader=csv.reader(trail,dialect='mydialect') for row in reader: print(row) 4.Write a python program using csv.unregister_dialect(name) import csv csv.register_dialect("hashes", delimiter="#") csv.unregister_dialect('hashes') 5.Write a python program using csv.get_dialect csv.get_dialect('mydialect') 6.Write a python program using csv.list_dialects() csv.list_dialects() ## returns a list of all the dialects its a direct program in itself 7.Write a python program using csv.field_size_limit([new_limit])
def done(self) -> None: """Ends/cleanup the process""" csv.unregister_dialect(self.dialect) self.header = None self.header_line = None
fp.close() print('----数据预处理开始') # 读取数据集并划分 csv.register_dialect('mydialect', delimiter='\t', quoting=csv.QUOTE_ALL) train_item = open('../data/train_item.tsv', 'w', encoding='utf-8') train_type = open('../data/train_type.tsv', 'w', encoding='utf-8') with open('../data/train.tsv', encoding='utf-8') as csvfile: file_list = csv.reader(csvfile, 'mydialect') for line in file_list: train_item.write(line[0] + '\n') train_type.write(line[1] + '\n') csv.unregister_dialect('mydialect') print('----数据集初始化结束') # 分词 cut_for_search jieba.setLogLevel(logging.INFO) with open('../data/train_item.tsv', encoding='utf-8') as train_items: seged_train_item = [ re.sub('[a-zA-Z0-9’!"#$%&\'()()*+,-./:;<=>?@,。?★、…【】《》?“”‘’![\\]^_`{|}~]+', '', ' '.join(jieba.cut(r))) for r in train_items.readlines()] with open('../data/test.tsv', encoding='utf-8') as train_items: seged_test = [ re.sub('[a-zA-Z0-9’!"#$%&\'()()*+,-./:;<=>?@,。?★、…【】《》?“”‘’![\\]^_`{|}~]+', '', ' '.join(jieba.cut(r))) for r in train_items.readlines()] savefile('../data/seged_train_item.tsv', ''.join(seged_train_item)) savefile('../data/seged_test.tsv', ''.join(seged_test)) print('----分词去停用词结束')
def reset(): handlers.clear() for name in (i['name'] for i in csv_dialects): csv.unregister_dialect(name) csv_dialects.clear()
#program for registered diaelet import csv csv.register_dialect('unixpwd', delimiter=':', quoting=csv.QUOTE_NONE) csv.register_dialect('unixpwd', delimiter=':', quoting=csv.QUOTE_NONE) with open(r'D:\tablueau\circuits.csv', newline='') as f: reader = csv.reader(f, 'unixpwd') for i in reader: print(i) #program for unregister dialect csv.list_dialects() csv.unregister_dialect("excel-tab") print csv.list_dialects() #Write a python program using csv.get_dialect get_dialect(...) Return the dialect instance associated with name. dialect = csv.get_dialect(name) c=csv.get_dialect(name) #Write a python program using csv.list_dialects()
def test_dialect_apply(self): class testA(csv.excel): delimiter = "\t" class testB(csv.excel): delimiter = ":" class testC(csv.excel): delimiter = "|" csv.register_dialect('testC', testC) try: fd, name = tempfile.mkstemp() fileobj = os.fdopen(fd, "w+b") try: writer = csv.writer(fileobj) writer.writerow([1,2,3]) fileobj.seek(0) self.assertEqual(fileobj.read(), "1,2,3\r\n") finally: fileobj.close() os.unlink(name) fd, name = tempfile.mkstemp() fileobj = os.fdopen(fd, "w+b") try: writer = csv.writer(fileobj, testA) writer.writerow([1,2,3]) fileobj.seek(0) self.assertEqual(fileobj.read(), "1\t2\t3\r\n") finally: fileobj.close() os.unlink(name) fd, name = tempfile.mkstemp() fileobj = os.fdopen(fd, "w+b") try: writer = csv.writer(fileobj, dialect=testB()) writer.writerow([1,2,3]) fileobj.seek(0) self.assertEqual(fileobj.read(), "1:2:3\r\n") finally: fileobj.close() os.unlink(name) fd, name = tempfile.mkstemp() fileobj = os.fdopen(fd, "w+b") try: writer = csv.writer(fileobj, dialect='testC') writer.writerow([1,2,3]) fileobj.seek(0) self.assertEqual(fileobj.read(), "1|2|3\r\n") finally: fileobj.close() os.unlink(name) fd, name = tempfile.mkstemp() fileobj = os.fdopen(fd, "w+b") try: writer = csv.writer(fileobj, dialect=testA, delimiter=';') writer.writerow([1,2,3]) fileobj.seek(0) self.assertEqual(fileobj.read(), "1;2;3\r\n") finally: fileobj.close() os.unlink(name) finally: csv.unregister_dialect('testC')
def _parse_csv(cls, csv_data, delimiter='|', wrap_all_strings=False, null_text='None', wrapper_character="'", double_wrapper_character_when_nested=False, escape_character="\\", line_terminator='\r\n', config_set=None): """Generate a :class:`dict <python:dict>` from a CSV record. .. tip:: Unwrapped empty column values are automatically interpreted as null (:obj:`None <python:None>`). :param csv_data: The CSV record. Should be a single row and should **not** include column headers. :type csv_data: :class:`str <python:str>` :param delimiter: The delimiter used between columns. Defaults to ``|``. :type delimiter: :class:`str <python:str>` :param wrapper_character: The string used to wrap string values when wrapping is applied. Defaults to ``'``. :type wrapper_character: :class:`str <python:str>` :param null_text: The string used to indicate an empty value if empty values are wrapped. Defaults to `None`. :type null_text: :class:`str <python:str>` :param config_set: If not :obj:`None <python:None>`, the named configuration set to use. Defaults to :obj:`None <python:None>`. :type config_set: :class:`str <python:str>` / :obj:`None <python:None>` :returns: A :class:`dict <python:dict>` representation of the CSV record. :rtype: :class:`dict <python:dict>` :raises DeserializationError: if ``csv_data`` is not a valid :class:`str <python:str>` :raises CSVStructureError: if the columns in ``csv_data`` do not match the expected columns returned by :func:`get_csv_column_names() <BaseModel.get_csv_column_names>` :raises ValueDeserializationError: if a value extracted from the CSV failed when executing its :term:`de-serialization function`. """ try: csv_data = validators.string(csv_data, allow_empty=False) except (ValueError, TypeError): raise DeserializationError("csv_data expects a 'str', received '%s'" \ % type(csv_data)) if not wrapper_character: wrapper_character = '\'' if wrap_all_strings: quoting = csv.QUOTE_NONNUMERIC else: quoting = csv.QUOTE_MINIMAL if 'sqlathanor' in csv.list_dialects(): csv.unregister_dialect('sqlathanor') csv.register_dialect('sqlathanor', delimiter=delimiter, doublequote=double_wrapper_character_when_nested, escapechar=escape_character, quotechar=wrapper_character, quoting=quoting, lineterminator=line_terminator) csv_column_names = [ x for x in cls.get_csv_column_names( deserialize=True, serialize=None, config_set=config_set) ] csv_reader = csv.DictReader([csv_data], fieldnames=csv_column_names, dialect='sqlathanor', restkey=None, restval=None) rows = [x for x in csv_reader] if len(rows) > 1: raise CSVStructureError('expected 1 row of data, received %s' % len(csv_reader)) elif len(rows) == 0: data = dict_() for column_name in csv_column_names: data[column_name] = None else: data = rows[0] if data.get(None, None) is not None: raise CSVStructureError('expected %s fields, found %s' % (len(csv_column_names), len(data.keys()))) deserialized_data = dict_() for key in data: if data[key] == null_text: deserialized_data[key] = None continue attribute_name = cls._get_attribute_name(key) deserialized_value = cls._get_deserialized_value( data[key], 'csv', key, config_set=config_set) deserialized_data[attribute_name] = deserialized_value csv.unregister_dialect('sqlathanor') return deserialized_data