def addElementsmapsDetails(catalog, file1, file2): """ Carga los elementos de los mapas relacionados con los details. """ file1 = config.file_dir + file1 dialect = csv.excel() dialect.delimiter = ";" input_file1 = csv.DictReader(open(file1, encoding="utf-8"), dialect=dialect) file2 = config.file_dir + file2 dialect = csv.excel() dialect.delimiter = ";" input_file2 = csv.DictReader(open(file2, encoding="utf-8"), dialect=dialect) for movie1 in input_file1: lt.addLast(catalog['Data']['details'], movie1) md.addCompany(movie1, catalog) md.addGenre(movie1, catalog) iterator = it.newIterator(catalog['Data']['details']) for movie2 in input_file2: movie1 = it.next(iterator) lt.addLast(catalog['Data']['casting'], movie2) md.addDirector(movie2, movie1, catalog) md.addActor(movie2, movie1, catalog) md.addCountry(movie2, movie1, catalog)
def loadTrips (catalog, sep=';'): """ Carga los vuelos del archivo. """ t1 = process_time() #tiempo inicial flightsfile = cf.data_dir + 'tripxday_edges1/tripday_edges.csv' dialect = csv.excel() dialect.delimiter=sep with open(flightsfile, encoding="utf-8-sig") as csvfile: spamreader = csv.DictReader(csvfile, dialect=dialect) for row in spamreader: addTripNode(catalog, row) t2 = process_time() #tiempo final para vértices print("Tiempo de ejecución carga de vértices en el grafo de viajes:",t2-t1," segundos") t3 = process_time() #tiempo inicial para arcos flightsfile = cf.data_dir + 'tripxday_edges1/tripday_edges.csv' dialect = csv.excel() dialect.delimiter=sep with open(flightsfile, encoding="utf-8-sig") as csvfile: spamreader = csv.DictReader(csvfile, dialect=dialect) for row in spamreader: addTripEdge(catalog, row) t4 = process_time() #tiempo final para carga de vértices y arcos print("Tiempo de ejecución carga de arcos en el grafo de viajes:",t4-t3," segundos") print("Tiempo de ejecución total para carga del grafo de viajes:",t4-t1," segundos")
def check_and_return_header(uuids_fd, delimiter='\t', quotechar=''): uuid_field_names = set(['CCC_DID', 'UUID']); first_line = uuids_fd.readline(); #Read first line dialect = None; has_header = False; identifer_field_name = None; try: sniffer = csv.Sniffer(); dialect = sniffer.sniff(first_line); has_header = sniffer.has_header(first_line); except csv.Error: dialect = csv.excel(); dialect.delimiter = delimiter; dialect.quotechar = quotechar; dialect.quoting = csv.QUOTE_NONE; #If delim is None or empty string or definitely incorrect delim, set to TSV default if(not dialect.delimiter or len(dialect.delimiter) == 0 or dialect.delimiter.isalnum()): dialect = csv.excel(); dialect.delimiter = delimiter; dialect.quotechar = quotechar; dialect.quoting = csv.QUOTE_NONE; print('INFO: for UUID file, delimiter is %s and quote char is %s' %('<TAB>' if dialect.delimiter=='\t' else dialect.delimiter, '<NONE>' if dialect.quoting == csv.QUOTE_NONE else dialect.quotechar)); first_line = first_line.strip(); fieldnames = first_line.split(dialect.delimiter); for token in fieldnames: if(token in uuid_field_names): has_header = True; identifer_field_name = token; break; if(not has_header): print_error_and_exit('TSV/CSV file with CCC_DIDs/UUIDs: %s does not seem to have a header row'%(uuids_filename)); return dialect, identifer_field_name, fieldnames;
def test_edge_then_vertex(self): with open(data_file("tiny-multigraph.csv")) as edge_file: edge_dataset = CsvDataset(edge_file, True, csv.excel()) proj = projection.edge_with_collection_metadata( edge_dataset.headers(), 1, 2, 4) graph = from_dataset(edge_dataset, proj) with open(data_file("tiny-graph-vertex.csv")) as vertex_file: vertex_dataset = CsvDataset(vertex_file, True, csv.excel()) vertex_proj = projection.vertex_with_single_metadata( vertex_dataset.headers(), 0, ignored_values=["NULL"]) same_graph = from_dataset(vertex_dataset, vertex_proj, graph) self.assertTrue(same_graph == graph) self.assertEqual(7, len(graph.nodes)) self.assertEqual(2, graph["jon"]["john"]["weight"]) attributes = graph["jon"]["john"]["attributes"] self.assertDictEqual( { "date": "7/1/2018", "subject": "Graphs are great", "replyCount": "1" }, attributes[0]) self.assertDictEqual( { "date": "7/1/2018", "subject": "Going to need to ask you to stay late tonight", "replyCount": "1" }, attributes[1]) self.assertDictEqual( { "date": "7/2/2018", "subject": "RE: Graphs are great", "replyCount": "0" }, attributes[2]) self.assertDictEqual( { "date": "7/2/2018", "subject": "RE: Going to need to ask you to stay late tonight", "replyCount": "0" }, attributes[3]) self.assertDictEqual( { "date": "7/2/2018", "subject": "No I'm not Lumberg", "replyCount": "0" }, attributes[4]) self.assertDictEqual({"lastName": "larson"}, graph.nodes["jon"]["attributes"][0]) self.assertDictEqual( { "lastName": "redhot", "sandwichPreference": "buffalo chicken" }, graph.nodes["frank"]["attributes"][0])
def test_load_csv(self): contents = ( b'1/1/1990,1.0,[,one,\n' b'1/1/1990,2.0,],two,\n' b'1/1/1990,3.0,{,three,' ) ColumnType = owcsvimport.Options.ColumnType RowSpec = owcsvimport.Options.RowSpec opts = owcsvimport.Options( encoding="ascii", dialect=csv.excel(), columntypes=[ (range(0, 1), ColumnType.Time), (range(1, 2), ColumnType.Numeric), (range(2, 3), ColumnType.Text), (range(3, 4), ColumnType.Categorical), ], rowspec=[] ) df = owcsvimport.load_csv(io.BytesIO(contents), opts) self.assertEqual(df.shape, (3, 5)) self.assertSequenceEqual( list(df.dtypes), [np.dtype("M8[ns]"), np.dtype(float), np.dtype(object), "category", np.dtype(float)], ) opts = owcsvimport.Options( encoding="ascii", dialect=csv.excel(), columntypes=[ (range(0, 1), ColumnType.Skip), (range(1, 2), ColumnType.Numeric), (range(2, 3), ColumnType.Skip), (range(3, 4), ColumnType.Categorical), (range(4, 5), ColumnType.Skip), ], rowspec=[ (range(1, 2), RowSpec.Skipped) ] ) df = owcsvimport.load_csv(io.BytesIO(contents), opts) self.assertEqual(df.shape, (2, 2)) self.assertSequenceEqual( list(df.dtypes), [np.dtype(float), "category"] ) self.assertSequenceEqual( list(df.iloc[:, 0]), [1.0, 3.0] ) self.assertSequenceEqual( list(df.iloc[:, 1]), ["one", "three"] )
def test_load_csv(self): contents = ( b'1/1/1990,1.0,[,one,\n' b'1/1/1990,2.0,],two,\n' b'1/1/1990,3.0,{,three,' ) opts = owcsvimport.Options( encoding="ascii", dialect=csv.excel(), columntypes=[ (range(0, 1), ColumnType.Time), (range(1, 2), ColumnType.Numeric), (range(2, 3), ColumnType.Text), (range(3, 4), ColumnType.Categorical), (range(4, 5), ColumnType.Auto), ], rowspec=[] ) df = owcsvimport.load_csv(io.BytesIO(contents), opts) self.assertEqual(df.shape, (3, 5)) self.assertSequenceEqual( list(df.dtypes), [np.dtype("M8[ns]"), np.dtype(float), np.dtype(object), "category", np.dtype(float)], ) opts = owcsvimport.Options( encoding="ascii", dialect=csv.excel(), columntypes=[ (range(0, 1), ColumnType.Skip), (range(1, 2), ColumnType.Numeric), (range(2, 3), ColumnType.Skip), (range(3, 4), ColumnType.Categorical), (range(4, 5), ColumnType.Skip), ], rowspec=[ (range(1, 2), RowSpec.Skipped) ] ) df = owcsvimport.load_csv(io.BytesIO(contents), opts) self.assertEqual(df.shape, (2, 2)) self.assertSequenceEqual( list(df.dtypes), [np.dtype(float), "category"] ) self.assertSequenceEqual( list(df.iloc[:, 0]), [1.0, 3.0] ) self.assertSequenceEqual( list(df.iloc[:, 1]), ["one", "three"] )
def read_file(cls, filename, wrapper=None): wrapper = wrapper or _IDENTITY import csv for encoding in (lambda: 'us-ascii', # fast lambda: detect_encoding(filename), # precise lambda: 'utf-8'): # fallback with cls.open(filename, mode='rt', newline='', encoding=encoding()) as file: # Sniff the CSV dialect (delimiter, quotes, ...) try: dialect = csv.Sniffer().sniff(file.read(1024), cls.DELIMITERS) except UnicodeDecodeError: continue except csv.Error: dialect = csv.excel() dialect.delimiter = cls.DELIMITERS[0] file.seek(0) dialect.skipinitialspace = True try: reader = csv.reader(file, dialect=dialect) return wrapper(cls.data_table(reader)) except Exception as e: error = e continue raise ValueError('Cannot parse dataset {}: {}'.format(filename, error))
def read_csv(filename, delimiter=',', ig_blank_char=True, ig_blank_line=True, header_cnt=0, ig_chars=r'-/#', check_cnt=0): """Open a csv file returning list of lines (delimited list of strings).""" props = csv.excel() props.delimiter = delimiter lines = [] try: with open(filename, 'r') as infile: read = csv.reader(infile, props) for lineno, line in enumerate(read): print(line) # Skip first line if (lineno < header_cnt) or \ (line and line[0] and line[0][0] in ig_chars): continue # Cleanup in the case delimiter contains spaces if ig_blank_char: line = [item.strip() for item in line if item.strip()] # Ignore blank lines if line or not ig_blank_line: lines.append(line) # Check count of items if check_cnt and check_cnt != len(line): perror(">>> open_csv: Bad item count on {0}:{1}.".format( filename, lineno)) except IOError: perror(">>> open_csv: Error opening file {0}.".format(filename)) return lines
def test_options_widget(self): w = textimport.CSVOptionsWidget() schanged = QSignalSpy(w.optionsChanged) sedited = QSignalSpy(w.optionsEdited) w.setDialect(csv.excel()) self.assertEqual(len(schanged), 1) self.assertEqual(len(sedited), 0) w.setSelectedEncoding("iso8859-1") self.assertEqual(len(schanged), 2) self.assertEqual(len(sedited), 0) d = w.dialect() self.assertEqual(d.delimiter, csv.excel.delimiter) self.assertEqual(d.doublequote, csv.excel.doublequote) self.assertEqual(w.encoding(), "iso8859-1") d = textimport.Dialect("a", "b", "c", True, True) w.setDialect(d) cb = w.findChild(QComboBox, "delimiter-combo-box") self.assertEqual(cb.currentIndex(), textimport.CSVOptionsWidget.DelimiterOther) le = w.findChild(QWidget, "custom-delimiter-edit") self.assertEqual(le.text(), "a") cb = w.findChild(QWidget, "quote-edit-combo-box") self.assertEqual(cb.currentText(), "b") d1 = w.dialect() self.assertEqual(d.delimiter, d1.delimiter) self.assertEqual(d.quotechar, d1.quotechar)
def __init__(self, parent: QWidget=None, dialect: csv.Dialect=csv.excel()) -> None: super().__init__(parent) self.tags: List[Tag] = [] self._file: Optional[TextIO] = None self._writer: Optional[_CSVWriter] = None self.dialect = dialect
def save(self): messages = [] success = False dialect = csv.excel() dialect.quotechar = '"' dialect.delimiter = ',' records = csv.reader(self.cleaned_data["file"], dialect=dialect) for line in records: date = line[0] account = line[1] amount = line[2] beneficiary = line[3] BIC = line[4] name = line[5] address = line[6] code = line[7] statement = line[8] try: account = Banking_Account.clean(account) except NameError, e: messages.append("Error => Could not convert to IBAN. " + str(e)) success = False continue if beneficiary: # payment to bank try: beneficiary = Banking_Account.clean(beneficiary) except NameError, e: messages.append("Error => Could not convert to IBAN. " + str(e)) success = False continue
def __init__(self,pfile,verbose=True): self._pfile = pfile self._params = [] with open(pfile,'r') as readfile: dia = csv.excel() dia.skipinitialspace = True reader = csv.reader(readfile,dia,delimiter=' ') # define data structure as named tuple for storing parameter values data = namedtuple('DataStruct', ['name','npts','units','xunits','x','y','dydx']) # iterate through lines of file, checking for a header line; # at each header, read the next npts lines of data into # appropriate arrays. # continue until no headerline is found (throws StopIteration). # Populate list of params with available variables. while True: try: headerline = next(reader) except StopIteration: break npts = int(headerline[0]) # size of abscissa, data arrays abscis = headerline[1] # string name of abscissa variable (e.g. 'psinorm') var = re.split('[\(\)]',headerline[2]) param = var[0] # string name of parameter (e.g. 'ne') units = var[1] # string name of units (e.g. '10^20/m^3') # read npts next lines, populate arrays x = [] val = [] gradval = [] for j in range(npts): dataline = next(reader) x.append(float(dataline[0])) val.append(float(dataline[1])) gradval.append(float(dataline[2])) x = np.array(x) val = np.array(val) gradval = np.array(gradval) # collate into storage structure vars(self)['_'+param] = data(name=param, npts=npts, units=units, xunits=abscis, x=x, y=val, dydx=gradval) self._params.append(param) if verbose: print('P-file data loaded from '+self._pfile) print('Available parameters:') for par in self._params: un = vars(self)['_'+par].units xun = vars(self)['_'+par].xunits print(str(par).ljust(8)+str(xun).ljust(12)+str(un))
def _make_reader(self, f): import csv sep = self.delimiter if sep is None or len(sep) == 1: sniff_sep = True # default dialect dia = csv.excel() if sep is not None: sniff_sep = False dia.delimiter = sep # attempt to sniff the delimiter if sniff_sep: line = f.readline() while self.pos in self.skiprows: self.pos += 1 line = f.readline() self.pos += 1 sniffed = csv.Sniffer().sniff(line) dia.delimiter = sniffed.delimiter if self.encoding is not None: self.buf.extend(list(com.UnicodeReader(StringIO(line), dialect=dia, encoding=self.encoding))) else: self.buf.extend(list(csv.reader(StringIO(line), dialect=dia))) if self.encoding is not None: reader = com.UnicodeReader(f, dialect=dia, encoding=self.encoding) else: reader = csv.reader(f, dialect=dia) else: reader = (re.split(sep, line.strip()) for line in f) self.data = reader
def load_details(catalog, details_file): """ Carga cada una de las lineas del archivo de detalles. - Se agrega cada película al catalogo de películas. - Por cada libro se encuentran sus autores y por cada autor, se crea una lista con sus libros """ dialect, dialect.delimiter = csv.excel(), ';' with open(details_file, encoding='utf-8-sig') as input_file: file_reader = csv.DictReader(input_file, dialect=dialect) for movie in file_reader: strip_movie = {} for key, value in movie.items(): strip_movie[key.strip()] = value.strip() movie = strip_movie model.add_details(catalog, movie) producer_names = movie['production_companies'].split(",") producer_countries = movie['production_countries'].split(',') genres = movie['genres'].split(",") for producer in producer_names: model.add_movie_production_companies(catalog, producer.lower(), movie) for genre in genres: genre = genre.split('|') for subgenre in genre: model.add_movie_genre(catalog, subgenre, movie) for country in producer_countries: model.add_movie_production_countries(catalog, country.lower(), movie)
def loadActors(catalog): ''' Carga todos los actores ''' t1_start = process_time() #tiempo inicial castingfile = cf.data_dir + 'themoviesdb/MoviesCastingRaw-small.csv' dialect = csv.excel() dialect.delimiter=';' with open(castingfile, encoding='utf-8') as csvfile: spamreader = csv.DictReader(csvfile, dialect=dialect) movie_counter = 1 casting = ['actor1_name', 'actor2_name', 'actor3_name', 'actor4_name', 'actor5_name'] for row in spamreader: for actor in casting: actor_name = row[actor] if not actor_name == 'none': director_name = row['director_name'] pos = lt.isPresent(actor_name, catalog['actors'], equal) if pos != 0: model.updateActor(catalog, pos, movie_counter, director_name) else: model.addActor(catalog, row, movie_counter) movie_counter += 1 t1_stop = process_time() #tiempo inicial print('Tiempo de ejecución carga actores',t1_stop-t1_start,' segundos') endActorslist_controller(catalog)
def cargar_listaActores(file, sep=";"): lst = lt.newList('SINGLE_LINKED', comparar_actores) dialect = csv.excel() dialect.delimiter = sep nombres_actores = [ "actor1_name", "actor2_name", "actor3_name", "actor4_name", "actor5_name" ] try: with open(file, encoding="utf-8") as csvfile: spamreader = csv.DictReader(csvfile, dialect=dialect) for row in spamreader: #print(row) # agregar una lista para los directores #directores ={} #directores["director"] =lt.newList('SINGLE_LINKED', comparar_directores) #lista directores for nombreCol in nombres_actores: actor = {} actor["nombre"] = row[nombreCol] posicion1 = lt.isPresent(lst, actor["nombre"]) if posicion1 == 0: actor["peliculas"] = lt.newList( ) #ids Peliculas del actor actor["director"] = lt.newList( 'SINGLE_LINKED', comparar_director) #lista directores lt.addLast(actor["peliculas"], row["id"]) director = {} director["nombre"] = row["director_name"] director["count"] = 1 lt.addLast(actor["director"], director) lt.addLast(lst, actor) else: actores = lt.getElement(lst, posicion1) lt.addLast(actores["peliculas"], row["id"]) #validra si ya esta el director o no pos_director = lt.isPresent(actores["director"], row["director_name"]) if pos_director == 0: # no esta crear director director = {} director["nombre"] = row["director_name"] director["count"] = 1 lt.addLast(actores["director"], director) else: # ya esta ese director aumnetar count en uno director = lt.getElement(actores["director"], pos_director) director["count"] = director["count"] + 1 except: print("Hubo un error con la carga del archivo") return lst
def test_carga(): List = [] List_ADT = lst.newList(list_type) file = 'Data/theMoviesdb/AllMoviesCastingRaw.csv' sep = ';' dialect = csv.excel() dialect.delimiter = sep assert (lst.size(List_ADT) == 0), "La lista no empieza en cero" try: with open(file, encoding='utf-8') as csvfile: reader = csv.DictReader(csvfile, dialect=dialect) for row in reader: List.append(row) lst.addLast(List_ADT, row) except: assert False, 'Se presentó un error al cargar el archivo' assert len(List) == lst.size(List_ADT), 'Son de distionto tamaño' for x in range(len(List)): assert lst.getElement( List_ADT, x + 1) == List[x], 'Las listas no estan en el mismo orden'
def loadCSVFileCasting(file, catalog): dialect = csv.excel() dialect.delimiter = ";" with open(config.data_dir + file, encoding="utf-8") as csvfile: row = csv.DictReader(csvfile, dialect=dialect) for elemento in row: model.addcasting(catalog, elemento)
def parse(self, cr, data): result = [] stmnt = None dialect = csv.excel() dialect.quotechar = '"' dialect.delimiter = ',' lines = data.split('\n') # Transaction lines are not numbered, so keep a tracer subno = 0 statement_id = False for line in csv.reader(lines, dialect=dialect): # Skip empty (last) lines and header line if not line or line[0] == 'Datum': continue subno += 1 msg = transaction_message(line, subno) if not statement_id: statement_id = self.get_unique_statement_id( cr, msg.execution_date.strftime('%Yw%W')) msg.statement_id = statement_id if stmnt: stmnt.import_transaction(msg) else: stmnt = statement(msg) result.append(stmnt) return result
def export_members(request): response = HttpResponse(content_type='application/zip') response['Content-Disposition'] = 'attachment; filename=transactions.csv.zip' zip_file = zipfile.ZipFile( response, "w", zipfile.ZIP_DEFLATED) csv_file = StringIO.StringIO() dialect = csv.excel() dialect.quotechar = '"' dialect.delimiter = ',' csv_writer = csv.writer(csv_file, dialect=dialect) for person in Person.objects.order_by("postal_code"): # generate chunk csv_writer.writerow([person.firstname.encode("utf-8"), person.lastname.encode("utf-8"), person.email_address, person.street.encode("utf-8"), person.postal_code, person.city.encode("utf-8"), person.telephone, person.language, person.notas.encode("utf-8"), person.last_payment_date]) zip_file.writestr("transactions.csv",csv_file.getvalue()) csv_file.close() zip_file.close() # generate the file response['Content-Length'] = response.tell() return response
def read_file(cls, filename, wrapper=None): wrapper = wrapper or _IDENTITY import csv for encoding in ( lambda: 'us-ascii', # fast lambda: detect_encoding(filename), # precise lambda: 'utf-8'): # fallback with cls.open(filename, mode='rt', newline='', encoding=encoding()) as file: # Sniff the CSV dialect (delimiter, quotes, ...) try: dialect = csv.Sniffer().sniff(file.read(1024), cls.DELIMITERS) except UnicodeDecodeError: continue except csv.Error: dialect = csv.excel() dialect.delimiter = cls.DELIMITERS[0] file.seek(0) dialect.skipinitialspace = True try: reader = csv.reader(file, dialect=dialect) return wrapper(cls.data_table(reader)) except Exception as e: error = e continue raise ValueError('Cannot parse dataset {}: {}'.format(filename, error))
def test_carga(): lista=[] lst = lt.newList() file='Data/test.csv' sep=',' dialect= csv.excel() dialect.delimiter = sep try: with open(file,encoding='utf-8') as csvfile: reader = csv.DictReader(csvfile,dialect=dialect) for row in reader: lista.append(row) lt.addLast(lst,row) except: print("Se presento un error al cargar los archivos") print("Lista python") for i in lista: print(i) print("Lista de DTA") iterator=it.newIterator(lst) while it.hasNext(iterator): element= it.next(iterator) print(element)
def __init__(self, encoding=None): super(CSVformatter, self).__init__() dialect = csv.excel() dialect.quoting = csv.QUOTE_ALL self._encoding = encoding or i18n.get_site_encoding() self._csv_writer = csv.writer(self, dialect=dialect) self._line = None
def loadmoviesCastingRaw(catalog, moviesfile): moviesfile = cf.data_dir + moviesfile dialect = csv.excel() dialect.delimiter = ";" input_file = csv.DictReader(open(moviesfile, encoding='utf-8-sig'), dialect=dialect) for movie in input_file: model.addMovieCasting(catalog, movie) actors1 = movie['actor1_name'].split(",") actors2 = movie['actor2_name'].split(",") actors3 = movie['actor3_name'].split(",") actors4 = movie['actor4_name'].split(",") actors5 = movie['actor5_name'].split(",") directors = movie['director_name'].split(",") for autor in actors1: model.addMovieByActor(catalog, autor.strip().lower(), movie) for autor in actors2: model.addMovieByActor(catalog, autor.strip().lower(), movie) for autor in actors3: model.addMovieByActor(catalog, autor.strip().lower(), movie) for autor in actors4: model.addMovieByActor(catalog, autor.strip().lower(), movie) for autor in actors5: model.addMovieByActor(catalog, autor.strip().lower(), movie) for director in directors: model.addMovieByDirector(catalog, director.strip().lower(), movie)
def loadBookReviews(catalog, sep=';'): """ Carga los libros del archivo. Por cada libro se toman sus autores y por cada uno de ellos, se crea un arbol de autores, a dicho autor y una referencia al libro que se esta procesando. """ t1_start = process_time() #tiempo inicial nodesfile = cf.data_dir + 'flights_nodes.csv' edgesfile = cf.data_dir + 'flights_edges.csv' dialect = csv.excel() dialect.delimiter = sep with open(nodesfile, encoding="utf-8-sig") as csvfile: spamreader = csv.DictReader(csvfile, dialect=dialect) t2_start = process_time() #tiempo inicial for row in spamreader: model.addNode(catalog, row) t2_stop = process_time() #tiempo final with open(edgesfile, encoding="utf-8-sig") as csvfile: spamreader = csv.DictReader(csvfile, dialect=dialect) t3_start = process_time() #tiempo inicial for row in spamreader: model.addEdge(catalog, row) t3_stop = process_time() #tiempo final t1_stop = process_time() #tiempo final print("Tiempo de ejecución carga de grafo de vuelos", t1_stop - t1_start, " segundos\n" "Tiempo de carga de nodos", t2_stop - t2_start, "segundos\n" "Tiempo de carga de arcos", t3_stop - t3_start, "segundos")
def cargarArchivosUnificados(details,casting, cmpfunction=None): lst=iniciarCatalogo() dialect = csv.excel() dialect.delimiter=";" i = 0 p = 0 print("Cargando archivos...") with open(cf.data_dir + details, encoding="utf-8-sig") as csvfile1: row = csv.DictReader(csvfile1, dialect=dialect) for elemento in row: if i%3290 == 0: print (" " + str(p) + "%" + " completado", end="\r") p+=1 model.agregarFinal(lst,elemento) i+=1 print (" 100%" +" completado\n") print("Uniendo datos...") with open(cf.data_dir + casting, encoding="utf-8-sig") as csvfile2: #Cambiamos el encoding ya que generaba row = csv.DictReader(csvfile2,dialect=dialect) #un error con los archivos grandes i = 1 p = 0 for elemento in row: if i%3290 == 0: print (" " + str(p) + "%" + " completado", end="\r") p+=1 if elemento["id"] == model.buscarPeliculas(lst,i)["id"]: for column in elemento: if column != "id": model.buscarPeliculas(lst,i)[column] = elemento[column] i += 1 print (" 100%" +" completado\n") return lst
def loadCSVFile (file, sep=";"): """ Carga un archivo csv a una lista Args: file Archivo csv del cual se importaran los datos sep = ";" Separador utilizado para determinar cada objeto dentro del archivo Try: Intenta cargar el archivo CSV a la lista que se le pasa por parametro, si encuentra algun error Borra la lista e informa al usuario Returns: None """ #lst = lt.newList("ARRAY_LIST") #Usando implementacion arraylist lst = lt.newList() #Usando implementacion linkedlist print("Cargando archivo ....") t1_start = process_time() #tiempo inicial dialect = csv.excel() dialect.delimiter=sep try: with open(file, encoding="utf-8") as csvfile: spamreader = csv.DictReader(csvfile, dialect=dialect) for row in spamreader: lt.addLast(lst,row) except: print("Hubo un error con la carga del archivo") t1_stop = process_time() #tiempo final print("Tiempo de ejecución ",t1_stop-t1_start," segundos") return lst
def test_carga(): lista = [] lst = lt.newList('ARRAY_LIST', cmpfunction) file = config.data_dir + 'MoviesCastingRaw-small.csv' sep = ';' dialect = csv.excel() dialect.delimiter = sep assert (lt.size(lst) == 0), "La lista no empieza en cero." try: with open(file, encoding='utf-8') as csvfile: reader = csv.DictReader(csvfile, dialect=dialect) for row in reader: lista.append(row) lt.addLast(lst, row) except: assert False, "Se presento un error al cargar el archivo." assert len(lista) == lt.size(lst), "Son diferentes tamaños." for i in range(len(lista)): assert lt.getElement( lst, i + 1) == lista[i], "Las listas no estan en el mismo orden."
def cargar_directores(file, sep=";"): lst = lt.newList( 'SINGLE_LINKED', comparar_director) #Usando implementacion linkedlist print("Cargando archivo ....") t1_start = process_time() #tiempo inicial dialect = csv.excel() dialect.delimiter = sep try: with open(file, encoding="utf-8") as csvfile: spamreader = csv.DictReader(csvfile, dialect=dialect) for row in spamreader: director = {} director["nombre"] = row["director_name"] posicion1 = lt.isPresent( lst, director["nombre"]) if posicion1 == 0: director["peliculas"] = lt.newList() lt.addLast(director["peliculas"], row["id"]) lt.addLast(lst, director) else: directores = lt.getElement(lst, posicion1) lt.addLast(directores["peliculas"], row["id"]) except: print("Hubo un error con la carga del archivo") t1_stop = process_time() #tiempo final print("Tiempo de ejecución ", t1_stop - t1_start, " segundos") return lst
def test_carga(): lista = [] lst = lt.newList() file = "Data\theMoviesdb\MoviesCastingRaw-small.csv" sep = ";" dialect = csv.excel() dialect.delimiter = sep assert (lt.size(lst) == 0), "la lista no empieza en cero" try: with open(file, enconding='utf-8') as csvfile: reader = csv.DictReader(csvfile, dialect=dialect) for row in reader: lista.append(row) lt.addLast(lst, row) except: assert False, "Se presento un error al cargar el archivo" assert len(lista) == lt.size(lst), "son diferentes tamaños" for i in range(len(lista)): assert lt.getElement( lst, i + 1) == lista[i], "las listas no estan en el mismo orden"
def export_csv(request): response = HttpResponse(content_type='application/zip') response['Content-Disposition'] = 'attachment; filename=transactions.csv.zip' zip_file = zipfile.ZipFile( response, "w", zipfile.ZIP_DEFLATED) csv_file = StringIO.StringIO() dialect = csv.excel() dialect.quotechar = '"' dialect.delimiter = ',' csv_writer = csv.writer(csv_file, dialect=dialect) for transaction in Transaction.objects.order_by("date"): # generate chunk csv_writer.writerow([transaction.date, transaction.pirate_account.account.iban, transaction.amount, transaction.beneficiary.current_banking_account.iban if transaction.beneficiary.current_banking_account else "", transaction.BIC, transaction.beneficiary.lastname+" "+transaction.beneficiary.firstname, "%s %s %s"%(transaction.beneficiary.street, transaction.beneficiary.postal_code, transaction.beneficiary.city), transaction.code, transaction.statement.encode("utf-8")]) zip_file.writestr("transactions.csv",csv_file.getvalue()) csv_file.close() zip_file.close() # generate the file response['Content-Length'] = response.tell() return response
def get_company(citys): columns=['车系', '省', '城市', '销售公司'] s = requests.Session() url='http://mall.haval.com.cn/cars/getDealerByType.html' with open('haval.csv', 'w', newline='') as csvfile: detail_wirter = csv.writer(csvfile, dialect=csv.excel()) detail_wirter.writerow(columns) for city in citys: done=True while done: try: r=s.post(url,data={'city':city[1]}, headers=headers, timeout=10) time.sleep(1) done=False if r.status_code!=200: done=True except Exception as e: print(e) data=r.json()['list'] print(city) for x in data: cars=x['carModel'].split(',') company=x['storeName'] for car in cars: detail_wirter.writerow([car, city[0], city[1], company])
def test_carga(): lst = [] lst = lt.newList() file = "Data/GoodReads/books.csv" sep = ',' dialect = csv.excel() dialect.delimiter = sep assert (lt.size(lst) == 0), "la lista no empieza en cero" try: with open(file, encoding='utf-8') as csvfile: reader = csv.DictReader(csvfile, dialect=dialect) for row in reader: lst.append(row) lt.addLast(lst, row) except: assert False, "Se presento un error al cargar el archivo" assert len(lst) == lt.size(lst), "Son diferentes tamaños" for i in range(len(lst)): assert lt.getElement( lst, i + 1) == lst[i], "Las listas no estan en el mismo orden"
def loadCSVFile(file, lst): dialect = csv.excel() dialect.delimiter = ';' with open(file, encoding='utf-8-sig') as data: input_file = csv.DictReader(data, dialect=dialect) for row in input_file: lt.addLast(lst, row)
def parse(self, data): result = [] stmnt = None dialect = csv.excel() dialect.quotechar = '"' dialect.delimiter = ';' lines = data.split('\n') # Probe first record to find out which format we are parsing. if lines and lines[0].count(',') > lines[0].count(';'): dialect.delimiter = ',' dialect.quotechar = "'" for line in csv.reader(lines, dialect=dialect): # Skip empty (last) lines if not line: continue msg = transaction_message(line) if stmnt and stmnt.id != msg.statement_id: result.append(stmnt) stmnt = None if not stmnt: stmnt = statement(msg) else: stmnt.import_transaction(msg) result.append(stmnt) return result
def parse(self, cr, data): result = [] stmnt = None dialect = csv.excel() dialect.quotechar = '"' dialect.delimiter = ';' lines = data.split('\n') # Probe first record to find out which format we are parsing. if lines and lines[0].count(',') > lines[0].count(';'): dialect.delimiter = ',' if lines and lines[0].count("'") > lines[0].count('"'): dialect.quotechar = "'" # Transaction lines are not numbered, so keep a tracer subno = 0 for line in csv.reader(lines, dialect=dialect): # Skip empty (last) lines if not line: continue subno += 1 msg = transaction_message(line, subno) if stmnt and stmnt.id != msg.statement_id: result.append(stmnt) stmnt = None subno = 0 if not stmnt: stmnt = statement(msg) else: stmnt.import_transaction(msg) result.append(stmnt) return result
def loadCSVFile(file, lst, sep=";"): """ Carga un archivo csv a una lista Args: file Archivo de texto del cual se cargaran los datos requeridos. lst :: [] Lista a la cual quedaran cargados los elementos despues de la lectura del archivo. sep :: str Separador escodigo para diferenciar a los distintos elementos dentro del archivo. Try: Intenta cargar el archivo CSV a la lista que se le pasa por parametro, si encuentra algun error Borra la lista e informa al usuario Returns: None """ del lst[:] print("Cargando archivo ....") t1_start = process_time() #tiempo inicial dialect = csv.excel() dialect.delimiter = sep try: with open(file, encoding="utf-8") as csvfile: spamreader = csv.DictReader(csvfile, dialect=dialect) for row in spamreader: lst.append(row) except: del lst[:] print("Se presento un error en la carga del archivo") t1_stop = process_time() #tiempo final print("Tiempo de ejecución ", t1_stop - t1_start, " segundos")
def loadPeliculas(lst, file): dialect = csv.excel() dialect.delimiter = ";" try: with open(cf.data_dir + file, encoding="utf-8") as csvfile: row = csv.DictReader(csvfile, dialect=dialect) for elemento in row: ## Eliminar la información que no fue solicitada para el laboratorio elemento.pop("id") elemento.pop("budget") elemento.pop("genres") elemento.pop("imdb_id") elemento.pop("original_language") elemento.pop("overview") elemento.pop("popularity") elemento.pop("production_companies") elemento.pop("production_countries") elemento.pop("revenue") elemento.pop("runtime") elemento.pop("status") elemento.pop("tagline") elemento.pop("original_title") elemento.pop("production_companies_number") elemento.pop("spoken_languages_number") elemento.pop("production_countries_number") model.addMovie(lst, elemento) except: print("Hubo un error con la carga del archivo") return lst
def loadMovies(catalog, moviesfile): """ Carga cada una de las lineas del archivo de películas. - Se agrega cada película al catalogo de películas """ moviesfile = cf.data_dir + moviesfile dialect = csv.excel() dialect.delimiter = ';' try: with open(moviesfile, encoding="utf-8-sig") as csvfile: row = csv.DictReader(csvfile, dialect=dialect) for movie in row: lst = model.nueva_lista("ARRAY_LIST") model.addMovie(catalog, movie) producers = movie[ 'production_companies'] # Se obtienen las productoras countries = movie[ 'production_countries'] # Se obtienen los países release_date = movie['release_date'] year = release_date.split("/") model.addMovieProducer(catalog, producers, movie) model.añanir_pelicula(lst, movie['title']) model.añanir_pelicula(lst, year[-1]) model.añanir_pelicula(lst, movie['id']) model.addCountry(catalog, countries, lst) genre = movie['genres'] genre_sep = genre.split('|') for genero in genre_sep: model.addMovieGenre(catalog, genero, movie) except: print("Hubo un error en la carga de archivos")
def loadBooks(catalog, sep=','): """ Carga los libros del archivo. Por cada libro se toman sus autores y por cada uno de ellos, se crea en la lista de autores, a dicho autor y una referencia al libro que se esta procesando. """ t1_start = process_time() #tiempo inicial booksfile = cf.data_dir + 'GoodReads/AllMoviesDetailsCleaned.csv' dialect = csv.excel() dialect.delimiter = ';' with open(booksfile, encoding="utf-8-sig") as csvfile: spamreader = csv.DictReader(csvfile, dialect=dialect) for row in spamreader: # Se adiciona el libro a la lista de libros model.addMovieList(catalog, row) # Se adiciona el libro al mapa de libros (key=title) model.addMovieMap(catalog, row) model.add_gen(catalog, row) # Se obtienen los autores del libro #authors = row['authors'].split(",") # Cada autor, se crea en la lista de autores del catalogo, y se # adiciona un libro en la lista de dicho autor (apuntador al libro) #for author in authors: #model.addAuthor (catalog, author.strip(), row) t1_stop = process_time() #tiempo final print("Tiempo de ejecución carga películas:", t1_stop - t1_start, " segundos")
def report_MLST_result_in_csv_file(output_directory, ids, locusList, FinalResults): indel_pos_including_flanking_region = 0 all_allele_variants =[] SNPData = None INDELdata = None Results_file = output_directory + "/" +ids +"_MLST_result.csv" with open(Results_file, "wb") as csv_fp: dial = csv.excel() dial.lineterminator = '\r\n' csvWriter= csv.writer(csv_fp, dialect=dial) st_value =FinalResults["ST"] csvWriter.writerow(["st value:",st_value]) csvWriter.writerow(["Predicted Serotype",FinalResults["predicted_serotype"]]) header_row = ["locus name","allele variant","Percentage coverage", "Max percentage of non consensus bases","minimum total depth", "mean consensus depth", "numberOfSNPs","SNPsLists","INDELs","INDELsLists"] csvWriter.writerow(header_row) for locus in locusList: array =[] allele_variant =FinalResults[locus]["ReportedVariantNumber"] all_allele_variants.append(allele_variant) percentage_coverage = FinalResults[locus]["percentage_coverage"] max_percentage_of_non_consensus_bases =FinalResults[locus]["max_percentage_of_non_consensus_bases"] minimum_total_depth =FinalResults[locus]["minimum_total_depth"] mean_consensus_depth= FinalResults[locus]["mean_consensus_depth"] SNPsListsHash= FinalResults[locus]["SNPsListsHash"] numberOfSNPs = FinalResults[locus]["numberOfSNPs"] numberOfINDELs= FinalResults[locus]["numberOfINDELs"] INDELsListsHash= FinalResults[locus]["INDELsListsHash"] if int(numberOfSNPs) >= int(1): for (pos, ref, dist) in SNPsListsHash: ds = 'A(' + str(dist['a']) + ') C(' + str(dist['c']) + ') G(' + str(dist['g']) + ') T(' + str(dist['t']) + ')' pos = pos -100 # remove flanking region SNPData = "SNP-position:"+str(pos)+ " reference base:"+ref + " SNP type:",str(ds) else: SNPData = None if int(numberOfINDELs) >= int(1): for (pos, ref, TypeOfINDELs) in INDELsListsHash: ds =','.join(str(dist) for dist in TypeOfINDELs) pos = pos -100 # remove flanking region INDELdata = "INDEL-position:"+str(pos)+" reference base:"+ref +" INDEL type:"+ str(ds) else: INDELdata = None if int(numberOfINDELs) >= int(1): for (pos, ref, TypeOfINDELs) in INDELsListsHash: indel_pos_including_flanking_region = pos array.extend((locus, allele_variant, percentage_coverage, max_percentage_of_non_consensus_bases, minimum_total_depth, mean_consensus_depth, numberOfSNPs, SNPData, numberOfINDELs, INDELdata )) csvWriter.writerow(array)
def restoreDefaults(self): """ Restore the options to default state. """ # preserve `_options` if set by clients (for `reset`). opts = self._options self.setOptions(Options("utf-8", csv.excel())) self._options = opts
def writeloops(self, loopnum=None, dir=None, append='--Corr', log=True): ''' Write H, M arrays for loops to a file ''' if loopnum is None: # if loopnum not given, make choice based on file name loopnummap = {'easy':2, 'hard':2, 'minor':'all'} lfilename = os.path.split(self.filepath)[1].lower() for k in loopnummap: if k in lfilename: loopnum = loopnummap[k] if loopnum is None: # if none of the words in loopnummap are found, default to 'all' loopnum = 'all' loopind = self._loopind(loopnum) indir, fn = os.path.split(self.filepath) outdir = indir if dir is None else dir loopfn = os.path.splitext(fn)[0] + append + '.csv' looppath = os.path.join(outdir, loopfn) # if file exists, start appending numbers if os.path.isfile(looppath): matches = fnmatch.filter(os.listdir(outdir), '??'.join(os.path.splitext(loopfn))) if not any(matches): looppath = '_2'.join(os.path.splitext(looppath)) else: n = np.max([int(p[-5]) for p in matches]) looppath = ('_'+str(n+1)).join(os.path.splitext(looppath)) # Output will be alternating H, M, H, M, ... # not straightforward because loops may have different lengths # filter out unwanted loops, convert to kOe H, M = [], [] for i, [h, m] in enumerate(zip(self.H, self.M)): if i in loopind: H.append(h/1000) M.append(m) # Append the interpolated minor loop at the end if it exists if hasattr(self, 'H_zminor'): H.append(self.H_zminor) M.append(self.M_zminor) # interleave loops, with padding empty spaces with None # don't ask... raggedlooparray = zipl(*[x for t in zip(H, M) for x in t]) with open(looppath, "wb") as f: # lines terminate with \r\n by default, change to \n excelmod = csv.excel() excelmod.lineterminator = '\n' writer = csv.writer(f, dialect=excelmod) writer.writerows(raggedlooparray) print('Loop(s) {} written to {}'.format(loopnum, looppath)) self.log += '{}: Wrote loop(s) {} to disk: {}\n'.format(_now(), loopnum, looppath) if log: self.writelog(dir=dir)
def toCSV(self, fields, data): dialect = csv.excel() dialect.delimiter = self.getDelimiter() buffer = StringIO() writer = DictWriter(buffer, fieldnames=fields, dialect=dialect) if self.getShowHeader(): writer.writerow(dict(zip(fields, fields))) writer.writerows(data) return buffer.getvalue()
def __init__(self, encoding='utf-8', dialect=csv.excel(), columntypes=[], rowspec=[(range(0, 1), RowSpec.Header)], decimal_separator=".", group_separator=""): # type: (str, csv.Dialect, List[Tuple[range, ColumnType]], ...) -> None self.encoding = encoding self.dialect = dialect self.columntypes = columntypes self.rowspec = rowspec # type: List[Tuple[range, Options.RowSpec]] self.decimal_separator = decimal_separator self.group_separator = group_separator
def read(self): for encoding in (lambda: ('us-ascii', None), # fast lambda: (detect_encoding(self.filename), None), # precise lambda: (locale.getpreferredencoding(False), None), lambda: (sys.getdefaultencoding(), None), # desperate lambda: ('utf-8', None), # ... lambda: ('utf-8', 'ignore')): # fallback encoding, errors = encoding() # Clear the error flag for all except the last check, because # the error of second-to-last check is stored and shown as warning in owfile if errors != 'ignore': error = '' with self.open(self.filename, mode='rt', newline='', encoding=encoding, errors=errors) as file: # Sniff the CSV dialect (delimiter, quotes, ...) try: dialect = csv.Sniffer().sniff( # Take first couple of *complete* lines as sample ''.join(file.readline() for _ in range(5)), self.DELIMITERS) except UnicodeDecodeError as e: error = e continue except csv.Error: dialect = csv.excel() dialect.delimiter = self.DELIMITERS[0] file.seek(0) dialect.skipinitialspace = True try: reader = csv.reader(file, dialect=dialect) data = self.data_table(reader) # TODO: Name can be set unconditionally when/if # self.filename will always be a string with the file name. # Currently, some tests pass StringIO instead of # the file name to a reader. if isinstance(self.filename, str): data.name = path.splitext( path.split(self.filename)[-1])[0] if error and isinstance(error, UnicodeDecodeError): pos, endpos = error.args[2], error.args[3] warning = ('Skipped invalid byte(s) in position ' '{}{}').format(pos, ('-' + str(endpos)) if (endpos - pos) > 1 else '') warnings.warn(warning) self.set_table_metadata(self.filename, data) return data except Exception as e: error = e continue raise ValueError('Cannot parse dataset {}: {}'.format(self.filename, error)) from error
def test_serializable_dialect_fields(): def same_attr(key, coll1, coll2): return getattr(coll1, key) == getattr(coll2, key) original_dialect = csv.excel() serializable_dialect = SerializableDialect.from_dialect(original_dialect) converted_dialect = serializable_dialect.to_dialect() attributes = 'delimiter doublequote escapechar ' \ 'lineterminator quotechar quoting skipinitialspace'.split(' ') for key in attributes: assert hasattr(serializable_dialect, key) assert same_attr(key, serializable_dialect, original_dialect) assert same_attr(key, original_dialect, converted_dialect)
def get_csv(self): # Call the URL, get the response, parse it strictly as CSV, # and return the list of dictionaries rsp = self.client.get(self.url) self.assertEqual(200, rsp.status_code) dialect = csv.excel() dialect.strict = True reader = csv.DictReader(StringIO(rsp.content), dialect=dialect) result = [] for item in reader: for k, v in item.iteritems(): item[k] = v.decode('utf-8') result.append(item) return result
def read_from_file(fin, is_firstline_title=True, encoding='shift_jis'): if is_firstline_title == False: raise Exception("Does not support for is_firstline_title=False." "Please add title line at first.") if fin.encoding != encoding: fin = codecs.getreader(encoding)(fin.detach()) items = [] rows = csv.DictReader(fin, dialect=csv.excel(), quotechar='"', restkey="rest") for row in rows: rowdict = {} for k,v in row.items(): rowdict[k.strip()] = v.strip() items.append(rowdict) return items
def test_import_widget(self): w = textimport.CSVImportWidget() w.setDialect(csv.excel()) w.setSampleContents(io.BytesIO(DATA1)) view = w.dataview model = view.model() self.assertEqual(model.columnCount(), 4) self.assertEqual(model.rowCount(), 1) self.assertEqual(model.canFetchMore(), False) w.setSampleContents(io.BytesIO(DATA2)) model = view.model() self.assertEqual(model.columnCount(), 0) self.assertEqual(model.rowCount(), 0) self.assertEqual(model.canFetchMore(), False) w.setSampleContents(io.BytesIO(DATA4)) model = view.model() self.assertEqual(model.columnCount(), 4) self.assertEqual(model.rowCount(), 3) types = { 0: ColumnTypes.Categorical, 1: ColumnTypes.Numeric, 2: ColumnTypes.Text, 3: ColumnTypes.Time, } w.setColumnTypes(types) self.assertEqual(w.columnTypes(), types) rs = w.rowStates() self.assertEqual(rs, {}) w.setStateForRow(0, textimport.TablePreview.Header) w.setRowStates({0: textimport.TablePreview.Header}) self.assertEqual(w.rowStates(), {0: textimport.TablePreview.Header}) w.setStateForRow(1, textimport.TablePreview.Skipped) view.grab() w.setSampleContents(io.BytesIO(DATA5)) model = view.model() self.assertEqual(model.columnCount(), 1) w.setDialect(csv.excel_tab()) w.setSampleContents(io.BytesIO(DATA5)) model = view.model() self.assertEqual(model.columnCount(), 2) self.assertTrue(model.canFetchMore()) rows = model.rowCount() spy = QSignalSpy(model.rowsInserted) model.fetchMore() self.assertGreater(model.rowCount(), rows) self.assertEqual(len(spy), 1)
def asCsvDialect(self): """ Represent dialect as csv.Dialect. """ result = csv.excel() result.lineterminator = self.lineDelimiter result.delimiter = str(self.itemDelimiter) result.quotechar = str(self.quoteChar) result.doublequote = (self.escapeChar == self.quoteChar) if self.escapeChar is None: result.escapechar = None else: result.escapechar = str(self.escapeChar) result.skipinitialspace = (self.blanksAroundItemDelimiter) return result
def _make_reader(self, f): sep = self.delimiter if sep is None or len(sep) == 1: sniff_sep = True # default dialect if self.dialect is None: dia = csv.excel() elif isinstance(self.dialect, basestring): dia = csv.get_dialect(self.dialect) else: dia = self.dialect if sep is not None: sniff_sep = False dia.delimiter = sep # attempt to sniff the delimiter if sniff_sep: line = f.readline() while self.pos in self.skiprows: self.pos += 1 line = f.readline() line = self._check_comments([line])[0] self.pos += 1 sniffed = csv.Sniffer().sniff(line) dia.delimiter = sniffed.delimiter if self.encoding is not None: self.buf.extend(list( com.UnicodeReader(StringIO(line), dialect=dia, encoding=self.encoding))) else: self.buf.extend(list(csv.reader(StringIO(line), dialect=dia))) if self.encoding is not None: reader = com.UnicodeReader(f, dialect=dia, encoding=self.encoding) else: reader = csv.reader(f, dialect=dia) else: reader = (re.split(sep, line.strip()) for line in f) self.data = reader
def __init__(self, stream, types, header, sep=",", skipHeader=False): self.stream = stream self.types = types self.skipHeader = skipHeader self.explicitHeader = header if sep is not None and sys.version_info < (3,): sep = asciistr(sep) self.dialect = csv.excel() if sep is not None: self.dialect.delimiter = sep self.fields = header.split(sep) if isinstance(self.types, dict) and all(map(callable, self.types.values())): self._types = self.types else: self._types = dict([(f, cast[t]) for f, t in (self.types.items() if self.types is not None else [(f, "string") for f in self.fields])])
def main(f1, f2, out): d = csv.excel() d.lineterminator = "\n" writer = csv.writer(out, d) while True: try: l1 = next(f1)[:-2].decode("gb18030", "ignore") l2 = next(f2)[:-2].decode("gb18030", "ignore") except StopIteration: break assert len(l1) == len(l2) base = 0 row = [] for i in range(len(l1)): if l1[i] != l2[i]: row.append(l1[base:i]) base = i + 1 row.append(l1[base:]) writer.writerow(row)
def test_dialect(self): data = """\ label1,label2,label3 index1,"a,c,e index2,b,d,f """ dia = csv.excel() dia.quoting = csv.QUOTE_NONE df = read_csv(StringIO(data), dialect=dia) data = '''\ label1,label2,label3 index1,a,c,e index2,b,d,f ''' exp = read_csv(StringIO(data)) exp.replace('a', '"a', inplace=True) assert_frame_equal(df, exp)
def read_file(cls, filename, wrapper=None): wrapper = wrapper if wrapper and wrapper != Table else _IDENTITY import csv, sys, locale for encoding in (lambda: ('us-ascii', None), # fast lambda: (detect_encoding(filename), None), # precise lambda: (locale.getpreferredencoding(False), None), lambda: (sys.getdefaultencoding(), None), # desperate lambda: ('utf-8', None), # ... lambda: ('utf-8', 'ignore')): # fallback encoding, errors = encoding() # Clear the error flag for all except the last check, because # the error of second-to-last check is stored and shown as warning in owfile if errors != 'ignore': error = '' with cls.open(filename, mode='rt', newline='', encoding=encoding, errors=errors) as file: # Sniff the CSV dialect (delimiter, quotes, ...) try: dialect = csv.Sniffer().sniff(file.read(1024), cls.DELIMITERS) except UnicodeDecodeError as e: error = e continue except csv.Error: dialect = csv.excel() dialect.delimiter = cls.DELIMITERS[0] file.seek(0) dialect.skipinitialspace = True try: reader = csv.reader(file, dialect=dialect) data = cls.data_table(reader) if error and isinstance(error, UnicodeDecodeError): pos, endpos = error.args[2], error.args[3] warning = ('Skipped invalid byte(s) in position ' '{}{}').format(pos, ('-' + str(endpos)) if (endpos - pos) > 1 else '') warnings.warn(warning) return wrapper(data) except Exception as e: error = e continue raise ValueError('Cannot parse dataset {}: {}'.format(filename, error))