Пример #1
0
def addElementsmapsDetails(catalog, file1, file2):
    """
    Carga los elementos de los mapas relacionados con los details.
    """
    file1 = config.file_dir + file1
    dialect = csv.excel()
    dialect.delimiter = ";"
    input_file1 = csv.DictReader(open(file1, encoding="utf-8"),
                                 dialect=dialect)
    file2 = config.file_dir + file2
    dialect = csv.excel()
    dialect.delimiter = ";"
    input_file2 = csv.DictReader(open(file2, encoding="utf-8"),
                                 dialect=dialect)
    for movie1 in input_file1:
        lt.addLast(catalog['Data']['details'], movie1)
        md.addCompany(movie1, catalog)
        md.addGenre(movie1, catalog)
    iterator = it.newIterator(catalog['Data']['details'])
    for movie2 in input_file2:
        movie1 = it.next(iterator)
        lt.addLast(catalog['Data']['casting'], movie2)
        md.addDirector(movie2, movie1, catalog)
        md.addActor(movie2, movie1, catalog)
        md.addCountry(movie2, movie1, catalog)
Пример #2
0
def loadTrips (catalog, sep=';'):
    """
    Carga los vuelos del archivo.
    """
    t1 = process_time() #tiempo inicial
    flightsfile = cf.data_dir + 'tripxday_edges1/tripday_edges.csv'
    dialect = csv.excel()
    dialect.delimiter=sep
    with open(flightsfile, encoding="utf-8-sig") as csvfile:
        spamreader = csv.DictReader(csvfile, dialect=dialect)
        for row in spamreader:
            addTripNode(catalog, row)
    t2 = process_time() #tiempo final para vértices
    print("Tiempo de ejecución carga de vértices en el grafo de viajes:",t2-t1," segundos") 

    t3 = process_time() #tiempo inicial para arcos
    flightsfile = cf.data_dir + 'tripxday_edges1/tripday_edges.csv'
    dialect = csv.excel()
    dialect.delimiter=sep
    with open(flightsfile, encoding="utf-8-sig") as csvfile:
        spamreader = csv.DictReader(csvfile, dialect=dialect)
        for row in spamreader:
            addTripEdge(catalog, row)
    t4 = process_time() #tiempo final para carga de vértices y arcos
    print("Tiempo de ejecución carga de arcos en el grafo de viajes:",t4-t3," segundos")

    print("Tiempo de ejecución total para carga del grafo de viajes:",t4-t1," segundos")
Пример #3
0
def check_and_return_header(uuids_fd, delimiter='\t', quotechar=''):
    uuid_field_names = set(['CCC_DID', 'UUID']);
    first_line = uuids_fd.readline();  #Read first line
    dialect = None;
    has_header = False;
    identifer_field_name = None;
    try:
        sniffer = csv.Sniffer();
        dialect = sniffer.sniff(first_line);
        has_header = sniffer.has_header(first_line);
    except csv.Error:
        dialect = csv.excel();
        dialect.delimiter = delimiter;
        dialect.quotechar = quotechar;
        dialect.quoting = csv.QUOTE_NONE;
    #If delim is None or empty string or definitely incorrect delim, set to TSV default
    if(not dialect.delimiter or len(dialect.delimiter) == 0 or dialect.delimiter.isalnum()):
        dialect = csv.excel();
        dialect.delimiter = delimiter;
        dialect.quotechar = quotechar;
        dialect.quoting = csv.QUOTE_NONE;
    print('INFO: for UUID file, delimiter is %s and quote char is %s'
            %('<TAB>' if dialect.delimiter=='\t' else dialect.delimiter,
                '<NONE>' if dialect.quoting == csv.QUOTE_NONE else dialect.quotechar));
    first_line = first_line.strip();
    fieldnames = first_line.split(dialect.delimiter);
    for token in fieldnames:
        if(token in uuid_field_names):
            has_header = True;
            identifer_field_name = token;
            break;
    if(not has_header):
        print_error_and_exit('TSV/CSV file with CCC_DIDs/UUIDs: %s does not seem to have a header row'%(uuids_filename));
    return dialect, identifer_field_name, fieldnames;
Пример #4
0
    def test_edge_then_vertex(self):
        with open(data_file("tiny-multigraph.csv")) as edge_file:
            edge_dataset = CsvDataset(edge_file, True, csv.excel())
            proj = projection.edge_with_collection_metadata(
                edge_dataset.headers(), 1, 2, 4)
            graph = from_dataset(edge_dataset, proj)

            with open(data_file("tiny-graph-vertex.csv")) as vertex_file:
                vertex_dataset = CsvDataset(vertex_file, True, csv.excel())
                vertex_proj = projection.vertex_with_single_metadata(
                    vertex_dataset.headers(), 0, ignored_values=["NULL"])
                same_graph = from_dataset(vertex_dataset, vertex_proj, graph)

                self.assertTrue(same_graph == graph)

            self.assertEqual(7, len(graph.nodes))
            self.assertEqual(2, graph["jon"]["john"]["weight"])
            attributes = graph["jon"]["john"]["attributes"]
            self.assertDictEqual(
                {
                    "date": "7/1/2018",
                    "subject": "Graphs are great",
                    "replyCount": "1"
                }, attributes[0])
            self.assertDictEqual(
                {
                    "date": "7/1/2018",
                    "subject": "Going to need to ask you to stay late tonight",
                    "replyCount": "1"
                }, attributes[1])
            self.assertDictEqual(
                {
                    "date": "7/2/2018",
                    "subject": "RE: Graphs are great",
                    "replyCount": "0"
                }, attributes[2])
            self.assertDictEqual(
                {
                    "date": "7/2/2018",
                    "subject":
                    "RE: Going to need to ask you to stay late tonight",
                    "replyCount": "0"
                }, attributes[3])
            self.assertDictEqual(
                {
                    "date": "7/2/2018",
                    "subject": "No I'm not Lumberg",
                    "replyCount": "0"
                }, attributes[4])

            self.assertDictEqual({"lastName": "larson"},
                                 graph.nodes["jon"]["attributes"][0])
            self.assertDictEqual(
                {
                    "lastName": "redhot",
                    "sandwichPreference": "buffalo chicken"
                }, graph.nodes["frank"]["attributes"][0])
 def test_load_csv(self):
     contents = (
         b'1/1/1990,1.0,[,one,\n'
         b'1/1/1990,2.0,],two,\n'
         b'1/1/1990,3.0,{,three,'
     )
     ColumnType = owcsvimport.Options.ColumnType
     RowSpec = owcsvimport.Options.RowSpec
     opts = owcsvimport.Options(
         encoding="ascii",
         dialect=csv.excel(),
         columntypes=[
             (range(0, 1), ColumnType.Time),
             (range(1, 2), ColumnType.Numeric),
             (range(2, 3), ColumnType.Text),
             (range(3, 4), ColumnType.Categorical),
         ],
         rowspec=[]
     )
     df = owcsvimport.load_csv(io.BytesIO(contents), opts)
     self.assertEqual(df.shape, (3, 5))
     self.assertSequenceEqual(
         list(df.dtypes),
         [np.dtype("M8[ns]"), np.dtype(float), np.dtype(object),
          "category", np.dtype(float)],
     )
     opts = owcsvimport.Options(
         encoding="ascii",
         dialect=csv.excel(),
         columntypes=[
             (range(0, 1), ColumnType.Skip),
             (range(1, 2), ColumnType.Numeric),
             (range(2, 3), ColumnType.Skip),
             (range(3, 4), ColumnType.Categorical),
             (range(4, 5), ColumnType.Skip),
         ],
         rowspec=[
             (range(1, 2), RowSpec.Skipped)
         ]
     )
     df = owcsvimport.load_csv(io.BytesIO(contents), opts)
     self.assertEqual(df.shape, (2, 2))
     self.assertSequenceEqual(
         list(df.dtypes), [np.dtype(float), "category"]
     )
     self.assertSequenceEqual(
         list(df.iloc[:, 0]), [1.0, 3.0]
     )
     self.assertSequenceEqual(
         list(df.iloc[:, 1]), ["one", "three"]
     )
 def test_load_csv(self):
     contents = (
         b'1/1/1990,1.0,[,one,\n'
         b'1/1/1990,2.0,],two,\n'
         b'1/1/1990,3.0,{,three,'
     )
     opts = owcsvimport.Options(
         encoding="ascii",
         dialect=csv.excel(),
         columntypes=[
             (range(0, 1), ColumnType.Time),
             (range(1, 2), ColumnType.Numeric),
             (range(2, 3), ColumnType.Text),
             (range(3, 4), ColumnType.Categorical),
             (range(4, 5), ColumnType.Auto),
         ],
         rowspec=[]
     )
     df = owcsvimport.load_csv(io.BytesIO(contents), opts)
     self.assertEqual(df.shape, (3, 5))
     self.assertSequenceEqual(
         list(df.dtypes),
         [np.dtype("M8[ns]"), np.dtype(float), np.dtype(object),
          "category", np.dtype(float)],
     )
     opts = owcsvimport.Options(
         encoding="ascii",
         dialect=csv.excel(),
         columntypes=[
             (range(0, 1), ColumnType.Skip),
             (range(1, 2), ColumnType.Numeric),
             (range(2, 3), ColumnType.Skip),
             (range(3, 4), ColumnType.Categorical),
             (range(4, 5), ColumnType.Skip),
         ],
         rowspec=[
             (range(1, 2), RowSpec.Skipped)
         ]
     )
     df = owcsvimport.load_csv(io.BytesIO(contents), opts)
     self.assertEqual(df.shape, (2, 2))
     self.assertSequenceEqual(
         list(df.dtypes), [np.dtype(float), "category"]
     )
     self.assertSequenceEqual(
         list(df.iloc[:, 0]), [1.0, 3.0]
     )
     self.assertSequenceEqual(
         list(df.iloc[:, 1]), ["one", "three"]
     )
Пример #7
0
    def read_file(cls, filename, wrapper=None):
        wrapper = wrapper or _IDENTITY
        import csv
        for encoding in (lambda: 'us-ascii',                 # fast
                         lambda: detect_encoding(filename),  # precise
                         lambda: 'utf-8'):                   # fallback
            with cls.open(filename, mode='rt', newline='', encoding=encoding()) as file:
                # Sniff the CSV dialect (delimiter, quotes, ...)
                try:
                    dialect = csv.Sniffer().sniff(file.read(1024), cls.DELIMITERS)
                except UnicodeDecodeError:
                    continue
                except csv.Error:
                    dialect = csv.excel()
                    dialect.delimiter = cls.DELIMITERS[0]

                file.seek(0)
                dialect.skipinitialspace = True

                try:
                    reader = csv.reader(file, dialect=dialect)
                    return wrapper(cls.data_table(reader))
                except Exception as e:
                    error = e
                    continue
        raise ValueError('Cannot parse dataset {}: {}'.format(filename, error))
Пример #8
0
def read_csv(filename, delimiter=',', ig_blank_char=True, ig_blank_line=True,
             header_cnt=0, ig_chars=r'-/#', check_cnt=0):
    """Open a csv file returning list of lines (delimited list of strings)."""
    props = csv.excel()
    props.delimiter = delimiter
    lines = []
    try:
        with open(filename, 'r') as infile:
            read = csv.reader(infile, props)
            for lineno, line in enumerate(read):
                print(line)
                # Skip first line
                if (lineno < header_cnt) or \
                   (line and line[0] and line[0][0] in ig_chars):
                    continue
                # Cleanup in the case delimiter contains spaces
                if ig_blank_char:
                    line = [item.strip() for item in line if item.strip()]
                # Ignore blank lines
                if line or not ig_blank_line:
                    lines.append(line)
                # Check count of items
                if check_cnt and check_cnt != len(line):
                    perror(">>> open_csv: Bad item count on {0}:{1}.".format(
                           filename, lineno))
    except IOError:
        perror(">>> open_csv: Error opening file {0}.".format(filename))
    return lines
    def test_options_widget(self):
        w = textimport.CSVOptionsWidget()
        schanged = QSignalSpy(w.optionsChanged)
        sedited = QSignalSpy(w.optionsEdited)
        w.setDialect(csv.excel())
        self.assertEqual(len(schanged), 1)
        self.assertEqual(len(sedited), 0)
        w.setSelectedEncoding("iso8859-1")

        self.assertEqual(len(schanged), 2)
        self.assertEqual(len(sedited), 0)

        d = w.dialect()
        self.assertEqual(d.delimiter, csv.excel.delimiter)
        self.assertEqual(d.doublequote, csv.excel.doublequote)
        self.assertEqual(w.encoding(), "iso8859-1")

        d = textimport.Dialect("a", "b", "c", True, True)
        w.setDialect(d)

        cb = w.findChild(QComboBox, "delimiter-combo-box")
        self.assertEqual(cb.currentIndex(),
                         textimport.CSVOptionsWidget.DelimiterOther)
        le = w.findChild(QWidget, "custom-delimiter-edit")
        self.assertEqual(le.text(), "a")

        cb = w.findChild(QWidget, "quote-edit-combo-box")
        self.assertEqual(cb.currentText(), "b")
        d1 = w.dialect()
        self.assertEqual(d.delimiter, d1.delimiter)
        self.assertEqual(d.quotechar, d1.quotechar)
Пример #10
0
    def __init__(self, parent: QWidget=None, dialect: csv.Dialect=csv.excel()) -> None:
        super().__init__(parent)

        self.tags: List[Tag] = []
        self._file: Optional[TextIO] = None
        self._writer: Optional[_CSVWriter] = None
        self.dialect = dialect
Пример #11
0
 def save(self):
     messages = []
     success = False
     dialect = csv.excel()
     dialect.quotechar = '"'
     dialect.delimiter = ','
     
     records = csv.reader(self.cleaned_data["file"], dialect=dialect)
     for line in records:
         date = line[0]
         account = line[1]
         amount = line[2]
         beneficiary = line[3]
         BIC = line[4]
         name = line[5]
         address = line[6]
         code = line[7]
         statement = line[8]
         
         try:
             account = Banking_Account.clean(account)
         except NameError, e:
             messages.append("Error => Could not convert to IBAN. " + str(e))
             success = False
             continue
         
         if beneficiary: # payment to bank
             try:
                 beneficiary = Banking_Account.clean(beneficiary)
             except NameError, e:
                 messages.append("Error => Could not convert to IBAN. " + str(e))
                 success = False
                 continue
Пример #12
0
    def __init__(self,pfile,verbose=True):
        self._pfile = pfile
        self._params = []

        with open(pfile,'r') as readfile:
            dia = csv.excel()
            dia.skipinitialspace = True
            reader = csv.reader(readfile,dia,delimiter=' ')

            # define data structure as named tuple for storing parameter values
            data = namedtuple('DataStruct',
                             ['name','npts','units','xunits','x','y','dydx'])

            # iterate through lines of file, checking for a header line; 
            # at each header, read the next npts lines of data into 
            # appropriate arrays.
            # continue until no headerline is found (throws StopIteration).  
            # Populate list of params with available variables.
            while True:
                try:
                    headerline = next(reader)
                except StopIteration:
                    break

                npts = int(headerline[0])               # size of abscissa, data arrays
                abscis = headerline[1]                  # string name of abscissa variable (e.g. 'psinorm')
                var = re.split('[\(\)]',headerline[2])
                param = var[0]                          # string name of parameter (e.g. 'ne')
                units = var[1]                          # string name of units (e.g. '10^20/m^3')

                # read npts next lines, populate arrays
                x = []
                val = []
                gradval = []
                for j in range(npts):
                    dataline = next(reader)
                    x.append(float(dataline[0]))
                    val.append(float(dataline[1]))
                    gradval.append(float(dataline[2]))
                x = np.array(x)
                val = np.array(val)
                gradval = np.array(gradval)

                # collate into storage structure
                vars(self)['_'+param] = data(name=param,
                                             npts=npts,
                                             units=units,
                                             xunits=abscis,
                                             x=x,
                                             y=val,
                                             dydx=gradval)
                self._params.append(param)

        if verbose:
            print('P-file data loaded from '+self._pfile)
            print('Available parameters:')
            for par in self._params:
                un = vars(self)['_'+par].units
                xun = vars(self)['_'+par].xunits
                print(str(par).ljust(8)+str(xun).ljust(12)+str(un))
Пример #13
0
    def _make_reader(self, f):
        import csv

        sep = self.delimiter

        if sep is None or len(sep) == 1:
            sniff_sep = True
            # default dialect
            dia = csv.excel()
            if sep is not None:
                sniff_sep = False
                dia.delimiter = sep
            # attempt to sniff the delimiter
            if sniff_sep:
                line = f.readline()
                while self.pos in self.skiprows:
                    self.pos += 1
                    line = f.readline()

                self.pos += 1
                sniffed = csv.Sniffer().sniff(line)
                dia.delimiter = sniffed.delimiter
                if self.encoding is not None:
                    self.buf.extend(list(com.UnicodeReader(StringIO(line), dialect=dia, encoding=self.encoding)))
                else:
                    self.buf.extend(list(csv.reader(StringIO(line), dialect=dia)))

            if self.encoding is not None:
                reader = com.UnicodeReader(f, dialect=dia, encoding=self.encoding)
            else:
                reader = csv.reader(f, dialect=dia)
        else:
            reader = (re.split(sep, line.strip()) for line in f)

        self.data = reader
Пример #14
0
def load_details(catalog, details_file):
    """
    Carga cada una de las lineas del archivo de detalles.
    - Se agrega cada película al catalogo de películas.
    - Por cada libro se encuentran sus autores y por cada
      autor, se crea una lista con sus libros
    """
    dialect, dialect.delimiter = csv.excel(), ';'
    with open(details_file, encoding='utf-8-sig') as input_file:
        file_reader = csv.DictReader(input_file, dialect=dialect)
        for movie in file_reader:
            strip_movie = {}
            for key, value in movie.items():
                strip_movie[key.strip()] = value.strip()
            movie = strip_movie
            model.add_details(catalog, movie)
            producer_names = movie['production_companies'].split(",")
            producer_countries = movie['production_countries'].split(',')
            genres = movie['genres'].split(",")
            for producer in producer_names:
                model.add_movie_production_companies(catalog, producer.lower(),
                                                     movie)
            for genre in genres:
                genre = genre.split('|')
                for subgenre in genre:
                    model.add_movie_genre(catalog, subgenre, movie)
            for country in producer_countries:
                model.add_movie_production_countries(catalog, country.lower(),
                                                     movie)
Пример #15
0
def loadActors(catalog):
    '''
    Carga todos los actores
    '''
    t1_start = process_time() #tiempo inicial
    castingfile = cf.data_dir + 'themoviesdb/MoviesCastingRaw-small.csv'
    
    dialect = csv.excel()
    dialect.delimiter=';'
    with open(castingfile, encoding='utf-8') as csvfile:
        spamreader = csv.DictReader(csvfile, dialect=dialect)
        movie_counter = 1
        casting = ['actor1_name', 'actor2_name', 'actor3_name', 'actor4_name', 'actor5_name']
        for row in spamreader:
            for actor in casting:
                actor_name = row[actor]
                if not actor_name == 'none':
                    director_name = row['director_name']
                    pos = lt.isPresent(actor_name, catalog['actors'], equal)
                    if pos != 0:
                        model.updateActor(catalog, pos, movie_counter, director_name)
                    else:
                        model.addActor(catalog, row, movie_counter)
            movie_counter += 1
    t1_stop = process_time() #tiempo inicial
    print('Tiempo de ejecución carga actores',t1_stop-t1_start,' segundos')
    endActorslist_controller(catalog)
Пример #16
0
def cargar_listaActores(file, sep=";"):
    lst = lt.newList('SINGLE_LINKED', comparar_actores)

    dialect = csv.excel()
    dialect.delimiter = sep
    nombres_actores = [
        "actor1_name", "actor2_name", "actor3_name", "actor4_name",
        "actor5_name"
    ]
    try:
        with open(file, encoding="utf-8") as csvfile:
            spamreader = csv.DictReader(csvfile, dialect=dialect)
            for row in spamreader:
                #print(row)

                # agregar una lista para los directores
                #directores ={}
                #directores["director"] =lt.newList('SINGLE_LINKED', comparar_directores)  #lista directores
                for nombreCol in nombres_actores:
                    actor = {}
                    actor["nombre"] = row[nombreCol]

                    posicion1 = lt.isPresent(lst, actor["nombre"])
                    if posicion1 == 0:
                        actor["peliculas"] = lt.newList(
                        )  #ids Peliculas del actor
                        actor["director"] = lt.newList(
                            'SINGLE_LINKED',
                            comparar_director)  #lista directores

                        lt.addLast(actor["peliculas"], row["id"])
                        director = {}
                        director["nombre"] = row["director_name"]
                        director["count"] = 1

                        lt.addLast(actor["director"], director)

                        lt.addLast(lst, actor)
                    else:
                        actores = lt.getElement(lst, posicion1)
                        lt.addLast(actores["peliculas"], row["id"])

                        #validra si ya esta el director o no
                        pos_director = lt.isPresent(actores["director"],
                                                    row["director_name"])

                        if pos_director == 0:  # no esta crear director
                            director = {}
                            director["nombre"] = row["director_name"]
                            director["count"] = 1

                            lt.addLast(actores["director"], director)
                        else:  # ya esta ese director aumnetar count en uno
                            director = lt.getElement(actores["director"],
                                                     pos_director)
                            director["count"] = director["count"] + 1

    except:
        print("Hubo un error con la carga del archivo")
    return lst
Пример #17
0
def test_carga():
    List = []
    List_ADT = lst.newList(list_type)

    file = 'Data/theMoviesdb/AllMoviesCastingRaw.csv'
    sep = ';'
    dialect = csv.excel()
    dialect.delimiter = sep

    assert (lst.size(List_ADT) == 0), "La lista no empieza en cero"

    try:
        with open(file, encoding='utf-8') as csvfile:
            reader = csv.DictReader(csvfile, dialect=dialect)

            for row in reader:
                List.append(row)
                lst.addLast(List_ADT, row)
    except:
        assert False, 'Se presentó un error al cargar el archivo'

    assert len(List) == lst.size(List_ADT), 'Son de distionto tamaño'

    for x in range(len(List)):
        assert lst.getElement(
            List_ADT,
            x + 1) == List[x], 'Las listas no estan en el mismo orden'
Пример #18
0
def loadCSVFileCasting(file, catalog):
    dialect = csv.excel()
    dialect.delimiter = ";"
    with open(config.data_dir + file, encoding="utf-8") as csvfile:
        row = csv.DictReader(csvfile, dialect=dialect)
        for elemento in row:
            model.addcasting(catalog, elemento)
Пример #19
0
 def parse(self, cr, data):
     result = []
     stmnt = None
     dialect = csv.excel()
     dialect.quotechar = '"'
     dialect.delimiter = ','
     lines = data.split('\n')
     # Transaction lines are not numbered, so keep a tracer
     subno = 0
     statement_id = False
     for line in csv.reader(lines, dialect=dialect):
         # Skip empty (last) lines and header line
         if not line or line[0] == 'Datum':
             continue
         subno += 1
         msg = transaction_message(line, subno)
         if not statement_id:
             statement_id = self.get_unique_statement_id(
                 cr, msg.execution_date.strftime('%Yw%W'))
         msg.statement_id = statement_id
         if stmnt:
             stmnt.import_transaction(msg)
         else:
             stmnt = statement(msg)
     result.append(stmnt)
     return result
Пример #20
0
def export_members(request):
    response = HttpResponse(content_type='application/zip')
    response['Content-Disposition'] = 'attachment; filename=transactions.csv.zip'
    
    zip_file = zipfile.ZipFile( response, "w", zipfile.ZIP_DEFLATED)
    csv_file = StringIO.StringIO()
    dialect = csv.excel()
    dialect.quotechar = '"'
    dialect.delimiter = ','
    csv_writer = csv.writer(csv_file, dialect=dialect)
    
    for person in Person.objects.order_by("postal_code"):    # generate chunk
        csv_writer.writerow([person.firstname.encode("utf-8"),
                             person.lastname.encode("utf-8"),
                             person.email_address,
                             person.street.encode("utf-8"),
                             person.postal_code,
                             person.city.encode("utf-8"),
                             person.telephone,
                             person.language,
                             person.notas.encode("utf-8"),
                             person.last_payment_date])

    zip_file.writestr("transactions.csv",csv_file.getvalue())
    csv_file.close()
    zip_file.close()
    # generate the file
    response['Content-Length'] = response.tell()
    return response
Пример #21
0
    def read_file(cls, filename, wrapper=None):
        wrapper = wrapper or _IDENTITY
        import csv
        for encoding in (
                lambda: 'us-ascii',  # fast
                lambda: detect_encoding(filename),  # precise
                lambda: 'utf-8'):  # fallback
            with cls.open(filename, mode='rt', newline='',
                          encoding=encoding()) as file:
                # Sniff the CSV dialect (delimiter, quotes, ...)
                try:
                    dialect = csv.Sniffer().sniff(file.read(1024),
                                                  cls.DELIMITERS)
                except UnicodeDecodeError:
                    continue
                except csv.Error:
                    dialect = csv.excel()
                    dialect.delimiter = cls.DELIMITERS[0]

                file.seek(0)
                dialect.skipinitialspace = True

                try:
                    reader = csv.reader(file, dialect=dialect)
                    return wrapper(cls.data_table(reader))
                except Exception as e:
                    error = e
                    continue
        raise ValueError('Cannot parse dataset {}: {}'.format(filename, error))
Пример #22
0
def test_carga():
    lista=[]
    lst = lt.newList()

    file='Data/test.csv'
    sep=','
    dialect= csv.excel()
    dialect.delimiter = sep

    try:
        with open(file,encoding='utf-8') as csvfile:
            reader = csv.DictReader(csvfile,dialect=dialect)

            for row in reader:
                lista.append(row)
                lt.addLast(lst,row)
    except:
        print("Se presento un error al cargar los archivos")
    print("Lista python")
    for i in lista:
        print(i)

    print("Lista de DTA")
    iterator=it.newIterator(lst)
    while it.hasNext(iterator):
        element= it.next(iterator)
        print(element)
Пример #23
0
 def __init__(self, encoding=None):
   super(CSVformatter, self).__init__()
   dialect = csv.excel()
   dialect.quoting = csv.QUOTE_ALL
   self._encoding = encoding or i18n.get_site_encoding()
   self._csv_writer = csv.writer(self, dialect=dialect)
   self._line = None
Пример #24
0
def loadmoviesCastingRaw(catalog, moviesfile):
    moviesfile = cf.data_dir + moviesfile
    dialect = csv.excel()
    dialect.delimiter = ";"
    input_file = csv.DictReader(open(moviesfile, encoding='utf-8-sig'),
                                dialect=dialect)
    for movie in input_file:
        model.addMovieCasting(catalog, movie)
        actors1 = movie['actor1_name'].split(",")
        actors2 = movie['actor2_name'].split(",")
        actors3 = movie['actor3_name'].split(",")
        actors4 = movie['actor4_name'].split(",")
        actors5 = movie['actor5_name'].split(",")
        directors = movie['director_name'].split(",")
        for autor in actors1:
            model.addMovieByActor(catalog, autor.strip().lower(), movie)
        for autor in actors2:
            model.addMovieByActor(catalog, autor.strip().lower(), movie)
        for autor in actors3:
            model.addMovieByActor(catalog, autor.strip().lower(), movie)
        for autor in actors4:
            model.addMovieByActor(catalog, autor.strip().lower(), movie)
        for autor in actors5:
            model.addMovieByActor(catalog, autor.strip().lower(), movie)
        for director in directors:
            model.addMovieByDirector(catalog, director.strip().lower(), movie)
Пример #25
0
def loadBookReviews(catalog, sep=';'):
    """
    Carga los libros del archivo.  Por cada libro se toman sus autores y por 
    cada uno de ellos, se crea un arbol de autores, a dicho autor y una
    referencia al libro que se esta procesando.
    """
    t1_start = process_time()  #tiempo inicial
    nodesfile = cf.data_dir + 'flights_nodes.csv'
    edgesfile = cf.data_dir + 'flights_edges.csv'
    dialect = csv.excel()
    dialect.delimiter = sep
    with open(nodesfile, encoding="utf-8-sig") as csvfile:
        spamreader = csv.DictReader(csvfile, dialect=dialect)
        t2_start = process_time()  #tiempo inicial
        for row in spamreader:
            model.addNode(catalog, row)
        t2_stop = process_time()  #tiempo final
    with open(edgesfile, encoding="utf-8-sig") as csvfile:
        spamreader = csv.DictReader(csvfile, dialect=dialect)
        t3_start = process_time()  #tiempo inicial
        for row in spamreader:
            model.addEdge(catalog, row)
        t3_stop = process_time()  #tiempo final
    t1_stop = process_time()  #tiempo final
    print("Tiempo de ejecución carga de grafo de vuelos", t1_stop - t1_start,
          " segundos\n"
          "Tiempo de carga de nodos", t2_stop - t2_start, "segundos\n"
          "Tiempo de carga de arcos", t3_stop - t3_start, "segundos")
Пример #26
0
def cargarArchivosUnificados(details,casting, cmpfunction=None):
    lst=iniciarCatalogo()
    dialect = csv.excel()
    dialect.delimiter=";"
    i = 0
    p = 0
    print("Cargando archivos...")
    with open(cf.data_dir + details, encoding="utf-8-sig") as csvfile1:
        row = csv.DictReader(csvfile1, dialect=dialect)
        for elemento in row:
            if i%3290 == 0:
                print (" " + str(p) + "%" + " completado", end="\r")
                p+=1
            model.agregarFinal(lst,elemento)
            i+=1
    print (" 100%" +" completado\n")
    print("Uniendo datos...")
    with open(cf.data_dir + casting, encoding="utf-8-sig") as csvfile2: #Cambiamos el encoding ya que generaba
        row = csv.DictReader(csvfile2,dialect=dialect)                  #un error con los archivos grandes
        i = 1
        p = 0
        for elemento in row:
            if i%3290 == 0:
                print (" " + str(p) + "%" + " completado", end="\r")
                p+=1
            if elemento["id"] == model.buscarPeliculas(lst,i)["id"]:
                for column in elemento:
                    if column != "id":
                        model.buscarPeliculas(lst,i)[column] = elemento[column]
            i += 1
    print (" 100%" +" completado\n")
    return lst
Пример #27
0
def loadCSVFile (file, sep=";"):
    """
    Carga un archivo csv a una lista
    Args:
        file
            Archivo csv del cual se importaran los datos
        sep = ";"
            Separador utilizado para determinar cada objeto dentro del archivo
        Try:
        Intenta cargar el archivo CSV a la lista que se le pasa por parametro, si encuentra algun error
        Borra la lista e informa al usuario
    Returns: None  
    """
    #lst = lt.newList("ARRAY_LIST") #Usando implementacion arraylist
    lst = lt.newList() #Usando implementacion linkedlist
    print("Cargando archivo ....")
    t1_start = process_time() #tiempo inicial
    dialect = csv.excel()
    dialect.delimiter=sep
    try:
        with open(file, encoding="utf-8") as csvfile:
            spamreader = csv.DictReader(csvfile, dialect=dialect)
            for row in spamreader: 
                lt.addLast(lst,row)
    except:
        print("Hubo un error con la carga del archivo")
    t1_stop = process_time() #tiempo final
    print("Tiempo de ejecución ",t1_stop-t1_start," segundos")
    return lst
Пример #28
0
def test_carga():
    lista = []
    lst = lt.newList('ARRAY_LIST', cmpfunction)

    file = config.data_dir + 'MoviesCastingRaw-small.csv'
    sep = ';'
    dialect = csv.excel()
    dialect.delimiter = sep

    assert (lt.size(lst) == 0), "La lista no empieza en cero."

    try:
        with open(file, encoding='utf-8') as csvfile:
            reader = csv.DictReader(csvfile, dialect=dialect)

            for row in reader:
                lista.append(row)
                lt.addLast(lst, row)

    except:
        assert False, "Se presento un error al cargar el archivo."

    assert len(lista) == lt.size(lst), "Son diferentes tamaños."

    for i in range(len(lista)):
        assert lt.getElement(
            lst, i + 1) == lista[i], "Las listas no estan en el mismo orden."
                def cargar_directores(file, sep=";"):
                    lst = lt.newList(
                        'SINGLE_LINKED',
                        comparar_director)  #Usando implementacion linkedlist
                    print("Cargando archivo ....")
                    t1_start = process_time()  #tiempo inicial
                    dialect = csv.excel()
                    dialect.delimiter = sep
                    try:
                        with open(file, encoding="utf-8") as csvfile:
                            spamreader = csv.DictReader(csvfile,
                                                        dialect=dialect)
                            for row in spamreader:

                                director = {}
                                director["nombre"] = row["director_name"]
                                posicion1 = lt.isPresent(
                                    lst, director["nombre"])
                                if posicion1 == 0:
                                    director["peliculas"] = lt.newList()
                                    lt.addLast(director["peliculas"],
                                               row["id"])
                                    lt.addLast(lst, director)
                                else:
                                    directores = lt.getElement(lst, posicion1)
                                    lt.addLast(directores["peliculas"],
                                               row["id"])

                    except:
                        print("Hubo un error con la carga del archivo")
                    t1_stop = process_time()  #tiempo final
                    print("Tiempo de ejecución ", t1_stop - t1_start,
                          " segundos")
                    return lst
Пример #30
0
def test_carga():
    lista = []
    lst = lt.newList()

    file = "Data\theMoviesdb\MoviesCastingRaw-small.csv"
    sep = ";"
    dialect = csv.excel()
    dialect.delimiter = sep

    assert (lt.size(lst) == 0), "la lista no empieza en cero"

    try:
        with open(file, enconding='utf-8') as csvfile:
            reader = csv.DictReader(csvfile, dialect=dialect)

            for row in reader:
                lista.append(row)
                lt.addLast(lst, row)
    except:
        assert False, "Se presento un error al cargar el archivo"

    assert len(lista) == lt.size(lst), "son diferentes tamaños"

    for i in range(len(lista)):
        assert lt.getElement(
            lst, i + 1) == lista[i], "las listas no estan en el mismo orden"
Пример #31
0
 def __init__(self, encoding=None):
   super(CSVformatter, self).__init__()
   dialect = csv.excel()
   dialect.quoting = csv.QUOTE_ALL
   self._encoding = encoding or i18n.get_site_encoding()
   self._csv_writer = csv.writer(self, dialect=dialect)
   self._line = None
Пример #32
0
def export_csv(request):
    response = HttpResponse(content_type='application/zip')
    response['Content-Disposition'] = 'attachment; filename=transactions.csv.zip'
    
    zip_file = zipfile.ZipFile( response, "w", zipfile.ZIP_DEFLATED)
    csv_file = StringIO.StringIO()
    dialect = csv.excel()
    dialect.quotechar = '"'
    dialect.delimiter = ','
    csv_writer = csv.writer(csv_file, dialect=dialect)
    
    for transaction in Transaction.objects.order_by("date"):    # generate chunk
        csv_writer.writerow([transaction.date, 
                             transaction.pirate_account.account.iban, 
                             transaction.amount, 
                             transaction.beneficiary.current_banking_account.iban if transaction.beneficiary.current_banking_account else "",
                             transaction.BIC,
                             transaction.beneficiary.lastname+" "+transaction.beneficiary.firstname,
                             "%s %s %s"%(transaction.beneficiary.street, transaction.beneficiary.postal_code, transaction.beneficiary.city),
                             transaction.code,
                             transaction.statement.encode("utf-8")])

    zip_file.writestr("transactions.csv",csv_file.getvalue())
    csv_file.close()
    zip_file.close()
    # generate the file
    response['Content-Length'] = response.tell()
    return response
Пример #33
0
def get_company(citys):
    columns=['车系', '省', '城市', '销售公司']
    s = requests.Session()
    url='http://mall.haval.com.cn/cars/getDealerByType.html'
    with open('haval.csv', 'w', newline='') as csvfile:
         detail_wirter = csv.writer(csvfile, dialect=csv.excel())
         detail_wirter.writerow(columns)
         for city in citys:
            done=True
            while done:
                try:
                    r=s.post(url,data={'city':city[1]}, headers=headers, 
                                       timeout=10)
                    time.sleep(1)
                    done=False
                    if r.status_code!=200:
                         done=True
                except Exception as e:
                    print(e)
            data=r.json()['list']
            print(city)
            for x in data:
                cars=x['carModel'].split(',')
                company=x['storeName']
                for car in cars:
                     detail_wirter.writerow([car, city[0], city[1], company])
Пример #34
0
def test_carga():
    lst = []
    lst = lt.newList()

    file = "Data/GoodReads/books.csv"
    sep = ','
    dialect = csv.excel()
    dialect.delimiter = sep

    assert (lt.size(lst) == 0), "la lista no empieza en cero"

    try:
        with open(file, encoding='utf-8') as csvfile:
            reader = csv.DictReader(csvfile, dialect=dialect)

            for row in reader:
                lst.append(row)
                lt.addLast(lst, row)
    except:
        assert False, "Se presento un error al cargar el archivo"
    assert len(lst) == lt.size(lst), "Son diferentes tamaños"

    for i in range(len(lst)):
        assert lt.getElement(
            lst, i + 1) == lst[i], "Las listas no estan en el mismo orden"
Пример #35
0
def loadCSVFile(file, lst):
    dialect = csv.excel()
    dialect.delimiter = ';'
    with open(file, encoding='utf-8-sig') as data:
        input_file = csv.DictReader(data, dialect=dialect)
        for row in input_file:
            lt.addLast(lst, row)
 def parse(self, data):
     result = []
     stmnt = None
     dialect = csv.excel()
     dialect.quotechar = '"'
     dialect.delimiter = ';'
     lines = data.split('\n')
     # Probe first record to find out which format we are parsing.
     if lines and lines[0].count(',') > lines[0].count(';'):
         dialect.delimiter = ','
         dialect.quotechar = "'"
     for line in csv.reader(lines, dialect=dialect):
         # Skip empty (last) lines
         if not line:
             continue
         msg = transaction_message(line)
         if stmnt and stmnt.id != msg.statement_id:
             result.append(stmnt)
             stmnt = None
         if not stmnt:
             stmnt = statement(msg)
         else:
             stmnt.import_transaction(msg)
     result.append(stmnt)
     return result
Пример #37
0
 def parse(self, cr, data):
     result = []
     stmnt = None
     dialect = csv.excel()
     dialect.quotechar = '"'
     dialect.delimiter = ';'
     lines = data.split('\n')
     # Probe first record to find out which format we are parsing.
     if lines and lines[0].count(',') > lines[0].count(';'):
         dialect.delimiter = ','
     if lines and lines[0].count("'") > lines[0].count('"'):
         dialect.quotechar = "'"
     # Transaction lines are not numbered, so keep a tracer
     subno = 0
     for line in csv.reader(lines, dialect=dialect):
         # Skip empty (last) lines
         if not line:
             continue
         subno += 1
         msg = transaction_message(line, subno)
         if stmnt and stmnt.id != msg.statement_id:
             result.append(stmnt)
             stmnt = None
             subno = 0
         if not stmnt:
             stmnt = statement(msg)
         else:
             stmnt.import_transaction(msg)
     result.append(stmnt)
     return result
Пример #38
0
def loadCSVFile(file, lst, sep=";"):
    """
    Carga un archivo csv a una lista
    Args:
        file 
            Archivo de texto del cual se cargaran los datos requeridos.
        lst :: []
            Lista a la cual quedaran cargados los elementos despues de la lectura del archivo.
        sep :: str
            Separador escodigo para diferenciar a los distintos elementos dentro del archivo.
    Try:
        Intenta cargar el archivo CSV a la lista que se le pasa por parametro, si encuentra algun error
        Borra la lista e informa al usuario
    Returns: None
    """
    del lst[:]
    print("Cargando archivo ....")
    t1_start = process_time()  #tiempo inicial
    dialect = csv.excel()
    dialect.delimiter = sep
    try:
        with open(file, encoding="utf-8") as csvfile:
            spamreader = csv.DictReader(csvfile, dialect=dialect)
            for row in spamreader:
                lst.append(row)
    except:
        del lst[:]
        print("Se presento un error en la carga del archivo")

    t1_stop = process_time()  #tiempo final
    print("Tiempo de ejecución ", t1_stop - t1_start, " segundos")
Пример #39
0
def loadPeliculas(lst, file):
    dialect = csv.excel()
    dialect.delimiter = ";"
    try:
        with open(cf.data_dir + file, encoding="utf-8") as csvfile:
            row = csv.DictReader(csvfile, dialect=dialect)
            for elemento in row:
                ## Eliminar la información que no fue solicitada para el laboratorio
                elemento.pop("id")
                elemento.pop("budget")
                elemento.pop("genres")
                elemento.pop("imdb_id")
                elemento.pop("original_language")
                elemento.pop("overview")
                elemento.pop("popularity")
                elemento.pop("production_companies")
                elemento.pop("production_countries")
                elemento.pop("revenue")
                elemento.pop("runtime")
                elemento.pop("status")
                elemento.pop("tagline")
                elemento.pop("original_title")
                elemento.pop("production_companies_number")
                elemento.pop("spoken_languages_number")
                elemento.pop("production_countries_number")
                model.addMovie(lst, elemento)
    except:
        print("Hubo un error con la carga del archivo")
    return lst
Пример #40
0
def loadMovies(catalog, moviesfile):
    """
    Carga cada una de las lineas del archivo de películas.
    - Se agrega cada película al catalogo de películas
    """
    moviesfile = cf.data_dir + moviesfile
    dialect = csv.excel()
    dialect.delimiter = ';'
    try:
        with open(moviesfile, encoding="utf-8-sig") as csvfile:
            row = csv.DictReader(csvfile, dialect=dialect)
            for movie in row:
                lst = model.nueva_lista("ARRAY_LIST")
                model.addMovie(catalog, movie)
                producers = movie[
                    'production_companies']  # Se obtienen las productoras
                countries = movie[
                    'production_countries']  # Se obtienen los países
                release_date = movie['release_date']
                year = release_date.split("/")
                model.addMovieProducer(catalog, producers, movie)
                model.añanir_pelicula(lst, movie['title'])
                model.añanir_pelicula(lst, year[-1])
                model.añanir_pelicula(lst, movie['id'])
                model.addCountry(catalog, countries, lst)
                genre = movie['genres']
                genre_sep = genre.split('|')
                for genero in genre_sep:
                    model.addMovieGenre(catalog, genero, movie)
    except:
        print("Hubo un error en la carga de archivos")
Пример #41
0
def loadBooks(catalog, sep=','):
    """
    Carga los libros del archivo.  Por cada libro se toman sus autores y por 
    cada uno de ellos, se crea en la lista de autores, a dicho autor y una
    referencia al libro que se esta procesando.
    """
    t1_start = process_time()  #tiempo inicial
    booksfile = cf.data_dir + 'GoodReads/AllMoviesDetailsCleaned.csv'
    dialect = csv.excel()
    dialect.delimiter = ';'
    with open(booksfile, encoding="utf-8-sig") as csvfile:
        spamreader = csv.DictReader(csvfile, dialect=dialect)
        for row in spamreader:
            # Se adiciona el libro a la lista de libros
            model.addMovieList(catalog, row)
            # Se adiciona el libro al mapa de libros (key=title)
            model.addMovieMap(catalog, row)
            model.add_gen(catalog, row)
            # Se obtienen los autores del libro
            #authors = row['authors'].split(",")
            # Cada autor, se crea en la lista de autores del catalogo, y se
            # adiciona un libro en la lista de dicho autor (apuntador al libro)
            #for author in authors:
            #model.addAuthor (catalog, author.strip(), row)
    t1_stop = process_time()  #tiempo final
    print("Tiempo de ejecución carga películas:", t1_stop - t1_start,
          " segundos")
Пример #42
0
    def test_options_widget(self):
        w = textimport.CSVOptionsWidget()
        schanged = QSignalSpy(w.optionsChanged)
        sedited = QSignalSpy(w.optionsEdited)
        w.setDialect(csv.excel())
        self.assertEqual(len(schanged), 1)
        self.assertEqual(len(sedited), 0)
        w.setSelectedEncoding("iso8859-1")

        self.assertEqual(len(schanged), 2)
        self.assertEqual(len(sedited), 0)

        d = w.dialect()
        self.assertEqual(d.delimiter, csv.excel.delimiter)
        self.assertEqual(d.doublequote, csv.excel.doublequote)
        self.assertEqual(w.encoding(), "iso8859-1")

        d = textimport.Dialect("a", "b", "c", True, True)
        w.setDialect(d)

        cb = w.findChild(QComboBox, "delimiter-combo-box")
        self.assertEqual(cb.currentIndex(),
                         textimport.CSVOptionsWidget.DelimiterOther)
        le = w.findChild(QWidget, "custom-delimiter-edit")
        self.assertEqual(le.text(), "a")

        cb = w.findChild(QWidget, "quote-edit-combo-box")
        self.assertEqual(cb.currentText(), "b")
        d1 = w.dialect()
        self.assertEqual(d.delimiter, d1.delimiter)
        self.assertEqual(d.quotechar, d1.quotechar)
Пример #43
0
 def parse(self, cr, data):
     result = []
     stmnt = None
     dialect = csv.excel()
     dialect.quotechar = '"'
     dialect.delimiter = ','
     lines = data.split('\n')
     # Transaction lines are not numbered, so keep a tracer
     subno = 0
     statement_id = False
     for line in csv.reader(lines, dialect=dialect):
         # Skip empty (last) lines and header line
         if not line or line[0] == 'Datum':
             continue
         subno += 1
         msg = transaction_message(line, subno)
         if not statement_id:
             statement_id = self.get_unique_statement_id(
                 cr, msg.execution_date.strftime('%Yw%W'))
         msg.statement_id = statement_id
         if stmnt:
             stmnt.import_transaction(msg)
         else:
             stmnt = statement(msg)
     result.append(stmnt)
     return result
Пример #44
0
def read_csv(filename,
             delimiter=',',
             ig_blank_char=True,
             ig_blank_line=True,
             header_cnt=0,
             ig_chars=r'-/#',
             check_cnt=0):
    """Open a csv file returning list of lines (delimited list of strings)."""
    props = csv.excel()
    props.delimiter = delimiter
    lines = []
    try:
        with open(filename, 'r') as infile:
            read = csv.reader(infile, props)
            for lineno, line in enumerate(read):
                print(line)
                # Skip first line
                if (lineno < header_cnt) or \
                   (line and line[0] and line[0][0] in ig_chars):
                    continue
                # Cleanup in the case delimiter contains spaces
                if ig_blank_char:
                    line = [item.strip() for item in line if item.strip()]
                # Ignore blank lines
                if line or not ig_blank_line:
                    lines.append(line)
                # Check count of items
                if check_cnt and check_cnt != len(line):
                    perror(">>> open_csv: Bad item count on {0}:{1}.".format(
                        filename, lineno))
    except IOError:
        perror(">>> open_csv: Error opening file {0}.".format(filename))
    return lines
def report_MLST_result_in_csv_file(output_directory, ids, locusList, FinalResults):


    indel_pos_including_flanking_region = 0
    all_allele_variants =[]
    SNPData = None
    INDELdata = None
    Results_file = output_directory + "/" +ids +"_MLST_result.csv"
    
    with open(Results_file, "wb") as csv_fp:
        dial = csv.excel()
        dial.lineterminator = '\r\n'
        csvWriter= csv.writer(csv_fp, dialect=dial)
        st_value =FinalResults["ST"]
        csvWriter.writerow(["st value:",st_value])
        csvWriter.writerow(["Predicted Serotype",FinalResults["predicted_serotype"]])
        header_row = ["locus name","allele variant","Percentage coverage",
                      "Max percentage of non consensus bases","minimum total depth",
                      "mean consensus depth", "numberOfSNPs","SNPsLists","INDELs","INDELsLists"]

        csvWriter.writerow(header_row)
        for locus in locusList:
            array =[]
            allele_variant =FinalResults[locus]["ReportedVariantNumber"]
            all_allele_variants.append(allele_variant)
            percentage_coverage = FinalResults[locus]["percentage_coverage"]
            max_percentage_of_non_consensus_bases =FinalResults[locus]["max_percentage_of_non_consensus_bases"]
            minimum_total_depth =FinalResults[locus]["minimum_total_depth"]
            mean_consensus_depth= FinalResults[locus]["mean_consensus_depth"]
            SNPsListsHash= FinalResults[locus]["SNPsListsHash"]
            numberOfSNPs = FinalResults[locus]["numberOfSNPs"]
            numberOfINDELs= FinalResults[locus]["numberOfINDELs"]
            INDELsListsHash= FinalResults[locus]["INDELsListsHash"]

            if int(numberOfSNPs) >= int(1):
                for (pos, ref, dist) in SNPsListsHash:
                    ds = 'A(' + str(dist['a']) + ') C(' + str(dist['c']) + ') G(' + str(dist['g']) + ') T(' + str(dist['t']) + ')'
                    pos = pos -100 # remove flanking region
                    SNPData =  "SNP-position:"+str(pos)+ "   reference base:"+ref + "   SNP type:",str(ds)
            else:
                SNPData = None
            if int(numberOfINDELs) >= int(1):
                for (pos, ref, TypeOfINDELs) in INDELsListsHash:
                    ds =','.join(str(dist) for dist in TypeOfINDELs) 
                    pos = pos -100 # remove flanking region
                    INDELdata =  "INDEL-position:"+str(pos)+"  reference base:"+ref +"   INDEL type:"+ str(ds)
            else:
                INDELdata = None

            if int(numberOfINDELs) >= int(1):
                for (pos, ref, TypeOfINDELs) in INDELsListsHash:
                    indel_pos_including_flanking_region = pos

            array.extend((locus, allele_variant, percentage_coverage,
                          max_percentage_of_non_consensus_bases,
                          minimum_total_depth, mean_consensus_depth,
                          numberOfSNPs, SNPData,
                          numberOfINDELs, INDELdata ))
            csvWriter.writerow(array)
 def restoreDefaults(self):
     """
     Restore the options to default state.
     """
     # preserve `_options` if set by clients (for `reset`).
     opts = self._options
     self.setOptions(Options("utf-8", csv.excel()))
     self._options = opts
Пример #47
0
    def writeloops(self, loopnum=None, dir=None, append='--Corr', log=True):
        ''' Write H, M arrays for loops to a file '''
        if loopnum is None:
            # if loopnum not given, make choice based on file name
            loopnummap = {'easy':2, 'hard':2, 'minor':'all'}
            lfilename = os.path.split(self.filepath)[1].lower()
            for k in loopnummap:
                if k in lfilename:
                    loopnum = loopnummap[k]
            if loopnum is None:
                # if none of the words in loopnummap are found, default to 'all'
                loopnum = 'all'

        loopind = self._loopind(loopnum)
        indir, fn = os.path.split(self.filepath)
        outdir = indir if dir is None else dir
        loopfn = os.path.splitext(fn)[0] + append + '.csv'
        looppath = os.path.join(outdir, loopfn)

        # if file exists, start appending numbers
        if os.path.isfile(looppath):
            matches = fnmatch.filter(os.listdir(outdir), '??'.join(os.path.splitext(loopfn)))
            if not any(matches):
                looppath = '_2'.join(os.path.splitext(looppath))
            else:
                n = np.max([int(p[-5]) for p in matches])
                looppath = ('_'+str(n+1)).join(os.path.splitext(looppath))

        # Output will be alternating H, M, H, M, ...
        # not straightforward because loops may have different lengths

        # filter out unwanted loops, convert to kOe
        H, M = [], []
        for i, [h, m] in enumerate(zip(self.H, self.M)):
            if i in loopind:
                H.append(h/1000)
                M.append(m)

        # Append the interpolated minor loop at the end if it exists
        if hasattr(self, 'H_zminor'):
            H.append(self.H_zminor)
            M.append(self.M_zminor)

        # interleave loops, with padding empty spaces with None
        # don't ask...
        raggedlooparray = zipl(*[x for t in zip(H, M) for x in t])

        with open(looppath, "wb") as f:
            # lines terminate with \r\n by default, change to \n
            excelmod = csv.excel()
            excelmod.lineterminator = '\n'
            writer = csv.writer(f, dialect=excelmod)
            writer.writerows(raggedlooparray)

        print('Loop(s) {} written to {}'.format(loopnum, looppath))
        self.log += '{}: Wrote loop(s) {} to disk: {}\n'.format(_now(), loopnum, looppath)

        if log: self.writelog(dir=dir)
Пример #48
0
    def toCSV(self, fields, data):
        dialect = csv.excel()
        dialect.delimiter = self.getDelimiter()
        buffer = StringIO()
        writer = DictWriter(buffer, fieldnames=fields, dialect=dialect)
	if self.getShowHeader():
            writer.writerow(dict(zip(fields, fields)))
        writer.writerows(data)
        return buffer.getvalue()
 def __init__(self, encoding='utf-8', dialect=csv.excel(), columntypes=[],
              rowspec=[(range(0, 1), RowSpec.Header)],
              decimal_separator=".", group_separator=""):
     # type: (str, csv.Dialect, List[Tuple[range, ColumnType]], ...) -> None
     self.encoding = encoding
     self.dialect = dialect
     self.columntypes = columntypes
     self.rowspec = rowspec  # type: List[Tuple[range, Options.RowSpec]]
     self.decimal_separator = decimal_separator
     self.group_separator = group_separator
Пример #50
0
    def read(self):
        for encoding in (lambda: ('us-ascii', None),                 # fast
                         lambda: (detect_encoding(self.filename), None),  # precise
                         lambda: (locale.getpreferredencoding(False), None),
                         lambda: (sys.getdefaultencoding(), None),   # desperate
                         lambda: ('utf-8', None),                    # ...
                         lambda: ('utf-8', 'ignore')):               # fallback
            encoding, errors = encoding()
            # Clear the error flag for all except the last check, because
            # the error of second-to-last check is stored and shown as warning in owfile
            if errors != 'ignore':
                error = ''
            with self.open(self.filename, mode='rt', newline='',
                           encoding=encoding, errors=errors) as file:
                # Sniff the CSV dialect (delimiter, quotes, ...)
                try:
                    dialect = csv.Sniffer().sniff(
                        # Take first couple of *complete* lines as sample
                        ''.join(file.readline() for _ in range(5)),
                        self.DELIMITERS)
                except UnicodeDecodeError as e:
                    error = e
                    continue
                except csv.Error:
                    dialect = csv.excel()
                    dialect.delimiter = self.DELIMITERS[0]

                file.seek(0)
                dialect.skipinitialspace = True

                try:
                    reader = csv.reader(file, dialect=dialect)
                    data = self.data_table(reader)

                    # TODO: Name can be set unconditionally when/if
                    # self.filename will always be a string with the file name.
                    # Currently, some tests pass StringIO instead of
                    # the file name to a reader.
                    if isinstance(self.filename, str):
                        data.name = path.splitext(
                            path.split(self.filename)[-1])[0]
                    if error and isinstance(error, UnicodeDecodeError):
                        pos, endpos = error.args[2], error.args[3]
                        warning = ('Skipped invalid byte(s) in position '
                                   '{}{}').format(pos,
                                                  ('-' + str(endpos)) if (endpos - pos) > 1 else '')
                        warnings.warn(warning)
                    self.set_table_metadata(self.filename, data)
                    return data
                except Exception as e:
                    error = e
                    continue
        raise ValueError('Cannot parse dataset {}: {}'.format(self.filename, error)) from error
Пример #51
0
def test_serializable_dialect_fields():
    def same_attr(key, coll1, coll2):
        return getattr(coll1, key) == getattr(coll2, key)
    original_dialect = csv.excel()
    serializable_dialect = SerializableDialect.from_dialect(original_dialect)
    converted_dialect = serializable_dialect.to_dialect()
    attributes = 'delimiter doublequote escapechar ' \
                 'lineterminator quotechar quoting skipinitialspace'.split(' ')
    for key in attributes:
        assert hasattr(serializable_dialect, key)
        assert same_attr(key, serializable_dialect, original_dialect)
        assert same_attr(key, original_dialect, converted_dialect)
Пример #52
0
 def get_csv(self):
     # Call the URL, get the response, parse it strictly as CSV,
     # and return the list of dictionaries
     rsp = self.client.get(self.url)
     self.assertEqual(200, rsp.status_code)
     dialect = csv.excel()
     dialect.strict = True
     reader = csv.DictReader(StringIO(rsp.content), dialect=dialect)
     result = []
     for item in reader:
         for k, v in item.iteritems():
             item[k] = v.decode('utf-8')
         result.append(item)
     return result
Пример #53
0
def read_from_file(fin, is_firstline_title=True, encoding='shift_jis'):
    if is_firstline_title == False:
        raise Exception("Does not support for is_firstline_title=False."
                        "Please add title line at first.")

    if fin.encoding != encoding:
        fin = codecs.getreader(encoding)(fin.detach())
    items = []
    rows = csv.DictReader(fin, dialect=csv.excel(), quotechar='"', restkey="rest")
    for row in rows:
        rowdict = {}
        for k,v in row.items():
            rowdict[k.strip()] = v.strip()
        items.append(rowdict)
    return items
    def test_import_widget(self):
        w = textimport.CSVImportWidget()
        w.setDialect(csv.excel())
        w.setSampleContents(io.BytesIO(DATA1))
        view = w.dataview
        model = view.model()
        self.assertEqual(model.columnCount(), 4)
        self.assertEqual(model.rowCount(), 1)
        self.assertEqual(model.canFetchMore(), False)
        w.setSampleContents(io.BytesIO(DATA2))
        model = view.model()
        self.assertEqual(model.columnCount(), 0)
        self.assertEqual(model.rowCount(), 0)
        self.assertEqual(model.canFetchMore(), False)
        w.setSampleContents(io.BytesIO(DATA4))
        model = view.model()
        self.assertEqual(model.columnCount(), 4)
        self.assertEqual(model.rowCount(), 3)

        types = {
            0: ColumnTypes.Categorical,
            1: ColumnTypes.Numeric,
            2: ColumnTypes.Text,
            3: ColumnTypes.Time,
        }
        w.setColumnTypes(types)
        self.assertEqual(w.columnTypes(), types)
        rs = w.rowStates()
        self.assertEqual(rs, {})
        w.setStateForRow(0, textimport.TablePreview.Header)
        w.setRowStates({0: textimport.TablePreview.Header})
        self.assertEqual(w.rowStates(), {0: textimport.TablePreview.Header})
        w.setStateForRow(1, textimport.TablePreview.Skipped)
        view.grab()

        w.setSampleContents(io.BytesIO(DATA5))
        model = view.model()
        self.assertEqual(model.columnCount(), 1)
        w.setDialect(csv.excel_tab())
        w.setSampleContents(io.BytesIO(DATA5))
        model = view.model()
        self.assertEqual(model.columnCount(), 2)
        self.assertTrue(model.canFetchMore())
        rows = model.rowCount()
        spy = QSignalSpy(model.rowsInserted)
        model.fetchMore()
        self.assertGreater(model.rowCount(), rows)
        self.assertEqual(len(spy), 1)
Пример #55
0
    def asCsvDialect(self):
        """
        Represent dialect as csv.Dialect.
        """
        result = csv.excel()
        result.lineterminator = self.lineDelimiter
        result.delimiter = str(self.itemDelimiter)
        result.quotechar = str(self.quoteChar)
        result.doublequote = (self.escapeChar == self.quoteChar)
        if self.escapeChar is None:
            result.escapechar = None
        else:
            result.escapechar = str(self.escapeChar)
        result.skipinitialspace = (self.blanksAroundItemDelimiter)

        return result
Пример #56
0
    def _make_reader(self, f):
        sep = self.delimiter

        if sep is None or len(sep) == 1:
            sniff_sep = True
            # default dialect
            if self.dialect is None:
                dia = csv.excel()
            elif isinstance(self.dialect, basestring):
                dia = csv.get_dialect(self.dialect)
            else:
                dia = self.dialect

            if sep is not None:
                sniff_sep = False
                dia.delimiter = sep
            # attempt to sniff the delimiter
            if sniff_sep:
                line = f.readline()
                while self.pos in self.skiprows:
                    self.pos += 1
                    line = f.readline()

                line = self._check_comments([line])[0]

                self.pos += 1
                sniffed = csv.Sniffer().sniff(line)
                dia.delimiter = sniffed.delimiter
                if self.encoding is not None:
                    self.buf.extend(list(
                        com.UnicodeReader(StringIO(line),
                                          dialect=dia,
                                          encoding=self.encoding)))
                else:
                    self.buf.extend(list(csv.reader(StringIO(line),
                                                    dialect=dia)))

            if self.encoding is not None:
                reader = com.UnicodeReader(f, dialect=dia,
                                           encoding=self.encoding)
            else:
                reader = csv.reader(f, dialect=dia)
        else:
            reader = (re.split(sep, line.strip()) for line in f)

        self.data = reader
Пример #57
0
    def __init__(self, stream, types, header, sep=",", skipHeader=False):
        self.stream = stream
        self.types = types
        self.skipHeader = skipHeader

        self.explicitHeader = header
        if sep is not None and sys.version_info < (3,):
            sep = asciistr(sep)

        self.dialect = csv.excel()
        if sep is not None: self.dialect.delimiter = sep

        self.fields = header.split(sep)

        if isinstance(self.types, dict) and all(map(callable, self.types.values())):
            self._types = self.types
        else:
            self._types = dict([(f, cast[t]) for f, t in (self.types.items() if self.types is not None else [(f, "string") for f in self.fields])])
Пример #58
0
def main(f1, f2, out):
    d = csv.excel()
    d.lineterminator = "\n"
    writer = csv.writer(out, d)
    while True:
        try:
            l1 = next(f1)[:-2].decode("gb18030", "ignore")
            l2 = next(f2)[:-2].decode("gb18030", "ignore")
        except StopIteration:
            break
        assert len(l1) == len(l2)
        base = 0
        row = []
        for i in range(len(l1)):
            if l1[i] != l2[i]:
                row.append(l1[base:i])
                base = i + 1
        row.append(l1[base:])
        writer.writerow(row)
Пример #59
0
    def test_dialect(self):
        data = """\
label1,label2,label3
index1,"a,c,e
index2,b,d,f
"""

        dia = csv.excel()
        dia.quoting = csv.QUOTE_NONE
        df = read_csv(StringIO(data), dialect=dia)

        data = '''\
label1,label2,label3
index1,a,c,e
index2,b,d,f
'''
        exp = read_csv(StringIO(data))
        exp.replace('a', '"a', inplace=True)
        assert_frame_equal(df, exp)
Пример #60
0
    def read_file(cls, filename, wrapper=None):
        wrapper = wrapper if wrapper and wrapper != Table else _IDENTITY
        import csv, sys, locale
        for encoding in (lambda: ('us-ascii', None),                 # fast
                         lambda: (detect_encoding(filename), None),  # precise
                         lambda: (locale.getpreferredencoding(False), None),
                         lambda: (sys.getdefaultencoding(), None),   # desperate
                         lambda: ('utf-8', None),                    # ...
                         lambda: ('utf-8', 'ignore')):               # fallback
            encoding, errors = encoding()
            # Clear the error flag for all except the last check, because
            # the error of second-to-last check is stored and shown as warning in owfile
            if errors != 'ignore':
                error = ''
            with cls.open(filename, mode='rt', newline='', encoding=encoding, errors=errors) as file:
                # Sniff the CSV dialect (delimiter, quotes, ...)
                try:
                    dialect = csv.Sniffer().sniff(file.read(1024), cls.DELIMITERS)
                except UnicodeDecodeError as e:
                    error = e
                    continue
                except csv.Error:
                    dialect = csv.excel()
                    dialect.delimiter = cls.DELIMITERS[0]

                file.seek(0)
                dialect.skipinitialspace = True

                try:
                    reader = csv.reader(file, dialect=dialect)
                    data = cls.data_table(reader)
                    if error and isinstance(error, UnicodeDecodeError):
                        pos, endpos = error.args[2], error.args[3]
                        warning = ('Skipped invalid byte(s) in position '
                                   '{}{}').format(pos,
                                                  ('-' + str(endpos)) if (endpos - pos) > 1 else '')
                        warnings.warn(warning)
                    return wrapper(data)
                except Exception as e:
                    error = e
                    continue
        raise ValueError('Cannot parse dataset {}: {}'.format(filename, error))