Exemplo n.º 1
0
 def _readline(self):
     '''Read a single decoded line from the file.'''
     line = self.file.readline(self.max_line_size)
     if not line:
         return ''
     if not line[-1] == '\n' and len(line) >= self.max_line_size:
         raise csv.Error('Line exceeds maximum size of {}'.format(
             self.max_line_size))
     try:
         return line.decode(self.encoding)
     except UnicodeDecodeError:
         raise csv.Error('Encountered invalid Unicode character')
     except AttributeError:
         return line
Exemplo n.º 2
0
    def load(self, stream, stream_columns):
        """
        Function that loads a custom data source
        Args:
        stream (io.StringIO): An in-memory stream of a custom data source.
                              The format of the stream must be a comma-separated file
                              with header containing the columns defined in RG_COLUMNS.
        """
        if not stream_columns:
            stream_columns = RG_COLUMNS

        stream_reader = csv.DictReader(stream, delimiter=',')
        header = stream_reader.fieldnames

        if header != stream_columns:
            raise csv.Error('Input must be a comma-separated file with header containing ' + \
                'the following columns - %s.\nFound header - %s.\nFor more help, visit: ' % (','.join(stream_columns), ','.join(header)) + \
                'https://github.com/thampiman/reverse-geocoder')

        # Load all the coordinates and locations
        geo_coords, locations = [], []
        for row in stream_reader:
            geo_coords.append((row['lat'], row['lon']))
            locations.append(row)

        return geo_coords, locations
Exemplo n.º 3
0
def read_csv(path, is_column_id, delimiter, quotechar, skip_first_row):
    with open(path) as f:
        reader = csv.reader(f, delimiter=delimiter, quotechar=quotechar)

        if skip_first_row:
            lines = list(reader)[1:]
        else:
            lines = list(reader)

    if len(lines[0]) == 3 and is_column_id:
        column_id = 0
        column_label = 1
        column_text_a = 2
        column_text_b = -1
    elif len(lines[0]) == 3 and not is_column_id:
        column_id = -1
        column_label = 0
        column_text_a = 1
        column_text_b = 2
    elif len(lines[0]) == 4 and is_column_id:
        column_id = 0
        column_label = 1
        column_text_a = 2
        column_text_b = 3
    elif len(lines[0]) == 2 and not is_column_id:
        column_id = -1
        column_label = 0
        column_text_a = 1
        column_text_b = -1
    else:
        raise csv.Error("The CSV file " + path + " is malformed")

    return lines, [column_id, column_label, column_text_a, column_text_b]
Exemplo n.º 4
0
def sniff(sample, delimiters=None):
    """
    This function mimics the Sniffer.sniff() method from the Python CSV 
    function, with one exception: it doesn't change the detected quotechar to 
    default to '"'. We do this because we want to know the detected quote 
    character.

    """
    sniffer = csv.Sniffer()

    quotechar, doublequote, delimiter, skipinitialspace = sniffer._guess_quote_and_delimiter(
        sample, delimiters)

    if not delimiter:
        delimiter, skipinitialspace = sniffer._guess_delimiter(
            sample, delimiters)
    if not delimiter:
        raise csv.Error("Could not determine delimiter")

    class dialect(csv.Dialect):
        _name = "sniffed"
        lineterminator = "\r\n"  # unused
        quoting = csv.QUOTE_MINIMAL

    dialect.doublequote = doublequote
    dialect.delimiter = delimiter
    dialect.quotechar = quotechar  # See above
    dialect.skipinitialspace = skipinitialspace
    dialect.escapechar = '' if dialect.escapechar is None else dialect.escapechar

    return dialect
Exemplo n.º 5
0
    def read_from_csv(self, lexicon_csv_path: str, encoding: str='utf-8', **csv_reader_keyword_arguments):
        """
        Append lexicon entries from CSV with columns for lemma, topic and
        rating. If the lemma is missing or starts with '#' the whole row is
        ignored. Leading and trailing white space is ignored. Upper and lower
        case on topics and ratings is ignored.

        :param lexicon_csv_path:
            path to the CSV file to read
        :param encoding:
            encoding of the CSV file
        :param csv_reader_keyword_arguments:
            additional options for the CSV reader, for example
            ``delimiter=','``
        """
        assert lexicon_csv_path is not None

        with open(lexicon_csv_path, encoding=encoding, newline='') as csv_file:
            lexicon_reader = csv.reader(csv_file, **csv_reader_keyword_arguments)
            for row in lexicon_reader:
                row = [item.strip() for item in row]
                row += 3 * ['']  # Ensure we have at least 4 strings
                try:
                    self._append_lexicon_entry_from_row(row)
                except ValueError as error:
                    raise csv.Error(
                        '%s:%d: %s' % (
                            lexicon_csv_path, lexicon_reader.line_num, error))
Exemplo n.º 6
0
def get_csv(csv_path, dialect):
    """
    Returns csv_data array for provided csv_path 
    """
    try:
        #register dialect provided as parameter
        csv.register_dialect('flatfileed',
                             delimiter=dialect['CSV_DELIMITER'],
                             quoting=dialect['CSV_QUOTING'],
                             doublequote=dialect['CSV_DOUBLEQUOTE'],
                             strict=True)

        csv_file = open(csv_path)
        #csv_reader = csv.reader(csv_file, delimiter=',', doublequote=True)
        csv_reader = csv.reader(csv_file, 'flatfileed')
        csv_data = []
        for row in csv_reader:
            csv_data.append(row)
        csv_file.close()

        if validate_csv(csv_data, dialect) == True:
            return csv_data
        else:
            raise csv.Error
    except FileNotFoundError:
        raise FileNotFoundError('CSV File not provided!')
    except csv.Error:
        raise csv.Error(
            'Wrong format of provied CSV! Check file or review configuration')
Exemplo n.º 7
0
    def writelines(self, rows: t.Union[list, tuple, t.Generator], **kwargs):
        asdict = kwargs.get('asdict')
        first_line = None

        if asdict and not self.has_headers:
            self.headers, first_line = self._detect_headers_before_write(rows)

            if not first_line:
                raise Error(
                    f'After header detection from rows= first_line has no data!\n{first_line}'
                )

        with self.writer(**kwargs) as writer:
            # writeheader
            if not self.has_headers and isinstance(writer, csv.DictWriter):
                raise csv.Error(
                    'No headers set - write prohibited! Try to set asdict=True'
                )

            if self.has_headers and isinstance(writer, csv.DictWriter):
                writer.writeheader()

            if isinstance(rows, t.Generator):
                # at headers detection phase, from rows=, as generator,
                # already was read first line of creating tsv-file -
                # save it additionally from first_line, right after header
                writer.writerow(first_line)

            for line in rows:
                writer.writerow(line)

        if isinstance(self.text, io.StringIO):
            return self.text.getvalue()
Exemplo n.º 8
0
def list_to_csv(row_list: list, filename: str=''):
    """
    Function to write a list of rows to a CSV file.

    Args:
        row_list (list): A list of lists (the rows).
        filename (str, optional): Name for the CSV file. Defaults to 'report.csv'.

    Returns:
        bool: True if CSV successfully created, False otherwise.
    """

    # convert single column list to required format
    if not isinstance(row_list[0], (list, tuple)):
        row_list = [(line,) for line in row_list]

    # write csv file
    try:
        with open(filename if filename else 'report.csv', 'w', newline='', encoding='utf-8') as file:
            writer = csv.writer(file, quoting=csv.QUOTE_ALL, delimiter=',')
            writer.writerows(row_list)
    except OSError as e:
        raise OSError(f'Error creating outputfile: {e}') from e
    except csv.Error as e:
        raise csv.Error(f'Error writing CSV data to file: {e}') from e
Exemplo n.º 9
0
def read_base_comp(filename):
    """Read the base compition from a file created by write_base_comp"""
    with open(filename, newline="") as csvfile:
        reader = csv.DictReader(csvfile)
        for row in reader:
            return row

    raise csv.Error("No rows found in %r" % (filename, ))
Exemplo n.º 10
0
def _get_updated_record(
    new_record,
    existing_record,
    continue_after_error,
    columns_with_arrays,
):
    record_to_write = copy.deepcopy(existing_record)

    # for any column not in the standard set, either update the existing
    # record with new data, or leave column as data provided
    for column_name in [
            key for key in new_record.keys() if key not in (
                GUID_STANDARD_KEY,
                SIZE_STANDARD_KEY,
                MD5_STANDARD_KEY,
            )
    ]:
        # first handle space-delimited columns
        if column_name in columns_with_arrays:
            if column_name in existing_record:
                # column that has a space-delimited array of values
                record_to_write[column_name] = " ".join(
                    sorted(
                        list(
                            set(new_record[column_name].split(" ") +
                                existing_record[column_name].split(" "))))
                ).strip(" ")
            else:
                record_to_write[column_name] = " ".join(
                    sorted(list(set(
                        new_record[column_name].split(" "))))).strip(" ")
        # handle non-space-delimited columns
        else:
            if not existing_record.get(column_name) or (
                    existing_record.get(column_name)
                    == new_record[column_name]):
                # use new record when nothing in existing record or it's the same data
                record_to_write[column_name] = new_record[column_name]
            elif not new_record[column_name]:
                # persist existing data if no new data
                record_to_write[column_name] = existing_record.get(
                    column_name, "")
            else:
                # old and new have different values, unsure how to merge
                error_msg = (
                    f"NOT merging column {column_name} for "
                    f"existing {existing_record} and new "
                    f"{new_record} because unsure how to merge the values.\nERROR: IGNORING NEW VALUE if "
                    f"forced to continue without error.")
                logging.error(error_msg)

                if not continue_after_error:
                    raise csv.Error(error_msg)

                # if we're here, that means we are just going to ignore new data
                # and add a row with the existing data

    return record_to_write
Exemplo n.º 11
0
Arquivo: app.py Projeto: Spydernaz/AMS
def submitcontext():

    try:
        updated = False

        username = request.form["username"]
        context = request.form["context"]
        headers = {"content-type": "application/json"}

        if (username == "" or username == None):
            flash([f"There was no username provided in the context request", "Warning"])
            return redirect(url_for('homepage'))

        if username == "easterEggz":
            return redirect("https://thecodinglove.com/when-i-try-to-bypass-a-security-feature")

        contextFile = {}
        filename = "context.txt"

        # Open config file and check for an existing context
        with open(filename, "r") as csvfile:
            print("\nopened file\n")
            reader = csv.reader(csvfile, delimiter='=', escapechar='\\', quoting=csv.QUOTE_NONE)
            print("\ncreated a reader\n")
            for row in reader:
                print("\noread first row\n")

                if len(row) != 2:
                    raise csv.Error("Too many fields on row with contents")
                contextFile[row[0]] = row[1]
        
        contextFile[username] = context

        csv.register_dialect('myDialect', delimiter = '=', escapechar='\\', quoting=csv.QUOTE_NONE)
        
        
        # Update / Insert context for the user
        with open(filename, "w") as csvfile:
            fieldnames = ['property', 'value']
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames, dialect="myDialect")            
            for k,v in contextFile.items():
                writer.writerow({"property": k, 'value': v})


        # Flash a message to the user
        if (updated):
            flash([f"Successfully updated user {username}'s context to {context}", "Success"])
        else:
            flash([f"Successfully set user {username}'s context to {context}", "Success"])


        return redirect(url_for('homepage'))
    except Exception as e:
        flash(["Was not able to update context", "Error"])
        print(e)
        abort(500)
Exemplo n.º 12
0
 def parse_csv(self, s, length):
     try:
         l = list(csv.reader([s]))[0]
     except csv.Error(e):
         raise PassParserError('Error parsing CSV')
     if len(l) != length:
         raise PassParserError(
             f'Invalid CSV forrmat, {length} columns expected instead of {len(l)}'
         )
     return l
Exemplo n.º 13
0
 def load_csv(self, filename):
     with open(filename, newline='') as f:
         reader = csv.reader(f)
         try:
             return [row for row in reader if row]  # skip blank lines
         except csv.Error as e:
             raise csv.Error('file {}, line {}: {}'.format(
                 filename, reader.line_num, e)) from e
         except FileNotFoundError:
             raise
Exemplo n.º 14
0
 def _process_csv(self):
     '''
     collect information from csv file
     '''
     try:
         ifile = open(self.csv_file, "r")
         self._templates = csv.DictReader(ifile)
         # ifile.close()
     except csv.Error, emsg:
         raise csv.Error("Issues with csv file %s. %s"
                                 % (os.path.basename(self.csv_file), emsg))
Exemplo n.º 15
0
	def read_ac_file(self, ac_file):
		reader = csv.reader(ac_file, delimiter=' ', skipinitialspace=True)
		try:
			for row in reader:
				# See if this is a valid token and pass the file handle and the current line to our function
				if row[0] in self.tokens.keys():
					self.tokens[row[0]](ac_file,row)
				else:
					self.report_error("invalid token: {tok} ({ln})".format(tok=row[0], ln=row))				
		except csv.Error(e):
			self.report_error('AC3D import error, line %d: %s' % (reader.line_num, e))
Exemplo n.º 16
0
def _error_if_invalid_size_or_guid(record, existing_record,
                                   continue_after_error,
                                   allow_mult_guids_per_hash):
    """Log and raise errors based on cfg if hashes don't match or there's multiple GUIDs"""
    guid = existing_record.get(GUID_STANDARD_KEY)
    new_guid = record.get(GUID_STANDARD_KEY)

    if SIZE_STANDARD_KEY in existing_record:
        size = existing_record[SIZE_STANDARD_KEY]

        if size != record[SIZE_STANDARD_KEY]:
            error_msg = (
                "Found two objects with the same hash but different sizes,"
                f" could not merge. Details: object {existing_record} could not be"
                f" merged with object {record} because {size} !="
                f" {record[SIZE_STANDARD_KEY]}.")
            logging.error(error_msg)

            if not continue_after_error:
                raise csv.Error(error_msg)

    # at this point, the record has the same hash and size as a previous guid
    # so either we're allowing an entry like that, or not
    if GUID_STANDARD_KEY in existing_record:
        if guid and new_guid and guid != new_guid:
            warning_msg = (
                "Found two objects with the same hash but different guids,"
                f" could not merge. Details: object {existing_record} could not be"
                f" merged with object {record} because {guid} !="
                f" {new_guid}.")

            if not allow_mult_guids_per_hash:
                logging.error(warning_msg)
                raise csv.Error(error_msg)

            info_msg = (
                f"Allowing multiple GUIDs per hash. {new_guid} has same "
                f"hash as {guid}.\n    Details: {record} is a different "
                f"record with same hash as existing guid: {guid}.")
            logging.info(info_msg)
Exemplo n.º 17
0
 def _sniff(self, size=10000, delimiters=', \t|'):
     '''Detect a header and the dialect within the first size bytes.'''
     self.file.seek(0)
     sample = self.file.read(size)
     try:
         sample = sample.decode(self.encoding)
     except UnicodeDecodeError:
         raise csv.Error('Encountered invalid Unicode character')
     except AttributeError:
         pass
     self.file.seek(0)
     sniffer = csv.Sniffer()
     return sniffer.sniff(sample, delimiters), sniffer.has_header(sample)
Exemplo n.º 18
0
 def _is_fauxer_file(
     self
 ):  # Does not check for MIME, checks for readibility and explicity dialect attributes.#See dialect this values should match.
     try:
         with open(self.fn, 'r') as file:
             dialect = csv.Sniffer().sniff(file.read(1024))
             if dialect.delimiter != ',':
                 raise csv.Error('Cell delimiter is not "," .')
             file.seek(0)
     except csv.Error as e:
         raise FileExistsError(
             '{} file is not a valid Fauxlizer (.faux) file. {}'.format(
                 self.fn, str(e)))
    async def test_raises_permission_denied_error(self):
        file_path = self.create_tempfile()
        os.remove(file_path)
        release_mngr = file_release_manager.CSVFileReleaseManager(
            file_path=file_path,
            save_mode=file_release_manager.CSVSaveMode.APPEND)

        with mock.patch.object(csv.DictWriter, 'writerow') as mock_writerow:
            mock_writerow.side_effect = csv.Error()

            with self.assertRaises(file_release_manager.
                                   FileReleaseManagerPermissionDeniedError):
                await release_mngr._append_value({})
Exemplo n.º 20
0
def process_row(row, cur):
    """

    process_row handles the tuples produced by the csv reader, and 
    handles all errors from the actual database functions. 
    Error handling is print messages and return to the for loop, so that we can 
    recover from broken records. The goal is to insert as many good vulns
    and/or records as possible, and alert the user to the malformed records.
    
    db.commit() happens after every successful insertVuln()/insertHost(), and every 
    successful insertInstance().
    Row Header:Plugin ID,CVE,CVSS,Risk,Host,Protocol,Port,Name,Synopsis,Description,Solution,See Also,Plugin Output
    Sample Row: 
    
        10881,,,None,192.168.1.5,tcp,22,SSH Protocol Versions Supported,A SSH server is running on the remote host.,
        "This plugin determines the versions of the SSH protocol supported by 
        the remote SSH daemon.",n/a,,"The remote SSH daemon supports the following versions of the
        SSH protocol :-X.XX- X.X SSHv2 host key fingerprint : XX:02:XX:07:54:05:b0:XX:4b:dd:88:XX:43:ae:XX:0a"

    """
    if row[3] == "None" or row[3] == "Risk":  #not a vuln
        return
    pluginID, cve, cvss, risk, hostName, protocol, port, vulnName, vulnDescription, longDescription, solution, url, pluginOutput = row
    textString = "<div id=\"protocol\">Protocol: " + protocol + "</div> \
                 <div id=\"port\">Port: " + port + "</div> \
                 <div id=\"detailed-explanation\">More Details: " + longDescription + "</div> \
                 <div id=\"plugin-output\">Plugin Output: " + pluginOutput + "</div>"
    descString = "<div id=\"description\">DESCRIPTION: " + vulnDescription + "</div>\
                 <div id=\"cvss-score\">CVSS: " + cvss + "</div> \
                 <div id=\"solution\">Solution: " + solution + "</div>"

    if (hostName == '' or vulnName == '' or vulnDescription == ''):
        raise csv.Error('Incomplete Record.')
        exitCode = 2
    try:
        cur.execute("SELECT host_id FROM host WHERE host_name = %s", hostName)
        hostID = cur.fetchone()
        if hostID == None:
            hostID = insertHost(hostName)
        else:
            hostID = hostID[0]
    except MySQLdb.Error, e:
        try:
            print "Host '%s' Lookup Error [%d]: %s" % (hostName, e.args[0],
                                                       e.args[1])
            exitCode = 2
            return
        except IndexError:
            print "Host '%s' Lookup Error: %s" % (hostName, str(e))
            exitCode = 2
            return
Exemplo n.º 21
0
def read_properties(filename):
    """ Reads a given properties file with each line of the format key=value.  Returns a dictionary containing the pairs.

    Keyword arguments:
        filename -- the name of the file to be read
    """
    result={ }
    with open(filename, "rb") as csvfile:
        reader = csv.reader(csvfile, delimiter='=', escapechar='\\', quoting=csv.QUOTE_NONE)
        for row in reader:
            if len(row) != 2:
                raise csv.Error("Too many fields on row with contents: "+str(row))
            result[row[0]] = row[1] 
    return result
Exemplo n.º 22
0
def loadConfig():
    try:
        with open(configFilename, "rb") as csvfile:
            reader = csv.reader(csvfile,
                                delimiter='=',
                                escapechar='\\',
                                quoting=csv.QUOTE_NONE)
            for row in reader:
                if len(row) != 2:
                    raise csv.Error("Too many fields on row with contents: " +
                                    str(row))
                configOptions[row[0]] = row[1]
    except:
        pass
Exemplo n.º 23
0
    def post(self, request, *args, **kwargs):
        #pylint:disable=unused-argument,too-many-locals
        plan = kwargs.get('plan')
        response = {'created': [], 'updated': [], 'failed': []}
        uploaded = request.FILES.get('file')
        filed = csv.reader(
            StringIO(uploaded.read().decode('utf-8', 'ignore')
                     ) if uploaded else StringIO())

        for row in filed:
            try:
                if len(row) == 2:
                    full_name, email = row
                elif len(row) == 3:
                    first_name, last_name, email = row
                    full_name = '%s %s' % (first_name, last_name)
                else:
                    raise csv.Error()
            except csv.Error:
                response['failed'].append({
                    'data': {
                        'raw': row
                    },
                    'error': 'Unable to parse row'
                })
            else:
                serializer = CartItemCreateSerializer(
                    data={
                        'plan': plan,
                        'full_name': full_name,
                        'sync_on': email,
                        'email': email
                    })
                if serializer.is_valid():
                    cart_item, created = self.insert_item(
                        request, **serializer.data)
                    if isinstance(cart_item, CartItem):
                        cart_item = serializer.to_representation(cart_item)
                    if created:
                        response['created'].append(cart_item)
                    else:
                        response['updated'].append(cart_item)
                else:
                    response['failed'].append({
                        'data': serializer.data,
                        'error': serializer.errors
                    })

        return Response(response)
Exemplo n.º 24
0
def write_entities(entities, header, outputfile):
    """
    Reads a set of entities and saves them to a file.

    :param set entities: set of tuples with entities data: identifier, label, name\
                        and other attributes.
    :param list header: list of column names.
    :param str outputfile: path to file to be saved (including filename and extention).
    """
    try:
        df = pd.DataFrame(list(entities), columns=header)
        df.to_csv(path_or_buf=outputfile, sep='\t',
                header=True, index=False, quotechar='"',
                line_terminator='\n', escapechar='\\')
    except csv.Error as err:
        raise csv.Error("Error writing etities to file: {}.\n {}".format(outputfile, err))
Exemplo n.º 25
0
def write_relationships(relationships, header, outputfile):
    """
    Reads a set of relationships and saves them to a file.

    :param set relationships: set of tuples with relationship data: source node, target node, \
                                relationship type, source and other attributes.
    :param list header: list of column names.
    :param str outputfile: path to file to be saved (including filename and extention).
    """
    try:
        df = pd.DataFrame(list(relationships), columns=header)
        df.to_csv(path_or_buf=outputfile, sep='\t',
                header=True, index=False, quotechar='"',
                line_terminator='\n', escapechar='\\')
    except Exception as err:
        raise csv.Error("Error writing relationships to file: {}.\n {}".format(outputfile, err))
Exemplo n.º 26
0
def sniff_dialect(sample, sep, skip_dialect, ui):
    t1 = time()
    try:
        if skip_dialect:
            ui.debug('investigate_encoding_and_dialect - skip dialect detect')
            if sep:
                csv.register_dialect('dataset_dialect', csv.excel,
                                     delimiter=sep)
            else:
                csv.register_dialect('dataset_dialect', csv.excel)
            dialect = csv.get_dialect('dataset_dialect')
        else:
            sniffer = csv.Sniffer()
            dialect = sniffer.sniff(sample, delimiters=sep)
            ui.debug('investigate_encoding_and_dialect - seconds to detect '
                     'csv dialect: {}'.format(time() - t1))
    except csv.Error:
        decoded_one = sample
        t2 = time()
        detector = Detector()
        delimiter, resampled = detector.detect(decoded_one)

        if len(delimiter) == 1:
            delimiter = delimiter[0]
            ui.info("Detected delimiter as %s" % delimiter)

            if sep is not None and sep != delimiter:
                delimiter = sep
        else:
            raise csv.Error(
                "The csv module failed to detect the CSV dialect. "
                "Try giving hints with the --delimiter argument, "
                "E.g  --delimiter=','"
            )

        sniffer = csv.Sniffer()
        dialect = sniffer.sniff(resampled, delimiters=delimiter)
        ui.debug('investigate_encoding_and_dialect v2 - seconds to detect '
                 'csv dialect: {}'.format(time() - t2))

    if dialect.escapechar is None:
        csv.register_dialect('dataset_dialect', dialect,
                             delimiter=str(dialect.delimiter),
                             quotechar=str(dialect.quotechar),
                             doublequote=True)
        dialect = csv.get_dialect('dataset_dialect')
    return dialect
    def readPropertyFile(self):
        import csv

        file_contents = self.readAll()
        file_lines = file_contents.splitlines()

        result = {}
        reader = csv.reader(file_lines,
                            delimiter=str('='),
                            quoting=csv.QUOTE_NONE)
        for row in reader:
            if len(row) != 2:
                raise csv.Error("Too many fields on row with contents: " +
                                str(row))
            result[row[0].strip()] = row[1].strip().lstrip('"').rstrip('"')

        return result
Exemplo n.º 28
0
    def parse_row(self, row, lineno):
        """Parse a row of a TD Canada Trust CSV file.

        Args:
          row: A list of field values for the row.
          lineno: The line number where the row appears in the CSV file
        Returns:
          A beansoup.importers.csv.Row object.
        """
        if len(row) != 5:
            raise csvlib.Error(
                'Invalid row; expecting 5 values: {}'.format(row))
        date = datetime.datetime.strptime(row[0], '%m/%d/%Y').date()
        description = row[1]
        amount = -D(row[2]) if row[2] else D(row[3])
        balance = self.account_sign * D(row[4])
        return csv.Row(lineno, date, description, amount, balance)
Exemplo n.º 29
0
 def check_and_clean_up_row(cls, extended_rows):
     """Check if row length is same as header and standardize null values as empty strings"""
     for row_number, headers, row in extended_rows:
         if len(row) != len(headers):
             raise csv.Error(
                 f'row {row_number} has a different number of data '
                 f'points ({len(row)}) than there are column headers ({len(headers)})'
             )
         cleaned_row = list(
             map(
                 lambda value: reduce(
                     lambda value, null_value: value.replace(
                         null_value, ''),
                     CSV_NULL_VALUES,
                     value,
                 ),
                 row,
             ))
         yield (row_number, headers, cleaned_row)
Exemplo n.º 30
0
def get_rows_from_file(file):
    data = file.read()
    try:
        import chardet
        charset = chardet.detect(data)['encoding']
    except ImportError:
        charset = file.charset
    data = data.decode(charset or 'utf-8')
    # If the file was modified on a Mac, it only contains \r as line breaks
    if '\r' in data and '\n' not in data:
        data = data.replace('\r', '\n')

    # Sniffing line by line is necessary as some banks like to include
    # one-column garbage at the beginning of the file which breaks the sniffer.
    # See also: http://bugs.python.org/issue2078
    last_e = None
    dialect = None
    for line in data.split("\n"):
        line = line.strip()
        if len(line) == 0:
            continue
        try:
            dialect = csv.Sniffer().sniff(line, delimiters=";,.#:")
        except Exception as e:
            last_e = e
        else:
            last_e = None
            break
    if dialect is None:
        raise last_e or csv.Error("No dialect detected")
    reader = csv.reader(io.StringIO(data), dialect)
    rows = []
    for row in reader:
        if rows and len(row) > len(rows[0]):
            # Some banks put metadata above the real data, things like
            # a headline, the bank's name, the user's name, etc.
            # In many cases, we can identify this because these rows
            # have less columns than the rows containing the real data.
            # Therefore, if the number of columns suddenly grows, we start
            # over with parsing.
            rows = []
        rows.append(row)
    return rows