Exemplo n.º 1
0
def parse(filename,
          filetype=None,
          encoding="utf-8",
          fallback_encoding="latin-1"):
    """Parse a COUNTER file, first attempting to determine type.

    Returns a :class:`CounterReport <CounterReport>` object.

    :param filename: path to COUNTER report to load and parse.
    :param filetype: type of file provided, one of "csv", "tsv", "xlsx".
        If set to None (the default), an attempt will be made to
        detect the correct type, first from the file extension, then from
        the file's contents.
    :param encoding: encoding to use to decode the file. Defaults to 'utf-8',
        ignored for XLSX files (which specify their encoding in their XML)
    :param fallback_encoding: alternative encoding to use to try to decode
        the file if the primary encoding fails. This defaults to 'latin-1',
        which will accept any bytes (possibly producing junk results...)
        Ignored for XLSX files.

    """
    if filetype is None:
        if filename.endswith(".tsv"):
            filetype = "tsv"
        elif filename.endswith(".xlsx"):
            filetype = "xlsx"
        elif filename.endswith(".csv"):
            filetype = "csv"
        else:
            with open(filename, "rb") as file_obj:
                filetype = guess_type_from_content(file_obj)

    if filetype == "tsv":
        return parse_separated(filename, "\t", encoding, fallback_encoding)
    elif filetype == "xlsx":
        return parse_xlsx(filename)
    elif filetype == "csv":
        return parse_separated(filename, ",", encoding, fallback_encoding)
    else:
        raise PycounterException("Unknown file type %s" % filetype)
Exemplo n.º 2
0
def parse(filename, filetype=None, encoding='utf-8',
          fallback_encoding='latin-1'):
    """Parse a COUNTER file, first attempting to determine type

    Returns a :class:`CounterReport <CounterReport>` object.

    :param filename: path to COUNTER report to load and parse.
    :param filetype: type of file provided, one of "csv", "tsv", "xlsx".
        If set to None (the default), an attempt will be made to
        detect the correct type, first from the file extension, then from
        the file's contents.
    :param encoding: encoding to use to decode the file. Defaults to 'utf-8',
        ignored for XLSX files (which specify their encoding in their XML)
    :param fallback_encoding: alternative encoding to use to try to decode
        the file if the primary encoding fails. This defaults to 'latin-1',
        which will accept any bytes (possibly producing junk results...)
        Ignored for XLSX files.

    """
    if filetype is None:
        if filename.endswith('.tsv'):
            filetype = 'tsv'
        elif filename.endswith('.xlsx'):
            filetype = 'xlsx'
        elif filename.endswith('.csv'):
            filetype = 'csv'
        else:
            with open(filename, 'rb') as file_obj:
                filetype = guess_type_from_content(file_obj)

    if filetype == 'tsv':
        return parse_separated(filename, '\t', encoding, fallback_encoding)
    elif filetype == 'xlsx':
        return parse_xlsx(filename)
    elif filetype == 'csv':
        return parse_separated(filename, ',', encoding, fallback_encoding)
    else:
        raise PycounterException("Unknown file type %s" % filetype)