Exemplo n.º 1
0
 def upsert_instance(cls,
                     hasAward: 'Award' = None,
                     yearHeld: str = None,
                     hasEditionNumber: int = None,
                     yearScreened: int = None) -> 'AwardCeremony':
     kwds = {
         'hasAward': hasAward,
         'yearHeld': yearHeld,
         'hasEditionNumber': hasEditionNumber,
         'yearScreened': yearScreened
     }
     try:
         return cls.objects.get(**select_not_null(kwds))
     except cls.DoesNotExist:
         pass
     return AwardCeremony.objects.create(**select_not_null(kwds))
def read_xsv(
    file: IO,
    dialect: str,
    fieldnames: List[str] = None,
    first_line_is_column_header: bool = True,
    discard: int = None,
    load_at_most: int = None,
) -> Iterable[Dict]:
    """Returns an iterable of dict. Must be iterated while file is still open.

    Args:
        file:
            An open file.
        dialect:
            As used in built-in module `csv`.
        fieldnames:
            TODO: Pending documentation for 'fieldnames'
        first_line_is_column_header:
            If True, parses first line as column headers.
        discard:
            Non-negative integer or None. Initial rows of _data_ to discard.
        load_at_most:
            Non-negative integer or None. Rows of _data_ to load.

    Notes:
        Use 'excel' dialect for CSV. Use 'excel-tab' for TSV.

    Warnings:
        Must be iterated while file is still open.
    """
    kwargs = {
        'fieldnames': fieldnames,
        'dialect': dialect,
    }

    if not first_line_is_column_header and fieldnames is None:
        # use 'Column X' as fieldnames like in OpenRefine
        first_line = file.readline(1)
        file.seek(-1)
        delimiter = csv.get_dialect(dialect).delimiter
        num_cols = len(first_line.split(delimiter))
        kwargs['fieldnames'] = [f'Column {i + 1}' for i in range(num_cols)]

    if first_line_is_column_header and fieldnames is not None:
        raise NotImplementedError(
            "Changing column names isn't supported for simplicity")

    reader = csv.DictReader(file,
                            **select_not_null(kwargs, 'fieldnames', 'dialect'))

    stop = None
    if load_at_most is not None:
        stop = load_at_most
        if discard is not None:
            stop += discard

    return islice(reader, discard, stop)
def read_xsv_file(
    filename: Union[str, Path],
    dialect: str,
    *,
    encoding: str = None,
    fieldnames: List[str] = None,
    first_line_is_column_header: bool = True,
    discard: int = None,
    load_at_most: int = None,
) -> List[Dict]:
    """Returns a list of dicts. Convenience method for `read_xsv`.

    Args:
        filename:
            The filename to open.
        dialect:
            As used in built-in module `csv`.
        encoding:
            Encoding of the file to open.
        fieldnames:
            TODO: Pending documentation for 'fieldnames'
        first_line_is_column_header:
            If True, parses first line as column headers.
        discard:
            Non-negative integer or None. Initial rows of _data_ to discard.
        load_at_most:
            Non-negative integer or None. Rows of _data_ to load.

    Notes:
        Use 'excel' dialect for CSV. Use 'excel-tab' for TSV.
    """
    kwargs = {
        'encoding': encoding,
        'fieldnames': fieldnames,
        'first_line_is_column_header': first_line_is_column_header,
        'discard': discard,
        'load_at_most': load_at_most,
    }
    with open(filename, 'r', **select_not_null(kwargs, 'encoding')) as file:
        kwargs.pop('encoding')
        # must iterated now because file will be closed
        return list(read_xsv(file, dialect, **select_not_null(kwargs)))
Exemplo n.º 4
0
def write_model(file: Union[IO, str, Path],
                model_name: str,
                data: List[Dict[str, Any]],
                *,
                gen_pk: bool = True,
                pk_cols: List[str] = None):
    kwargs = {'pk_cols': pk_cols, 'gen_pk': gen_pk}

    df = pd.DataFrame(data)
    table = Table(model_name, df, **select_not_null(kwargs))
    open_and_write_file(file, table.as_python())
Exemplo n.º 5
0
def open_and_write_file(file: Union[IO, str, Path],
                        s: str,
                        *,
                        mode: str = 'w',
                        buffering: int = None,
                        encoding: str = None,
                        errors: str = None,
                        newline: str = None,
                        closefd: bool = None) -> None:
    """Convenience function to write to file.

    Ensures `s` is written to `file` using either its name or its path.

    Args:
        file: A filename or an opened file (IO object).
        s: The string to write to the file.
        options: Options to open the file with if unopened.

    Returns:
        None

    Raises:
        OSError: When `file` is an IO that doesn't support writing.
    """
    try:
        file.write(s)
        return
    except OSError:
        raise
    except AttributeError:
        options = {
            'mode': mode,
            'buffering': buffering,
            'encoding': encoding if encoding is not None else 'utf-8',
            'errors': errors,
            'newline': newline,
            'closefd': closefd,
        }
        if isinstance(file, (str, Path)):
            with open(file, **select_not_null(options)) as f:
                f.write(s)
                return
    raise AssertionError
Exemplo n.º 6
0
def parse_tsv(file: IO, chunksize: int = None) -> pd.DataFrame:
    kwd = {chunksize: chunksize}
    return pd.read_csv(file, dialect='excel-tab', **select_not_null(kwd))