Пример #1
0
def saveToCassandra(rdd, keyspace=None, table=None, columns=None, row_format=None, keyed=None,
                    write_conf=None, **write_conf_kwargs):
    '''
        Saves an RDD to Cassandra. The RDD is expected to contain dicts with keys mapping to CQL
        columns.

        Arguments:
        @param rdd(RDD):
            The RDD to save. Equals to self when invoking saveToCassandra on a monkey patched RDD.
        @param keyspace(string):in
            The keyspace to save the RDD in. If not given and the rdd is a CassandraRDD the same
            keyspace is used.
        @param table(string):
            The CQL table to save the RDD in. If not given and the rdd is a CassandraRDD the same
            table is used.

        Keyword arguments:
        @param columns(iterable):
            The columns to save, i.e. which keys to take from the dicts in the RDD.
            If None given all columns are be stored.

        @param row_format(RowFormat):
            Make explicit how to map the RDD elements into Cassandra rows.
            If None given the mapping is auto-detected as far as possible.
        @param keyed(bool):
            Make explicit that the RDD consists of key, value tuples (and not arrays of length
            two).

        @param write_conf(WriteConf):
            A WriteConf object to use when saving to Cassandra
        @param **write_conf_kwargs:
            WriteConf parameters to use when saving to Cassandra
    '''

    keyspace = keyspace or getattr(rdd, 'keyspace', None)
    if not keyspace:
        raise ValueError("keyspace not set")

    table = table or getattr(rdd, 'table', None)
    if not table:
        raise ValueError("table not set")

    # create write config as map
    write_conf = WriteConf.build(write_conf, **write_conf_kwargs)
    write_conf = as_java_object(rdd.ctx._gateway, write_conf.settings())
    # convert the columns to a string array
    columns = as_java_array(rdd.ctx._gateway, "String", columns) if columns else None

    helper(rdd.ctx) \
        .saveToCassandra(
            rdd._jrdd,
            keyspace,
            table,
            columns,
            row_format,
            keyed,
            write_conf,
        )
Пример #2
0
def deleteFromCassandra(dstream, keyspace=None, table=None, deleteColumns=None,
                        keyColumns=None,
                        row_format=None, keyed=None, write_conf=None,
                        **write_conf_kwargs):
    """Delete data from Cassandra table, using data from the RDD as primary
    keys. Uses the specified column names.

    Arguments:
       @param dstream(DStream)
        The DStream to join. Equals to self when invoking
        joinWithCassandraTable on a monkey patched RDD.
        @param keyspace(string):in
            The keyspace to save the RDD in. If not given and the rdd is a
            CassandraRDD the same keyspace is used.
        @param table(string):
            The CQL table to save the RDD in. If not given and the rdd is a
            CassandraRDD the same table is used.

        Keyword arguments:
        @param deleteColumns(iterable):
            The list of column names to delete, empty ColumnSelector means full
            row.

        @param keyColumns(iterable):
            The list of column names to delete, empty ColumnSelector means full
            row.

        @param row_format(RowFormat):
            Primary key columns selector, Optional. All RDD primary columns
            columns will be checked by default
        @param keyed(bool):
            Make explicit that the RDD consists of key, value tuples (and not
            arrays of length two).

        @param write_conf(WriteConf):
            A WriteConf object to use when saving to Cassandra
        @param **write_conf_kwargs:
            WriteConf parameters to use when saving to Cassandra
    """

    ctx = dstream._ssc._sc
    gw = ctx._gateway

    # create write config as map
    write_conf = WriteConf.build(write_conf, **write_conf_kwargs)
    write_conf = as_java_object(gw, write_conf.settings())
    # convert the columns to a string array
    deleteColumns = as_java_array(gw, "String",
                                  deleteColumns) if deleteColumns else None
    keyColumns = as_java_array(gw, "String", keyColumns) \
        if keyColumns else None

    return helper(ctx).deleteFromCassandra(dstream._jdstream, keyspace, table,
                                           deleteColumns, keyColumns,
                                           row_format,
                                           keyed, write_conf)
Пример #3
0
def saveToCassandra(dstream, keyspace, table, columns=None, row_format=None, keyed=None, write_conf=None,
                    **write_conf_kwargs):
    ctx = dstream._ssc._sc
    gw = ctx._gateway

    # create write config as map
    write_conf = WriteConf.build(write_conf, **write_conf_kwargs)
    write_conf = as_java_object(gw, write_conf.settings())
    # convert the columns to a string array
    columns = as_java_array(gw, "String", columns) if columns else None

    return helper(ctx).saveToCassandra(dstream._jdstream, keyspace, table, columns, row_format, keyed, write_conf)
Пример #4
0
    def __init__(self, ctx, keyspace, table, row_format=None, read_conf=None, **read_conf_kwargs):
        super(CassandraTableScanRDD, self).__init__(ctx, keyspace, table, row_format, read_conf, **read_conf_kwargs)

        self._key_by = ColumnSelector.none()

        read_conf = as_java_object(ctx._gateway, self.read_conf.settings())

        self.crdd = self._helper \
            .cassandraTable(
                ctx._jsc,
                keyspace,
                table,
                read_conf,
            )
Пример #5
0
    def __init__(self, ctx, keyspace, table, row_format=None, read_conf=None, **read_conf_kwargs):
        super(CassandraTableScanRDD, self).__init__(ctx, keyspace, table, row_format, read_conf,
                                                    **read_conf_kwargs)

        self._key_by = ColumnSelector.none()

        read_conf = as_java_object(ctx._gateway, self.read_conf.settings())

        self.crdd = self._helper \
            .cassandraTable(
                ctx._jsc,
                keyspace,
                table,
                read_conf,
            )
Пример #6
0
def saveToCassandra(dstream, keyspace, table, columns=None, row_format=None,
                    keyed=None,
                    write_conf=None, **write_conf_kwargs):
    ctx = dstream._ssc._sc
    gw = ctx._gateway

    # create write config as map
    write_conf = WriteConf.build(write_conf, **write_conf_kwargs)
    write_conf = as_java_object(gw, write_conf.settings())
    # convert the columns to a string array
    columns = as_java_array(gw, "String", columns) if columns else None

    return helper(ctx).saveToCassandra(dstream._jdstream, keyspace, table,
                                       columns, row_format,
                                       keyed, write_conf)
Пример #7
0
def deleteFromCassandra(rdd,
                        keyspace=None,
                        table=None,
                        deleteColumns=None,
                        keyColumns=None,
                        row_format=None,
                        keyed=None,
                        write_conf=None,
                        **write_conf_kwargs):
    """
        Delete data from Cassandra table, using data from the RDD as primary
        keys. Uses the specified column names.

        Arguments:
        @param rdd(RDD):
            The RDD to save. Equals to self when invoking saveToCassandra on a
            monkey patched RDD.
        @param keyspace(string):in
            The keyspace to save the RDD in. If not given and the rdd is a
            CassandraRDD the same keyspace is used.
        @param table(string):
            The CQL table to save the RDD in. If not given and the rdd is a
            CassandraRDD the same table is used.

        Keyword arguments:
        @param deleteColumns(iterable):
            The list of column names to delete, empty ColumnSelector means full
            row.

        @param keyColumns(iterable):
            The list of column names to delete, empty ColumnSelector means full
            row.

        @param row_format(RowFormat):
            Primary key columns selector, Optional. All RDD primary columns
            columns will be checked by default
        @param keyed(bool):
            Make explicit that the RDD consists of key, value tuples (and not
            arrays of length two).

        @param write_conf(WriteConf):
            A WriteConf object to use when saving to Cassandra
        @param **write_conf_kwargs:
            WriteConf parameters to use when saving to Cassandra
    """

    keyspace = keyspace or getattr(rdd, 'keyspace', None)
    if not keyspace:
        raise ValueError("keyspace not set")

    table = table or getattr(rdd, 'table', None)
    if not table:
        raise ValueError("table not set")

    # create write config as map
    write_conf = WriteConf.build(write_conf, **write_conf_kwargs)
    write_conf = as_java_object(rdd.ctx._gateway, write_conf.settings())

    # convert the columns to a string array
    deleteColumns = as_java_array(rdd.ctx._gateway, "String", deleteColumns) \
        if deleteColumns else None
    keyColumns = as_java_array(rdd.ctx._gateway, "String", keyColumns) \
        if keyColumns else None

    helper(rdd.ctx) \
        .deleteFromCassandra(
        rdd._jrdd,
        keyspace,
        table,
        deleteColumns,
        keyColumns,
        row_format,
        keyed,
        write_conf,
    )