Пример #1
0
def joinWithCassandraTable(dstream, keyspace, table, selected_columns=None, join_columns=None):
    """Joins a DStream (a stream of RDDs) with a Cassandra table

    Arguments:
        @param dstream(DStream)
        The DStream to join. Equals to self when invoking joinWithCassandraTable on a monkey patched RDD.
        @param keyspace(string):
            The keyspace to join on.
        @param table(string):
            The CQL table to join on.
        @param selected_columns(string):
            The columns to select from the Cassandra table.
        @param join_columns(string):
            The columns used to join on from the Cassandra table.
    """

    ssc = dstream._ssc
    ctx = ssc._sc
    gw = ctx._gateway

    selected_columns = as_java_array(gw, "String", selected_columns) if selected_columns else None
    join_columns = as_java_array(gw, "String", join_columns) if join_columns else None

    h = helper(ctx)
    dstream = h.joinWithCassandraTable(dstream._jdstream, keyspace, table, selected_columns, join_columns)
    dstream = h.pickleRows(dstream)
    dstream = h.javaDStream(dstream)

    return DStream(dstream, ssc, AutoBatchedSerializer(PickleSerializer()))
Пример #2
0
def saveToCassandra(dstream, keyspace, table, columns=None, write_conf=None, row_format=None):
    helper = dstream._sc._jvm.java.lang.Thread.currentThread().getContextClassLoader() \
            .loadClass("pyspark_cassandra.PythonHelper").newInstance()
            
    write_conf = as_java_object(dstream._sc._gateway, write_conf.__dict__) if write_conf else None
    columns = as_java_array(dstream._sc._gateway, "String", columns) if columns else None

    helper.saveToCassandra(dstream._jdstream, keyspace, table, columns, write_conf, row_format)
Пример #3
0
def saveToCassandra(dstream, keyspace, table, columns=None, row_format=None, keyed=None, write_conf=None,
                    **write_conf_kwargs):
    ctx = dstream._ssc._sc
    gw = ctx._gateway

    # create write config as map
    write_conf = WriteConf.build(write_conf, **write_conf_kwargs)
    write_conf = as_java_object(gw, write_conf.settings())
    # convert the columns to a string array
    columns = as_java_array(gw, "String", columns) if columns else None

    return helper(ctx).saveToCassandra(dstream._jdstream, keyspace, table, columns, row_format, keyed, write_conf)
Пример #4
0
def saveToCassandra(dstream,
                    keyspace,
                    table,
                    columns=None,
                    write_conf=None,
                    row_format=None):
    helper = dstream._sc._jvm.java.lang.Thread.currentThread().getContextClassLoader() \
            .loadClass("pyspark_cassandra.PythonHelper").newInstance()

    write_conf = as_java_object(dstream._sc._gateway,
                                write_conf.__dict__) if write_conf else None
    columns = as_java_array(dstream._sc._gateway, "String",
                            columns) if columns else None

    helper.saveToCassandra(dstream._jdstream, keyspace, table, columns,
                           write_conf, row_format)