import tensorframes as tfs import tensorflow as tf from pyspark.sql import Row from pyspark.sql.functions import * from pyspark.sql.types import DoubleType, IntegerType, LongType, FloatType from tensorframes.core import _java_api japi = _java_api() _java_api().initialize_logging() data = [Row(x=float(x), key=str(x / 3)) for x in range(1, 6)] df = sqlContext.createDataFrame(data) tfs.block(df, "x") data = [Row(x=float(x), key=str(x / 3)) for x in range(1, 6)] df = sqlContext.createDataFrame(data) gb = df.groupBy("key") with tf.Graph().as_default() as g: x_input = tfs.block(df, "x", tf_name="x_input") x = tf.reduce_sum(x_input, [0], name='x') df2 = tfs.aggregate(x, gb) data = [Row(x=float(x)) for x in range(5)] df = sqlContext.createDataFrame(data) with tf.Graph().as_default() as g: # The placeholder that corresponds to column 'x' x = tf.placeholder(tf.double, shape=[None], name="x") # The output that adds 3 to x z = tf.add(x, 3, name='z') # The resulting dataframe
def setUp(self): self.sql = SQLContext(TestCore.sc) self.api = _java_api() self.api.initialize_logging() print "setup"
def setUp(self): self.sql = SQLContext(TestCore.sc) self.api = _java_api() self.api.initialize_logging() print("setup")
import logging logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.INFO) import tensorframes as tfs import tensorflow as tf from pyspark.sql import Row from pyspark.sql.functions import * from pyspark.sql.types import DoubleType, IntegerType, LongType, FloatType from tensorframes.core import _java_api japi = _java_api() _java_api().initialize_logging() # The input data data = [Row(x=[float(x), float(2 * x)], key=str(x % 2)) for x in range(1, 6)] df = sqlContext.createDataFrame(data) df = tfs.analyze(sqlContext.createDataFrame(data)) # The geometric mean: # TODO(tjh) make a test out of this, it found some bugs # - non numeric columns (string) # - unused columns # - output that has a child col_name = "x" col_key = "key" with tf.Graph().as_default() as g: x = tfs.block(df, col_name) invs = tf.inv(tf.to_double(x), name="invs") df2 = tfs.map_blocks([invs, tf.ones_like(invs, name="count")], df) # The geometric mean gb = df2.select(col_key, "invs", "count").groupBy("key")