Python Row примеры использования

Язык программирования: Python

Пространство имен/Пакет: pysparkling.sql.types

Класс/Тип: Row

Примеров на hotexamples.com: 8

Python Row - 8 примеров найдено. Это лучшие примеры Python кода для pysparkling.sql.types.Row, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Row(8)

Основные методы

Row (8)

Пример #1

Показать файл

    def test_session_create_data_frame_from_pandas_data_frame(self):
        try:
            # Pandas is an optional dependency
            # pylint: disable=import-outside-toplevel
            import pandas as pd
        except ImportError as e:
            raise ImportError("pandas is not importable") from e

        pdf = pd.DataFrame([(1, "one"), (2, "two"), (3, "three")])

        df = self.spark.createDataFrame(pdf)

        self.assertEqual(df.count(), 3)
        self.assertListEqual(df.collect(), [
            Row(**{
                "0": 1,
                "1": 'one'
            }),
            Row(**{
                "0": 2,
                "1": 'two'
            }),
            Row(**{
                "0": 3,
                "2": 'three'
            })
        ])
        self.assertEqual(
            df.schema,
            StructType([
                StructField("0", LongType(), True),
                StructField("1", StringType(), True)
            ]))

Пример #2

Показать файл

 def test_session_create_data_frame_from_list_with_schema(self):
     schema = StructType(
         [StructField("map", MapType(StringType(), IntegerType()), True)])
     df = self.spark.createDataFrame([({'a': 1}, )], schema=schema)
     self.assertEqual(df.count(), 1)
     self.assertListEqual(df.collect(), [Row(map={'a': 1})])
     self.assertEqual(df.schema, schema)

Пример #3

Показать файл

 def test_session_create_data_frame_from_list(self):
     df = self.spark.createDataFrame([
         (1, "one"),
         (2, "two"),
         (3, "three"),
     ])
     self.assertEqual(df.count(), 3)
     self.assertListEqual(
         df.collect(),
         [Row(_1=1, _2='one'),
          Row(_1=2, _2='two'),
          Row(_1=3, _2='three')])
     self.assertEqual(
         df.schema,
         StructType([StructField("_1", LongType(), True), StructField("_2", StringType(), True)])
     )

Пример #4

Показать файл

Файл: internals.py Проект: bireports/pysparkling

        def create_counts_row(col1Item, rows):
            counts_row = [None] * (column_size + 1)

            def parse_row(row):
                column_index = distinct_col2[clean_element(row[1])]
                counts_row[int(column_index + 1)] = int(row[2])

            rows.foreach(parse_row)
            # the value of col1 is the first value, the rest are the counts
            counts_row[0] = clean_element(col1Item)
            return Row(counts_row)

Пример #5

Показать файл

Файл: test_casts.py Проект: svenkreiss/pysparkling

 def test_cast_to_struct(self):
     self.assertEqual(
         cast_to_struct(Row(character='Alice',
                            day='28',
                            month='8',
                            year='2019'),
                        from_type=StructType(fields=[
                            StructField("character", StringType()),
                            StructField("day", StringType()),
                            StructField("month", StringType()),
                            StructField("year", StringType()),
                        ]),
                        to_type=StructType(fields=[
                            StructField("character", StringType()),
                            StructField("day", IntegerType()),
                            StructField("month", IntegerType()),
                            StructField("year", IntegerType()),
                        ]),
                        options=BASE_OPTIONS),
         Row(character='Alice', day=28, month=8, year=2019),
     )

Пример #6

Показать файл

 def test_session_range(self):
     df = self.spark.range(3)
     self.assertEqual(df.count(), 3)
     self.assertListEqual(df.collect(), [Row(id=0), Row(id=1), Row(id=2)])
     self.assertEqual(
         list(df.toLocalIterator()),
         [Row(id=0), Row(id=1), Row(id=2)])

Пример #7

Показать файл

Файл: test_stat_counter.py Проект: svenkreiss/pysparkling

def test_column_stat_helper():
    """
    Expected quantile values come from use of org.apache.spark.sql.catalyst.util.QuantileSummaries
    """
    schema = StructType([StructField("value", IntegerType())])
    helper = ColumnStatHelper(col("value"))
    for i in range(1, 100001):
        helper.merge(Row(value=i), schema)
    helper.finalize()
    assert helper.count == 100000
    assert helper.min == 1
    assert helper.max == 100000
    assert helper.mean == 50000.5
    assert helper.stddev == 28867.65779668774  # sample standard deviation
    assert helper.get_quantile(0) == 1
    assert helper.get_quantile(0.25) == 24998
    assert helper.get_quantile(0.5) == 50000
    assert helper.get_quantile(0.75) == 74993
    assert helper.get_quantile(1) == 100000

Пример #8

Показать файл

Файл: test_casts.py Проект: svenkreiss/pysparkling

 def test_cast_row_to_string(self):
     self.assertEqual(
         cast_to_string(Row(a=collections.OrderedDict([("value", None),
                                                       ("b", {
                                                           "c": 7
                                                       })]),
                            b=None,
                            c=True,
                            d=5.2),
                        StructType([
                            StructField(
                                "a",
                                MapType(
                                    StringType(),
                                    MapType(StringType(), LongType(), True),
                                    True), True),
                            StructField("b", LongType(), True),
                            StructField("c", BooleanType(), True),
                            StructField("d", DoubleType(), True)
                        ]),
                        options=BASE_OPTIONS),
         "[[value ->, b -> [c -> 7]],, true, 5.2]")