Python Schema примеры использования

Язык программирования: Python

Пространство имен/Пакет: pydatavec

Класс/Тип: Schema

Примеров на hotexamples.com: 19

Python Schema - 19 примеров найдено. Это лучшие примеры Python кода для pydatavec.Schema, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Schema(15)

add_string_column(14)

add_categorical_column(5)

add_double_column(4)

add_integer_column(3)

add_float_column(1)

to_java(1)

Пример #1

Показать файл

Файл: test_transform_process.py Проект: rghwer/testdocs

def test_append_string():
    schema = Schema()
    schema.add_string_column('str1')

    tp = TransformProcess(schema)
    tp.append_string('str1', 'xxx')

    tp.to_java()

Пример #2

Показать файл

Файл: test_transform_process.py Проект: rghwer/testdocs

def test_remove_white_spaces():
    schema = Schema()
    schema.add_string_column('str1')

    tp = TransformProcess(schema)
    tp.remove_white_spaces('str1')

    tp.to_java()

Пример #3

Показать файл

Файл: test_transform_process.py Проект: rghwer/testdocs

def test_replace_empty():
    schema = Schema()
    schema.add_string_column('str1')

    tp = TransformProcess(schema)
    tp.replace_empty_string('str1', 'xx')

    tp.to_java()

Пример #4

Показать файл

Файл: test_transform_process.py Проект: rghwer/testdocs

def test_lower():
    schema = Schema()
    schema.add_string_column('str1')

    tp = TransformProcess(schema)
    tp.lower('str1')

    tp.to_java()

Пример #5

Показать файл

Файл: test_transform_process.py Проект: rghwer/testdocs

def test_cat_to_int():
    schema = Schema()
    schema.add_categorical_column('cat', ['A', 'B', 'C'])

    tp = TransformProcess(schema)
    tp.categorical_to_integer('cat')

    assert tp.final_schema.get_column_type('cat') == 'integer'

    tp.to_java()

Пример #6

Показать файл

Файл: test_transform_process.py Проект: rghwer/testdocs

def test_rename():
    schema = Schema()
    schema.add_string_column('str1')

    tp = TransformProcess(schema)
    tp.rename_column('str1', 'str2')

    assert 'str1' not in tp.final_schema.columns
    assert 'str2' in tp.final_schema.columns

    tp.to_java()

Пример #7

Показать файл

Файл: test_transform_process.py Проект: rghwer/testdocs

def test_remove():
    schema = Schema()
    schema.add_string_column('str1')
    schema.add_string_column('str2')

    tp = TransformProcess(schema)
    tp.remove_column('str1')

    assert tp.final_schema.columns.keys() == ['str2']

    tp.to_java()

Пример #8

Показать файл

Файл: test_transform_process.py Проект: rghwer/testdocs

def test_concat():
    schema = Schema()
    schema.add_string_column('str1')
    schema.add_string_column('str2')

    tp = TransformProcess(schema)
    tp.concat(['str1', 'str2'], 'str3')

    assert 'str3' in tp.final_schema.columns

    tp.to_java()

Пример #9

Показать файл

Файл: test_reduce.py Проект: akhodakivskiy/deeplearning4j

def test_reduce_4():
    reductions = ['first', 'last', 'append', 'prepend', 'count', 'count_unique']
    for red in reductions:
        schema = Schema()
        schema.add_string_column('col1')
        schema.add_string_column('col2')

        tp = TransformProcess(schema)
        tp.reduce('col1', red)

        tp.to_java()

Пример #10

Показать файл

Файл: test_transform_process.py Проект: rghwer/testdocs

def test_str_to_time():
    schema = Schema()
    schema.add_string_column('str1')
    schema.add_string_column('str2')

    tp = TransformProcess(schema)

    tp.string_to_time('str1')

    assert tp.final_schema.get_column_type('str1') == 'DateTime'

    tp.to_java()

Пример #11

Показать файл

Файл: test_reduce.py Проект: taotesea/EclipseDeeplearning4j

def test_reduce_4():
    reductions = ['first', 'last', 'append',
                  'prepend', 'count', 'count_unique']
    for red in reductions:
        schema = Schema()
        schema.add_string_column('col1')
        schema.add_string_column('col2')

        tp = TransformProcess(schema)
        tp.reduce('col1', red)

        tp.to_java()

Пример #12

Показать файл

Файл: test_transform_process.py Проект: rghwer/testdocs

def test_derive_col_from_time():
    schema = Schema()
    schema.add_string_column('str1')
    schema.add_string_column('str2')

    tp = TransformProcess(schema)

    tp.string_to_time('str1')
    tp.derive_column_from_time('str1', 'hour', 'hour_of_day')

    assert 'hour' in tp.final_schema.columns

    tp.to_java()

Пример #13

Показать файл

Файл: test_reduce.py Проект: eric-erki/deeplearning4j

def test_reduce_1():
    reductions = ['sum', 'mean', 'std', 'var', 'prod']
    for red in reductions:
        schema = Schema()
        schema.add_string_column('name')
        schema.add_double_column('amount')
        schema.add_integer_column('hours')

        tp = TransformProcess(schema)
        tp.reduce('name', red)

        tp.to_java()

Пример #14

Показать файл

Файл: test_reduce.py Проект: akhodakivskiy/deeplearning4j

def test_reduce_1():
    reductions = ['sum', 'mean', 'std', 'var', 'prod']
    for red in reductions:
        schema = Schema()
        schema.add_string_column('name')
        schema.add_double_column('amount')
        schema.add_integer_column('hours')

        tp = TransformProcess(schema)
        tp.reduce('name', red)

        tp.to_java()

Пример #15

Показать файл

Файл: train_model_and_transform.py Проект: wangfeng-skymind/skil-python

temp_filename = filename + '_temp'
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"

if not os.path.isfile(filename):
    if os.path.isfile(temp_filename):
        os.remove(temp_filename)
    download_file(url, temp_filename)
    os.rename(temp_filename, filename)

# We use pyspark to filter empty lines
sc = pyspark.SparkContext(master='local[*]', appName='iris')
data = sc.textFile('iris.data')
filtered_data = data.filter(lambda d: len(d) > 0)

# Define Input Schema
input_schema = Schema()
input_schema.add_double_column('Sepal length')
input_schema.add_double_column('Sepal width')
input_schema.add_double_column('Petal length')
input_schema.add_double_column('Petal width')
input_schema.add_categorical_column(
    "Species", ["Iris-setosa", "Iris-versicolor", "Iris-virginica"])

# Define Transform Process
tp = TransformProcess(input_schema)
tp.one_hot("Species")

# Do the transformation on spark and convert to numpy
output = tp(filtered_data)
np_array = np.array([[float(i) for i in x.split(',')] for x in output])
x = np_array[:, :-3]

Пример #16

Показать файл

def test_schema():
    schema = Schema()
    schema.add_string_column('str1')
    schema.add_string_column('str2')
    schema.add_integer_column('int1')
    schema.add_integer_column('int2')
    schema.add_double_column('dbl1')
    schema.add_double_column('dbl2')
    schema.add_float_column('flt1')
    schema.add_float_column('flt2')
    schema.add_categorical_column('cat1', ['A', 'B', 'C'])
    schema.add_categorical_column('cat2', ['A', 'B', 'C'])
    schema.to_java()

Пример #17

Показать файл

# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
#
# SPDX-License-Identifier: Apache-2.0
################################################################################

# Basic example

from pydatavec import Schema, TransformProcess
from pydatavec import NotInSet, LessThan

# Let's define the schema of the data that we want to import
# The order in which columns are defined here should match the order in which they appear in the input data

input_schema = Schema()

input_schema.add_string_column("DateTimeString")
input_schema.add_string_column("CustomerID")
input_schema.add_string_column("MerchantID")

input_schema.add_integer_column("NumItemsInTransaction")

input_schema.add_categorical_column("MerchantCountryCode",
                                    ["USA", "CAN", "FR", "MX"])

# Some columns have restrictions on the allowable values, that we consider valid:

input_schema.add_double_column(
    "TransactionAmountUSD", 0.0, None, False,
    False)  # $0.0 or more, no maximum limit, no NaN and no Infinite values

Пример #18

Показать файл

Файл: reduction.py Проект: vishalbelsare/pydatavec

# License for the specific language governing permissions and limitations
# under the License.
#
# SPDX-License-Identifier: Apache-2.0
################################################################################
'''
In this simple example: We'll show how to combine multiple independent records by key.
Specifically, assume we have data like "person,country_visited,entry_time" and we want to know how many times
each person has entered each country.
'''

from pydatavec import Schema, TransformProcess

# Define the input schema

schema = Schema()
schema.add_string_column('person')
schema.add_categorical_column('country_visited',
                              ['USA', 'Japan', 'China', 'India'])
schema.add_string_column('entry_time')

# Define the operations we want to do

tp = TransformProcess(schema)

# Parse date-time
# Format for parsing times is as per http://www.joda.org/joda-time/apidocs/org/joda/time/format/DateTimeFormat.html

tp.string_to_time('entry_time', 'YYYY/MM/dd')

# Take the "country_visited" column and expand it to a one-hot representation

Пример #19

Показать файл

Файл: test_schema.py Проект: akhodakivskiy/deeplearning4j

def test_schema():
    schema = Schema()
    schema.add_string_column('str1')
    schema.add_string_column('str2')
    schema.add_integer_column('int1')
    schema.add_integer_column('int2')
    schema.add_double_column('dbl1')
    schema.add_double_column('dbl2')
    schema.add_float_column('flt1')
    schema.add_float_column('flt2')
    schema.add_categorical_column('cat1', ['A', 'B', 'C'])
    schema.add_categorical_column('cat2', ['A', 'B', 'C'])
    schema.to_java()