Python fromcolumns示例，petl.fromcolumns Python示例

示例#1

0

显示文件

文件： peer_assess_pro.py 项目： peerlogic/DataWarehouseETL

 def _get_criteria(self):
     # TODO: only has criteria titles and no IDs
     # Peer Assess Pro has a single rubric, they plan to allow minimal variety later on
     return (etl.fromcolumns([
         self._get_imported_criteria_list(),
         self._get_imported_criteria_list()
     ]).rename(0, 'id').rename(1, 'title'))

示例#2

0

显示文件

文件： peer_assess_pro.py 项目： peerlogic/DataWarehouseETL

    def _get_answers(self):
        # TODO: double check that assessee and assessor mixed up
        # confusion on assessee and assessor ids here, is assessment result peer feedback?
        assessment_results = (etl.fromcsv(
            f'{self._dirc}/assessment_result.csv',
            delimiter=';').listoflists())[1:]
        # create a separate row for each criteria
        # peer assess pro table format has each criteria as a separate column
        table = []
        for row in assessment_results:
            crit_names = self._get_imported_criteria_list()
            for i, answers in enumerate(zip(row[3:], crit_names)):
                comment, criteria = answers
                table.append([int(row[0]) * len(crit_names) + i] + row[1:3] +
                             [comment, criteria])

        assessments = (
            etl.fromcsv(f'{self._dirc}/assessment.csv', delimiter=';').cut(
                'id',
                'assessor_id').rename('id', 'assessee_artifact_id').rename(
                    'assessor_id',
                    'assessee_actor_id')  # also need to double check
        )

        return (etl.fromcolumns(np.array(table).T.tolist(
        )).rename(0, 'id').rename(1, 'assessee_artifact_id').rename(
            2, 'assessor_actor_id'
        )  # should double check on this, i think assessee should be assessor on this ish
                .rename(3, 'comment').rename(4, 'criterion_id').leftjoin(
                    assessments, key='assessee_artifact_id').convert(
                        'assessee_artifact_id', lambda r: None))

示例#3

0

显示文件

文件： util_test.py 项目： mattphillipsphd/fhir-petl

def test_preprocess():
    header = ['SUBJECT', 'NAME']
    data = [['2', '1'], ['Steve', 'Bob']]
    table = etl.fromcolumns(data, header)

    table = util.preprocess(table, 'SUBJECT')
    result = list(table.data())
    assert result == [(1, 'Bob', result[0][2]), (2, 'Steve', result[1][2])]

示例#4

0

显示文件

文件： tofrom.py 项目： margotw40/parsons

    def from_columns(cls, cols, header=None):
        """
        Create a ``parsons table`` from a list of lists organized as columns

        `Args:`
            cols: list
                A list of lists organized as columns
            header: list
                List of column names. If not specified, will use dummy column names
        `Returns:`
            Parsons Table
                See :ref:`parsons-table` for output options.
        """

        return cls(petl.fromcolumns(cols, header=header))

示例#5

0

显示文件

def fetch_collection(filepath):
    addr = f'{settings.SWAPI_HOST}/api/people'

    etl.tocsv([FIELDS_WE_NEED], filepath)

    while addr:
        response = requests.get(addr).json()
        addr = response['next']

        table_columns = [[item[column_name] for item in response['results']]
                         for column_name in FIELDS_WE_GET]

        table = (etl.fromcolumns(table_columns, header=FIELDS_WE_GET).convert(
            'homeworld', resolve_homeworld).addfield(
                'date', lambda rec: rec['edited'].split('T')[0]).cutout(
                    'created').cutout('edited'))

        etl.appendcsv(table, filepath)

    return response['count']

示例#6

0

显示文件

# coding:utf8
import petl as etl

# print(c)

cols1 = [[0, 1, 2], ['a', 'b', 'c']]
tb1 = etl.fromcolumns(cols1)
print(tb1)

# add "missing"
cols2 = [[0, 1, 2, 3], ['a', 'b', 'c']]
tb2 = etl.fromcolumns(cols2, missing='NA')
print(tb2)

# petl.io.json.fromjson()
dict = [{
    'foo': 'a',
    'bar': 1
}, {
    'foo': 'b',
    'bar': 2
}, {
    'foo': 'c',
    'bar': 3
}, {
    'foo': 'd'
}]
# 缺失值填充int:4
tb3 = etl.fromdicts(dict, header=['foo', 'bar'], missing=4)
print(tb3)

示例#7

0

显示文件

import pandas as pd
import numpy as np
from datetime import datetime
dn = 'defect'
du = 'postgres'
dp = 1
dh = 'localhost'
dbp = 5432

cs = "dbname=%s user=%s password=%s host=%s port=%s" % (dn, du, dp, dh, dbp)
connection = psycopg2.connect(cs)

#
a = np.random.rand(100000, 200).tolist()
b = np.random.rand(100000, 10).tolist()
etl.fromcolumns()

#pandas -> table -> db
import pandas as pd
df = pd.DataFrame(columns=['id', 'features', 'model_id', 'regdate'])
for i in range(1, 10000):
    df.loc[i] = [i, np.random.rand(10).tolist(), 'test1', datetime.now()]

pddf = etl.fromdataframe(df)
etl.todb(pddf, connection, 'defect_features', 'public')

#select query
mkcursor = lambda: connection.cursor(name='arbitrary')
table = etl.fromdb(mkcursor, 'select * from public.defect_features')
table

示例#8

0

显示文件

import pickle

import petl as etl
import csv

cols = [[0, 1, 2], ['a', 'b', 'c']]
table1 = etl.fromcolumns(cols)
print(table1)

###########################CSV Reading###############################

table2 = etl.fromcsv('AAPL.csv')

print(table2['Date'])
print(table2)

etl.tocsv(table1, 'example.csv')  #wrting to a CSV file

##########################Reading from Pickle files####################
"""" 
what is pickle?
Pickling is a way to convert a python object (list, dict, etc.) into a character stream.
The idea is that this character stream contains all the information necessary to reconstruct
the object in another python script.
"""
#Creating a pickle file

a = ['test value', 'test value 2', 'test value 3']

file_Name = "testfile"
# open the file for writing

示例#9

0

显示文件

文件： stcfor.py 项目： hsinkai/crowa

        glob.glob(
            os.path.join(os.path.dirname(sys.argv[1]),
                         'ocm3_[thz]*' + inputstr + '_H*'))) == 12:
    timelen = 96
    home = os.path.expanduser("~")
    today = parser.parse(inputstr + '00+0000').astimezone(tz.tzlocal())

    dataset = {}
    for i in ('temp', 'hvel', 'zcor'):
        dataset[i] = netCDF4.MFDataset([
            os.path.join(head, 'ocm3_{0}_{1}_H{2}.nc'.format(i, inputstr, j))
            for j in ('-23_00', '01_24', '25_48', '49_72')
        ])

    find = [-1, -5, -10]
    depth = petl.fromcolumns([find], ['depth'])

    points = petl.fromcsv(os.path.join(home, 'var', 'Points')).convert({
        'lat':
        float,
        'lon':
        float,
        'ncpos':
        int
    })
    pPoints = [int(x) for x in open(os.path.join(home, 'var', 'PengHu'))]
    nodelist = points.values('ncpos')

    zcorlist = dataset['zcor']['zcor'][:].take(nodelist, 2)
    templist = dataset['temp']['temp'][:].take(nodelist, 2)
    ulist = dataset['hvel']['u'][:].take(nodelist, 2)

示例#10

0

显示文件

文件： petl_util.py 项目： Utah-Data-Science/dimensional

def table_container_from(dataset):
    return etl.fromcolumns(dataset.columns, dataset.header)

示例#11

0

显示文件

# initialize list of lists for data storage
BOCDates = []
BOCRates = []

# check response status and process BOC JSON object
if (BOCResponse.status_code == 200):
    BOCRaw = json.loads(BOCResponse.text)

    # extract observation data into column arrays
    for row in BOCRaw['observations']:
        BOCDates.append(datetime.datetime.strptime(row['d'], '%Y-%m-%d'))
        BOCRates.append(decimal.Decimal(row['FXUSDCAD']['v']))

    # create petl table from column arrays and rename the columns
    exchangeRates = petl.fromcolumns([BOCDates, BOCRates],
                                     header=['date', 'rate'])

    # print (exchangeRates)

    # load expense document
    try:
        expenses = petl.io.xlsx.fromxlsx('Expenses.xlsx', sheet='Github')
    except Exception as e:
        print('could not open expenses.xlsx:' + str(e))
        sys.exit()

    # join tables
    expenses = petl.outerjoin(exchangeRates, expenses, key='date')

    # fill down missing values
    expenses = petl.filldown(expenses, 'rate')

示例#12

0

显示文件

文件： sword.py 项目： Urlarrie/DataWarehouseETL

 def _get_criteria(self):
     # TODO: only has criteria titles and no IDs
     return (etl.fromcolumns([
         self._get_imported_criteria_list(),
         self._get_imported_criteria_list()
     ]).rename(0, 'id').rename(1, 'title'))

示例#13

0

显示文件

import psycopg2 as pg
import json
import petl as etl
server = 'localhost' 
database = 'BikeStores' 
username = '******' 
password = '******' 
connection = pyodbc.connect('DRIVER={ODBC Driver 17 for SQL Server};SERVER='+server+';DATABASE='+database+';UID='+username+';PWD='+ password)
mkcursor = lambda: connection.cursor()
table = etl.fromdb(mkcursor,'select * from production.brands')
table
cursor = cnxn.cursor()

categories = [['yuchan','raum','hyejin']]

table = etl.fromcolumns(categories)
table = etl.rename(table,'f0','category_name')
res = etl.appenddb(table,connection,'categories','production')
res

#df = pd.DataFrame(columns=['id','list','dict'])

#for i in range(5):
#    id = i
#    a = np.array([k for k in range(i+10)]).tobytes()
#    b = json.dumps({'ab':[1,2,3],'cd':[4,5,6]})
#    df.loc[i] = (i,a,b)


#from sqlalchemy import create_engine
#df