Пример #1
0
def basic_set(csvStr, csvStr2):
	# Import CSV
	ray_df = pd.read_csv(csvStr) 
	ray_df2 = pd.read_csv(csvStr2, 
         parse_dates=['tpep_pickup_datetime', 'tpep_dropoff_datetime'])

	# Accesses
	result = ray_df.head()
	result2 = ray_df2.head()

	return ray_df
Пример #2
0
def test_from_csv_delimiter():
    setup_csv_file(SMALL_ROW_SIZE, delimiter='|')

    pandas_df = pandas.read_csv(TEST_CSV_FILENAME)
    ray_df = pd.read_csv(TEST_CSV_FILENAME)

    assert ray_df_equals_pandas(ray_df, pandas_df)

    teardown_csv_file()
Пример #3
0
import ray.dataframe as pd
#import pandas as pd

print('############ 12: Test Ray TimeSeries #############')

ray_df = pd.read_csv("yellow_tripdata_2015-01-01.csv")

print('    Read_CSV finished. Result:')
print(ray_df.head(3))

ray_df = ray_df.set_index('tpep_pickup_datetime') 

print('    set_index finished. Result:')
print(ray_df.head(3))

ray_df.index = pd.to_datetime(ray_df.index)
print('    to_datetime finished. Result:')
print(ray_df.head(3))

ray_resamp = ray_df.passenger_count.resample('1d')
print('    resample finished')

ray_mn = ray_resamp.mean()
print('    mean of resample finished. Result:')
print(ray_mn)

ray_roll = ray_df.passenger_count.rolling(10)
print('    rolling aggregation finished. Result:')
print(ray_roll)

result = ray_roll.mean()
Пример #4
0
import ray.dataframe as pd
#import pandas as pd

print('############ 3: Test Ray Convert [Column] toDateTime #############')

ray_df = pd.read_csv("yellow_1of3.csv")

print('    Read_CSV finished. Result:')
print(ray_df.head(3))

ray_df['tpep_pickup_datetime'] = pd.to_datetime(ray_df['tpep_pickup_datetime']) 
print(ray_df.head(3))

print('    to_datetime(df[column]) finished. Result above.')
Пример #5
0
import ray.dataframe as pd
#import pandas as pd

print('############ 10: Test Ray Merge #############')

ray_df = pd.read_csv(
    "yellow_tripdata_2015-01-01.csv",
    parse_dates=['tpep_pickup_datetime', 'tpep_dropoff_datetime'])

print('    Read_CSV finished. Result:')
print(ray_df.head(3))

ray_payments = pd.DataFrame({
    'num': [1, 2, 3, 4, 5, 6],
    'payment_name':
    ['Credit Card', 'Cash', 'No Charge', 'Dispute', 'Unknown', 'Voided trip']
})
print('    new DataFrame finished. Result:')
print(ray_payments)

ray_df2 = ray_df.merge(ray_payments, left_on="payment_type", right_on="num")
print('    merge finished. Result:')
print(ray_df2.head(3))

ray2_groupby = ray_df2.groupby(ray_df2.payment_name)
print('    groupby on merge finished. Result:')
print(ray2_groupby.head(3))

# Ray does not support getting column from groupby below
# result = ray2_groupby.tip_amount.mean()
# print('    mean on groupby finished. Result:')
Пример #6
0
'''
a = pd.DataFrame([1,2,3])
print(a[0:10])

dates = ts.trade_cal()
test =dates['calendarDate'].tolist()

d= {"date":test}
open_dates = dates.loc[dates['isOpen']==1].reset_index()
test.reverse()
d['date'].reverse()
end = open_dates.loc[open_dates.loc[open_dates['calendarDate']=='2014-03-26'].index +10, 'calendarDate']
print(end.values[0])
'''
a = clock()
k_data = pd.read_csv('/home/wxl/p/StockBrain/backend/data/ashareeodprices.csv',
                     low_memory=False)
stock = k_data.query('S_INFO_WINDCODE == "600031.SH"')
stocks = stock[:]
stocks['TRADE_DT'] = stocks['TRADE_DT'].apply(lambda x: str(x)[
    0:4] + "-" + str(x)[4:6] + "-" + str(x)[6:9])  #, meta=('TRADE_DT', 'str')
stocks.set_index('TRADE_DT', inplace=True)
b = clock()
#print('finish reading')
#pre_k_data = dd.from_pandas(pre_k_data, npartitions = nCores)
#pre_k_data['TRADE_DT'] = pre_k_data.map_partitions(lambda df: df['TRADE_DT'].apply(lambda x: str(x)[0:4]+"-"+str(x)[4:6]+"-"+str(x)[6:9]) )
#k_data['TRADE_DT'] = k_data['TRADE_DT'].apply(lambda x: str(x)[0:4]+"-"+str(x)[4:6]+"-"+str(x)[6:9])
#print('finish design')
#pre_k_data.compute(get=get)

#stock = k_data.loc[k_data['S_INFO_WINDCODE'] == '600031.SH']
#print(stock.index)