예제 #1
0
import pytz
import random

import weather

month = 11
days = range(12,26)
day = 12

flight_data_path = os.path.join(os.environ["DataPath"], "GEFlight", "RawPublicLeaderboard")
output_path = os.path.join(os.environ["DataPath"], "GEFlight", "Release 2", "PublicLeaderboardTrainDays")

for day in days:
	day_beginning = parser.parse("2012-" + str(month) + "-" + str(day) + " 01:00:00.00-08")
	day_end = parser.parse("2012-" + str(month) + "-" + str(day+1) + " 01:00:00.00-08")
	print day_beginning
	print day_end

	data_out = "C:\\Users\\david\\Dropbox\\GEFlight\\InitialTrainingSet_rev1\\2012_" + str(month) + "_" + str(day) + "\\"
	print data_out
	if not os.path.exists(data_out):
		os.makedirs(data_out)
	weather.process_one_day("C:\\Users\\david\\Dropbox\\GE\\nov26 data\\", data_out, day_beginning, day_end, "train")

#cutoff_beginning_range = parser.parse("2012-" + str(month) + "-" + str(day) + " 09:00:00.00-09")
#random_range_num_hours = 12

#hours_to_add = random.uniform(0, random_range_num_hours)

#import create_sample_test_set
import weather
from dateutil import parser
import pandas
import os

test_folder = '''C:\Users\david\Dropbox\GEFlight\Release 1\SampleTestSet\\'''

days = pandas.read_csv(test_folder + "days.csv")

day = days.ix[1]
print day
print day['day_beginning']
release_path = '''C:\Users\david\Dropbox\GEFlight\Release 1\\'''

for row in days.iterrows():
	day = row[1]
	print "DAY IS " + str(day)
	weather.process_one_day(
		os.path.join(release_path, "InitialTrainingSet_rev1", day['folder_name']), 
		os.path.join(release_path, "SampleTestSet\\", day['folder_name']), 
		parser.parse(day['day_beginning']), 
		parser.parse(day['selected_cutoff_time']), 
		"test", 
		cutoff_time = parser.parse(day['selected_cutoff_time'])
	)
import os
import pandas
from datetime import datetime, timedelta
from dateutil import parser, tz
from geflight.transform import utilities
import pytz
import random

import weather

raw_data_path = os.path.join(os.environ["DataPath"], "GEFlight", "RawPublicLeaderboard")
output_path = os.path.join(os.environ["DataPath"], "GEFlight", "Release 2", "PublicLeaderboardTrainDays")

start_day = datetime(2012,11,26,20,00, tzinfo=tz.tzutc())
cutoff_times = [start_day]
for i in range(1,14):
    cutoff_times.append(start_day + timedelta(i, 0))

for ct in cutoff_times:
    print ct
    day_output_path = os.path.join(output_path, utilities.get_day_str(ct, -9))
    day_beginning, day_end = utilities.get_day_boundaries(ct, -9)

    if not os.path.exists(day_output_path):
        os.makedirs(day_output_path)
    weather.process_one_day(raw_data_path, day_output_path, day_beginning, day_end, "train")
import weather
from dateutil import parser
import pandas
import os

release_path = os.path.join(os.environ["DataPath"], "GEFlight", "Release 2")


days = pandas.read_csv(os.path.join(release_path, "PublicLeaderboardSet", "days.csv"))

day = days.ix[1]
print day
print day['day_beginning']

for row in days.iterrows():
	day = row[1]
	print "DAY IS " + str(day)
	weather.process_one_day(
		os.path.join(release_path, "PublicLeaderboardTrainDays", day['folder_name']), 
		os.path.join(release_path, "PublicLeaderboardSet", day['folder_name']), 
		parser.parse(day['day_beginning']), 
		parser.parse(day['selected_cutoff_time']), 
		"test", 
		cutoff_time = parser.parse(day['selected_cutoff_time'])
	)