Пример #1
0
def predict():
    now = round(time.time())
    now_str = time.strftime(Common.REPORT_FILE_NAME_FORMAT,
                            time.localtime(now))
    Common.create_folder("report")
    with open(FILE_NAME_FORMAT.format(now_str), "w") as f:
        text = "Number,Time,Pred Available Bike Stands,Pred Bikes,Actual Time,Actual Available Bike Stands,Actual Bikes\n"

        for station in STATIONS:
            text += str(station) + "," + str(now) + ","
            url = f"http://localhost:4502/api/search?station={station}&minutes={TIME_DELAY}"
            response = requests.get(url)
            json_data = response.json()
            status = int(json_data["status"])
            print(f"Getting URL {url} responses {status} => {json_data}")

            if (status == 200):
                available_bike_stands = json_data["data"][
                    "available_bike_stands"]
                text += str(available_bike_stands) + ","
                available_bikes = json_data["data"]["available_bikes"]
                text += str(available_bikes) + "\n"
                time_arr.append(now_str)
            else:
                print(
                    f"An error occurs while calling the API to predict - {json_data['status']}"
                )
                text += "\n"
            #time.sleep(20)

        f.write(text)
Пример #2
0
def count_check_out(diff):
    if (diff < 0):
        return abs(diff)
    return 0


start = time.time()

#########################################################
##################### READ RAW DATA #####################
#########################################################
# get the current working directory
working_dir = os.getcwd()

Common.create_folder(f"{working_dir}/temp")
Common.create_folder(Common.CLEAN_DATA_DIR)

# change to raw-data directory to fetch JSON files
data_dir = os.path.join(working_dir, "new-data")
os.chdir(data_dir)
print(f"Change current directory to {data_dir}")

# get number of JSON files underneath a directory
files = fnmatch.filter(os.listdir(data_dir), '*.json')
n_files = len(files)
print(f"Total JSON files is {n_files}")

JSON_FILE_NAME_PATTERN = "([A-Z]{1}[a-z]{2})-([0-2][0-9]|3[0-1])-(0[0-9]|1[0-2])-\d{4}"
file_dictionaries = {}  # 424 items
for f in files:
Пример #3
0
        "number": "Number",
        "name": "Name",
        "address": "Address",
        "date": "Date",
        "time": "Time",
        "weekday": "Weekday",
        "bike_stands": "Bike Stands",
        "diff": "Diff",
        "available_bike_stands": "Available Stands",
        "check_in": "Check In",
        "check_out": "Check Out"
    })

###############################################################
############### SAVE PREPROCESSING DATA TO FILE ###############
###############################################################
print("Saving data to CSV file")
# make the saved data preparation directory
Common.create_folder(Common.CLEAN_DATA_DIR)
# delete db_all_data.csv file if it exists there
Common.delete_file(Common.CLEAN_DATA_FILE_FULL_PATH)
# save the data preparation for using later
Common.save_csv(df, Common.CLEAN_DATA_FILE_FULL_PATH)

# print result out
#print(df)
#print(df[["Address","Date","Time","Bike Stands","Available Stands","Check In","Check Out"]])

end = time.time()
print("Done preparation after {} seconds".format((end - start)))
Пример #4
0
import pandas as pd
import time
from common import Common
import sys
import math
import fnmatch
import re
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from sklearn.externals import joblib    # for saving and loading model
from sklearn import preprocessing   # label encoder
from sklearn.metrics import mean_squared_error      # calculate MSE

start = time.time()

Common.create_folder(Common.EVALUATION_PLOTS_DIR)
Common.create_folder(Common.UNSEEN_PREDICTING_PLOTS_DIR)

def setBikeStands(number):
    if (number == 79):
        total_stands = 27
    elif (number == 5):
        total_stands = 40
    elif (number == 100):
        total_stands = 25
    elif (number == 66):
        total_stands = 40
    else:
        total_stands = 23
    return total_stands
Пример #5
0
    Description:
        Plotting distribution of activity throughout the week
        Finding the most 10th busy and least 10th busy stations
'''

import os
import numpy as np
import pandas as pd
import calendar
import time
from common import Common
import matplotlib.pyplot as plt

start = time.time()

Common.create_folder(Common.PLOTS_DIR)

# get the relative path of preparation data file
rel_path = os.path.relpath(Common.CLEAN_DATA_FILE_FULL_PATH)

# read CSV files using Pandas
df = pd.read_csv(rel_path, delimiter=",", parse_dates=["Date"])

# see how many occurrence of data for date, the date which has minor values (<10) means the data is somehow missing
#print(df.groupby([df["Date"].dt.date])["Date"].count())

# after viewing, notice that July 2016 has minor values
#print(df[df["Date"].dt.month == 7].reset_index(drop=True))

top_check_ins = pd.DataFrame(
    df.groupby(
Пример #6
0
import pandas as pd
import time
from common import Common
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import math
from sklearn import preprocessing  # label encoder
from sklearn import ensemble  # library of Gradient Boosting
from sklearn.model_selection import train_test_split  # split data to training set and tesing set
from sklearn.metrics import mean_squared_error  # calculate MSE
from sklearn.externals import joblib  # for saving and loading model
import sys

start = time.time()

Common.create_folder(Common.PREDICTING_PLOTS_DIR)

# get clusters dataframe
clusters = Common.get_dataframe_from_file(Common.CLUSTERED_DATA_FILE_FULL_PATH,
                                          True)

# get all data dataframe
all_df = Common.get_dataframe_from_file(Common.CLEAN_DATA_FILE_FULL_PATH, True)
all_df = all_df[(all_df["Date"] >= "2016-10-14")
                & (all_df["Date"] <= "2017-10-14")].reset_index(drop=True)

# left merge these two dataframes together based on Number, Date and Time
merged_df = pd.merge(all_df,
                     clusters[["Number", "Time", "Cluster"]],
                     on=["Number", "Time"],
                     how="left")
Пример #7
0
import os
import numpy as np
import pandas as pd
import time
from common import Common
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
import datetime as dt
import matplotlib.dates as mdates
import matplotlib.ticker as ticker
import sys

start = time.time()

Common.create_folder(Common.CLUSTERING_PLOTS_DIR)


def fill_na(df):
    # iterate through rows
    for i, row in df.iterrows():
        # iterate through columns of the current row
        for j, column in row.iteritems():
            # if the current row is the first row and it has N/A value, fill in with the next non-N/A value
            if (i == 0 and np.isnan(df.loc[i, j])):
                k = i + 1
                # iterate to find the next non-N/A value
                while (np.isnan(df.loc[k, j])):
                    k = k + 1
                df.at[i, j] = df.at[k, j]
            elif (