import readFromDatabase as rfd import matplotlib.pyplot as plt import seaborn as sns import numpy as np ageData = rfd.readFrom('visitor', 'age') genderData = rfd.readFrom('visitor', 'gender') carsData = rfd.readFrom('visitor', 'cars') childrenData = rfd.readFrom('visitor', 'children') marriedData = rfd.readFrom('visitor', 'married') socialData = rfd.readFrom('visitor', 'social') workingData = rfd.readFrom('visitor', 'working') total = ageData.groupby('Year').sum().reset_index()['Count'] Data = [ ageData, genderData, carsData, childrenData, marriedData, socialData, workingData ] names = ['age', 'gender', 'cars', 'children', 'married', 'social', 'working'] for i in range(len(Data)): tem = Data[i].groupby(['Year', 'Attribute']).sum().reset_index() tem = tem.pivot(index='Year', columns='Attribute', values='Count').reset_index().drop(columns=['Year']) tem['total'] = total #tem.corr().to_csv('correlation/'+names[i]+'_cor.csv') #tem.corr(method='spearman').to_csv('correlation/'+names[i]+'_spearman.csv') listPearson = np.array([[ 0.383, 0.52, 0.143, 0.815, 0.168, 0.408, 0.911, 0.207, 0.946, -0.083, 0.914, 0.745, 0.893, 0.201, 0.679, 0.803, 0.424, -0.505, 0.954, -0.236, 0.497 ], [
from matplotlib import pyplot as plt import readFromDatabase as rfd import os #Read data from database visitorData = rfd.readFrom('visitor', 'married') plt.figure(figsize=(8, 8)) #Group the data based on 'Year', calculate the number of visitors for each year and generate a new dataframe. overAll = visitorData.groupby(['Year', 'Attribute']).sum().reset_index() married = ['married', 'not married'] #Draw a line chart to show the overall trend and set labels and titles for the chart. for x in married: data = overAll[overAll['Attribute'] == x] plt.plot(data['Year'], data['Count'], marker='o', linewidth=3) plt.legend(['Married', 'Not married'], fontsize=15) plt.ylabel('Count(Million)', fontsize=15, weight='bold') plt.xlabel('Years', fontsize=15, weight='bold') plt.xlim(2011, 2019) plt.title('Number of Married Visitors for Day Visit in Scotland', fontsize=15, weight='bold') plt.xticks(fontsize=12, weight='bold') plt.yticks(fontsize=12, weight='bold') #Save the chart as a 'PNG' file. plt.savefig(os.path.dirname(os.getcwd()) + '/figures/MarriedOverYears.png') plt.show()
from matplotlib import pyplot as plt import readFromDatabase as rfd import os #Read data from database visitorData = rfd.readFrom('visitor', 'children') plt.figure(figsize=(8, 8)) #Group the data based on 'Year', calculate the number of visitors for each year and generate a new dataframe. overAll = visitorData.groupby(['Year', 'Attribute']).sum().reset_index() children = ['yes', 'no'] #Draw a line chart to show the overall trend and set labels and titles for the chart. for x in children: data = overAll[overAll['Attribute'] == x] plt.plot(data['Year'], data['Count'], marker='o', linewidth=3) plt.legend(['Yes', 'No'], fontsize=15) plt.ylabel('Count(Million)', fontsize=15, weight='bold') plt.xlabel('Years', fontsize=15, weight='bold') plt.xlim(2011, 2019) plt.title('Number of Visitors who Have Children for Day Visit in Scotland', fontsize=15, weight='bold') plt.xticks(fontsize=12, weight='bold') plt.yticks(fontsize=12, weight='bold') #Save the chart as a 'PNG' file. plt.savefig(os.path.dirname(os.getcwd()) + '/figures/ChildrenOverYears.png') plt.show()
from matplotlib import pyplot as plt import readFromDatabase as rfd import os #Read data from database visitorData = rfd.readFrom('visitor', 'age') #Add a column named 'Total' plt.figure(figsize=(8, 8)) #Group the data based on 'Year', calculate the number of visitors for each year and generate a new dataframe. overAll = visitorData.groupby(['Year', 'Attribute']).sum().reset_index() age = ['16-24', '25-34', '35-44', '45-54', '55-64', '65+'] #Draw a line chart to show the overall trend and set labels and titles for the chart. for x in age: data = overAll[overAll['Attribute'] == x] plt.plot(data['Year'], data['Count'], marker='o', linewidth=3) plt.legend(['16-24', '25-34', '35-44', '45-54', '55-64', '65+'], fontsize=12) plt.ylabel('Count(Million)', fontsize=12, weight='bold') plt.xlabel('Years', fontsize=12, weight='bold') plt.xlim(2011, 2019) plt.xticks(fontsize=12, weight='bold') plt.yticks(fontsize=12, weight='bold') plt.title('Number of Visitors from Different Age for Day Visit in Scotland', fontsize=15, weight='bold') #Save the chart as a 'PNG' file. plt.savefig(os.path.dirname(os.getcwd()) + '/figures/AgeOverYears.png') plt.show()
from matplotlib import pyplot as plt import readFromDatabase as rfd import os #Read data from database visitorData = rfd.readFrom('visitor', 'social') #Add a column named 'Total' plt.figure(figsize=(8, 8)) #Group the data based on 'Year', calculate the number of visitors for each year and generate a new dataframe. overAll = visitorData.groupby(['Year', 'Attribute']).sum().reset_index() social = ['ab', 'c1', 'c2', 'de'] #Draw a line chart to show the overall trend and set labels and titles for the chart. for x in social: data = overAll[overAll['Attribute'] == x] plt.plot(data['Year'], data['Count'], marker='o', linewidth=3) plt.legend(['AB', 'C1', 'C2', 'DE'], fontsize=15) plt.ylabel('Count(Million)', fontsize=15, weight='bold') plt.xlabel('Years', fontsize=15, weight='bold') plt.xlim(2011, 2019) plt.title('Number of Visitors from Social Grade for Day Visit in Scotland', fontsize=15, weight='bold') plt.xticks(fontsize=12, weight='bold') plt.yticks(fontsize=12, weight='bold') #Save the chart as a 'PNG' file. plt.savefig(os.path.dirname(os.getcwd()) + '/figures/SocialOverYears.png') plt.show()
import readFromDatabase as rfd import matplotlib.pyplot as plt from sklearn.linear_model import LinearRegression from sklearn.metrics import r2_score import statsmodels.api as sm age=['16-24', '25-34', '35-44', '45-54', '55-64', '65+'] cars=['access to car (1+)', 'no access to car (0)'] children=['yes', 'no'] gender=['male', 'female'] married=['married', 'not married'] social=['ab', 'c1', 'c2', 'de'] working=['employed/self-employed (full or part time)', 'in full or part time education', 'unemployed/not working'] ageData=rfd.readFrom('visitor', 'age') total=ageData.groupby('Year').sum().reset_index()['Count'] attributes=[age, gender, cars, children, married, social, working] names=['age', 'gender', 'cars', 'children', 'married', 'social', 'working'] for i in range(len(names)): data = rfd.readFrom('visitor', names[i]) tem = data.groupby(['Year', 'Attribute']).sum().reset_index() plt.figure(figsize=(8, 8)) for item in attributes[i]: x = tem[tem['Attribute'] == item]['Count'].values.reshape(-1, 1) y = total.values.reshape(-1, 1) reg = LinearRegression() reg.fit(x, y) print(item) print("The linear model is: y = {:.5} + {:.5}x".format(reg.intercept_[0], reg.coef_[0][0])) predictions = reg.predict(x)
from matplotlib import pyplot as plt import readFromDatabase as rfd import os #Read data from database visitorData = rfd.readFrom('visitor', 'gender') plt.figure(figsize=(8, 8)) #Group the data based on 'Year', calculate the number of visitors for each year and generate a new dataframe. overAll = visitorData.groupby(['Year', 'Attribute']).sum().reset_index() gender = ['male', 'female'] #Draw a line chart to show the overall trend and set labels and titles for the chart. for x in gender: data = overAll[overAll['Attribute'] == x] plt.plot(data['Year'], data['Count'], marker='o', linewidth=3) plt.legend(['Male', 'Female'], fontsize=15) plt.ylabel('Count(Million)', fontsize=15, weight='bold') plt.xlabel('Years', fontsize=15, weight='bold') plt.xlim(2011, 2019) plt.title( 'Number of Visitors from Different Genders for Day Visit in Scotland', fontsize=15, weight='bold') plt.xticks(fontsize=12, weight='bold') plt.yticks(fontsize=12, weight='bold') #Save the chart as a 'PNG' file. plt.savefig(os.path.dirname(os.getcwd()) + '/figures/GenderOverYears.png') plt.show()
from matplotlib import pyplot as plt import readFromDatabase as rfd import os #Read data from database visitorData = rfd.readFrom('visitor', 'cars') cars = ['access to car (1+)', 'no access to car (0)'] plt.figure(figsize=(8, 8)) #Group the data based on 'Year', calculate the number of visitors for each year and generate a new dataframe. overAll = visitorData.groupby(['Year', 'Attribute']).sum().reset_index() #Draw a line chart to show the overall trend and set labels and titles for the chart. for x in cars: data = overAll[overAll['Attribute'] == x] plt.plot(data['Year'], data['Count'], marker='o', linewidth=3) plt.legend(['Access to car (1+)', 'No access to car (0)'], fontsize=15) plt.ylabel('Count(Million)', fontsize=15, weight='bold') plt.xlabel('Years', fontsize=15, weight='bold') plt.xlim(2011, 2019) plt.title('Number of Visitors Using Cars in Scotland', fontsize=15, weight='bold') plt.xticks(fontsize=12, weight='bold') plt.yticks(fontsize=12, weight='bold') #Save the chart as a 'PNG' file. plt.savefig(os.path.dirname(os.getcwd()) + '/figures/CarOverYears.png') plt.show()
from matplotlib import pyplot as plt import readFromDatabase as rfd import os #Read data from database visitorData = rfd.readFrom('visitor', 'working') #Add a column named 'Total' plt.figure(figsize=(8, 8)) #Group the data based on 'Year', calculate the number of visitors for each year and generate a new dataframe. overAll = visitorData.groupby(['Year', 'Attribute']).sum().reset_index() working = [ 'employed/self-employed (full or part time)', 'in full or part time education', 'unemployed/not working' ] #Draw a line chart to show the overall trend and set labels and titles for the chart. for x in working: data = overAll[overAll['Attribute'] == x] plt.plot(data['Year'], data['Count'], marker='o', linewidth=3) plt.legend([ 'Employed/self-employed (full or part time)', 'In full or part time education', 'Unemployed/not working' ], fontsize=12) plt.ylabel('Count(Million)', fontsize=15, weight='bold') plt.xlabel('Years', fontsize=15, weight='bold') plt.xlim(2011, 2019) plt.xticks(fontsize=12, weight='bold') plt.yticks(fontsize=12, weight='bold') plt.title('Number of Visitors from Different Working Status', fontsize=15, weight='bold') #Save the chart as a 'PNG' file.
elif input in food: return "go for food" elif input in entertainment: return "go for entertainment" elif input in leisure: return "go for leisure activities" elif input in special_event: return "for special event" elif input in healthCentre: return "go for health centre" else: return "other activities" attributes=['visit friends or family', 'go for food', 'go for leisure activities',] #Read data from database purposeData = rfd.readFrom('action', 'activity') #List used to classify visitors and draw the figures. age=['16-24', '25-34', '35-44', '45-54', '55-64', '65+'] cars=['access to car (1+)', 'no access to car (0)'] children=['yes', 'no'] gender=['male', 'female'] married=['married', 'not married'] social=['ab', 'c1', 'c2', 'de'] working=['employed/self-employed (full or part time)', 'in full or part time education', 'unemployed/not working'] columns=[age, cars, children, gender, social, working, married] visitorType=['age', 'cars', 'children', 'gender', 'social', 'working', 'married'] purposeData['Action'] = purposeData['Action'].apply(lambda x:classifyPurpose(x)) #Group the based on action as the detailed 15 actions are classified into 8 actions. purposeData=purposeData.groupby(['Year', 'Action', 'Visitor', 'Attribute']).sum().reset_index()
plt.xticks(fontsize=12, weight='bold') plt.yticks(fontsize=12, weight='bold') ax2 = f.add_subplot(2, 1, 2) plot_pacf(ts,ax=ax2,lags=lags) plt.subplots_adjust(hspace=0.5) plt.title('Partial Autocorrelation', fontsize=15, weight='bold') plt.xticks(fontsize=12, weight='bold') plt.yticks(fontsize=12, weight='bold') plt.show() #Generate the date for index date=[] for i in range(2011, 2020): for j in range(1, 13): date.append(str(i)+'-'+str(j)) #Read the data from database and set the date as index. timeData=rfd.readFrom('visitor', 'age').groupby(['Year', 'Month']).sum().reset_index() timeData['date']=date timeData=timeData.set_index(pd.to_datetime(timeData['date'], format='%Y-%m')) timeData=timeData.drop(columns=['Year', 'Month', 'date']) #To reduce the data fluctuation range, make logarithm operation tsLog=np.log(timeData) #Make decomposition to original data decomposition=seasonal_decompose(tsLog) trend=decomposition.trend seasonal=decomposition.seasonal residual=decomposition.resid #Draw the figures about the trend, periodicity and residual of the data. plt.figure(figsize=(8, 8)) plt.subplot(3, 1, 1) trend.plot(color='red', fontsize=15, linewidth=2) plt.title('The Trend of Data', fontsize=15, weight='bold')