-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
112 lines (76 loc) · 3.19 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
#!/usr/bin/python3
#Each set of lines should be ran in the main.pynb file using jupiter notebook.
#In this way you can view the visualizations
#
#
#
#
#
#
#
#imports from path to find any datasets inside of your data folder
from pathlib import Path
data_dir_path = Path('data')
file_names = [x.name for x in data_dir_path.glob('*') if x.is_file()]
#prints the sizes of both datasets
import os
print('---Size of each dataset---\n')
print('Super Bowl History:')
print(os.path.getsize('data/datasets_superbowl.csv'))
print('\nSuper Bowl Ads:')
print(os.path.getsize('data/datasets_superbowl-ads.csv'))
#prints the amount of rows that are in each dataset
from utils import line_count
print("---Number of rows from each dataset---\n")
print("Super Bowl History:")
line_num = line_count('data/datasets_superbowl.csv')
print(line_num)
print("\nSuper Bowl Ads:")
line_num = line_count('data/datasets_superbowl-ads.csv')
print(line_num)
#prints all columns and one row from each dataset
from utils import head
print("All columns and one row from each dataset\n")
print("Super Bowl History:")
print(head('data/datasets_superbowl.csv', 2))
print("\nSuper Bowl Ads:")
print(head('data/datasets_superbowl-ads.csv', 2))
#-------------------------------------------------------#
# There are 10 columns in the Super Bowl History CSV:
# Date: date, Date of Super Bowl
# SB: string, Super Bowl name
# Winner: sting, Winning team
# Winner Pts: int, Points scored by winning team
# Loser: string, Losing team
# Loser Pts: int, Points scored by losing team
# MVP: string, Most Valuable Player award recepient
# Stadium: string, Stadium name of Super Bowl
# City: string, City of Super Bowl
# State: string, State of Super Bowl
# There are 4 columns in the Super Bowl Ads CSV:
# Year: string, Year of Super Bowl
# Product/Type: string, Indusrty of ad
# Product/Title: string, Name of product
# Plot/Notes: string, Description of commercial
#-----------------------------------------------------#
# There are two CSV files that are in this project. The datasets are structured by rows
# with headers as the attributes. Each have very different attritbutes, but the main and only attribute
# that is similar in both datasets is the Date in the History and the Year in the Ads. Since there is
# only 1 Super Bowl in a year, it will help comine and merge the two datasets together and analyze
# the data better and in more detail.
# After combining the two datasets, the dataset will have 13 columns. And the same number
# of columns. Another thing to do is to change all of the Product/Type into categories, they’re are
# similiraties like Soft Drink and Beer or Web hosting and Wireless. These will be assigned to Beverages
# and Technology, respectively.
#-----------------------------------------------------#
###NEED TO PARSE DATA BC SOME HAVE 5 FIELDS INSTEAD OF THE DEFAULT FIELD 4
#Read into textfile by readline method
#read line into text file
#splitting by , into a list
# check if the the line is greater than 4
#if it is then modify
import pandas as pd
df = pd.read_csv('test-data/datasets_superbowl-ads.txt')
import pandas as pd
ads = pd.read_csv('data/datasets_superbowl-ads.csv')
history = pd.read_csv('data/datasets_superbowl.csv')