-
Notifications
You must be signed in to change notification settings - Fork 0
/
load_data.py
132 lines (110 loc) · 5.76 KB
/
load_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
import sys, os
import pandas as pd
import datetime
# What does this do?
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "project.settings")
# What does setup do?
import django
django.setup()
from reviews.models import Review, Item
from django.contrib.auth.models import User
from django.contrib.auth.hashers import make_password
# ==============================================================================
# EXTRACT REVIEW
# ==============================================================================
def extract_review_from_row(review_row):
review = Review(id = review_row["id"],
author = review_row["username"],
item = Item.objects.get(id=review_row["item_id"]),
rating = review_row["rating"],
review = review_row["comment"],
pub_date = datetime.datetime.now()
)
review.save()
# ==============================================================================
# EXTRACT_USER_FROM_ROW
# ==============================================================================
def extract_user_from_row(user_row):
user = User(username = user_row["name"],
# id = user_row["id"] # id in this case should be handled automatically
password = make_password("password") # temporary password
)
user.save()
# ==============================================================================
# EXTRACT_ITEM_FROM_ROW
# ==============================================================================
def extract_item_from_row(item_row):
item = Item(name=item_row["name"],
id= item_row["id"]
)
item.save()
# the main function for the script, called by the shell
if __name__ == "__main__":
# --------------------------------------------------------------------------
# Check that the script has received two arguments
# (the option of which Model to update and the csv file path)
# --------------------------------------------------------------------------
if len(sys.argv) == 3:
option = str(sys.argv[1]).strip().lower()
file = str(sys.argv[2]).strip()
# ----------------------------------------------------------------------
# Get data from the file
# ----------------------------------------------------------------------
print("Reading file " + file)
try:
data = pd.read_csv(file)
except:
raise RuntimeError('Could not open the file "{}"\n'.format(file) \
+ 'please make sure it exists')
print(data.head(10)) # Print the first 10 rows of data
# ----------------------------------------------------------------------
# Determine the relevant processing function based on option selected
# ----------------------------------------------------------------------
# -------------------------------------------------------------- Reviews
if option in ["review", "reviews"]:
necessary_columns = {"id","username","item_id","rating","comment"}
if necessary_columns.issubset(data.columns):
processing_function = extract_review_from_row
else:
raise ValueError("The data in file does not match the expected columns for reviews")
# ---------------------------------------------------------------- Items
elif option in ["items", "item"]:
necessary_columns = {"id", "name"}
if necessary_columns.issubset(data.columns):
processing_function = extract_item_from_row
else:
raise ValueError(
"The data in file does not match the expected columns for items")
# ---------------------------------------------------------------- Users
elif option in ["users", "user"]:
necessary_columns = {"id", "name"}
if necessary_columns.issubset(data.columns):
processing_function = extract_user_from_row
else:
raise ValueError(
"The data in file does not match the expected columns for users")
else:
# ------------------------------------------------------------------
# Handle Incorrect Option
# ------------------------------------------------------------------
raise ValueError("Incorrect Option. Legal options are 'reviews', 'items' or 'users'")
# ----------------------------------------------------------------------
# For each row, add that information to the django database
# ----------------------------------------------------------------------
data.apply(
processing_function,
axis=1 # 1 = row-wise
)
print("Done processing the data in {}".format(file))
else:
# ----------------------------------------------------------------------
# Handle Incorrect Use of arguments
# ----------------------------------------------------------------------
message = """
Incorrect use of arguments. Correct usage is as follows
python load_data.py option filepath
Where option must be one of: reviews, items or users
And filepath must be the file path to the relevant csv file based on
the option selected.
"""
raise ValueError(message)