-
Notifications
You must be signed in to change notification settings - Fork 1
/
yelp_restaurant_pull_driver.py
executable file
·83 lines (48 loc) · 2.91 KB
/
yelp_restaurant_pull_driver.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
#!/Users/rileymatthews/anaconda/bin/python
# -*- coding: utf-8 -*-
import argparse
from yelp_api_machinery import YelpApiPhoneInterfacer, YelpApiFirstPassCoordinator, \
YelpApiAddressInterfacer, YelpApiSecondPassCoordinator
from table_builders import YelpRestaurantsTableBuilder, YelpCategoriesTableBuilder, \
YelpNeighborhoodsTableBuilder
from extract_matchers import ByPhoneExtractMatcher, ByAddressExtractMatcher
def build_argparser():
parser = argparse.ArgumentParser()
parser.add_argument('-n', '--n_pull', help = 'Number of restuarants to pull.', required = True)
parser.add_argument('-o', '--offset', help = 'Offset from which to start pull.', required = True)
parser.add_argument('-r', '--report_interval', help = 'Report interval.', required = False)
parser.add_argument('-c', '--create_table', help = 'Whether to new create a table.', dest = 'create_table', action = 'store_true')
parser.add_argument('-s', '--search_limit', help = 'Number of results to pull per address search.', required = False)
parser.set_defaults(feature=False, report_interval = 250, search_limit = 15)
return parser
if __name__ == '__main__':
parser = build_argparser()
args = parser.parse_args()
n_pull = int(args.n_pull)
offset = int(args.offset)
report_interval = int(args.report_interval) if args.report_interval else 250
create_table = args.create_table
limit = args.search_limit if args.search_limit else 15
yelp_restuarants_tb = YelpRestaurantsTableBuilder()
yelp_categories_tb = YelpCategoriesTableBuilder()
yelp_neighborhoods_tb = YelpNeighborhoodsTableBuilder()
api_phone_interfacer = YelpApiPhoneInterfacer(report_interval = report_interval)
first_coordinator = YelpApiFirstPassCoordinator(api_interfacer = api_phone_interfacer, start_read = offset)
# first pass
extracts = first_coordinator.read_next_n(n = n_pull)
matched_extracts = ByPhoneExtractMatcher().match_all(extracts)
if create_table:
yelp_restuarants_tb.create_table()
yelp_categories_tb.create_table()
yelp_neighborhoods_tb.create_table()
yelp_restuarants_tb.add_records(matched_extracts)
yelp_categories_tb.add_records(matched_extracts)
yelp_neighborhoods_tb.add_records(matched_extracts)
# second pass
api_address_interfacer = YelpApiAddressInterfacer(limit = limit, sort = 1, report_interval = report_interval)
second_coordinator = YelpApiSecondPassCoordinator(api_interfacer = api_address_interfacer, start_read = offset)
extracts = second_coordinator.read_next_n(n = n_pull)
matched_extracts = ByAddressExtractMatcher().match_all(extracts)
yelp_restuarants_tb.add_records(matched_extracts)
yelp_categories_tb.add_records(matched_extracts)
yelp_neighborhoods_tb.add_records(matched_extracts)