def fetchDetails(placeID):
        placeStatus = statusTable[placeID]
        # Get a list of (src, version) pairs that could be updated, skip searched places
        # TODO: Gracefully handle if TripAdvisor-mapper runs out of API calls (25k)
        newProviders = [
            src for src in config if src not in placeStatus or (
                config[src] > placeStatus[src]
                and placeStatus[src] != Status.NOT_FOUND.value)
        ]
        if not newProviders:
            #            log.info("No new sources for {}".format(placeID))
            return

        try:
            placeProviderIDs = proxwalk.getAndCacheProviderIDs(
                placeID, newProviders, placeStatus["identifiers"])
        except Exception as e:
            log.error("Error fetching or caching provider id: {}".format(e))
            return

        updatedProviders = request_handler.researchPlace(
            placeID, placeProviderIDs)

        # Write updated sources to /status
        newStatus = makeNewStatusTable(config, updatedProviders,
                                       placeProviderIDs, newProviders)

        try:
            placeStatus.update(newStatus)
            db().child(venuesTable, "status", placeID).update(placeStatus)
        except Exception as e:
            log.error("Error accessing status table for {}: {}".format(
                placeID, e))

        log.info("{} done: {}".format(placeID, str(updatedProviders)))
示例#2
0
def _guessYelpId(placeName, lat, lon):
    safePlaceId = hashlib.md5(placeName).hexdigest()
    cachedId = db.child(eventsTable).child("cache/" + safePlaceId).get().val()
    if cachedId:
        return cachedId

    opts = {
      # 'term': placeName, # Yelp does a bad job with term searching
      'limit': 20,
      #'radius_filter': 1000,
      #'sort_by': 'distance',
      'sort': 1,
    }
    r = yelpClient.search_by_coordinates(lat, lon, **opts)
    if len(r.businesses) > 0:
        location = (lat, lon)
        businessesWithCoords = filter(
            lambda b:
                (b.location is not None) and (b.location.coordinate is not None),
            r.businesses)
        biz = min(businessesWithCoords, key=lambda b:
            geo.distance(location,
                         (b.location.coordinate.latitude, b.location.coordinate.longitude))
        )
        log.debug("%s --> %s" % (placeName, biz.name))
        researchVenue(biz)

        # Add bizId to cache
        record = { "cache/" +  safePlaceId: str(biz.id) }
        db.child(eventsTable).update(record)

        return biz.id
    else:
        log.info("Can't find %s" % placeName)
        return None
示例#3
0
    def _print(self, result) -> bool:
        if len(result) == 0:
            info(f"[{self.store_name}] {self.product_name} not available.")
            return False

        for elem in result:
            success(f"[{self.store_name}] {self.product_name} Available {elem}!")
            return True
示例#4
0
def searchLocation(lat, lng, radius):
    yelpVenues = search.getVenuesFromIndex(lat, lng, radius)

    log.debug("Writing venues...")

    writeYelpRecords(yelpVenues)

    log.info("Wrote %d venues" % len(yelpVenues))
示例#5
0
def searchLocationWithErrorRecovery(lat, lng, radius=None):
    try:
        searchLocation(lat, lng, radius=radius)
    except KeyboardInterrupt:
        log.info("GOODBYE")
        sys.exit(1)
    except Exception:
        from app.util import log
        log.exception("Unknown exception")
示例#6
0
def getVenuesFromIndex(lat, lon, radius):
    all = _getVenuesFromIndex(lat, lon, radius, YELP_SORT_ORDER)
    seen = set()
    unique = [
        biz for biz in all if biz.id not in seen and not seen.add(biz.id)
        and biz.location.coordinate != None
    ]
    log.info("Found %d unique venues with locations" % len(unique))
    return unique
def expandPlaces(config, center, radius_km):
    """
    Expands cached venue details by fetching additional sources
    Config is of the form:
        { <provider>: <version> }
    where version is the newest version status
    """
    statusTable = db().child(venuesTable).child("status").get().val()

    # Fetch placeIDs to expand
    location_table = db().child(locationsTable).get().val()
    placeIDs = geo.get_place_ids_in_radius(center, radius_km, location_table)

    log.info("{} places found".format(len(placeIDs)))

    def fetchDetails(placeID):
        placeStatus = statusTable[placeID]
        # Get a list of (src, version) pairs that could be updated, skip searched places
        # TODO: Gracefully handle if TripAdvisor-mapper runs out of API calls (25k)
        newProviders = [
            src for src in config if src not in placeStatus or (
                config[src] > placeStatus[src]
                and placeStatus[src] != Status.NOT_FOUND.value)
        ]
        if not newProviders:
            #            log.info("No new sources for {}".format(placeID))
            return

        try:
            placeProviderIDs = proxwalk.getAndCacheProviderIDs(
                placeID, newProviders, placeStatus["identifiers"])
        except Exception as e:
            log.error("Error fetching or caching provider id: {}".format(e))
            return

        updatedProviders = request_handler.researchPlace(
            placeID, placeProviderIDs)

        # Write updated sources to /status
        newStatus = makeNewStatusTable(config, updatedProviders,
                                       placeProviderIDs, newProviders)

        try:
            placeStatus.update(newStatus)
            db().child(venuesTable, "status", placeID).update(placeStatus)
        except Exception as e:
            log.error("Error accessing status table for {}: {}".format(
                placeID, e))

        log.info("{} done: {}".format(placeID, str(updatedProviders)))

    pool = ThreadPool(8)
    pool.map(fetchDetails, placeIDs)

    log.info("Finished crawling other sources")
示例#8
0
    def check_availability(self) -> bool:
        if not self.cfg.best_buy_api_key:
            info(f"[{self.store_name}] api key missing. Skipping...")
            return False

        sleep(1)
        is_available = False
        if self._print(self.available_within_zip(int(self.product_id))):
            is_available = True
        sleep(1)
        if self._print(self.available_online(int(self.product_id))):
            is_available = True
        return is_available
示例#9
0
 def check_availability(self) -> bool:
     is_available = False
     for zip_code in self.cfg.target_zip_codes:
         try:
             amount, location = self.check_specific(zip_code)
             if amount > 0:
                 is_available = True
                 success(
                     f"[{self.store_name}] {self.product_name} {amount} units found {location}"
                 )
             else:
                 info(
                     f"[{self.store_name}] {self.product_name} not available in {zip_code}"
                 )
         except KeyError:
             fail(f"[{self.store_name}] {self.product_name} could not find "
                  f"something in the list for {self.store_name}")
     return is_available
示例#10
0
def findSearchRecord(center, radius=1000):
    import app.geofire as geo
    import time
    queries = geo.geohashQueries(center, radius)
    now = time.time()

    for query in queries:
        results = db.child(searchesTable).order_by_key().start_at(query[0]).end_at(query[1]).get()
        for result in results.each():
            record = result.val()
            if record.get("time", 0) + searchCacheExpiry < now:
                db.child(searchesTable).child(result.key()).remove()
                continue
            # double check that we're within distance
            circleDistance = geo.distance(center, record["l"]) * 1000
            # 1000 m in 1 km (geo.distance is in km, searchCacheRadius is in m)
            if circleDistance < searchCacheRadius:
                return record
            log.info("Circle distance is " + str(circleDistance))
示例#11
0
def getGcalEventObj(event):
    if ("dateTime" not in event["start"]) or ("dateTime" not in event["end"]) or ("location" not in event):
        return None

    eventLoc = event["location"]
    name, address = events.getNameAndAddress(eventLoc)
    mapping = search._getAddressIdentifiers(eventLoc)
    if mapping:
        try:
            location = mapping['location']
            placeName = '%s, %s' % (mapping['name'], mapping['zipcode'])
            yelpId = _guessYelpId(eventLoc, location['lat'], location['lng'])
            if yelpId:
                optUrl = event["description"] if "description" in event else None
                eventObj = representation.eventRecord(yelpId, location['lat'], location['lng'], event['summary'], event['start']['dateTime'], event['end']['dateTime'], optUrl)
                return eventObj

        except Exception as err:
            log.exception("getGcalEventObj")
    log.info("Unable to find corresponding location for %s" % eventLoc)
示例#12
0
    def check_for_inventory(self, content):
        doc = html.fromstring(content)
        try:
            raw_availability = doc.xpath(
                '//div[@id ="ProductBuy"]//span[contains(@class, "btn-message")]//text()'
            )
            result = "".join(
                raw_availability).strip() if raw_availability else None
            if str(result) in str("Sold Out"):
                info(f"[{self.store_name}] {self.product_name} not available")
                return False

            raw_availability = doc.xpath(
                '//div[contains(@class, "flags-body")]//text()')
            result = "".join(
                raw_availability).strip() if raw_availability else None
            if str(result) in str("CURRENTLY SOLD OUT"):
                info(f"[{self.store_name}] {self.product_name} not available")
                return False
        except:
            time.sleep(1)

        raw_availability = doc.xpath(
            '//div[@id ="ProductBuy"]//button[contains(@class, "btn-primary")]//text()'
        )
        result = "".join(
            raw_availability).strip() if raw_availability else None

        if str(result).lower() in str("add to cart"):
            success(f"[{self.store_name}] {self.product_name} Available!")
            return True

        info(f"[{self.store_name}] {self.product_name} not available")
        return False
示例#13
0
def crawlPoints(search_center, search_radius):
    lat, lng = search_center

    if dryRun:
        log.info("Dry run - center: (%.8f, %.8f) radius: %d meters" %
                 (lat, lng, search_radius))
        yelpVenues = search.getVenuesFromIndex(lat, lng, search_radius)
        for biz in yelpVenues:
            log.info(biz.id)
        log.info("%d unique results found." % len(yelpVenues))
        return

    log.info("starting: %.8f, %.8f -------------------" % search_center)
    searchLocation(lat, lng, search_radius)
示例#14
0
def searchLocation(lat, lng, radius, maxNum):
    # Fetch locations
    searchRecord = findSearchRecord((lat, lng), searchCacheRadius)
    if searchRecord is not None:
        log.debug("searchRecord: %s" % searchRecord)
        return
    else:
        writeSearchRecord(lat, lng)

    yelpVenues = search.getVenuesFromIndex(lat, lng, radius, maxNum)
    pool = ThreadPool(5)

    res = pool.map(researchVenue, yelpVenues)

    # Fetch events from Eventful
    eventListings = events.fetchEventsFromLocation(lat, lng)
    eRes = pool.map(researchEvent, eventListings)

    pool.close()
    pool.join()

    import json
    log.info("Found %d: %s" % (len(res), json.dumps(res)))
示例#15
0
def searchLocation(lat, lng, radius=None):
    # Fetch locations
    searchRecord = findSearchRecord((lat, lng), searchCacheRadius)
    if searchRecord is not None:
        log.debug("searchRecord: %s" % searchRecord)
        return
    else:
        writeSearchRecord(lat, lng)

    if radius is None:
        radius = venueSearchRadius

    total = 1
    offset = 0
    yelpVenues = []
    while offset < total:
        locality = search._getVenuesFromIndex(lat,
                                              lng,
                                              offset=offset,
                                              radius=radius)
        total = locality.total
        yelpVenues += locality.businesses
        offset = len(yelpVenues)

    pool = ThreadPool(5)

    res = pool.map(researchVenue, yelpVenues)

    # Fetch events from Eventful
    eventListings = events.fetchEventsFromLocation(lat, lng)
    eRes = pool.map(researchEvent, eventListings)

    pool.close()
    pool.join()

    import json
    log.info("Finished: " + json.dumps(res))
示例#16
0
    def check_availability(self) -> bool:
        if self.product_link is None:
            return False
        r = requests.get(
            url=self.product_link,
            headers={
                "Accept":
                "application/json",
                "Referer":
                "https://www.walmart.com/",
                "User-Agent":
                "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
                "(KHTML, like Gecko) Chrome/42.0.2311.90 Safari/537.36",
            },
        )
        ans = self.check_inventory(r.content)
        if not ans:
            return False
        if ans in "add to cart":
            success(f"[{self.store_name}] {self.product_name} Available!")
            return True

        info(f"[{self.store_name}] {self.product_name} not available")
        return False
示例#17
0
    def check_availability(self) -> bool:
        if self.product_link is None:
            return False
        page = requests.get(
            url=self.product_link,
            headers={
                "Referer": "https://www.gamestop.com/",
                "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
                "authority": "9300303.fls.doubleclick.net",
                "scheme": "https",
                "sec-fetch-dest": "iframe",
                "sec-fetch-mode": "navigate",
                "sec-fetch-site": "cross-site",
                "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
                "(KHTML, like Gecko) Chrome/42.0.2311.90 Safari/537.36",
            },
        )
        if page.status_code > 300:
            return False
        doc = html.fromstring(page.content)
        raw_availability = doc.xpath(
            '//div[contains(@class, "primary-details-row")]//'
            'button[contains(@class, "add-to-cart")]//text()'
        )

        result = "".join(raw_availability).strip() if raw_availability else None
        if str(result).lower() in str("Not Available").lower():
            info(f"[{self.store_name}] {self.product_name} not available.")
            return False

        if str(result).lower() in str("Add to cart").lower():
            success(f"[{self.store_name}] {self.product_name} Available!")
            return True

        info(f"[{self.store_name}] {self.product_name} not available.")
        return False
示例#18
0
def crawlPoints(grid, search_radius, max_venue_per_search):
    for center in grid:
        lat, lng = center
        # Now actually do the search.
        if not dryRun:
            #from app.queue.enqueue import searchLocation
            from app.request_handler import searchLocationWithErrorRecovery as searchLocation
            log.info("starting: %.8f, %.8f -------------------" % center)
            searchLocation(lat, lng, search_radius, max_venue_per_search)
        else:
            print("%.8f, %.8f" % center)
    count = len(grid)
    log.info("Number of points: %d" % (count))
    log.info("Number of Yelp searches: %d" % (count * maxVenuesPerSearch / 20))
    log.info("Distance between points is %.2f km" % (grid_size_m / 1000))
    log.info("Search radius is %.2f km" % (search_radius / 1000))
    log.info("Maximum number of venues: %d" % (count * maxVenuesPerSearch))