def fetchDetails(placeID):
        placeStatus = statusTable[placeID]
        # Get a list of (src, version) pairs that could be updated, skip searched places
        # TODO: Gracefully handle if TripAdvisor-mapper runs out of API calls (25k)
        newProviders = [
            src for src in config if src not in placeStatus or (
                config[src] > placeStatus[src]
                and placeStatus[src] != Status.NOT_FOUND.value)
        ]
        if not newProviders:
            #            log.info("No new sources for {}".format(placeID))
            return

        try:
            placeProviderIDs = proxwalk.getAndCacheProviderIDs(
                placeID, newProviders, placeStatus["identifiers"])
        except Exception as e:
            log.error("Error fetching or caching provider id: {}".format(e))
            return

        updatedProviders = request_handler.researchPlace(
            placeID, placeProviderIDs)

        # Write updated sources to /status
        newStatus = makeNewStatusTable(config, updatedProviders,
                                       placeProviderIDs, newProviders)

        try:
            placeStatus.update(newStatus)
            db().child(venuesTable, "status", placeID).update(placeStatus)
        except Exception as e:
            log.error("Error accessing status table for {}: {}".format(
                placeID, e))

        log.info("{} done: {}".format(placeID, str(updatedProviders)))
示例#2
0
def getVenueIdentifiers(yelpID):
    yelpURL = "https://yelp.com/biz/%s" % yelpID
    mapping = {
      "id": yelpID,
      "version": CROSSWALK_CACHE_VERSION,
      "yelp": {
        "url": yelpURL
      }
    }
    try:
        obj = factualClient.crosswalk().filters({"url": yelpURL}).data()

        if len(obj) == 0:
            return mapping, True

        factualID = obj[0]["factual_id"]
        mapping["factualID"] = factualID
        mapping["factual"] = { "id": factualID }

        idList = factualClient.crosswalk().filters({"factual_id": factualID}).data()

        for idObj in idList:
            namespace = idObj["namespace"]
            del idObj["factual_id"]
            del idObj["namespace"]
            mapping[namespace] = idObj
        return mapping, True
    except APIException:
        log.error("Factual API failed again")
    except Exception:
        log.exception("Factual problem " + yelpID)
    return mapping, False
示例#3
0
def writeVenueProviderRecord(yelpID, details):
    try:
        venue = representation.updateRecord(yelpID, **details)
        for provider, data in list(venue["providers"].items()):
            db().child(venuesTable, "details", yelpID, "providers").update({provider: data})
    except Exception as e:
        log.error("Error writing record: {}\n{}".format(details, e))
示例#4
0
def readCachedVenueIterableDetails(place_ids):
    """Retrieves the cache objects matching the given place IDs.

    This method retrieves the whole cache/ child when making a call so call sparingly.
    We do this because it's slow to make network requests for each child individually.
    To pull down less data, you can use `readCacheVenueDetails`.

    :param place_ids: Iterable of place_ids
    :return: a list of cache objects. If a place_id is not in the cache, it will be dropped from the results.
    """
    out = []
    try:
        cache = db().child(cacheTable).get().val()
        for place_id in place_ids:
            if place_id not in cache: continue
            out.append(cache[place_id])
    except Exception:
        log.error("Error fetching cached venue details for " + place_id)

    return out
示例#5
0
def getVenueIdentifiers(yelpID):
    yelpURL = "https://yelp.com/biz/%s" % yelpID
    mapping = {
        "id": yelpID,
        "version": CROSSWALK_CACHE_VERSION,
        "yelp": {
            "url": yelpURL
        }
    }
    try:
        if deployment == "production/":
            crosswalk = factualClient.table("crosswalk-us")
        else:
            crosswalk = factualClient.crosswalk()

        obj = crosswalk.filters({"url": yelpURL}).data()

        if len(obj) == 0:
            log.debug("Crosswalk empty for Yelp -> Factual " + yelpID)
            return mapping, True

        factualID = obj[0]["factual_id"]
        mapping["factualID"] = factualID
        mapping["factual"] = {"id": factualID}

        idList = crosswalk.filters({"factual_id": factualID}).data()

        if len(idList) == 0:
            log.warn("Crosswalk empty for Factual -> * " + yelpID + " " +
                     factualID)
        for idObj in idList:
            namespace = idObj["namespace"]
            del idObj["factual_id"]
            del idObj["namespace"]
            mapping[namespace] = idObj
        return mapping, True
    except APIException:
        log.error("Factual API failed again")
    except Exception:
        log.exception("Factual problem " + yelpID)
    return mapping, False
示例#6
0
def readCachedVenueDetails(key):
    try:
        cache = db.child(venuesTable).child("cache/" + key).get().val()
        return cache
    except Exception:
        log.error("Error fetching cached venue details for " + key)