Пример #1
0
  def dump_hotel(cls, name, action='normal'):
    from ghost_spider.elastic import LocationHotelEs, LatteHotelEs
    from ghost_spider import progressbar

    filename = cls.get_filename_by_name(name)
    query = {"query": {"bool": {"must": [{"term": {"prefecture_ascii": name}}], "must_not": []}}}

    if action == 'recover':
      query["query"]["bool"]["must"].append({"term": {"recovered": "1"}})
      filename = cls.get_filename_by_name(u'%s_recover' % name)
    elif action == 'production':
      filename = cls.get_filename_by_name(u'%s_production' % name)
      query["query"]["bool"]["must"].append({"term": {"version": 10}})

    query["query"]["bool"]["must_not"].append({"term": {"genre": u'ラブホテル'}})

    if os.path.exists(filename):
      os.remove(filename)

    progress = None
    total = 0
    page = 1
    limit = 100
    sort = [{"area.untouched": "asc"}]

    save_data_to_file = cls.save_for_production if action == u'production' else cls.save_to_csv

    print "=" * 100
    while True:
      places, total = LocationHotelEs.pager(query=query, page=page, size=limit, sort=sort)
      page += 1
      if not places or not len(places):
        break
      if not progress:
        print "Dumping data for %s (%s)" % (name, total)
        progress = progressbar.AnimatedProgressBar(end=total, width=100)
      progress + limit
      progress.show_progress()
      for place in places:
        result = LatteHotelEs.get_place_by_name(place.get('name'))
        if result["hits"]["total"] > 0:
          place["latte_url"] = result["hits"]["hits"][0]["_source"]["url"]

        if action == 'normal':
          hotel_kind = u'ホテル'
          if place.get('kind') and place.get('kind') in LocationHotelSelectors.REPLACE_HOTEL:
            hotel_kind = place.get('kind')
          else:
            for genre in place['genre']:
              if genre in LocationHotelSelectors.REPLACE_HOTEL:
                hotel_kind = LocationHotelSelectors.REPLACE_HOTEL[genre]
                break
          place['kind'] = hotel_kind
        save_data_to_file(filename, place)
    print " "