Пример #1
0
def load(events, metadata=None, pipeline_options=None):
  num_events_in_pane = 30
  windowed_events = (
      events
      | beam.WindowInto(
          window.GlobalWindows(),
          trigger=trigger.Repeatedly(trigger.AfterCount(num_events_in_pane)),
          accumulation_mode=trigger.AccumulationMode.DISCARDING))
  auction_by_seller_id = (
      windowed_events
      | nexmark_query_util.JustAuctions()
      | 'query3_filter_category' >> beam.Filter(lambda auc: auc.category == 10)
      | 'query3_key_by_seller' >> beam.ParDo(
          nexmark_query_util.AuctionBySellerFn()))
  person_by_id = (
      windowed_events
      | nexmark_query_util.JustPerson()
      | 'query3_filter_region' >>
      beam.Filter(lambda person: person.state in ['OR', 'ID', 'CA'])
      | 'query3_key_by_person_id' >> beam.ParDo(
          nexmark_query_util.PersonByIdFn()))
  return ({
      nexmark_query_util.AUCTION_TAG: auction_by_seller_id,
      nexmark_query_util.PERSON_TAG: person_by_id,
  }
          | beam.CoGroupByKey()
          | 'query3_join' >> beam.ParDo(
              JoinFn(metadata.get('max_auction_waiting_time')))
          | 'query3_output' >> beam.Map(
              lambda t: {
                  ResultNames.NAME: t[1].name,
                  ResultNames.CITY: t[1].city,
                  ResultNames.STATE: t[1].state,
                  ResultNames.AUCTION_ID: t[0].id
              }))
Пример #2
0
  def expand(self, pcoll):
    events = pcoll | beam.WindowInto(self.auction_or_bid_windowFn)

    auction_by_id = (
        events
        | nexmark_query_util.JustAuctions()
        | 'auction_by_id' >> beam.ParDo(nexmark_query_util.AuctionByIdFn()))
    bids_by_auction_id = (
        events
        | nexmark_query_util.JustBids()
        | 'bid_by_auction' >> beam.ParDo(nexmark_query_util.BidByAuctionIdFn()))

    return ({
        nexmark_query_util.AUCTION_TAG: auction_by_id,
        nexmark_query_util.BID_TAG: bids_by_auction_id
    }
            | beam.CoGroupByKey()
            | beam.ParDo(JoinAuctionBidFn()))
Пример #3
0
def load(events, metadata=None):
  # window person and key by persons' id
  persons_by_id = (
      events
      | nexmark_query_util.JustPerson()
      | 'query8_window_person' >> beam.WindowInto(
          window.FixedWindows(metadata.get('window_size_sec')))
      | 'query8_person_by_id' >> beam.ParDo(nexmark_query_util.PersonByIdFn()))
  # window auction and key by auctions' seller
  auctions_by_seller = (
      events
      | nexmark_query_util.JustAuctions()
      | 'query8_window_auction' >> beam.WindowInto(
          window.FixedWindows(metadata.get('window_size_sec')))
      | 'query8_auction_by_seller' >> beam.ParDo(
          nexmark_query_util.AuctionBySellerFn()))
  return ({
      nexmark_query_util.PERSON_TAG: persons_by_id,
      nexmark_query_util.AUCTION_TAG: auctions_by_seller
  }
          | beam.CoGroupByKey()
          | 'query8_join' >> beam.ParDo(JoinPersonAuctionFn()))