def test_update_beatmapset_status_nom_qual_dq(test_database): creator = User(3, name="test") user1 = User(1, name="someone") user2 = User(2, name="sometwo") beatmapset = Beatmapset(1, creator=creator, allow_api=False) event1 = Event("nominate", from_string("2020-01-01 00:00:00"), beatmapset, user=user1) event2 = Event("qualify", from_string("2020-01-01 00:01:00"), beatmapset, user=user2) event3 = Event("disqualify", from_string("2020-01-02 00:00:00"), beatmapset, user=user1) test_database.insert_event(event1) test_database.insert_event(event2) test_database.insert_event(event3) retrieved_status = test_database.retrieve_beatmapset_status( "beatmapset_id=%s ORDER BY time DESC", (beatmapset.id, )) assert retrieved_status.status == "pending" assert retrieved_status.nominators == [] retrieved_status = test_database.retrieve_beatmapset_status( "beatmapset_id=%s AND time=%s", (beatmapset.id, from_string("2020-01-01 00:01:00"))) assert retrieved_status.status == "qualified" assert retrieved_status.nominators == [user1, user2]
async def test_insert_retrieve_multiple_events(test_database): time = datetime.utcnow() user = User(1, name="test") beatmapset = Beatmapset(1, creator=user, allow_api=False) discussion = Discussion(1, beatmapset=beatmapset, user=user, content="testing", tab="tab", difficulty="diff") event1 = Event(_type="test", time=time, beatmapset=beatmapset, discussion=discussion, user=user) event2 = Event(_type="123", time=time, beatmapset=beatmapset, discussion=discussion, user=user) test_database.insert_event(event1) test_database.insert_event(event2) retrieved_events = test_database.retrieve_events( where="beatmapset_id=%s", where_values=(beatmapset.id, )) assert await anext(retrieved_events, None) == event1 assert await anext(retrieved_events, None) == event2
async def test_insert_retrieve_multiple_events(test_database): time = datetime.utcnow() user = User(1, name="test") beatmapset = Beatmapset(1, artist="123", title="456", creator=user, modes=["osu", "taiko"], genre="genre", language="language") discussion = Discussion(1, beatmapset=beatmapset, user=user, content="testing", tab="tab", difficulty="diff") event1 = Event(_type="test", time=time, beatmapset=beatmapset, discussion=discussion, user=user) event2 = Event(_type="123", time=time, beatmapset=beatmapset, discussion=discussion, user=user) test_database.insert_event(event1) test_database.insert_event(event2) retrieved_events = test_database.retrieve_events( where="beatmapset_id=%s", where_values=(beatmapset.id, )) assert await anext(retrieved_events, None) == event1 assert await anext(retrieved_events, None) == event2
async def populate_from_discussion(event: Event) -> None: """Populates the given event using the beatmapset discussion json (e.g. missing discussion info and additional details like who did votes).""" discussions_json = get_discussions_json(event.beatmapset) if discussions_json is None: # This happens if the beatmapset was deleted in between us scraping it and populating it. event.marked_for_deletion = True return event.discussion = get_complete_discussion_info(event.discussion, event.beatmapset, discussions_json) await __populate_additional_details(event, discussions_json)
def test_insert_retrieve_discussion_and_replies(test_database): time = datetime.utcnow() author = User(1, name="one") replier = User(2, name="two") beatmapset = Beatmapset(1, artist="123", title="456", creator=replier, modes=["osu", "taiko"], genre="genre", language="language") discussion = Discussion(1, beatmapset=beatmapset, user=author, content="ping", tab="tab", difficulty="diff") problem = Event(_type="problem", time=time, beatmapset=beatmapset, discussion=discussion, user=author, content="ping") reply1 = Event(_type="reply", time=time, beatmapset=beatmapset, discussion=discussion, user=replier, content="pong") reply2 = Event(_type="reply", time=time, beatmapset=beatmapset, discussion=discussion, user=author, content="miss") test_database.insert_event(problem) test_database.insert_event(reply1) test_database.insert_event(reply2) retrieved_problem = test_database.retrieve_event( where="type=%s", where_values=("problem", )) retrieved_reply1 = test_database.retrieve_event( where="type=%s AND user_id=%s", where_values=("reply", replier.id)) retrieved_reply2 = test_database.retrieve_event( where="type=%s AND user_id=%s", where_values=("reply", author.id)) assert retrieved_problem assert retrieved_reply1 assert retrieved_reply2
async def __populate_additional_details( event: Event, discussions_json: object, db_name: str = SCRAPER_DB_NAME) -> None: """Populates additional details in the given event from the beatmapset discussion json (e.g. who voted).""" if event.discussion and (not event.discussion.user or not event.discussion.content): if not __complete_discussion_context(event.discussion, db_name=db_name): # After being deleted, many properties of discussions are inaccessible without referring to cached information. # Without cached information, we skip the event, since this its context is no longer visible to the public anyway. event.marked_for_deletion = True return if event.type in [types.NOMINATE]: # Nominate/qualify content should reflect recent praise/hype/note content. event.content = get_nomination_comment(event, discussions_json) if event.type in [types.DISQUALIFY, types.RESET]: # Event content should reflect discussion content. if event.discussion: # Discussion may have been deleted. event.content = event.discussion.content if not discussions_json: # Mapset has no discussion, probably associated with some legacy thread (e.g. ranked before modding v2). return beatmapset_json = discussions_json["beatmapset"] for page_event in get_map_page_event_jsons(event.beatmapset, discussions_json): # Likelihood that two same type of events happen in the same second is very unlikely, # so this'll work as identification (we have no access to actual event ids on scraping side, so can't use that). same_time = event.time == from_string(page_event["created_at"]) same_type = event.type == page_event["type"] if same_time and same_type: if event.type in [types.RESOLVE, types.REOPEN]: # Event user should be whoever resolved or reopened, rather than the discussion author. post_author = discussion_parser.parse_discussion_post_author( page_event["comment"]["beatmap_discussion_post_id"], beatmapset_json) event.user = post_author if event.type in [types.KUDOSU_GAIN, types.KUDOSU_LOSS]: # Event user should be whoever gave or removed the kudosu, not the discssion author. kudosu_author = discussion_parser.parse_user( page_event["comment"]["new_vote"]["user_id"], beatmapset_json) event.user = kudosu_author
async def test_insert_retrieve_event_digit_properties(test_database): user = User(1, "497") beatmapset = Beatmapset(3, artist="5", title="2", creator=user, allow_api=False) discussion = Discussion(2, beatmapset, user, content="8", tab="tab", difficulty="diff") event = Event(_type="test", time=datetime.utcnow(), user=user, beatmapset=beatmapset, discussion=discussion, content="4") test_database.insert_event(event) retrieved_event = await test_database.retrieve_event( where="type=%s", where_values=("test", )) # Ensures the database field retrieval retains the `str` type, rather than reinterpreting as `int`. assert retrieved_event.content == "4" assert retrieved_event.user.name == "497" assert retrieved_event.beatmapset.artist == "5" assert retrieved_event.beatmapset.title == "2" assert retrieved_event.discussion.content == "8"
async def test_insert_retrieve_event(test_database): time = datetime.utcnow() user = User(1, name="test") beatmapset = Beatmapset(1, artist="123", title="456", creator=user, modes=["osu", "taiko"], genre="genre", language="language") discussion = Discussion(1, beatmapset=beatmapset, user=user, content="testing", tab="tab", difficulty="diff") event = Event(_type="test", time=time, beatmapset=beatmapset, discussion=discussion, user=user) test_database.insert_event(event) retrieved_event = await test_database.retrieve_event("type=%s", ("test", )) assert retrieved_event.type == event.type assert retrieved_event.time == event.time assert retrieved_event.beatmapset == event.beatmapset assert retrieved_event.discussion == event.discussion assert retrieved_event.user == event.user assert retrieved_event.content == event.content assert retrieved_event == event
async def test_insert_retrieve_event(test_database): time = datetime.utcnow() user = User(1, allow_api=False) beatmapset = Beatmapset(1, creator=user, allow_api=False) discussion = Discussion(1, beatmapset=beatmapset, user=user, content="testing", tab="tab", difficulty="diff") event = Event(_type="test", time=time, beatmapset=beatmapset, discussion=discussion, user=user) test_database.insert_event(event) retrieved_event = await test_database.retrieve_event("type=%s", ("test", )) assert retrieved_event.type == event.type assert retrieved_event.time == event.time assert retrieved_event.beatmapset == event.beatmapset assert retrieved_event.discussion == event.discussion assert retrieved_event.user == event.user assert retrieved_event.content == event.content assert retrieved_event == event
def parse_event_json(self, event_json: object, user_jsons: object = None) -> Event: """Returns a BeatmapsetEvent reflecting the given event json object. Ignores any event with an incomplete context (e.g. deleted beatmaps). Requests user names from the api unless supplied with the corresponding user json from the discussion page.""" if not event_json: # Seems to occur when the respective beatmapset has been deleted. However, it's there when # viewing the page source manually for some reason, regardless of login status. log_err( "WARNING | An event is missing; the beatmapset was probably deleted." ) return None try: # Scrape object data _type = event_json["message_type"] time = timestamp.from_string(event_json["created_at"]) beatmapset_id = event_json["beatmapset_id"] discussion_id = event_json["starting_post"][ "beatmap_discussion_id"] user_id = event_json["user_id"] # The user name is either provided by a user json from the discussion page, or queried through the api. user_json = self.__lookup_user_json(user_id, user_jsons) user_name = user_json["username"] if user_json else None content = event_json["starting_post"]["message"] difficulty = event_json["beatmap"][ "version"] if "beatmap" in event_json and "version" in event_json[ "beatmap"] else None tab = None if event_json["timestamp"] is not None: tab = "timeline" elif difficulty: tab = "general" else: tab = "generalAll" # Reconstruct objects beatmapset = Beatmapset(beatmapset_id) user = User(user_id, user_name) if user_id is not None else None # TODO: This portion is missing handling for replies, see the other method. # Still unclear which message_type replies use; will need to find out if/when replies get json formats. discussion = Discussion( discussion_id, beatmapset, user, content, tab, difficulty) if discussion_id is not None else None except DeletedContextError as err: log_err(err) else: return Event(_type=_type, time=time, beatmapset=beatmapset, discussion=discussion, user=user, content=content) return None
async def test_insert_retrieve_event_group_change_hybrid(test_database): event_old = Event(_type="add", time=from_string("2020-01-01 00:00:00"), user=User(2, name="sometwo"), group=Usergroup(7, mode="osu")) event = Event(_type="add", time=datetime.utcnow(), user=User(2, name="sometwo"), group=Usergroup(7, mode="taiko")) test_database.insert_event(event_old) test_database.insert_event(event) retrieved_event = await test_database.retrieve_event("type=%s", ("add", ), order_by="time DESC") assert retrieved_event.type == event.type assert retrieved_event.time == event.time assert retrieved_event.group == event.group assert retrieved_event.group.mode == event.group.mode assert retrieved_event.user == event.user assert retrieved_event == event
async def test_insert_retrieve_small_event(test_database): event = Event(_type="test", time=datetime.utcnow()) test_database.insert_event(event) retrieved_event = await test_database.retrieve_event( where="type=%s", where_values=("test", )) assert retrieved_event.type == event.type assert retrieved_event.time == event.time assert retrieved_event.beatmapset == event.beatmapset assert retrieved_event.discussion == event.discussion assert retrieved_event.user == event.user assert retrieved_event.content == event.content assert retrieved_event == event
async def test_insert_retrieve_event_group_change(test_database): event = Event(_type="add", time=datetime.utcnow(), user=User(2, name="sometwo"), group=Usergroup(7, mode="osu")) test_database.insert_event(event) retrieved_event = await test_database.retrieve_event("type=%s", ("add", )) assert retrieved_event.type == event.type assert retrieved_event.time == event.time assert retrieved_event.group == event.group assert retrieved_event.group.mode == event.group.mode assert retrieved_event.user == event.user assert retrieved_event == event
def parse_post_json(post_json: object) -> Event: """Returns an event representing the given news post json object (a single news post instance, for multiple see `parse_post_jsons`).""" author = User(name=post_json["author"].strip()) return Event(_type="news", time=from_string(post_json["published_at"]), newspost=NewsPost(_id=post_json["id"], title=post_json["title"], preview=post_json["preview"], author=author, slug=post_json["slug"], image_url=complete_image_url( post_json["first_image"])), user=author if author.id else None, content=post_json["preview"])
def test_update_beatmapset_status_nom(test_database): creator = User(3, name="test") user = User(1, name="someone") beatmapset = Beatmapset(1, creator=creator, allow_api=False) event = Event("nominate", from_string("2020-01-01 00:00:00"), beatmapset, user=user) test_database.insert_event( event) # Should call `update_beatmapset_status`. retrieved_status = test_database.retrieve_beatmapset_status( "beatmapset_id=%s", (beatmapset.id, )) assert retrieved_status.status == "nominated" assert retrieved_status.nominators == [user]
def parse_event(self, event: Tag) -> Event: """Returns a BeatmapsetEvent reflecting the given event html Tag object. Ignores any event with an incomplete context (e.g. deleted beatmaps).""" try: # Scrape object data _type = self.parse_event_type(event) time = self.parse_event_time(event) link = self.parse_event_link(event) beatmapset_id = self.parse_id_from_beatmapset_link(link) discussion_id = self.parse_id_from_discussion_link(link) user_id = self.parse_event_author_id(event) user_name = self.parse_event_author_name(event) content = self.parse_discussion_message(event) # Reconstruct objects beatmapset = Beatmapset(beatmapset_id) user = User(user_id, user_name) if user_id is not None else None if _type == "reply": # Replies should look up the discussion they are posted on. discussion = Discussion( discussion_id, beatmapset) if discussion_id is not None else None else: tab = self.parse_discussion_tab(event) difficulty = self.parse_discussion_diff(event) discussion = Discussion( discussion_id, beatmapset, user, content, tab, difficulty) if discussion_id is not None else None except DeletedContextError as err: log_err(err) else: return Event(_type=_type, time=time, beatmapset=beatmapset, discussion=discussion, user=user, content=content) return None
async def test_insert_retrieve_event_cached(cached_database): time = datetime.utcnow() user = User(1, name="test") beatmapset = Beatmapset(1, artist="123", title="456", creator=user, modes=["osu", "taiko"], genre="genre", language="language") discussion = Discussion(1, beatmapset=beatmapset, user=user, content="testing", tab="tab", difficulty="diff") for i in range(100): event = Event(_type=f"{i}", time=time, beatmapset=beatmapset, discussion=discussion, user=user) cached_database.insert_event(event) start_time = datetime.utcnow() retrieved_events_uncached = cached_database.retrieve_events( where="beatmapset_id=%s", where_values=(beatmapset.id, )) async for event in retrieved_events_uncached: assert event.beatmapset == beatmapset delta_time_uncached = datetime.utcnow() - start_time start_time = datetime.utcnow() retrieved_events_cached = cached_database.retrieve_events( where="beatmapset_id=%s", where_values=(beatmapset.id, )) async for event in retrieved_events_cached: assert event.beatmapset == beatmapset delta_time_cached = datetime.utcnow() - start_time assert await anext(retrieved_events_uncached, None) == await anext(retrieved_events_cached, None) assert delta_time_uncached > delta_time_cached
def test_insert_retrieve_obv_sev_event(test_database): user = User(1, name="test") beatmapset = Beatmapset(1, creator=user, allow_api=False) discussion = Discussion(1, beatmapset=beatmapset, user=user, content="testing", tab="tab", difficulty="diff") event = Event(_type=types.SEV, time=from_string("2020-01-01 00:00:00"), beatmapset=beatmapset, discussion=discussion, content="2/0") test_database.insert_obv_sev_event(event) obv, sev = test_database.retrieve_obv_sev(discussion_id=1) assert obv == 2 assert sev == 0
async def test_insert_retrieve_event_with_newspost(test_database): time = datetime.utcnow() author = User(1, name="test") newspost = NewsPost(_id=3, title="title", preview="preview", author=author, slug="slug", image_url="image_url") event = Event(_type="news", time=time, newspost=newspost, user=author) test_database.insert_event(event) retrieved_event = await test_database.retrieve_event("type=%s", ("news", )) assert retrieved_event.type == event.type assert retrieved_event.time == event.time assert retrieved_event.newspost == event.newspost assert retrieved_event.user == event.user assert retrieved_event.content == event.content assert retrieved_event == event
async def retrieve_events( self, where: str, where_values: tuple = None, extensive: bool = False) -> Generator[Event, None, None]: """Returns an asynchronous generator of all events from the database matching the given WHERE clause. Optionally retrieve extensively so that more can be queried (e.g. user name, beatmap creator/artist/title).""" if not extensive: fetched_rows = self.__fetch_events(where, where_values) else: fetched_rows = self.__fetch_events_extensive(where, where_values) for row in (fetched_rows or []): await asyncio.sleep( 0 ) # Return control back to the event loop, granting other tasks a window to start/resume. _type = row[0] time = row[1] beatmapset = self.retrieve_beatmapset( "id=%s", (row[2], )) if row[2] else None discussion = self.retrieve_discussion( "id=%s", (row[3], )) if row[3] else None user = self.retrieve_user("id=%s", (row[4], )) if row[4] else None group = Usergroup( row[5], mode=row[6] if row[6] else None) if row[5] else None newspost = self.retrieve_newspost("id=%s", (row[7], )) if row[7] else None content = row[8] yield Event(_type, time, beatmapset, discussion, user, group, newspost, content=content)
def test_event_int_content(): event = Event(_type="test", time=datetime.utcnow(), content=4) assert event.content == "4"
async def populate_from_bnsite(event: Event) -> None: """Populates the given event using the bnsite API if possible.""" if event.type in [types.REMOVE, types.ADD]: # Group removal content should reflect the bnsite removal reason (e.g. Kicked/Resigned) event.content = get_group_bnsite_comment(event) event.group.mode = get_group_bnsite_mode(event)
def parse_event_json(self, event_json: object, user_jsons: object = None) -> Event: """Returns a BeatmapsetEvent reflecting the given event json object. Ignores any event with an incomplete context (e.g. deleted beatmaps). Requests user names from the api unless supplied with the json-users.""" if not event_json: # Seems to occur when the respective beatmapset has been deleted. log_err( "WARNING | An event is missing; the beatmapset was probably deleted." ) return None try: # Scrape object data _type = event_json["type"] time = timestamp.from_string(event_json["created_at"]) if "beatmapset" not in event_json or not event_json["beatmapset"]: raise DeletedContextError( "No beatmapset was found in this event. It was likely deleted." ) beatmapset_id = event_json["beatmapset"]["id"] discussion_id = event_json["discussion"][ "id"] if "discussion" in event_json and event_json[ "discussion"] else None user_id = event_json["user_id"] if "user_id" in event_json else None user_json = self.__lookup_user_json(user_id, user_jsons) user_name = user_json["username"] if user_json else None content = None if _type in [types.LANGUAGE_EDIT, types.GENRE_EDIT]: # Language/genre edits always have "old" and "new" fields, which no other type has. old = event_json["comment"]["old"] new = event_json["comment"]["new"] content = f"{old} -> {new}" if _type in [types.UNLOVE]: # E.g. "Mapper has asked for it to be removed from Loved". content = event_json["comment"]["reason"] # Reconstruct objects beatmapset = Beatmapset(beatmapset_id) user = User(user_id, user_name) if user_id is not None else None discussion = Discussion( discussion_id, beatmapset) if discussion_id is not None else None except DeletedContextError as err: log_err(err) else: return Event(_type=_type, time=time, beatmapset=beatmapset, discussion=discussion, user=user, content=content) return None
async def retrieve_events( self, where: str, where_values: tuple = None, group_by: str = None, order_by: str = None, limit: int = None, extensive: bool = False) -> Generator[Event, None, None]: """Returns an asynchronous generator of all events from the database matching the given WHERE clause. Optionally retrieve extensively so that more can be queried (e.g. user name, beatmap creator/artist/title).""" if not extensive: fetched_rows = self.__fetch_events(where, where_values, order_by, limit) else: fetched_rows = self.__fetch_events_extensive( where, where_values, order_by, limit) for row in (fetched_rows or []): await asyncio.sleep( 0 ) # Return control back to the event loop, granting other tasks a window to start/resume. # Treat deleted beatmapsets/discussions as if not stored. with suppress(DeletedContextError): _type = row[0] time = row[1] beatmapset = self.retrieve_beatmapset( "id=%s", (row[2], )) if row[2] else None discussion = self.retrieve_discussion( "id=%s", (row[3], ), beatmapset=beatmapset) if row[3] else None user = self.retrieve_user("id=%s", (row[4], )) if row[4] else None group = Usergroup(row[5], mode=row[6] if row[6] else None) if row[5] else None newspost = self.retrieve_newspost( "id=%s", (row[7], )) if row[7] else None content = row[8] if beatmapset: status_time = time if _type == types.SEV: reset_event = await self.retrieve_event( where="(type=%s OR type=%s) AND discussion_id=%s", where_values=("disqualify", "nomination_reset", discussion.id)) if reset_event: status_time = reset_event.time # Dependent on when the event happened, hence why this is here and not in `retrieve_beatmapset`. beatmapset.status = self.retrieve_beatmapset_status( where="beatmapset_id=%s AND time < %s", where_values=(beatmapset.id, status_time), order_by="time DESC") yield Event(_type, time, beatmapset, discussion, user, group, newspost, content=content)