def crawl_friends(session, twitter_api, crawl_user, crawl_user_id=None, when=None): if not crawl_user_id: crawl_user_id = crawl_user.id if when is None: when = now() for user_id in twitter_api.GetFriendIDs(user_id=crawl_user_id): try: if session.query(Account)\ .filter_by(external_id=twitter_external_user_id(user_id))\ .first(): continue else: user = twitter_api.GetUser(user_id=user_id) new_user = insert_user(session, user) print(new_user) get_or_create(session, AccountRelationship, left=crawl_user, right=new_user, rel="follows", when=when) except twitter.error.TwitterError as e: print(user_id, e) continue
def helper(session, external_id, persona=None, when=None): when = when or now() _svc = service_ctor(session) _extid = external_id_fn(external_id) account = session.query(schema.Account)\ .filter_by(service=_svc, external_id=_extid)\ .first() if not account: account = schema.Account(service=_svc, external_id=_extid) session.add(account) if when: account.when = when if account.persona and persona: from bbdb.personas import merge_left merge_left(session, persona, account.persona) else: persona = account.persona = persona or schema.Persona() schema.get_or_create(session, schema.Name, name=external_id, account=account) session.commit() session.refresh(account) return account
def insert_display_name(session, user: User, handle=None, when=None): """Insert a display name, attaching it to a handle.""" if user.name: external_id = twitter_external_user_id(user.id) handle = handle or get_or_create( session, Account, external_id=external_id) display_name = get_or_create(session, Name, name=user.name, account=handle) display_name.when = when or now() session.add(display_name) return display_name
def insert_screen_name(session, user: User, handle=None, when=None): """Insert a screen name, attaching it to a handle.""" if user.screen_name: external_id = twitter_external_user_id(user.id) handle = handle or get_or_create( session, Account, external_id=external_id) screen_name = get_or_create(session, Name, name="@" + user.screen_name, account=handle) screen_name.when = when or now() session.add(screen_name) return screen_name
def helper(session): service = session.query(schema.Service).filter( schema.Service.name == name.lower()).first() if not service: service = schema.get_or_create(session, schema.Service, name=name.lower()) if service.more and "pretty_name" not in service.more: service.more["pretty_name"] = name elif not service.more: service.more = {"pretty_name": name} for url in urls: schema.get_or_create(session, schema.ServiceURL, service=service, url=normalize_url(url) if normalize else url) return service
def crawl_followers(session, twitter_api, crawl_user, crawl_user_id=None, when=None): if not crawl_user_id: crawl_user_id = crawl_user.id if when is None: when = now() for user_id in twitter_api.GetFollowerIDs(user_id=crawl_user_id): try: extid = twitter_external_user_id(user_id) handle = session.query(Account)\ .filter_by(external_id=extid)\ .first() if handle and handle.names: print("Already know of user", user_id, "AKA", ", ".join([an.name for an in handle.names])) continue else: # Hydrate the one user explicitly user = twitter_api.GetUser(user_id=user_id) new_account = insert_user(session, user) print(new_account) get_or_create(session, AccountRelationship, left=new_account, right=crawl_user, rel="follows", when=when) except twitter.error.TwitterError as e: print(user_id, e) continue
def insert_name(session, persona, name): """Add a name to the given persona by linking it through a null service.""" nullsvc = null_service(session) nullact = session.query(schema.Account)\ .filter_by(service=nullsvc, persona=persona)\ .first() if not nullact: nullact = schema.Account(service=nullsvc, external_id=_nullsvc_fk(persona.id), persona=persona) session.add(nullact) session.commit() return get_or_create(session, schema.Name, name=name, account=nullact)
session = factory() if len(sys.argv) == 2: user_id = twitter_api.GetUser(screen_name=sys.argv[1]).id else: user_id = twitter_api.VerifyCredentials().id try: when = arrow.utcnow() for user in twitter_api.GetFollowers(user_id=user_id): print(twitter.insert_user(session, user)) schema.get_or_create(session, schema.TwitterFollows, follows_id=user_id, follower_id=user.id, when=when) for user in twitter_api.GetFriends(user_id=user_id): print(twitter.insert_user(session, user)) schema.get_or_create(session, schema.TwitterFollows, follower_id=user_id, follows_id=user.id, when=when) finally: session.flush() session.close()
def insert_user(session, kb_user, persona=None, when=None, twitter_api=None): kb_account = _insert_user(session, kb_user.id, persona=persona, when=when) name = schema.get_or_create(session, schema.Name, name=kb_user.username, account=kb_account) for proof in kb_user.proofs: if proof.proof_type == "generic_web_site": # FIXME: do something with this. continue elif proof.proof_type == "twitter": # FIXME: Try to find (or create) a Twitter user. # # It happens to be safe just to search by @-handle since we drive keybase from Twitter for # now. But that may not be safe in the future. Really this should push to a Twitter user # ingesting queue or something somewhere. proved_service = insert_twitter(session) twitter_account = session.query(schema.Account)\ .filter_by(service=proved_service)\ .join(schema.Name)\ .filter(schema.Name.name=="@{}".format(proof.nametag))\ .first() if not twitter_account and twitter_api: twitter_insert_user(session, twitter_api.GetUser(screen_name=proof.nametag), persona=kb_account.persona) elif twitter_account: merge_left(session, kb_account.persona, twitter_account.persona) else: print("[WARN] Unable to link proved Twitter identity @{}".format(proof.nametag)) continue else: # We make a bunch of assumptions about other services... proved_service = schema.get_or_create(session, schema.Service, name=proof.proof_type) # Insert the service's URL schema.get_or_create(session, schema.ServiceURL, service=proved_service, url=normalize_url(proof.service_url)) external_id = ("%s:%s" % (proof.proof_type, proof.nametag)) proved_account = session.query(schema.Account)\ .filter_by(service=proved_service, external_id=external_id)\ .first() if not proved_account: proved_account = schema.Account(service=proved_service, external_id=external_id, persona=kb_account.persona) elif proved_account.persona_id is not None: merge_left(session, kb_account.persona, proved_account.persona) else: proved_account.persona_id = persona.id session.add(proved_account) nametag = schema.get_or_create(session, schema.Name, name=proof.nametag, account=proved_account) nametag.persona = persona session.add(nametag) session.commit() print("User", kb_account, "proved for service", proved_service) return kb_account
def insert_tweet(session, twitter_api, tweet): """Insert a tweet (status using the old API terminology) into the backing datastore. This means inserting the original poster, inserting the service, inserting the post and inserting the post distribution. WARNING: this function does NOT recursively insert replied to tweets, or quoted tweets. It's expected that some other system handles walking the tree of tweets to deal with all that. This is, ultimately, to work around the garbage Twitter rate limits. """ _tw = insert_twitter(session) try: poster = tweet.user if not isinstance(poster, User): poster = User.NewFromJsonDict(poster) poster = insert_user(session, poster) assert isinstance(poster, Account) except AssertionError as e: print("Encountered exception", repr(e), traceback.format_exc(), "Processing tweet", tweet) return None dupe = session.query(Post)\ .filter_by(external_id=twitter_external_tweet_id(tweet.id))\ .first() # There's a dummy record in place, flesh it out. We're in a monoid here. if dupe: dupe.poster = poster dupe.when = aget( datetime.strptime(tweet.created_at, _tw_datetime_pattern)) dupe.text = _get_tweet_text(tweet) dupe.more = tweet.AsDict() session.add(dupe) session.commit() return dupe # We're inserting a new tweet here... else: post = Post(service=_tw, text=_get_tweet_text(tweet), external_id=twitter_external_tweet_id(tweet.id_str), poster=poster, when=aget( datetime.strptime(tweet.created_at, _tw_datetime_pattern)), more=tweet.AsDict()) session.add(post) for user in tweet.user_mentions: get_or_create(session, PostDistribution, post=post, recipient=insert_user(session, User(id=user.id)), rel="to") if tweet.in_reply_to_status_id: get_or_create(session, PostRelationship, left=post, right=_tweet_or_dummy(session, tweet.in_reply_to_status_id), rel="reply-to") if tweet.quoted_status_id: get_or_create(session, PostRelationship, left=post, right=_tweet_or_dummy(session, tweet.quoted_status_id), rel="quotes") session.commit() return post
def _tweet_or_dummy(session, external_id): return get_or_create(session, Post, external_id=twitter_external_tweet_id(external_id), service=insert_twitter(session))