def index(self): ''' Return an array of all labels. **Example Response** .. sourcecode: json { "labels": [ { "id": 1, "name": "gender", "url": "https://quickpin/api/label/1", }, ... ], "total_count": 2 } :<header Content-Type: application/json :<header X-Auth: the client's auth token :query page: the page number to display (default: 1) :query rpp: the number of results per page (default: 10) :query site: name of site to filter by :>header Content-Type: application/json :>json list labels: a list of label objects :>json int labels[n].id: unique identifier for profile :>json str labels[n].name: the label name :>json str labels[n].url: URL endpoint for retriving more data about this label :status 200: ok :status 400: invalid argument[s] :status 401: authentication required ''' page, results_per_page = get_paging_arguments(request.args) query = g.db.query(Label) total_count = query.count() query = query.order_by(Label.name.asc()) \ .limit(results_per_page) \ .offset((page - 1) * results_per_page) labels = list() for label in query: data = label.as_dict() data['url'] = url_for('LabelView:get', id_=label.id) labels.append(data) return jsonify( labels=labels, total_count=total_count )
def index(self): """ Return an array of all labels. **Example Response** .. sourcecode:: json { "labels": [ { "id": 1, "name": "gender", "url": "https://quickpin/api/label/1", }, ... ], "total_count": 2 } :<header Content-Type: application/json :<header X-Auth: the client's auth token :query page: the page number to display (default: 1) :query rpp: the number of results per page (default: 10) :>header Content-Type: application/json :>json list labels: a list of label objects :>json int labels[n].id: unique identifier for profile :>json str labels[n].name: the label name :>json str labels[n].url: URL endpoint for retriving more data about this label :status 200: ok :status 400: invalid argument[s] :status 401: authentication required """ page, results_per_page = get_paging_arguments(request.args) query = g.db.query(Label) total_count = query.count() query = query.order_by(Label.name.asc()) \ .limit(results_per_page) \ .offset((page - 1) * results_per_page) labels = list() for label in query: data = label.as_dict() data['url'] = url_for('LabelView:get', id_=label.id) labels.append(data) return jsonify( labels=labels, total_count=total_count )
def index(self): ''' Return an array of all categories. **Example Response** .. sourcecode: json { "categories": [ { "id": 1, "name": "gender", "sites": [ { "category": "books", "id": 2, "name": "aNobil", "search_text": "- aNobii</title>", "status_code": 200, "url": "http://www.anobii.com/%s/books" }, ... ] }, ... ], "total_count": 2 } :<header Content-Type: application/json :<header X-Auth: the client's auth token :query page: the page number to display (default: 1) :query rpp: the number of results per page (default: 10) :>header Content-Type: application/json :>json list categories: a list of category objects :>json str categories[n].category: the category category :>json int categories[n].id: unique identifier for category :>json str categories[n].name: the category name :>json list categories[n].sites: list of sites associated with this category :>json str categories[n].sites[n].category: the site category :>json str categories[n].sites[n].id: the unique id for site :>json str categories[n].sites[n].name: the site name :>json str categories[n].sites[n].search_text: string search pattern :>json str categories[n].sites[n].status_code: server response code for site :>json str categories[n].sites[n].url: the site url :status 200: ok :status 400: invalid argument[s] :status 401: authentication required ''' page, results_per_page = get_paging_arguments(request.args) query = g.db.query(Category) total_count = query.count() query = query.order_by(Category.name.asc()) \ .limit(results_per_page) \ .offset((page - 1) * results_per_page) categories = list() for category in query: data = category.as_dict() data['url-for'] = url_for('CategoryView:get', id_=category.id) categories.append(data) return jsonify(categories=categories, total_count=total_count)
def index(self): ''' Return an array of result archives. **Example Response** .. sourcecode:: json { "archives": [ { "id": 1, "job_id": '2298d96a-653d-42f2-b6d3-73ff337d51ce', "user_id": 1, "username": "******", "date": "", "site_count": 166, "found_count": 65, "not_found_count": 101, "error_count": 9, "zip_file": "bob-2298d96a-653d-42f2.zip", "category_id": 2, "category_name": "Business", }, ... ], "total_count": 5 } :<header Content-Type: application/json :<header X-Auth: the client's auth token :query page: the page number to display (default: 1) :query rpp: the number of results per page (default: 10) :query username: filter by matching usernames :>header Content-Type: application/json :>json list archives: a list of result archive objects :>json str archives[n].job_id: the job_id of this archive :>json int archives[n].id: the unique id of this archive :>json str archives[n].user_id: the user_id of the owner of this archive :>json str archives[n].username: the archive username :>json str archives[n].date: the archive creation date :>json str archives[n].site_count: the number of site results in this archive :>json str archives[n].found_count: the number sites in this archive with a username match :>json str archives[n].not_found_count: the number sites in this archive with no username match :>json str archives[n].error_count: the number sites in this archive that raised an error while searching for username :>json str archives[n].zip_file: the zip file location for this archive :>json str archives[n].category_id: category ID of the archive :>json str archives[n].category_name: category name of the archive :status 200: ok :status 400: invalid argument[s] :status 401: authentication required ''' page, results_per_page = get_paging_arguments(request.args) username = request.args.get('username', '') query = g.db.query(Archive).filter(Archive.user_id == g.user.id) if username: query = query.filter(Archive.username == username) total_count = query.count() query = query.order_by(Archive.date.desc()) \ .limit(results_per_page) \ .offset((page - 1) * results_per_page) # Get categories categories = {} for category in g.db.query(Category).all(): categories[category.id] = category.name archives = list() for archive in query: archive_dict = archive.as_dict() try: archive_dict['category_name'] = categories[archive.category_id] except KeyError: archive_dict['category_name'] = 'All sites' archives.append(archive_dict) return jsonify(archives=archives, total_count=total_count)
def index(self): ''' Return an array of data about profiles. Note that this only returns full profiles, not "stub" profiles. If user A in QuickPin has a friend/follower user B but user B is not in QuickPin, then a "stub" profile is created for user B. **Example Response** .. sourcecode:: json { "profiles": [ { "avatar_url": "https://quickpin/api/file/5", "avatar_thumb_url": "https://quickpin/api/file/6", "description": "A human being.", "follower_count": 12490, "friend_count": 294, "id": 5, "is_stub": False, "is_interesting": False, "join_date": "2010-01-30T18:21:35", "last_update": "2015-08-18T10:51:16", "location": "Washington, DC", "name": "John Q. Doe", "post_count": 230, "private": false, "site": "twitter", "time_zone": "Central Time (US & Canada)", "upstream_id": "123456", "url": "https://quickpin/api/profile/5", "username": "******" }, ... ], "total_count": 5 } :<header Content-Type: application/json :<header X-Auth: the client's auth token :query page: the page number to display (default: 1) :query rpp: the number of results per page (default: 10) :query site: name of site to filter by :>header Content-Type: application/json :>json list profiles: a list of profile objects :>json str profiles[n].avatar_url: a URL to the user's current avatar image :>json str profiles[n].avatar_thumb_url: a URL to a 32x32px thumbnail of the user's current avatar image :>json str profiles[n].description: profile description :>json int profiles[n].follower_count: number of followers :>json int profiles[n].friend_count: number of friends (a.k.a. followees) :>json int profiles[n].id: unique identifier for profile :>json bool profiles[n].is_stub: indicates that this is a stub profile, e.g. related to another profile but has not been fully imported (for this particular endpoint, is_stub will always be false) :>json bool is_interesting: indicates whether this profile has been tagged as interesting. The value can be null. :>json str profiles[n].join_date: the date this profile joined its social network (ISO-8601) :>json str profiles[n].last_update: the last time that information about this profile was retrieved from the social media site (ISO-8601) :>json str profiles[n].location: geographic location provided by the user, as free text :>json str profiles[n].name: the full name provided by this user :>json int profiles[n].post_count: the number of posts made by this profile :>json bool profiles[n].private: true if this is a private account (i.e. not world-readable) :>json str profiles[n].site: machine-readable site name that this profile belongs to :>json str profiles[n].site_name: human-readable site name that this profile belongs to :>json str profiles[n].time_zone: the user's provided time zone as free text :>json str profiles[n].upstream_id: the user ID assigned by the social site :>json str profiles[n].url: URL endpoint for retriving more data about this profile :>json str profiles[n].username: the current username for this profile :>json int total_count: count of all profile objects, not just those on the current page :status 200: ok :status 400: invalid argument[s] :status 401: authentication required ''' page, results_per_page = get_paging_arguments(request.args) current_avatar_id = self._current_avatar_subquery() query = g.db.query(Profile, Avatar) \ .outerjoin(Avatar, Avatar.id==current_avatar_id) \ .filter(Profile.is_stub == False) # Parse filter arguments site = request.args.get('site', None) is_interesting = request.args.get('interesting', None) labels = request.args.get('label', None) if site is not None: query = query.filter(Profile.site == site) if is_interesting is not None: if is_interesting == 'yes': query = query.filter(Profile.is_interesting == True) elif is_interesting == 'no': query = query.filter(Profile.is_interesting == False) elif is_interesting == 'unset': query = query.filter(Profile.is_interesting == None) if labels is not None: for label in labels.split(','): query = query.filter( Profile.labels.any(Label.name==label.lower()) ) total_count = query.count() query = query.order_by(Profile.last_update.desc()) \ .limit(results_per_page) \ .offset((page - 1) * results_per_page) profiles = list() for profile, avatar in query: data = profile.as_dict() data['url'] = url_for('ProfileView:get', id_=profile.id) if avatar is not None: data['avatar_url'] = url_for( 'FileView:get', id_=avatar.file.id ) data['avatar_thumb_url'] = url_for( 'FileView:get', id_=avatar.thumb_file.id ) else: data['avatar_url'] = url_for( 'static', filename='img/default_user.png' ) data['avatar_thumb_url'] = url_for( 'static', filename='img/default_user_thumb.png' ) profiles.append(data) return jsonify( profiles=profiles, total_count=total_count )
def get_relations(self, id_, reltype): ''' Return an array of profiles that are related to the specified profile by `reltype`, either "friends" or "followers". **Example Response** .. sourcecode:: json { "relations": [ { "avatar_thumb_url": "https://quickpin/api/file/1", "id": 3, "url": "https://quickpin/api/profile/3", "username": "******" }, { "avatar_thumb_url": "https://quickpin/api/file/2", "id": 4, "url": "https://quickpin/api/profile/4", "username": "******" }, ... } :<header Content-Type: application/json :<header X-Auth: the client's auth token :query page: the page number to display (default: 1) :query rpp: the number of results per page (default: 10) :>header Content-Type: application/json :>json object relations Array of related profiles. :>json int relations[n].avatar_thumb_url a URL to a thumbnail of the user's current avatar :>json int relations[n].id Unique identifier for relation's profile. :>json str relations[n].url The URL to fetch this relation's profile. :>json str relations[n].username This relation's username. :>json int total_count Total count of all related profiles, not just those on the current page. :status 200: ok :status 400: invalid argument[s] :status 401: authentication required :status 404: user does not exist ''' page, results_per_page = get_paging_arguments(request.args) current_avatar_id = self._current_avatar_subquery() profile = g.db.query(Profile).filter(Profile.id == id_).first() if profile is None: raise NotFound('No profile with id={}.'.format(id_)) if reltype == 'friends': join_cond = (profile_join_self.c.friend_id == Profile.id) filter_cond = (profile_join_self.c.follower_id == id_) elif reltype == 'followers': join_cond = (profile_join_self.c.follower_id == Profile.id) filter_cond = (profile_join_self.c.friend_id == id_) else: raise NotFound('Invalid relation type "{}".'.format(reltype)) relationship_query = \ g.db.query(Profile, Avatar) \ .outerjoin(Avatar, Avatar.id==current_avatar_id) \ .join(profile_join_self, join_cond) \ .filter(filter_cond) total_count = relationship_query.count() relationship_query = relationship_query \ .order_by(Profile.is_stub, Profile.username) \ .limit(results_per_page) \ .offset((page - 1) * results_per_page) relations = list() for relation, avatar in relationship_query: if avatar is not None: thumb_url = url_for( 'FileView:get', id_=avatar.thumb_file.id ) else: thumb_url = url_for( 'static', filename='img/default_user_thumb.png' ) relations.append({ 'avatar_thumb_url': thumb_url, 'id': relation.id, 'url': url_for('ProfileView:get', id_=relation.id), 'username': relation.username, }) return jsonify( site_name=profile.site_name(), relations=relations, total_count=total_count, username=profile.username )
def get_posts(self, id_): ''' Return an array of posts by this profile. **Example Response** .. sourcecode:: json { "posts": [ { "content": "If your #Tor relay is stolen or you lose control of it, please report it so we can blacklist it: https://t.co/imVnrh1FbD @TorProject", "id": 4, "language": "en", "last_update": "2015-08-19T18:17:07", "location": [ null, null ], "upstream_created": "2014-11-07T16:24:05", "upstream_id": "530878388605423616" }, ... ], "username": "******" } :<header Content-Type: application/json :<header X-Auth: the client's auth token :query page: the page number to display (default: 1) :query rpp: the number of results per page (default: 10) :>header Content-Type: application/json :>json list posts Array of post objects. :>json str posts[n].content Text content of the post. :>json int posts[n].id Unique identifier for post. :>json str posts[n].language Language of post, e.g. 'en'. :>json str posts[n].last_update The date and time that this record was updated from the social media site. :>json str posts[n].location 2-element array of longitude and latitude. :>json str posts[n].upstream_created The date this was posted. :>json str posts[n].upstream_id The unique identifier assigned by the social media site. :>json str username Username of the requested profile :>json str site_name Site name associated with the requested profile :>json int total_count Total count of all posts by this profile, not just those displayed on this page :status 200: ok :status 400: invalid argument[s] :status 401: authentication required :status 404: user does not exist ''' page, results_per_page = get_paging_arguments(request.args) profile = g.db.query(Profile).filter(Profile.id == id_).first() if profile is None: raise NotFound('No profile exists for id={}.'.format(id_)) posts = list() post_query = g.db.query(Post) \ .filter(Post.author_id == id_) total_count = post_query.count() post_query = post_query.order_by(Post.upstream_created.desc()) \ .limit(results_per_page) \ .offset((page - 1) * results_per_page) for post in post_query: post_dict = { 'content': post.content, 'id': post.id, 'language': post.language, 'last_update': isodate(post.last_update), 'location': (post.longitude, post.latitude), 'upstream_created': isodate(post.upstream_created), 'upstream_id': post.upstream_id, } if len(post.attachments) > 0: attachment = post.attachments[0] post_dict['attachment'] = { 'mime': attachment.mime, 'name': attachment.name, 'url': url_for('FileView:get', id_=attachment.id) } posts.append(post_dict) return jsonify( posts=posts, site_name=profile.site_name(), total_count=total_count, username=profile.username )
def query(self): ''' Run a search query. The response contains search hits that may be of different types, e.g. post hits, site hits, user hits, etc. The caller should look at the 'type' field of each hit to determine how to handle it. Rather than list all of the possible return values in the documentation below, refer to the example that shows complete examples for every type of document that can be returned by this API. Note that all text returned by the search engine is **plain text**, meaning no inline markup. The following are highlighted: * Profile Description * Profile Name * Site Name Highlighted fields are returned in this format: .. sourcecode:: json { highlighted: [false, true, false], text: ["the ", "quick", " brown fox"] } The text is returned as an array of strings. The original text can be constructed simply by joining this array together. A parallel array of booleans is also included. For each index `i`, the text in `text[i]` should be highlighted if and only if `highlighted[i]` is `true`. In the example above, the word "quick" should be highlighted by the client. Facets can be selected by encoding a list of facet field/value pairs as a list delimited by null bytes and passing it in the `facets` query parameter. Note that the current implementation only supports one value per facet field, although we intend to support multiple facets per field in the future. (If you do specify multiple values, then the behavior is undefined.) **Example Response** .. sourcecode:: json { "facets": { "join_date_tdt": [ ["2014-03-01T00:00:00Z", 51], ["2014-04-01T00:00:00Z", 1], ["2014-05-01T00:00:00Z", 10], ["2014-06-01T00:00:00Z", 7], ["2014-08-01T00:00:00Z", 4], ... ], "site_name_txt_en": [ ["twitter", 181], ["instagram", 90], ... ], "username_s": [ ["johndoe", 46], ["janedoe", 30], ["maurice.moss", 17], ["jen.barber", 15], ... ], ... }, "results": [ { "description": { "highlighted": [false, true, false], "text": ["My ", "unique", " description"] }, "follower_count": 70, "friend_count": 213, "id": "Profile:1", "post_count": 1653, "site": { "highlighted": [false], "text": ["twitter"] }, "username": { "highlighted": [false], "text": ["mehaase"] } }, ... ], "total_count": 65 } :<header Content-Type: application/json :query facets: a null-delimited list of facet field names and values, delimited by null bytes (optional) :query page: the page number to display (default: 1) :query sort: a field name to sort by, optionally prefixed with a "-" to indicate descending sort, e.g. "post_date" sorts ascending by the post date, while "-username" sorts descending by username :query query: search query :query rpp: the number of results per page (default: 10) :query type: type of document to match, e.g. Profile, Post, etc. (optional) :>header Content-Type: application/json :>json dict facets: dictionary of facet names and facet values/counts :>json list results: array of search hits, each of which has a 'type' key that indicates what fields it will contain :>json int total_count: total number of documents that match the query, not just those included in the response :status 200: ok ''' formatters = { 'Post': self._format_post, 'Profile': self._format_profile, } query = request.args.get('query') type_ = request.args.get('type') sort = request.args.get('sort') facet_args = request.args.get('facets') page, results_per_page = get_paging_arguments(request.args) start_row = (page - 1) * results_per_page highlight_fields = [ 'content_txt_en', 'description_txt_en', 'location_txt_en', 'name_txt_en', 'site_name_txt_en', 'username_s', ] highlight_options = { 'snippets': 1, 'simple.pre': SearchView.HIGHLIGHT_TOKEN, 'simple.post': SearchView.HIGHLIGHT_TOKEN, } # These are user-friendly(er) names for the cryptic field names. Solr # allows a single alias to refer to multiple fields, so the fields are # specified as a list. aliases = { 'description': ['description_txt_en'], 'location': ['location_txt_en'], 'name': ['name_txt_en', 'username_s'], 'post': ['content_txt_en'], 'site': ['site_name_txt_en'], } # Boost fields. E.g. a match to a username ranks a result higher # than a match to the user's description. boosts = { 'name_txt_en': 3, 'username_s': 3, 'description_txt_en': 2, 'location_txt_en': 2, 'content_txt_en': 1, 'site_name_txt_en': 1, 'time_zone_txt_en': 1, } search = g.solr.query(DismaxString(query)) \ .alt_parser('edismax', f=aliases, qf=boosts) \ .highlight(highlight_fields, **highlight_options) \ .paginate(start=start_row, rows=results_per_page) search = self._add_facets(search, facet_args) if type_ is not None: search = search.filter(type_s=type_) if sort is not None: search = search.sort_by(sort) response = search.execute() results = list() facets = dict() highlights = response.highlighting for doc in response: formatter = formatters[doc['type_s']] results.append(formatter(doc, highlights)) for field, field_facets in response.facet_counts.facet_fields.items(): facets[field] = sorted(field_facets, key=lambda f: f[0].lower()) for field, field_facets in response.facet_counts.facet_ranges.items(): counts = dict(field_facets['counts']) list_ = [(k,v) for k,v in counts.items()] facets[field] = sorted(list_, key=lambda f: f[0]) return jsonify( results=results, facets=facets, total_count=response.result.numFound )
def index(self): ''' Return an array of all notes. **Example Response** .. sourcecode:: json { "notes": [ { "id": 1, "category": "user annotation", "body": "This is an interesting) profile.", "profile_id": 1, "created_at": "2015-12-15T10:41:55.792492", "url": "https://quickpin/api/note/1", }, ... ], "total_count": 1 } :<header Content-Type: application/json :<header X-Auth: the client's auth token :query page: the page number to display (default: 1) :query rpp: the number of results per page (default: 10) :query profile_id: profile id to filter by :>header Content-Type: application/json :>json list notes: list of profile note objects :>json int list[n].id: unique identifier for the note :>json str list[n].category: the user-defined category of this note :>json str list[n].body: the note :>json str list[n].profile_id: the unique id of the profile this note belongs to :>json str list[n].created_at: the iso-formatted creation time of the note :>json str list[n].url: API endpoint URL for this note object :status 200: ok :status 400: invalid argument[s] :status 401: authentication required ''' # Parse paging arguments page, results_per_page = get_paging_arguments(request.args) # Create base query query = g.db.query(ProfileNote) # Parse filter arguments profile_id = request.args.get('profile_id', None) if profile_id is not None: query = query.filter(ProfileNote.profile_id == profile_id) # Store the total result count before paging arguments limit result set total_count = query.count() # Apply paging arguments query = query.order_by(ProfileNote.category.asc()) \ .limit(results_per_page) \ .offset((page - 1) * results_per_page) # Add API endpoint URL for each note object notes = list() for note in query: data = note.as_dict() data['url'] = url_for('ProfileNoteView:get', id_=note.id) notes.append(data) return jsonify(notes=notes, total_count=total_count)
def get_posts(self, id_): ''' Return an array of posts by this profile. **Example Response** .. sourcecode:: json { "posts": [ { "content": "If your #Tor relay is stolen or you lose control of it, please report it so we can blacklist it: https://t.co/imVnrh1FbD @TorProject", "id": 4, "language": "en", "last_update": "2015-08-19T18:17:07", "location": [ null, null ], "upstream_created": "2014-11-07T16:24:05", "upstream_id": "530878388605423616" }, ... ], "username": "******" } :<header Content-Type: application/json :<header X-Auth: the client's auth token :query page: the page number to display (default: 1) :query rpp: the number of results per page (default: 10) :>header Content-Type: application/json :>json list posts: List of post objects. :>json str posts[n].content: Text content of the post. :>json int posts[n].id: Unique identifier for post. :>json str posts[n].language: Language of post, e.g. 'en'. :>json str posts[n].last_update: The date and time that this record was updated from the social media site. :>json str posts[n].location: 2-element array of longitude and latitude. :>json str posts[n].upstream_created: The date this was posted. :>json str posts[n].upstream_id: The unique identifier assigned by the social media site. :>json str username: Username of the requested profile :>json str site_name: Site name associated with the requested profile :>json int total_count: Total count of all posts by this profile, not just those displayed on this page :status 200: ok :status 400: invalid argument[s] :status 401: authentication required :status 404: user does not exist ''' page, results_per_page = get_paging_arguments(request.args) profile = g.db.query(Profile).filter(Profile.id == id_).first() if profile is None: raise NotFound('No profile exists for id={}.'.format(id_)) posts = list() post_query = g.db.query(Post) \ .filter(Post.author_id == id_) total_count = post_query.count() post_query = post_query.order_by(Post.upstream_created.desc()) \ .limit(results_per_page) \ .offset((page - 1) * results_per_page) for post in post_query: post_dict = { 'content': post.content, 'id': post.id, 'language': post.language, 'last_update': isodate(post.last_update), 'location': (post.longitude, post.latitude), 'upstream_created': isodate(post.upstream_created), 'upstream_id': post.upstream_id, } if len(post.attachments) > 0: attachment = post.attachments[0] post_dict['attachment'] = { 'mime': attachment.mime, 'name': attachment.name, 'url': url_for('FileView:get', id_=attachment.id) } posts.append(post_dict) return jsonify( posts=posts, site_name=profile.site_name(), total_count=total_count, username=profile.username )
def get_notes(self, id_): ''' Return an array of all notes for this profile. **Example Response** .. sourcecode:: json { "notes": [ { "id": 1, "category": "user annotation", "body": "This is an interesting) profile.", "created_at": "2015-12-15T10:41:55.792492", "url": "https://quickpin/api/note/1", } ... ], "total_count": 1 "username: "******", "sitename": twitter, } :<header Content-Type: application/json :<header X-Auth: the client's auth token :query page: the page number to display (default: 1) :query rpp: the number of results per page (default: 10) :>header Content-Type: application/json :>json list notes: list of profile note objects :>json int list[n].id: unique identifier for the note :>json str list[n].category: the user-defined category of this note :>json str list[n].body: the note :>json str list[n].created_at: the iso-formatted creation time of the note :>json str list[n].url: API endpoint URL for this note object :>json str total_count: the total number of notes for this profile :>json str username: the username of this profile :>json str sitename: the name of the social site the profile belongs to :status 200: ok :status 400: invalid argument[s] :status 401: authentication required ''' # Parse paging arguments page, results_per_page = get_paging_arguments(request.args) profile = g.db.query(Profile).filter(Profile.id == id_).first() if profile is None: raise NotFound('No profile exists for id={}.'.format(id_)) # Store the total result count before paging arguments limit result set total_count = len(profile.notes) # Add API endpoint URL for each note object notes = list() for note in profile.notes: data = note.as_dict() data['url'] = url_for('ProfileNoteView:get', id_=note.id) notes.append(data) # Apply paging arguments start = (page -1) * results_per_page end = start + results_per_page notes = notes[start:end] return jsonify( notes=notes, total_count=total_count, site_name=profile.site_name(), username=profile.username, )
def get_by_username(self, username): ''' Return latest results for `username`. **Example Response** .. sourcecode:: json { "results": [ { "id": 1, "tracker_id": '2298d96a-653d-42f2-b6d3-73ff337d51ce', "user_id": 1, "site_id": 100, "site_name": "Acme", "site_url": "https://www.acme.com/%s", "username": "******", "created_at": "2017-01-30T16:22:19.826841", "status": "Found", "html": "<html><head><title....", "number": "5", "total": "166", "image_file_id": "1234" "error": "", }, ... ], "total_count": 5 } :<header Content-Type: application/json :<header X-Auth: the client's auth token :query page: the page number to display (default: 1) :query rpp: the number of results per page (default: 10) :>header Content-Type: application/json :>json list results: a list of result objects :>json int results[n].id: the unique id of this result :>json str results[n].tracker_id: the tracker_id of this result :>json str results[n].user_id: the ID of the owner of this result :>json str results[n].site_id: the ID of the result site :>json str results[n].site_name: the site name of this result :>json str results[n].site_url: the site URL of this result :>json str results[n].username: the username for this result :>json str results[n].created_at: the UTC time of the result in ISO-8601 format string :>json str results[n].status: result status (Found, Not Found, Error) :>json str results[n].image_file_id: the file ID of the result screenshot :>json str results[n].error: result error message :status 200: ok :status 400: invalid argument[s] :status 401: authentication required ''' page, results_per_page = get_paging_arguments(request.args) query = g.db.query(Result).filter(Result.username == username).filter( Result.user_id == g.user.id).order_by( Result.site_name, Result.created_at.desc()).distinct(Result.site_name) total_count = query.count() query = query.limit(results_per_page) \ .offset((page - 1) * results_per_page) results = list() for result in query: results.append(result.as_dict()) return jsonify(results=results, total_count=total_count)
def failed_tasks(self): ''' Get data about failed tasks. **Example Response** .. sourcecode:: json { "failed": [ { "description": "Doing important stuff...", "exception": "Traceback (most recent call...", "function": "worker.scrape.check_site(1)", "id": "dea6bd20-4f8e-44d2-bee1-b5db78eb4cc8", "type": "posts", "original_queue": "scrape" }, ... ] "total_count": 5 } :<header Content-Type: application/json :<header X-Auth: the client's auth token :query page: the page number to display (default: 1) :query rpp: the number of results per page (default: 10) :>header Content-Type: application/json :>json list failed: list of failed tasks :>json str failed[n]["description"]: description of the task (optional) :>json str failed[n]["exception"]: stack trace of the exception :>json str failed[n]["function"]: the function call that was originally queued :>json str failed[n]["id"]: unique identifier :>json str failed[n]["type"]: the type of task performed :>json str failed[n]["original_queue"]: the queue that this task was initially placed on before it failed :>json int total_count: total number of failed tasks :status 200: ok :status 401: authentication required :status 403: you must be an administrator ''' failed_tasks = list() page, results_per_page = get_paging_arguments(request.args) total_count = 0 with rq.Connection(g.redis): for failed_task in rq.get_failed_queue().jobs: total_count += 1 try: if 'description' in failed_task.meta: desc = failed_task.meta['description'] else: desc = None if 'type' in failed_task.meta: type_ = failed_task.meta['type'] else: type_ = None exception_info = failed_task.exc_info if exception_info is not None: exception_info = exception_info else: exception_info = 'Unknown error' failed_tasks.append({ 'description': desc, 'function': failed_task.get_call_string(), 'exception': failed_task.exc_info, 'id': failed_task.id, 'type': type_, 'original_queue': failed_task.origin, }) except UnpickleError: failed_tasks.append({ 'description': 'Error: this job cannot be unpickled.', 'function': None, 'exception': failed_task.exc_info, 'id': failed_task.id, 'type': type_, 'original_queue': failed_task.origin, }) start = (page - 1) * results_per_page end = start + results_per_page failed_results = failed_tasks[start:end] return jsonify(failed=failed_results, total_count=total_count)
def index(self): ''' Return an array of data about application users. **Example Response** .. sourcecode:: json { "total_count": 2, "users": [ { "agency": "Department Of Justice", "created": "2015-05-05T14:30:09.676268", "email": "*****@*****.**", "id": 2029, "is_admin": true, "location": "Washington, DC", "modified": "2015-05-05T14:30:09.676294", "name": "Lt. John Doe", "phone": "+12025551234", "thumb": "iVBORw0KGgo…", "url": "https://quickpin/api/user/2029" }, ... ] } :<header Content-Type: application/json :<header X-Auth: the client's auth token :query page: the page number to display (default: 1) :query rpp: the number of results per page (default: 10) :>header Content-Type: application/json :>json int total_count: the total number of application users (not just the ones on the current page) :>json list users: list of users :>json str users[n].agency: the name of the organization/agency that this person is affiliated with (default: null) :>json str users[n].created: record creation timestamp in ISO-8601 format :>json str users[n].email: e-mail address :>json bool users[n].is_admin: true if this user has admin privileges, false otherwise :>json str users[n].location: location name, e.g. city or state (default: null) :>json str users[n].modified: record modification timestamp in ISO-8601 format :>json str users[n].name: user's full name, optionally including title or other salutation information (default: null) :>json str users[n].phone: phone number :>json str thumb: PNG thumbnail for this user, base64 encoded :>json str users[n].url: url to view data about this user :status 200: ok :status 400: invalid argument[s] :status 401: authentication required ''' page, results_per_page = get_paging_arguments(request.args) total_count = g.db.query(func.count(User.id)).scalar() user_query = g.db.query(User) \ .order_by(User.email) \ .limit(results_per_page) \ .offset((page - 1) * results_per_page) users = [self._user_dict(u) for u in user_query] return jsonify(users=users, total_count=total_count)
def query(self): ''' Run a search query. The response contains search hits that may be of different types, e.g. post hits, site hits, user hits, etc. The caller should look at the 'type' field of each hit to determine how to handle it. Rather than list all of the possible return values in the documentation below, refer to the example that shows complete examples for every type of document that can be returned by this API. Note that all text returned by the search engine is **plain text**, meaning no inline markup. The following are highlighted: * Profile Description * Profile Name * Site Name Highlighted fields are returned in this format: .. sourcecode:: json { highlighted: [false, true, false], text: ["the ", "quick", " brown fox"] } The text is returned as an array of strings. The original text can be constructed simply by joining this array together. A parallel array of booleans is also included. For each index `i`, the text in `text[i]` should be highlighted if and only if `highlighted[i]` is `true`. In the example above, the word "quick" should be highlighted by the client. Facets can be selected by encoding a list of facet field/value pairs as a list delimited by null bytes and passing it in the `facets` query parameter. Note that the current implementation only supports one value per facet field, although we intend to support multiple facets per field in the future. (If you do specify multiple values, then the behavior is undefined.) **Example Response** .. sourcecode:: json { "facets": { "join_date_tdt": [ ["2014-03-01T00:00:00Z", 51], ["2014-04-01T00:00:00Z", 1], ["2014-05-01T00:00:00Z", 10], ["2014-06-01T00:00:00Z", 7], ["2014-08-01T00:00:00Z", 4], ... ], "site_name_txt_en": [ ["twitter", 181], ["instagram", 90], ... ], "username_s": [ ["johndoe", 46], ["janedoe", 30], ["maurice.moss", 17], ["jen.barber", 15], ... ], ... }, "results": [ { "description": { "highlighted": [false, true, false], "text": ["My ", "unique", " description"] }, "follower_count": 70, "friend_count": 213, "id": "Profile:1", "post_count": 1653, "site": { "highlighted": [false], "text": ["twitter"] }, "username": { "highlighted": [false], "text": ["mehaase"] } }, ... ], "total_count": 65 } :<header Content-Type: application/json :query facets: a null-delimited list of facet field names and values, delimited by null bytes (optional) :query page: the page number to display (default: 1) :query sort: a field name to sort by, optionally prefixed with a "-" to indicate descending sort, e.g. "post_date" sorts ascending by the post date, while "-username" sorts descending by username :query query: search query :query rpp: the number of results per page (default: 10) :query type: type of document to match, e.g. Profile, Post, etc. (optional) :>header Content-Type: application/json :>json dict facets: dictionary of facet names and facet values/counts :>json list results: array of search hits, each of which has a 'type' key that indicates what fields it will contain :>json int total_count: total number of documents that match the query, not just those included in the response :status 200: ok ''' formatters = { 'Post': self._format_post, 'Profile': self._format_profile, } query = request.args.get('query') type_ = request.args.get('type') sort = request.args.get('sort') facet_args = request.args.get('facets') page, results_per_page = get_paging_arguments(request.args) start_row = (page - 1) * results_per_page highlight_fields = [ 'content_txt_en', 'description_txt_en', 'location_txt_en', 'name_txt_en', 'site_name_txt_en', 'username_s', ] highlight_options = { 'snippets': 1, 'simple.pre': SearchView.HIGHLIGHT_TOKEN, 'simple.post': SearchView.HIGHLIGHT_TOKEN, } # These are user-friendly(er) names for the cryptic field names. Solr # allows a single alias to refer to multiple fields, so the fields are # specified as a list. aliases = { 'description': ['description_txt_en'], 'location': ['location_txt_en'], 'name': ['name_txt_en', 'username_s'], 'post': ['content_txt_en'], 'site': ['site_name_txt_en'], 'upstream_id': ['upstream_id_s'], 'stub': ['is_stub_b'], } # Boost fields. E.g. a match to a username ranks a result higher # than a match to the user's description. boosts = { 'upstream_id_s': 4, 'name_txt_en': 3, 'username_s': 3, 'description_txt_en': 2, 'location_txt_en': 2, 'content_txt_en': 1, 'site_name_txt_en': 1, 'time_zone_txt_en': 1, } search = g.solr.query(DismaxString(query)) \ .alt_parser('edismax', f=aliases, qf=boosts) \ .highlight(highlight_fields, **highlight_options) \ .paginate(start=start_row, rows=results_per_page) search = self._add_facets(search, facet_args) if type_ is not None: search = search.filter(type_s=type_) if sort is not None: search = search.sort_by(sort) response = search.execute() results = list() facets = dict() highlights = response.highlighting for doc in response: formatter = formatters[doc['type_s']] results.append(formatter(doc, highlights)) for field, field_facets in response.facet_counts.facet_fields.items(): facets[field] = sorted(field_facets, key=lambda f: f[0].lower()) for field, field_facets in response.facet_counts.facet_ranges.items(): counts = dict(field_facets['counts']) list_ = [(k, v) for k, v in counts.items()] facets[field] = sorted(list_, key=lambda f: f[0]) return jsonify(results=results, facets=facets, total_count=response.result.numFound)
def index(self): ''' Return an array of data about application users. For admin only. **Example Response** .. sourcecode:: json { "total_count": 2, "users": [ { "agency": "Department Of Justice", "created": "2015-05-05T14:30:09.676268", "email": "*****@*****.**", "id": 2029, "is_admin": true, "location": "Washington, DC", "modified": "2015-05-05T14:30:09.676294", "name": "Lt. John Doe", "phone": "+12025551234", "thumb": "iVBORw0KGgo…", "credits": 200, "url": "https://quickpin/api/user/2029" }, ... ] } :<header Content-Type: application/json :<header X-Auth: the client's auth token :query page: the page number to display (default: 1) :query rpp: the number of results per page (default: 10) :>header Content-Type: application/json :>json int total_count: the total number of application users (not just the ones on the current page) :>json list users: list of users :>json str users[n].agency: the name of the organization/agency that this person is affiliated with (default: null) :>json str users[n].created: record creation timestamp in ISO-8601 format :>json str users[n].email: e-mail address :>json bool users[n].is_admin: true if this user has admin privileges, false otherwise :>json str users[n].location: location name, e.g. city or state (default: null) :>json str users[n].modified: record modification timestamp in ISO-8601 format :>json str users[n].name: user's full name, optionally including title or other salutation information (default: null) :>json str users[n].phone: phone number :>json str users[n].thumb: PNG thumbnail for this user, base64 encoded :>json int users[n].credits: the user credits :>json str users[n].url: url to view data about this user :status 200: ok :status 400: invalid argument[s] :status 401: authentication required ''' page, results_per_page = get_paging_arguments(request.args) total_count = g.db.query(func.count(User.id)).scalar() user_query = g.db.query(User) \ .order_by(User.email) \ .limit(results_per_page) \ .offset((page - 1) * results_per_page) users = [self._user_dict(u) for u in user_query] return jsonify(users=users, total_count=total_count)
def index(self): ''' Return an array of data about sites. **Example Response** .. sourcecode:: json { "sites": [ { "id": 1, "name": "Blinklist", "search_text": "BlinkList Page.</title>", "status_code": 200, "url": "https://app.blinklist.com/users/%s", "test_username_pos": "john", "test_username_neg": "dPGMFrf72SaS", "test_status": "f", "tested_at": "2016-01-01T00:00:00.000000+00:00", "headers": {"referer": "http://www.google.com"}, "censor_images": false, "wait_time": 5, "use_proxy": false, }, ... ], "total_count": 5, "total_valid_count": 5, "total_invalid_count": 0, "total_tested_count": 5 } :<header Content-Type: application/json :<header X-Auth: the client's auth token :query page: the page number to display (default: 1) :query rpp: the number of results per page (default: 10) :>header Content-Type: application/json :>json list sites: a list of site objects :>json int sites[n].id: the unique id of this site :>json str sites[n].name: the name of this site :>json str sites[n].search_text: the text pattern should that should exist in the body or headers of a successful search result page :>json str sites[n].status_code: the server response code that should be returned with a successful search result :>json str sites[n].url: the url of this site where username search can be performed :>json str sites[n].test_status: results of username test :>json str sites[n]tested_at: timestamp of last test :>json str sites[n].test_username_pos: the username that exists on the site (used for testing) :>json str sites[n].test_username_neg: the username that does not exist on the site (used for testing) :>json array sites[n].headers: the custom headers :>json bool sites[n].censor_images: whether to censor images from this profile :>json int sites[n].wait_time: time (in seconds) to wait for updates after page is loaded :>json bool sites[n].use_proxy: whether to proxy requests for this profile URL :status 200: ok :status 400: invalid argument[s] :status 401: authentication required ''' page, results_per_page = get_paging_arguments(request.args) query = g.db.query(Site) total_count = query.count() total_valid_count = query.filter(Site.valid == True).count() # noqa total_invalid_count = query.filter(Site.valid == False).count() # noqa total_tested_count = query.filter( Site.tested_at != None).count() # noqa query = query.order_by(Site.name.asc()) \ .limit(results_per_page) \ .offset((page - 1) * results_per_page) sites = list() for site in query: data = site.as_dict() sites.append(data) return jsonify( sites=sites, total_count=total_count, total_valid_count=total_valid_count, total_invalid_count=total_invalid_count, total_tested_count=total_tested_count, )
def get_relations(self, id_, reltype): ''' Return an array of profiles that are related to the specified profile by `reltype`, either "friends" or "followers". **Example Response** .. sourcecode:: json { "relations": [ { "avatar_thumb_url": "https://quickpin/api/file/1", "id": 3, "url": "https://quickpin/api/profile/3", "username": "******" }, { "avatar_thumb_url": "https://quickpin/api/file/2", "id": 4, "url": "https://quickpin/api/profile/4", "username": "******" }, ... } :>header Content-Type: application/json :>json list relations: list of related profiles. :>json int relations[n].avatar_thumb_url: a URL to a thumbnail of the user's current avatar :>json int relations[n].id: Unique identifier for relation's profile. :>json str relations[n].url: The URL to fetch this relation's profile. :>json str relations[n].username: This relation's username. :>json int total_count: Total count of all related profiles, not just those on the current page. :<header Content-Type: application/json :<header X-Auth: the client's auth token :query page: the page number to display (default: 1) :query rpp: the number of results per page (default: 10) :>header Content-Type: application/json :>json object relations Array of related profiles. :>json int relations[n].avatar_thumb_url a URL to a thumbnail of the user's current avatar :>json int relations[n].id Unique identifier for relation's profile. :>json str relations[n].url The URL to fetch this relation's profile. :>json str relations[n].username This relation's username. :>json int total_count Total count of all related profiles, not just those on the current page. :status 200: ok :status 400: invalid argument[s] :status 401: authentication required :status 404: user does not exist ''' page, results_per_page = get_paging_arguments(request.args) profile = g.db.query(Profile).filter(Profile.id == id_).first() if profile is None: raise NotFound('No profile with id={}.'.format(id_)) if reltype == 'friends': join_cond = (profile_join_self.c.friend_id == Profile.id) filter_cond = (profile_join_self.c.follower_id == id_) elif reltype == 'followers': join_cond = (profile_join_self.c.follower_id == Profile.id) filter_cond = (profile_join_self.c.friend_id == id_) else: raise NotFound('Invalid relation type "{}".'.format(reltype)) relationship_query = \ g.db.query(Profile, Avatar) \ .outerjoin(Profile.current_avatar) \ .join(profile_join_self, join_cond) \ .filter(filter_cond) total_count = relationship_query.count() relationship_query = relationship_query \ .order_by(Profile.is_stub, Profile.username) \ .limit(results_per_page) \ .offset((page - 1) * results_per_page) relations = list() for relation, avatar in relationship_query: if avatar is not None: thumb_url = url_for( 'FileView:get', id_=avatar.thumb_file.id ) else: thumb_url = url_for( 'static', filename='img/default_user_thumb.png' ) relations.append({ 'avatar_thumb_url': thumb_url, 'id': relation.id, 'url': url_for('ProfileView:get', id_=relation.id), 'username': relation.username, }) return jsonify( site_name=profile.site_name(), relations=relations, total_count=total_count, username=profile.username )
def get_by_tracker_id(self, tracker_id): ''' Return results identified by `tracker_id`. **Example Response** .. sourcecode:: json { "results": [ { "id": 1, "tracker_id": '2298d96a-653d-42f2-b6d3-73ff337d51ce', "site_name": "Acme", "site_url": "https://www.acme.com/%s", "status": "Found", "number": "5", "total": "166", "image_file_id": "1234" "error": "", }, ... ], "total_count": 5 } :<header Content-Type: application/json :<header X-Auth: the client's auth token :query page: the page number to display (default: 1) :query rpp: the number of results per page (default: 10) :>header Content-Type: application/json :>json list results: a list of result objects :>json int results[n].id: the unique id of this result :>json str results[n].tracker_id: the tracker_id of this result :>json str results[n].site_name: the site name of this result :>json str results[n].site_url: the site URL of this result :>json str results[n].status: result status (Found, Not Found, Error) :>json str results[n].image_file_id: the file ID of the result screenshot :>json str results[n].error: result error message :status 200: ok :status 400: invalid argument[s] :status 401: authentication required ''' page, results_per_page = get_paging_arguments(request.args) query = g.db.query(Result).filter(Result.tracker_id == tracker_id) total_count = query.count() query = query.limit(results_per_page) \ .offset((page - 1) * results_per_page) results = list() for result in query: results.append(result.as_dict()) return jsonify(results=results, total_count=total_count)
def index(self): ''' Return an array of data about profiles. **Example Response** .. sourcecode:: json { "profiles": [ { "avatar_url": "https://quickpin/api/file/5", "avatar_thumb_url": "https://quickpin/api/file/6", "description": "A human being.", "follower_count": 12490, "friend_count": 294, "id": 5, "is_stub": False, "is_interesting": False, "join_date": "2010-01-30T18:21:35", "last_update": "2015-08-18T10:51:16", "location": "Washington, DC", "name": "John Q. Doe", "post_count": 230, "private": false, "score": "-2.0621606863", "site": "twitter", "time_zone": "Central Time (US & Canada)", "upstream_id": "123456", "url": "https://quickpin/api/profile/5", "username": "******" }, ... ], "total_count": 5 } :<header Content-Type: application/json :<header X-Auth: the client's auth token :query page: the page number to display (default: 1) :query rpp: the number of results per page (default: 10) :query interesting: filter by whether profile is set as interesting :query label: comma seperated list of labels to filter by :query site: name of site to filter by :query stub: filter by whether profile is stub :>header Content-Type: application/json :>json list profiles: a list of profile objects :>json str profiles[n].avatar_url: a URL to the user's current avatar image :>json str profiles[n].avatar_thumb_url: a URL to a 32x32px thumbnail of the user's current avatar image :>json str profiles[n].description: profile description :>json int profiles[n].follower_count: number of followers :>json int profiles[n].friend_count: number of friends (a.k.a. followees) :>json int profiles[n].id: unique identifier for profile :>json bool profiles[n].is_stub: indicates that this is a stub profile, e.g. related to another profile but has not been fully imported (for this particular endpoint, is_stub will always be false) :>json bool is_interesting: indicates whether this profile has been tagged as interesting. The value can be null. :>json str profiles[n].join_date: the date this profile joined its social network (ISO-8601) :>json str profiles[n].last_update: the last time that information about this profile was retrieved from the social media site (ISO-8601) :>json str profiles[n].location: geographic location provided by the user, as free text :>json str profiles[n].name: the full name provided by this user :>json int profiles[n].post_count: the number of posts made by this profile :>json bool profiles[n].private: true if this is a private account (i.e. not world-readable) :>json str profiles[n].score: user-defined score for this profile :>json str profiles[n].site: machine-readable site name that this profile belongs to :>json str profiles[n].site_name: human-readable site name that this profile belongs to :>json str profiles[n].time_zone: the user's provided time zone as free text :>json str profiles[n].upstream_id: the user ID assigned by the social site :>json str profiles[n].url: URL endpoint for retriving more data about this profile :>json str profiles[n].username: the current username for this profile :>json int total_count: count of all profile objects, not just those on the current page :status 200: ok :status 400: invalid argument[s] :status 401: authentication required ''' page, results_per_page = get_paging_arguments(request.args) allowed_sort_fields = { 'score': Profile.score, 'updated': Profile.last_update, 'added': Profile.id } sort_arguments = get_sort_arguments(request.args, '-added', allowed_sort_fields) query = g.db.query(Profile, Avatar) \ .outerjoin(Profile.current_avatar) # Parse filter arguments is_stub = request.args.get('stub', None) site = request.args.get('site', None) is_interesting = request.args.get('interesting', None) labels = request.args.get('label', None) if site is not None: query = query.filter(Profile.site == site) if is_stub is not None: if is_stub == '1': query = query.filter(Profile.is_stub == True) elif is_stub == '0': query = query.filter(Profile.is_stub == False) if is_interesting is not None: if is_interesting == 'yes': query = query.filter(Profile.is_interesting == True) elif is_interesting == 'no': query = query.filter(Profile.is_interesting == False) elif is_interesting == 'unset': query = query.filter(Profile.is_interesting == None) if labels is not None: for label in labels.split(','): query = query.filter( Profile.labels.any(Label.name==label.lower()) ) total_count = query.count() for argument in sort_arguments: query = query.order_by(argument) query = query.limit(results_per_page) \ .offset((page - 1) * results_per_page) profiles = list() for profile, avatar in query: data = profile.as_dict() data['url'] = url_for('ProfileView:get', id_=profile.id) if avatar is not None: data['avatar_url'] = url_for( 'FileView:get', id_=avatar.file.id ) data['avatar_thumb_url'] = url_for( 'FileView:get', id_=avatar.thumb_file.id ) else: data['avatar_url'] = url_for( 'static', filename='img/default_user.png' ) data['avatar_thumb_url'] = url_for( 'static', filename='img/default_user_thumb.png' ) profiles.append(data) return jsonify( profiles=profiles, total_count=total_count )