def create_collection(args): if args.profile: rs = Client(profile=args.profile) else: rs = Client() with open(args.schema, 'r') as f: schema = yaml.full_load(f) field_types = schema['field_types'] clustering_fields = schema.get('clustering_key') column_names, column_types = get_columns(field_types) format_params = rs.Source.csv_params( separator=args.separator, encoding='UTF-8', first_line_as_column_names=False, column_names=column_names, column_types=column_types ) sources = [ rs.Source.s3( bucket=args.s3_bucket, prefix=args.s3_prefix, format_params=format_params ) ] if clustering_fields: clustering_key = [ rs.ClusteringKey.clusteringField(field_name=cf, cluster_type='AUTO') for cf in clustering_fields ] rs.Collection.create( name=args.collection, workspace=args.workspace, sources=sources, clustering_key=clustering_key ) else: rs.Collection.create( name=args.collection, workspace=args.workspace, sources=sources, ) print( 'Successfully created collection {}.{}'.format( args.workspace, args.collection ) )
def rockset_querymaker(query): # connect to Rockset rs = Client(api_key=api_key) print("query is", query) if query not in queries: print("Err!") return time = timeit.timeit(str(rs.sql(Q(queries[query])))) print(query, 1000 * time) return query, time * 1000
def do_backfill(n): # Import here to avoid requiring these dependencies in lambda from rockset import Client, Q, F, ParamDict from concurrent.futures import ThreadPoolExecutor, wait # query rockset for failed GHA job ids client = Client( api_key=ROCKSET_API_KEY, api_server="https://api.rs2.usw2.rockset.com", ) qlambda = client.QueryLambda.retrieve("unclassified", version="d39e66c0ed0aa238", workspace="commons") params = ParamDict() results = qlambda.execute(parameters=params).results # q = ( # Q("GitHub-Actions.workflow_job") # .where(F["conclusion"] == "failure") # .highest(n, F["_event_time"]) # .select(F["id"]) # ) # results = client.sql(q) ids = [result["id"] for result in results] with ThreadPoolExecutor() as executor: futures = [] for id in ids: futures.append(executor.submit(send_lambda_request, id)) wait(futures) logger.info("done!")
def get_avg_pm10_results(): rs = Client(api_key=ROCKSET_API_KEY, api_server='https://api.rs2.usw2.rockset.com') # retrieve Query Lambda qlambda = rs.QueryLambda.retrieve('YOUR QUERY LAMBDA NAME', version=4, workspace='commons') params = ParamDict() results = qlambda.execute(parameters=params) return results.results
def run(args): if args.profile: rs = Client(profile=args.profile) else: rs = Client() queries = [] for f in os.listdir(args.query_dir): if not f.endswith('.sql'): continue query_id = os.path.splitext(f)[0] with open(os.path.join(args.query_dir, f), 'r') as f: query_str = f.read() queries.append((query_id, query_str)) queries = sorted(queries) print('Found {} queries to run. Will run each {} times and take the median' ' runtime.'.format(len(queries), args.runs)) print('=' * 70) for query_id, query_str in queries: times = [] rows = None error = False for _ in range(args.runs): start = time.time() try: resp = rs.sql(Q(query_str)) if rows is None: rows = len(resp.results()) else: assert rows == len(resp.results()) times.append(1000 * (time.time() - start)) except Exception as e: print('Query {} produced an error: {}'.format( query_id, str(e))) error = True break if not error: print('Query {} produced {:>3d} rows in {:>5.0f} ms'.format( query_id, rows, statistics.median(times)))
import json import math import os from rockset import Client, ParamDict import pandas as pd ROCKSET_API_KEY = os.environ.get("ROCKSET_API_KEY") if ROCKSET_API_KEY is None: raise RuntimeError("ROCKSET_API_KEY not set") with open("rockset/prodVersions.json") as f: prod_versions = json.load(f) client = Client( api_key=ROCKSET_API_KEY, api_server="https://api.rs2.usw2.rockset.com", ) qlambda = client.QueryLambda.retrieve( "correlation_matrix", version=prod_versions["metrics"]["correlation_matrix"], workspace="metrics", ) params = ParamDict() results = qlambda.execute(parameters=params) pivot = defaultdict(dict) # Results look like (is_green, head_sha, name) # Turn results into a nested dict of head_sha => name => is_green for result in results.results:
from os import getenv from json import loads import flask from rockset import Client, Q app = flask.Flask(__name__, template_folder='js') rs = Client(api_key=getenv('RS2_TOKEN'), api_server='api.rs2.usw2.rockset.com') def redirect(link): return '<script>window.location.replace("{}")</script>'.format(link) selects = { 'treasure': 'drops', 'monsters': 'drops', 'materials': 'cooking_effect, hearts_recovered', 'equipment': 'attack, defense' } creatures_selects = { 'food': 'hearts_recovered, cooking_effect', 'others': 'drops' } def creatures_category(): others = list( rs.sql( Q('select id, name, description, {} from "botw-api".creatures where cooking_effect is null' .format(creatures_selects['others'])))) foods = list(
from typing import Any, Dict from datetime import datetime, timedelta from gitutils import _check_output from rockset import Client, ParamDict # type: ignore[import] import os rs = Client(api_key=os.getenv("ROCKSET_API_KEY", None)) qlambda = rs.QueryLambda.retrieve('commit_jobs_batch_query', version='15aba20837ae9d75', workspace='commons') def parse_args() -> Any: from argparse import ArgumentParser parser = ArgumentParser("Print latest commits") parser.add_argument("--minutes", type=int, default=30, help="duration in minutes of last commits") return parser.parse_args() def print_latest_commits(minutes: int = 30) -> None: current_time = datetime.now() time_since = current_time - timedelta(minutes=minutes) timestamp_since = datetime.timestamp(time_since) commits = _check_output( [ "git", "rev-list",
from rockset import Client, Q, F rs=Client() # requires an active profile in rockset credentials file aws_integration=rs.Integration.retrieve('aws_key_haneesh') sources=[ rs.Source.kinesis( stream_name="twitter-stream", integration=aws_integration)] twitter_kinesis_demo=rs.Collection.create("twitter-kinesis-demo", sources=sources)
import os from rockset import Client, Q, F collectionName = 'GadhaBotUsers' rs = Client(api_key=os.environ.get('ROCKSET_SECRET'), api_server="api.rs2.usw2.rockset.com") collection = rs.Collection.retrieve(collectionName) def store(userid, city): print('City store query') docs = {'id': userid, 'city': city} collection.add_docs([docs]) return ('City stored. Type !weather to see weather in your city.') def get(userid): struserid = str(userid) res = rs.sql( Q(collectionName).where(F['_id'] == struserid).select(F['city'])) cityobj = res[0] city = cityobj['city'] return city
import json from rockset import Client, Q import os from os import path from dotenv import load_dotenv CURRENT_DIR = path.dirname(os.path.abspath(__file__)) load_dotenv(dotenv_path=os.path.join(CURRENT_DIR, 'config.env'), override=True) client = Client(api_server=os.environ.get('ROCKSET_APISERVER'), api_key=os.environ.get('ROCKSET_APIKEY')) TOP_CONTRIBUTORS_QUERY = ''' WITH multi_contributor_repos as ( SELECT gh.repo.name AS repo_name FROM "github" gh WHERE type = 'CommitCommentEvent' GROUP BY gh.repo.name HAVING COUNT(DISTINCT gh.actor.display_login) > 10 ) SELECT gh.actor.display_login Contributor, COUNT(gh.actor.display_login) AS Commits FROM "github" gh WHERE type = 'CommitCommentEvent' AND gh.repo.name IN (SELECT * FROM multi_contributor_repos) GROUP BY gh.actor.display_login ORDER BY Commits DESC LIMIT 10; '''
from rockset import Client, Q from lambdarest import lambda_handler from credentials import API_KEY import json rs = Client(api_key=API_KEY, api_server='https://api.rs2.usw2.rockset.com') def lambda_handler(event, context): if 'queryStringParameters' in event: if 'interval' in event["queryStringParameters"]: interval = event["queryStringParameters"]["interval"] res = rs.sql(Q(f'''-- unnest tweets with stock ticker symbols from the past 1 day WITH stock_tweets AS (SELECT t.user.name, t.text, upper(sym.text) AS ticker FROM "twitter-firehose" AS t, unnest(t.entities.symbols) AS sym WHERE t.entities.symbols[1] is not null AND t._event_time > current_timestamp() - INTERVAL {interval}), -- aggregate stock ticker symbol tweet occurrences top_stock_tweets AS (SELECT ticker, count(*) AS tweet_count FROM stock_tweets GROUP BY ticker), -- join stock ticker symbol in tweets with NASDAQ company list data stock_info_with_tweets AS (SELECT top_stock_tweets.ticker, top_stock_tweets.tweet_count, tickers.Name, tickers.Industry, tickers.MarketCap FROM top_stock_tweets JOIN tickers ON top_stock_tweets.ticker = tickers.Symbol)
NOTE: This example uses IPs as document IDs, so they cannot be `None` """ import flask from flask_geomapper import flask_geomapper from apscheduler.schedulers.background import BackgroundScheduler from rockset import Client, Q from os import getenv app = flask.Flask(__name__) fg = flask_geomapper(app, debug=True) token = getenv("RS2_TOKEN") # or set token to a string with your API key rs = Client(token, "https://api.rs2.usw2.rockset.com") # configure server based off your location (this one is us west) collection_name = "flask-locations" # configure based off your collection name and workspace (if not in "commons") collection = rs.Collection.retrieve(collection_name) previous_locations = list(rs.sql(Q(f"select * from \"{collection_name}\""))) # retrieve previous locations from database if previous_locations != []: fg.add_locations(previous_locations, ip_key="_id") # if there are any items in the database, add them to flask-geomapper def add_docs(): collection.add_docs(fg.shape_to_docs()) scheduler = BackgroundScheduler(daemon=True) # init scheduler scheduler.add_job(func=collection.add_docs, args=(fg.shape_to_docs(ip_key="_id"), ), trigger="interval", seconds=10) """ ^^^ Add documents to collection every ten seconds.
import multiprocessing import time from rockset import Client, Q, F, P, ParamDict rs = Client() def audio_Stage1(process_name, tasks, results): print('[%s] evaluation routine starts' % process_name) qlambda = rs.QueryLambda.retrieveByVersion( 'all_audios_stage1', version='c728db03ee4e3c50', #version= '3064713fedc5e63a' , #version2 # '80cb8e8a16cafb5c', for. recall_ads collections workspace='commons') params = ParamDict() overlap = 5000 while True: task_present = tasks.get() if (isinstance(task_present, tuple)) == False: print('[%s] evaluation routine quits' % process_name) # Indicate finished results.put(-1) break
import flask from requests import get from rockset import Client, ParamDict, Q, F from os import getenv rs = Client(api_key=getenv('ROCKSET_SECRET'), api_server='api.rs2.usw2.rockset.com') app = flask.Flask(__name__) @app.route('/', methods=['GET']) def null(): return ( '<script>location.replace("http://github.com/gadhagod/Heroku-Status-Badges")</script>' ) @app.route('/<app_name>', methods=['GET']) def running(app_name): code = get('https://{}.herokuapp.com/'.format(app_name)).status_code if code >= 500 and code <= 599: return (flask.send_file('badges/down.png', mimetype='image/png')) elif code == 404: return (flask.send_file('badges/not_found.png', mimetype='image/png')) else: return (flask.send_file('badges/running.png', mimetype='image/png'))
import flask from os import getenv from rockset import Client, Q from flask_limiter import Limiter from flask_limiter.util import get_remote_address from requests import get app = flask.Flask(__name__) rs = Client(api_key=getenv("RS2_TOKEN"), api_server="api.rs2.usw2.rockset.com") collection = rs.Collection.retrieve("schoology-extension-downloads") limiter = Limiter( app, key_func=get_remote_address, default_limits=["30 per day"] ) @app.route("/") def main(): latest_zip = get("https://api.github.com/repos/Harker-Hackers/schoology-extension/releases/latest").json()["zipball_url"] return flask.redirect(latest_zip) @app.route("/dashboard") def dashboard(): if flask.request.args.get("token") == getenv("DASHBOARD_TOKEN"): return {"data": list(rs.sql(Q("select _event_time from \"schoology-extension-downloads\"")))} else: return main() @app.after_request def log(response): if response._status_code == 302:
"""Create a dashboard on Rockset Data using Dash (https://plot.ly/products/dash/)""" import dash import dash_core_components as dcc import dash_html_components as html import plotly.graph_objs as go from rockset import Client, Q from config import ROCKSET_API_SERVER, ROCKSET_API_KEY from constants import * rs = Client(api_key=ROCKSET_API_KEY, api_server=ROCKSET_API_SERVER) def get_data(query, x_label, y_label): """ Execute query on Rockset Args: query (str): Rockset compatible SQL Query x_label (str): Values of this column will be mapped on x-axis of the graph y_label (str): Values of this column will be mapped on x-axis of the graph Returns: dict """ result = rs.sql(Q(query)) result[0] return { 'x': [record[x_label] for record in result], 'y': [record[y_label] for record in result]