def test_detect_submodule_in_deployment(self): yh = Yhat("greg", "test", "http://api.yhathq.com/") _, bundle = yh.deploy("TestModel", TestModel, globals(), sure=True, dry_run=True) self.assertEqual(len(bundle['modules']), 8)
def home(): if request.method == 'POST': yh = Yhat("*****@*****.**", "b36b987283a83e5e4d2814af6ef0eda9", "http://cloud.yhathq.com/") recommender_name = "Final_Recommender" data = {"user" : request.json['user'], "products" : request.json['products'], "n": request.json['n']} pred = yh.predict(recommender_name, data) # returns the dictionary return Response(json.dumps(pred), mimetype='application/json') else: # if it is GET method, you just need to render the homepage part # defines the jQuery pages in order to render the page in home.html template css_url = url_for('static', filename='css/main.css') jquery_url = url_for('static', filename='js/jquery-1.11.1.js') # prodcuts_url = aData products_url = url_for('static', filename='js/products.js') highlight_url = url_for('static', filename='js/highlight.js') main_url = url_for('static', filename='js/main.js') return render_template('home.html', css_url=css_url,jquery_url=jquery_url, products_url=products_url, main_url=main_url, highlight_url=highlight_url)
def index(): if request.method == 'POST': yh = Yhat("USERNAME", "APIKEY", "http://cloud.yhathq.com/") pred = yh.predict("BeerRec", {"beers": request.json['beers'], "n": request.json['n']}) return Response(json.dumps(pred), mimetype='application/json') else: # static files css_url = url_for('static', filename='css/main.css') jquery_url = url_for('static', filename='js/jquery-1.10.2.min.js') beers_url = url_for('static', filename='js/beers.js') highlight_url = url_for('static', filename='js/code.js') js_url = url_for('static', filename='js/main.js') return render_template('index.html', css_url=css_url, jquery_url=jquery_url, beers_url=beers_url, js_url=js_url, highlight_url=highlight_url)
def index(): if request.method == 'POST': yh = Yhat("USERNAME", "APIKEY", "http://cloud.yhathq.com/") pred = yh.predict("BeerRec", { "beers": request.json['beers'], "n": request.json['n'] }) return Response(json.dumps(pred), mimetype='application/json') else: # static files css_url = url_for('static', filename='css/main.css') jquery_url = url_for('static', filename='js/jquery-1.10.2.min.js') beers_url = url_for('static', filename='js/beers.js') highlight_url = url_for('static', filename='js/code.js') js_url = url_for('static', filename='js/main.js') return render_template('index.html', css_url=css_url, jquery_url=jquery_url, beers_url=beers_url, js_url=js_url, highlight_url=highlight_url)
import os from yhat import Yhat, YhatModel, preprocess class HelloWorld(YhatModel): @preprocess(in_type=dict, out_type=dict) def execute(self, data): me = data['name'] greeting = "Hello %s!" % me return {"greeting": greeting} username = os.environ["USERNAME"] apikey = os.environ["APIKEY"] endpoint = os.environ["OPS_ENDPOINT"] print "%s:%s:%s" % ( username, apikey, endpoint, ) yh = Yhat(username, apikey, endpoint) yh.deploy("HelloWorld", HelloWorld, globals(), sure=True)
def test_deployment(self): yh = Yhat("foo", "bar", "http://api.yhathq.com/") _, bundle = yh.deploy("HelloWorld", HelloWorld, globals(), dry_run=True) self.assertTrue(True)
training_val = RFmodel.score(transform_dummies(X_train,False), y_train) testing_val = RFmodel.score(transform_dummies(X_test,False), y_test) print "training:", testing_val print "testing: ", training_val ############ DEPLOYMENT ###################### from yhat import Yhat, YhatModel, preprocess class TravisModel(YhatModel): def fit_val(self): testing_val = RFmodel.score(transform_dummies(X_test, False), y_test) return testing_val def execute(self,data): data = transform_dummies(data,False) output = RFmodel.predict(data) return output.tolist() ########## DEPLOY SET ##################### if __name__ == '__main__': yh = Yhat( os.environ['YHAT_USERNAME'], os.environ['YHAT_APIKEY'], os.environ['YHAT_URL'], ) yh.deploy("TravisModel", TravisModel, globals(), True)
import os from yhat import Yhat, YhatModel, preprocess from foo.foo import print_foo from module import function_in_same_dir class HelloWorld(YhatModel): @preprocess(in_type=dict, out_type=dict) def execute(self, data): me = data['name'] greeting = "Hello %s!" % me print_foo(me) return { "greeting": greeting, "nine": function_in_same_dir() } username = os.environ["USERNAME"] apikey = os.environ["APIKEY"] endpoint = os.environ["OPS_ENDPOINT"] print "%s:%s:%s" % (username, apikey, endpoint,) yh = Yhat( username, apikey, endpoint ) yh.deploy("HelloWorldPkg", HelloWorld, globals(), sure=True, verbose=1)
features = df.columns[df.columns != "MEDVALUE"] target = "MEDVALUE" y = df[target] X = df.drop(target, 1) X_train, X_test, y_train, y_test = train_test_split(X, y) clf = linear_model.LinearRegression() clf.fit(X_train,y_train) y_pred = clf.predict(X_test) print r2_score(y_test, y_pred) from yhat import Yhat, YhatModel, preprocess, df_to_json class HousePred(YhatModel): @preprocess(in_type=pd.DataFrame, out_type=pd.DataFrame) def execute(self, data): result = clf.predict(data[features]) df = pd.DataFrame(data={'predicted_price': result}) return df yh = Yhat( "YHAT_USERNAME", "YHAT_APIKEY", "http://cloud.yhathq.com/") yh.deploy("HouseValuePredictor", HousePred, globals()) print df_to_json(df[:1])
# create and train a classifier nbayes = MultinomialNB(fit_prior=False) nbayes.fit(train_twitter_tfidf, train.liked_content.tolist()) # prep the test data, then create a confusion matrix to examine the results test_twitter_tfidf = vec.transform(test.text) preds = nbayes.predict(test_twitter_tfidf) print pd.crosstab(test.liked_content, preds) from yhat import Yhat, YhatModel, preprocess class TwitterRanker(YhatModel): @preprocess(in_type=dict, out_type=dict) def execute(self, data): tweet = data['tweet_content'] data = vec.transform([tweet]) pred = nbayes.predict(data) prob = nbayes.predict_proba(data) prob = { "ham": round(prob[0][0], 4), "spam": 1 - round(prob[0][0], 4) } return {"pred": pred[0], "prob": prob} yh = Yhat("YOUR_USERNAME", "YOUR_APIKEY", "http://cloud.yhathq.com/") yh.deploy("twitterRanker", TwitterRanker, globals())
{"name": "x", "na_filler": 0}, {"name": "z", "na_filler": fill_z} ] class MyOtherClass: def hello(self, x): return "hello: %s" % str(x) REQS = open("reqs.txt").read() ### <DEPLOYMENT START> ### # @preprocess(in_type=dict, out_type=pd.DataFrame, null_handler=features) class MyModel(YhatModel): REQUIREMENTS=REQS @preprocess(out_type=pd.DataFrame) def execute(self, data): return predict(data) # "push" to server would be here data = {"x": 1, "z": None} if __name__ == '__main__': creds = credentials.read() yh = Yhat(creds['username'], creds['apikey'], "http://localhost:3000/") yh.deploy("mynewmodel", MyModel, globals())
iris = load_iris() X = pd.DataFrame(iris.data, columns=iris.feature_names) y = pd.DataFrame(iris.target, columns=["flower_types"]) clf = SVC() clf.fit(X, y["flower_types"]) class MySVC(YhatModel): @preprocess(in_type=pd.DataFrame, out_type=pd.DataFrame) def execute(self, data): prediction = clf.predict(pd.DataFrame(data)) species = ['setosa', 'versicolor', 'virginica'] result = [species[i] for i in prediction] return result username = os.environ["USERNAME"] apikey = os.environ["APIKEY"] endpoint = os.environ["OPS_ENDPOINT"] print "%s:%s:%s" % ( username, apikey, endpoint, ) yh = Yhat(username, apikey, endpoint) yh.deploy("SupportVectorClassifier", MySVC, globals(), sure=True)
data = pd.DataFrame(data) data = data[features] prob = glm.predict_proba(data)[0][1] if prob > 0.3: decline_code = "Credit score too low" else: decline_code = "" odds = glm.predict_log_proba(data)[0][1] score = calculate_score(odds) output = { "prob_default": [prob], "decline_code": [decline_code], "score": [score] } return output df_term[features].head() test = { "last_fico_range_low": 705, "last_fico_range_high": 732, "home_ownership": "MORTGAGE" } LoanModel().execute(test) yh = Yhat("austin", os.environ.get("SCIENCEOPS_API_KEY"), "https://sandbox.c.yhat.com/") yh.deploy("LendingClub", LoanModel, globals(), True)
iris = load_iris() X = pd.DataFrame(iris.data, columns=iris.feature_names) y = pd.DataFrame(iris.target, columns=["flower_types"]) clf = SVC() clf.fit(X, y["flower_types"]) class MySVC(YhatModel): @preprocess(in_type=pd.DataFrame, out_type=pd.DataFrame) def execute(self, data): prediction = clf.predict(pd.DataFrame(data)) species = ['setosa', 'versicolor', 'virginica'] result = [species[i] for i in prediction] return result username = os.environ["USERNAME"] apikey = os.environ["APIKEY"] endpoint = os.environ["OPS_ENDPOINT"] print "%s:%s:%s" % (username, apikey, endpoint,) yh = Yhat( username, apikey, endpoint ) yh.deploy("SupportVectorClassifier", MySVC, globals(), sure=True)
features = [{"name": "x", "na_filler": 0}, {"name": "z", "na_filler": fill_z}] class MyOtherClass: def hello(self, x): return "hello: %s" % str(x) REQS = open("reqs.txt").read() ### <DEPLOYMENT START> ### # @preprocess(in_type=dict, out_type=pd.DataFrame, null_handler=features) class MyModel(YhatModel): REQUIREMENTS = REQS @preprocess(out_type=pd.DataFrame) def execute(self, data): return predict(data) # "push" to server would be here data = {"x": 1, "z": None} if __name__ == '__main__': creds = credentials.read() yh = Yhat(creds['username'], creds['apikey'], "http://localhost:3000/") yh.deploy("mynewmodel", MyModel, globals())
features = df.columns[df.columns != "MEDVALUE"] target = "MEDVALUE" y = df[target] X = df.drop(target, 1) X_train, X_test, y_train, y_test = train_test_split(X, y) clf = linear_model.LinearRegression() clf.fit(X_train, y_train) y_pred = clf.predict(X_test) print r2_score(y_test, y_pred) from yhat import Yhat, YhatModel, preprocess, df_to_json class HousePred(YhatModel): @preprocess(in_type=pd.DataFrame, out_type=pd.DataFrame) def execute(self, data): result = clf.predict(data[features]) df = pd.DataFrame(data={'predicted_price': result}) return df yh = Yhat("YHAT_USERNAME", "YHAT_APIKEY", "http://cloud.yhathq.com/") yh.deploy("HouseValuePredictor", HousePred, globals()) print df_to_json(df[:1])
import time from yhat import Yhat # cd ~/repos/yhat/demos/heroku-demos/demo-lending-club/model df = pd.read_csv("./model/LoanStats3a.csv", skiprows=1) df_head = df.head() def is_poor_coverage(row): pct_null = float(row.isnull().sum()) / row.count() return pct_null < 0.8 df_head[df_head.apply(is_poor_coverage, axis=1)] df = df[df.apply(is_poor_coverage, axis=1)] df['year_issued'] = df.issue_d.apply(lambda x: int(x.split("-")[0])) df_term = df[df.year_issued < 2012] features = ['last_fico_range_low', 'last_fico_range_high', 'home_ownership'] yh = Yhat("demo-master", "3b0160e10f6d7a94a2528b11b1c9bca1", "https://sandbox.c.yhat.com/") for i, row in df_term[features][:500].iterrows(): # some models require vectorized data, others don't # non-vectorized # row = row.to_dict() # {'is_rent': True, 'last_fico_range_low': 785, 'last_fico_range_high': 789} # vectorized row = { k: [v] for k,v in row.to_dict().items() } # {'is_rent': [True], 'last_fico_range_low': [785], 'last_fico_range_high': [789]} print yh.predict("LendingClub", row) time.sleep(.05)
{ 's1':1, 's2':1, 's3':1, 's4':1, 's5':1, 'w1':1, 'w2':1, 'w3':1, 'w4':1, 'k1':1, 'k2':1, 'k3':1, 'k4':1, 'k5':1, 'k6':1, 'k7':1, 'k8':1, 'k9':1, 'k10':1, 'k11':1, 'k12':1, 'k13':1, 'k14':1, 'k15':1 } test_data = pd.read_csv(open('data/test.csv', 'r'), quotechar='"') sub_data = pd.read_csv(open('data/sampleSubmission.csv', 'r'), quotechar='"') if not np.alltrue(test_data['id'] == sub_data['id']): raise Exception("IDs do not match") yh = Yhat(username, apikey) variabless = sub_data.columns[1:] raw_tweets = test_data['tweet'].tolist() for variable in variables: model_version = best_model[variable] model_name = "TweetClassifier_%s" % (variable, ) results_from_server = yh.raw_predict(model_name, model_version, raw_tweets) pred = results_from_server['prediction']['scores'] sub_data[variable] = pred try: sub_data.to_csv(open(sub_file, 'w'), index=False) except IOError: sys.stderr.write("IO error: could not write data to file")
#!/usr/bin/env python from flask import Flask, request, render_template, url_for, Response, json from yhat import Yhat import os app = Flask(__name__) yh = Yhat(os.environ.get("YHAT_USERNAME"), os.environ.get("YHAT_APIKEY"), os.environ.get("YHAT_URL")) @app.route('/', methods=['GET', 'POST']) def index(): if request.method == 'POST': # print request.json['beers'] try: pred = yh.predict("BeerRecommender", {"beers": request.json['beers'], "n": request.json['n']}) return Response(json.dumps(pred), mimetype='application/json') except Exception, e: print e return Response(json.dumps({"error": str(e)}), mimetype='application/json') else: # static files css_url = url_for('static', filename='css/main.css') jquery_url = url_for('static', filename='js/jquery-1.10.2.min.js') beers_url = url_for('static', filename='js/beers.js') highlight_url = url_for('static', filename='js/code.js') js_url = url_for('static', filename='js/main.js') return render_template('index.html', css_url=css_url, jquery_url=jquery_url, beers_url=beers_url, js_url=js_url, highlight_url=highlight_url)
import os import subprocess from yhat import Yhat, YhatModel, preprocess class HelloWorld(YhatModel): # ensure the environment has "tree" subprocess.check_output(["tree"]) @preprocess(in_type=dict, out_type=dict) def execute(self, data): me = data['name'] greeting = "Hello %s!" % me return { "greeting": greeting } username = os.environ["USERNAME"] apikey = os.environ["APIKEY"] endpoint = os.environ["OPS_ENDPOINT"] print "%s:%s:%s" % (username, apikey, endpoint,) yh = Yhat( username, apikey, endpoint ) yh.deploy("PyAptGet", HelloWorld, globals(), sure=True, packages=["tree"])
from yhat import Yhat, YhatModel, preprocess import os USERNAME = os.environ["USERNAME"] APIKEY = os.environ["APIKEY"] URL = os.environ["URL"] class HelloWorld(YhatModel): @preprocess(in_type=dict, out_type=dict) def execute(self, data): me = data['name'] greeting = "Hello " + str(me) + "!" return {"greeting": greeting} yh = Yhat(USERNAME, APIKEY, URL) yh.deploy("Gitmodel", HelloWorld, globals(), True)
class CurrencyPortfolio(YhatModel): @preprocess(in_type=dict, out_type=dict) def execute(self, data): P = matrix(data['risk_aversion'] * returns_cov.as_matrix()) q = matrix(-exp_returns['mean'].as_matrix()) G = matrix(0.0, (len(q),len(q))) G[::len(q)+1] = -1.0 h = matrix(0.0, (len(q),1)) A = matrix(1.0, (1,len(q))) b = matrix(1.0) solution = solvers.qp(P, q, G, h, A, b) expected_return = exp_returns['mean'].dot(solution['x'])[0] variance = sum(solution['x'] * returns_cov.as_matrix().dot(solution['x']))[0] investments = {} for i, amount in enumerate(solution['x']): # Ignore values that appear to have converged to 0. if amount > 10e-5: investments[countries[i]] = amount*100 return { 'risk_aversion': data['risk_aversion'], 'investments': investments, 'expected_return': expected_return, 'variance': variance } yh = Yhat('USERNAME', 'APIKEY', 'http://cloud.yhathq.com/') yh.deploy('CurrencyPortfolio', CurrencyPortfolio, globals())
product - a product id (integer) """ p = dists[products].apply(lambda row: np.sum(row), axis=1) p = p.order(ascending=False) return p.index[p.index.isin(products) == False] get_sims(["Sierra Nevada Pale Ale", "120 Minute IPA", "Stone Ruination IPA"]) from yhat import Yhat, YhatModel, preprocess class BeerRecommender(YhatModel): @preprocess(in_type=dict, out_type=dict) def execute(self, data): beers = data.get("beers") suggested_beers = get_sims(beers) result = [] for beer in suggested_beers: result.append({"beer": beer}) return result yh = Yhat("YOUR_USERNAME", "YOUR_APIKEY", "http://cloud.yhathq.com/") if raw_input("Deploy? (y/N)") == "y": print yh.deploy("BeerRecommender", BeerRecommender, globals()) print yh.predict("BeerRecommender", {"beers": ["Sierra Nevada Pale Ale", "120 Minute IPA", "Stone Ruination IPA"]})
def is_poor_coverage(row): pct_null = float(row.isnull().sum()) / row.count() return pct_null < 0.8 df_head[df_head.apply(is_poor_coverage, axis=1)] df = df[df.apply(is_poor_coverage, axis=1)] df['year_issued'] = df.issue_d.apply(lambda x: int(x.split("-")[0])) df_term = df[df.year_issued < 2012] features = ['last_fico_range_low', 'last_fico_range_high', 'home_ownership'] yh = Yhat("demo-master", "3b0160e10f6d7a94a2528b11b1c9bca1", "https://sandbox.c.yhat.com/") for i, row in df_term[features][:500].iterrows(): # some models require vectorized data, others don't # non-vectorized # row = row.to_dict() # {'is_rent': True, 'last_fico_range_low': 785, 'last_fico_range_high': 789} # vectorized row = { k: [v] for k, v in row.to_dict().items() } # {'is_rent': [True], 'last_fico_range_low': [785], 'last_fico_range_high': [789]} print yh.predict("LendingClub", row) time.sleep(.05)
class ChurnModel(YhatModel): # Type casts incoming data as a dataframe @preprocess(in_type=pd.DataFrame, out_type=pd.DataFrame) def execute(self, data): # Collect customer meta data response = data[['Area Code', 'Phone']] charges = ['Day Charge', 'Eve Charge', 'Night Charge', 'Intl Charge'] response['customer_worth'] = data[charges].sum(axis=1) # Convert yes no columns to bool data[yes_no_cols] = data[yes_no_cols] == 'yes' # Create feature space X = data[features].as_matrix().astype(float) X = scaler.transform(X) # Make prediction churn_prob = clf.predict_proba(X) response['churn_prob'] = churn_prob[:, 1] # Calculate expected loss by churn response['expected_loss'] = response['churn_prob'] * response[ 'customer_worth'] response = response.sort('expected_loss', ascending=False) # Return response DataFrame return response yh = Yhat("e[at]yhathq.com", " MY APIKEY ", "http://cloud.yhathq.com/") print "Deploying model" response = yh.deploy("PythonChurnModel", ChurnModel, globals()) print json.dumps(response, indent=2)
return p[0:n_recs] get_sims(["Sierra Nevada Pale Ale", "60 Minute IPA"]) from yhat import Yhat, YhatModel, preprocess class BeerRecommender(YhatModel): REQUIREMENTS=['numpy==1.11.3', 'pandas==0.19.2', 'scikit-learn==0.18.1'] def execute(self, data): beers = data.get("beers") n_recs = data.get("n_recs") prob = data.get("prob") unique = data.get("unique") suggested_beers = get_sims(beers, n_recs, prob, unique) result = suggested_beers.to_dict(orient='records') return result model = BeerRecommender() model.execute({'beers':["Sierra Nevada Pale Ale"],'n_recs':10}) yh = Yhat("colin", "ce796d278f4840e30e763413d8b4baa4", "http://do-sb-dev-master.x.yhat.com/") print yh.deploy("BeerRecommender", BeerRecommender, globals(), autodetect=False, sure=True) # print yh.predict("BeerRecommender", {"beers": ["Sierra Nevada Pale Ale", # "120 Minute IPA", "Stone Ruination IPA"]})
import base64 def transform(self, data): image_string = data["image_string"] STANDARD_SIZE = (50, 50) f = StringIO(base64.decodestring(image_string)) img = Image.open(f) img = img.getdata() img = img.resize(STANDARD_SIZE) img = map(list, img) img = np.array(img) s = img.shape[0] * img.shape[1] img_wide = img.reshape(1, s) return img_wide[0] def predict(self, img): x = self.pca.transform([img]) x = self.std_scaler.transform(x) results = {"label": self.clf.predict(x)[0]} probs = {"prob_" + str(i) : prob for i, prob in enumerate(self.clf.predict_proba(x)[0])} results['probs'] = probs return results digit_model = DigitModel(clf=clf, std_scaler=std_scaler, pca=pca) yh = Yhat("YOUR USERNAME", "YOUR APIKEY", "http://cloud.yhathq.com/") yh.deploy("digitRecognizer", digit_model)
data = data[features] prob = glm.predict_proba(data)[0][1] if prob > 0.3: decline_code = "Credit score too low" else: decline_code = "" odds = glm.predict_log_proba(data)[0][1] score = calculate_score(odds) output = { "prob_default": [prob], "decline_code": [decline_code], "score": [score] } return output df_term[features].head() test = { "last_fico_range_low": 705, "last_fico_range_high": 732, "home_ownership": "MORTGAGE" } LoanModel().execute(test) yh = Yhat("colin", "d325fc5bcb83fc197ee01edb58b4b396", "https://sandbox.c.yhat.com/") yh.deploy("LendingClub", LoanModel, globals(), True)
def parse_tweet(tweet): trees = nltk.ne_chunk(nltk.pos_tag(nltk.word_tokenize(tweet))) for tree in trees.subtrees(): etype = None if tree.node == "PERSON": etype = "PERSON" elif tree.node == "GPE": etype = "PLACE" if etype is not None: ne = " ".join([leaf[0] for leaf in tree.leaves()]) tweet = tweet.replace(ne, "<" + etype + ">" + ne + "</" + etype + ">") return tweet class Tagger(YhatModel): @preprocess(in_type=dict, out_type=dict) def execute(self, raw): tweet = raw['text'] tagged = parse_tweet(tweet) raw['tagged'] = tagged return raw tg = Tagger() yh = Yhat("greg", "mykey", YHAT_URL) print yh.deploy("NamedEntityTagger" + str(i), Tagger, globals())
def convert_prob_to_score(p): """ takes a probability and converts it to a score Example: convert_prob_to_score(0.1) > 340 """ odds = (1 - p) / p return np.log(odds) * (40 / np.log(2)) + 340 ##Deploying to Yhat from yhat import BaseModel, Yhat yh = Yhat("greg", "abcd1234") class LoanModel(BaseModel): def transform(self, newdata): df = pd.DataFrame(newdata) # handle nulls here # df['monthly_income'] = self.income_imputer.predict(df[[]]) df['number_of_dependents'] = df['number_of_dependents'].fillna(0) return df def predict(self, df): data = df[self.features] result = {} p = self.clf.predict_proba(data) p = p[::, 1]
return self.dv.transform(doc) def predict(self, x): """ Evaluate model on array delegates to LinearRegression self.lr returns a dict (will be json encoded) suppling "predictedPrice", "suspectedOutlier", "x", "threshold" where "x" is the input vector and "threshold" is determined whether or not a listing is a suspected outlier. """ doc = self.dv.inverse_transform(x)[0] predicted = self.lr.predict(x)[0] err = abs(predicted - doc["price"]) return { "predictedPrice": predicted, "x": doc, "suspectedOutlier": 1 if (err > self.threshold) else 0, "threshold": self.threshold, } pm = PricingModel(dv=dv, lr=LR, threshold=np.percentile(trainingErrs, 95)) print pm.execute(testing.T.to_dict()[0]) if raw_input("Deploy? (y/N): ").lower() == "y": username = "******" apikey = "abcd1234" yh = Yhat(username, apikey, "http://cloud.yhathq.com/") print yh.deploy(model_name, fitted_model)
pred[np.where(pred < 0.0)] = 0.0 return {"scores" : pred} train_data = pd.read_csv(open('data/train.csv','r'),quotechar='"') raw_tweets = train_data['tweet'].tolist() sanity_raw = raw_tweets[:100] sentiments = train_data.columns[4:].tolist() vectorizer = CountVectorizer(tokenizer=nltk.word_tokenize, stop_words='english', max_features=3000, binary=True, ngram_range=(1,1)) yh = Yhat(username,apikey) X_train = vectorizer.fit_transform(raw_tweets) for sentiment in sentiments: print "Processing '%s'" % sentiment clf = SVR() y_train = train_data[sentiment].tolist() print "Training classifier" clf.fit(X_train,y_train) tweet_clf = TweetClassifier(clf=clf,vectorizer=vectorizer) model_name = "TweetClassifier_%s" % (sentiment,) print "Uploading to yhat"
red_upper = np.array([50, 56, 200], dtype = "uint8") mask = cv2.inRange(image, red_lower, red_upper) output = cv2.bitwise_and(image, image, mask = mask) output_gray = rgb2gray(output) total_red = np.sum(output_gray) y, x = ndimage.center_of_mass(output_gray) data = { "x": x, "y": y, "xmax": output_gray.shape[1], "ymax": output_gray.shape[0], "total_red": total_red, "time": time.time() } return data from yhat import Yhat, YhatModel class DroneModel(YhatModel): REQUIREMENTS = [ "opencv" ] def execute(self, data): return get_coords(data['image64']) yh = Yhat(username, apikey, url) yh.deploy("DroneModel", DroneModel, globals(), True)
from yhat import Yhat, BaseModel def hello(): return "HEY AUSTIN!" class MyModel(BaseModel): def require(self): pass def transform(self, data): return "something" def predict(self, data): return data * 10 mm = MyModel(clf=range(10), udfs=[hello]) yh = Yhat("greg", "abcd1234") yh.upload("functest", mm)
import os from yhat import Yhat, YhatModel, preprocess class HelloWorld(YhatModel): version = os.environ["MODEL_VERSION"] @preprocess(in_type=dict, out_type=dict) def execute(self, data): me = data['name'] greeting = "Hello %s!" % me print os.environ["MODEL_VERSION"] return { "greeting": greeting } username = os.environ["USERNAME"] apikey = os.environ["APIKEY"] endpoint = os.environ["OPS_ENDPOINT"] print "%s:%s:%s" % (username, apikey, endpoint,) yh = Yhat( username, apikey, endpoint ) yh.deploy("HelloWorldVer", HelloWorld, globals(), sure=True)
import json import pickle import time # raw_data = [[ 0., 0., 5., 13., 9., 1., 0., 0.], # [ 0., 0., 13., 15., 10., 15., 5., 0.], # [ 0., 3., 15., 2., 0., 11., 8., 0.], # [ 0., 4., 12., 0., 0., 8., 8., 0.], # [ 0., 5., 8., 0., 0., 9., 8., 0.], # [ 0., 4., 11., 0., 1., 12., 7., 0.], # [ 0., 2., 14., 5., 10., 12., 0., 0.], # [ 0., 0., 6., 13., 10., 0., 0., 0.]] # yh = Yhat("greg", "fCVZiLJhS95cnxOrsp5e2VSkk0GfypZqeRCntTD1nHA", "http://localhost:5000/") yh = Yhat("clotheshorse", "gwAaXlkkIyasM2ue7iwjUmuoUKCodSZjobNU9a5WmKc", "http://166.78.26.170/") # yh = Yhat("greg", "fCVZiLJhS95cnxOrsp5e2VSkk0GfypZqeRCntTD1nHA", "http://54.235.251.150/") # # pp.pprint(skd.show_models()) # # print "*"*80 # s = time.time() # pp.pprint(yh.raw_predict('gregsTree_v11', [2, 3, 2, 2])) # print time.time() - s # # print "*"*80 # # pp.pprint(skd.predict('digits', raw_data)) # # print "*"*80 class DecisionTreePML(BaseModel): def transform(self, rawData): pair = [5, 3]
self.hprint2('<div id="lineCanvas" style="overflow: auto; position:relative;height:300px;width:400px;"></div>') self.hprint2('<script type="text/javascript">') self.hprint2('var g = new line_graph();') for i in range(len(parcoh)): self.hprint2("g.add('%d', %f);"%(i+1, parcoh[i]*100)) self.hprint2('g.render("lineCanvas", "Paragraphs");') self.hprint2('</script>') # display text annotation/highlight met(d, is_local=is_local, num_label=max(int(good), int(bad)), label_sent=True) self.fout.write("<hr>") self.fout.write("<label><h2>Cohesion Highlighter</h2></label>") if int(good)>0: self.fout.write('<span class="bold red">Red: Cohesive </span>') if int(bad)>0: self.fout.write('<span class="yellow-background">Yellow: Not Cohesive</span>') self.hprint2('<div style="width:600px;"><p align="left">') d.print_html(self.fout, int(good), int(bad)) self.hprint2('</p></div>') # End Computing and OUtput output = self.fout.getvalue() self.fout.close() return { "html_output": output } #StickyTextYhat().run() yh = Yhat("*****@*****.**", "ff7bb725be9e4a32af286f464b316a23", "http://umsi.yhathq.com/") yh.deploy ("StickyText", StickyTextYhat, globals())
"k10": 1, "k11": 1, "k12": 1, "k13": 1, "k14": 1, "k15": 1, } test_data = pd.read_csv(open("data/test.csv", "r"), quotechar='"') sub_data = pd.read_csv(open("data/sampleSubmission.csv", "r"), quotechar='"') if not np.alltrue(test_data["id"] == sub_data["id"]): raise Exception("IDs do not match") yh = Yhat(username, apikey) variabless = sub_data.columns[1:] raw_tweets = test_data["tweet"].tolist() for variable in variables: model_version = best_model[variable] model_name = "TweetClassifier_%s" % (variable,) results_from_server = yh.raw_predict(model_name, model_version, raw_tweets) pred = results_from_server["prediction"]["scores"] sub_data[variable] = pred try: sub_data.to_csv(open(sub_file, "w"), index=False) except IOError: sys.stderr.write("IO error: could not write data to file")
def execute(self, data): P = matrix(data['risk_aversion'] * returns_cov.as_matrix()) q = matrix(-exp_returns['mean'].as_matrix()) G = matrix(0.0, (len(q), len(q))) G[::len(q) + 1] = -1.0 h = matrix(0.0, (len(q), 1)) A = matrix(1.0, (1, len(q))) b = matrix(1.0) solution = solvers.qp(P, q, G, h, A, b) expected_return = exp_returns['mean'].dot(solution['x'])[0] variance = sum(solution['x'] * returns_cov.as_matrix().dot(solution['x']))[0] investments = {} for i, amount in enumerate(solution['x']): # Ignore values that appear to have converged to 0. if amount > 10e-5: investments[countries[i]] = amount * 100 return { 'risk_aversion': data['risk_aversion'], 'investments': investments, 'expected_return': expected_return, 'variance': variance } yh = Yhat('USERNAME', 'APIKEY', 'http://cloud.yhathq.com/') yh.deploy('CurrencyPortfolio', CurrencyPortfolio, globals())
from yhat import Yhat, BaseModel def hello(): return "HEY AUSTIN!" class MyModel(BaseModel): def require(self): pass def transform(self, data): return "something" def predict(self, data): return data * 10 mm = MyModel(clf=range(10), udfs=[hello]) yh = Yhat("greg", "abcd1234") print yh._extract_source("model", mm)
import os from yhat import Yhat, YhatModel, preprocess from foo.foo import print_foo from module import function_in_same_dir class HelloWorld(YhatModel): @preprocess(in_type=dict, out_type=dict) def execute(self, data): me = data['name'] greeting = "Hello %s!" % me print_foo(me) return {"greeting": greeting, "nine": function_in_same_dir()} username = os.environ["USERNAME"] apikey = os.environ["APIKEY"] endpoint = os.environ["OPS_ENDPOINT"] print "%s:%s:%s" % ( username, apikey, endpoint, ) yh = Yhat(username, apikey, endpoint) yh.deploy("HelloWorldPkg", HelloWorld, globals(), sure=True, verbose=1)
""" p = dists[products].apply(lambda row: np.sum(row), axis=1) p = p.order(ascending=False) return p.index[p.index.isin(products)==False] class BeerRecommender(YhatModel): @preprocess(in_type=dict, out_type=dict) def execute(self, data): # handle uft8 beer names beers = [beer.encode('utf8') for beer in data.get("beers", [])] suggested_beers = get_sims(beers) result = [] for beer in suggested_beers: result.append({"beer": beer}) return result username = os.environ["USERNAME"] apikey = os.environ["APIKEY"] endpoint = os.environ["OPS_ENDPOINT"] print "%s:%s:%s" % (username, apikey, endpoint,) yh = Yhat( username, apikey, endpoint ) yh.deploy("BeerRecommender", BeerRecommender, globals(), sure=True)
""" YAHOO """ yahoo_data = aData yahoo_data.sort(columns = 'user_id', ascending = True, inplace = True) # no pass by value # rating-based CF recommendations data = {'user': [15], 'products':[123764, 71142], 'n':10} aGraphlab_Model = Graphlab_Recommender(dataset = yahoo_data) print aGraphlab_Model.predict(data) """ USA TODAY """ # rating-based CF recommendations usaToday_data = aData param = {'user_id':'Reviewer', 'product_id':'Id', 'ratings': 'Rating'} data = {'user': ['Edna Gundersen'], 'products':[123901], 'n':10} aGraphlab_Model = Graphlab_Recommender(dataset = usaToday_data, needed_param = param) print aGraphlab_Model.predict(data) # textual analytics + CF method param = {'comment': 'Brief', 'ratings': 'Rating', 'user_id':'Reviewer', 'product_id':'Id'} model, ratings_data = rec.sentiment_analysis_regress(usaToday_data, param) ratings_data = ratings_data.sort(columns = 'user_id') ratings_data['user_id'] = ratings_data['user_id'].fillna('anonymous') print ratings_data aGraphlab_Model = Graphlab_Recommender(dataset = ratings_data) data = {'user': ['Edna Gundersen'], 'products':[123901], 'n':10} print aGraphlab_Model.predict(data) ''' # deployment yh = Yhat("*****@*****.**", "b36b987283a83e5e4d2814af6ef0eda9", "http://cloud.yhathq.com/") yh.deploy("Final_Recommender", Final_Recommender, globals())
class ChurnModel(YhatModel): @preprocess(in_type=pd.DataFrame, out_type=pd.DataFrame) def execute(self, data): response = pd.DataFrame(data) charges = ['day_charge', 'eve_charge', 'night_charge', 'intl_charge'] response['customer_worth'] = data[charges].sum(axis=1) # Convert yes no columns to bool data[yes_no_cols] = data[yes_no_cols] == 'yes' # Create feature space X = data[features].as_matrix().astype(float) X = scaler.transform(X) # Make prediction churn_prob = clf.predict_proba(X) response['churn_prob'] = churn_prob[:, 1] # Calculate expected loss by churn response['expected_loss'] = response['churn_prob'] * response[ 'customer_worth'] response = response.sort('expected_loss', ascending=False) response = response[['customer_worth', 'churn_prob', 'expected_loss']] # Return response DataFrame return response yh = Yhat(raw_input("Yhat username: "******"Yhat apikey: "), "http://sandbox.yhathq.com/") print "Deploying model" response = yh.deploy("PythonChurnModel", ChurnModel, globals()) print df_to_json(churn_df[:1])
@preprocess(in_type=pd.DataFrame,out_type=pd.DataFrame) def execute(self,data): # Collect customer meta data response = data[['Area Code','Phone']] charges = ['Day Charge','Eve Charge','Night Charge','Intl Charge'] response['customer_worth'] = data[charges].sum(axis=1) # Convert yes no columns to bool data[yes_no_cols] = data[yes_no_cols] == 'yes' # Create feature space X = data[features].as_matrix().astype(float) X = scaler.transform(X) # Make prediction churn_prob = clf.predict_proba(X) response['churn_prob'] = churn_prob[:,1] # Calculate expected loss by churn response['expected_loss'] = response['churn_prob'] * response['customer_worth'] response = response.sort('expected_loss',ascending=False) # Return response DataFrame return response yh = Yhat( "e[at]yhathq.com", " MY APIKEY ", "http://cloud.yhathq.com/" ) print "Deploying model" response = yh.deploy("PythonChurnModel",ChurnModel,globals()) print json.dumps(response,indent=2)
{"name": "x", "na_filler": 0}, {"name": "z", "na_filler": fill_z} ] class MyOtherClass: def hello(self, x): return "hello: %s" % str(x) REQS = open("reqs.txt").read() ### <DEPLOYMENT START> ### # @preprocess(in_type=dict, out_type=pd.DataFrame, null_handler=features) class MyModel(YhatModel): REQUIREMENTS=REQS @preprocess(out_type=pd.DataFrame) def execute(self, data): return predict(data) # "push" to server would be here data = {"x": 1, "z": None} if __name__ == '__main__': creds = credentials.read() yh = Yhat(creds['username'], creds['apikey']) yh.deploy_to_file("mynewmodel", MyModel, globals())
from yhat import YhatModel, Yhat, preprocess # from first import hello as h2 import first as f2 from first import Support from another.testfile import bye def goodbye(y): bye() print y, "goodbye!" class Example(YhatModel): @preprocess(in_type=dict, out_type=dict) def execute(self, data): goodbye(x) return Support().hello(10) # return h2(data) from first import x yh = Yhat("greg", "fCVZiLJhS95cnxOrsp5e2VSkk0GfypZqeRCntTD1nHA", "http://api.yhathq.com/") yh.deploy_to_file("Example", Example, globals())
from yhat import Yhat, YhatModel , preprocess class HelloWorld(YhatModel): @preprocess(in_type=dict, out_type=dict) def execute(self, data): me = data['name'] greeting = "Hello " + str(me) + "!" return { "greeting": greeting } yh = Yhat("*****@*****.**", "ff7bb725be9e4a32af286f464b316a23", "http://umsi.yhathq.com/") yh.deploy ("HelloWorld", HelloWorld, globals())
get_sims(["Sierra Nevada Pale Ale", "120 Minute IPA", "Coors Light"]) # Index([u'Samuel Adams Boston Lager', u'Sierra Nevada Celebration Ale', u'90 Minute IPA', u'Arrogant Bastard Ale', u'Stone IPA (India Pale Ale)', u'60 Minute IPA', u'HopDevil Ale', u'Stone Ruination IPA', u'Sierra Nevada Bigfoot Barleywine Style Ale', u'Storm King Stout', u'Samuel Adams Winter Lager', u'Samuel Adams Summer Ale', u'Prima Pils', u'Anchor Steam Beer', u'Old Rasputin Russian Imperial Stout', u'Samuel Adams Octoberfest', ...], dtype='object') from yhat import Yhat, YhatModel, preprocess class BeerRecommender(YhatModel): @preprocess(in_type=dict, out_type=dict) def execute(self, data): beers = data.get("beers") suggested_beers = get_sims(beers) result = [] for beer in suggested_beers: result.append({"beer": beer}) return result BeerRecommender().execute({ "beers": ["Sierra Nevada Pale Ale", "120 Minute IPA", "Stone Ruination IPA"] }) yh = Yhat("USERNAME", "APIKEY", "http://cloud.yhathq.com") yh.deploy("BeerRecommender", BeerRecommender, globals()) yh.predict("BeerRecommender", { "beers": ["Sierra Nevada Pale Ale", "120 Minute IPA", "Stone Ruination IPA"] })
import os from yhat import Yhat, YhatModel from pricing import Pricing class MarketingSearchAPI(YhatModel): REQUIREMENTS = ["pandas==0.15.2", "numpy"] def execute(self, data): result = p.predict(data) return result p = Pricing() username = os.environ["USERNAME"] apikey = os.environ["APIKEY"] endpoint = os.environ["OPS_ENDPOINT"] print "%s:%s:%s" % ( username, apikey, endpoint, ) yh = Yhat(username, apikey, endpoint) yh.deploy("RelayRidesPricing", MarketingSearchAPI, globals(), sure=True)
beer = raw_data['beer'] weights = raw_data.get("weights", [1, 1, 1, 1]) # normalize the weights so they sum to 1.0 weights = [float(w) / sum(weights) for w in weights] print "making recs for: " + beer return (beer, weights) def predict(self, data): beer, weights = data results = [] for beer_cmp in self.beers: if beer!=beer_cmp: dist = calc_distance(self.simple_distances, beer, beer_cmp, weights) results.append((beer, beer_cmp, dist)) dists = sorted(results, key=lambda x: x[2]) # return dists return normalize_dists(dists) yh = Yhat({USERNAME}, {APIKEY}) myBeerModel = BeerRec(simple_distances=simple_distances, beers=beers, udfs=[calc_distance, normalize_dists]) if raw_input("Deploy? (y/N)")=="y": print yh.deploy("BeerRec", myBeerModel) print yh.predict("BeerRec", None, {"beer": "Coors Light"})
# 1 4.9 3.0 1.4 # 2 4.7 3.2 1.3 y = pd.DataFrame(iris.data[:,3:4], columns=iris.feature_names[3:4]) # petal width (cm) # 0 0.2 # 1 0.2 # 2 0.2 regr = linear_model.LinearRegression() regr.fit(X, y) class LinReg(YhatModel): @preprocess(in_type=pd.DataFrame, out_type=pd.DataFrame) def execute(self, data): prediction = regr.predict(pd.DataFrame(data)) return prediction username = os.environ["USERNAME"] apikey = os.environ["APIKEY"] endpoint = os.environ["OPS_ENDPOINT"] print "%s:%s:%s" % (username, apikey, endpoint,) yh = Yhat( username, apikey, endpoint ) yh.deploy("LinearRegression", LinReg, globals(), sure=True)
features = [{"name": "x", "na_filler": 0}, {"name": "z", "na_filler": fill_z}] class MyOtherClass: def hello(self, x): return "hello: %s" % str(x) REQS = open("reqs.txt").read() ### <DEPLOYMENT START> ### # @preprocess(in_type=dict, out_type=pd.DataFrame, null_handler=features) class MyModel(YhatModel): REQUIREMENTS = REQS @preprocess(out_type=pd.DataFrame) def execute(self, data): return predict(data) # "push" to server would be here data = {"x": 1, "z": None} if __name__ == '__main__': creds = credentials.read() yh = Yhat(creds['username'], creds['apikey']) yh.deploy_to_file("mynewmodel", MyModel, globals())
#!/usr/bin/env python from yhat import Yhat #yh = Yhat("*****@*****.**", "RoVGt5VDZfHkdBLx2rre76sg998cD4IuJiYzzNmNp48") yh = Yhat("*****@*****.**", "HaDobDyJtFoQQPZ9xRkCJrI44OB6EW8hC6IfUMsGzo8") checkoo_models = yh.show_models() for model in checkoo_models['models']: print model newcase = { 'loc':'BeiJing', 'major':'Computer Science/Engineering', 'vtype':'F1', 'ventry':'New', 'byear':'2013', 'bmonth':'7', 'bday':'20' } checkoo_version = 14 print yh.predict('CKModel',checkoo_version,newcase)
import os from yhat import Yhat, YhatModel, preprocess class HelloWorld(YhatModel): @preprocess(in_type=dict, out_type=dict) def execute(self, data): me = data['name'] greeting = "Hello %s!" % me return { "greeting": greeting } username = os.environ["USERNAME"] apikey = os.environ["APIKEY"] endpoint = os.environ["OPS_ENDPOINT"] print "%s:%s:%s" % (username, apikey, endpoint,) yh = Yhat( username, apikey, endpoint ) yh.deploy("IndentedModel", HelloWorld, globals(), sure=True)
def transform(self, data): image_string = data["image_string"] STANDARD_SIZE = (50, 50) f = StringIO(base64.decodestring(image_string)) img = Image.open(f) img = img.getdata() img = img.resize(STANDARD_SIZE) img = map(list, img) img = np.array(img) s = img.shape[0] * img.shape[1] img_wide = img.reshape(1, s) return img_wide[0] def predict(self, img): x = self.pca.transform([img]) x = self.std_scaler.transform(x) results = {"label": self.clf.predict(x)[0]} probs = { "prob_" + str(i): prob for i, prob in enumerate(self.clf.predict_proba(x)[0]) } results['probs'] = probs return results digit_model = DigitModel(clf=clf, std_scaler=std_scaler, pca=pca) yh = Yhat("YOUR USERNAME", "YOUR APIKEY", "http://cloud.yhathq.com/") yh.deploy("digitRecognizer", digit_model)
from yhat import Yhat, YhatModel, preprocess x = range(10) class HelloWorld(YhatModel): @preprocess(in_type=dict, out_type=dict) def execute(self, data): print x[:10] me = data['name'] greeting = "Hello " + str(me) + "!" return {"greeting": greeting, "x": x} # yh = Yhat("greg", "fCVZiLJhS95cnxOrsp5e2VSkk0GfypZqeRCntTD1nHA", "http://cloud.yhathq.com/") yh = Yhat("greg", "9207b9a2dd9d48848b139b729d4354bc", "http://localhost:8080/") yh.deploy("NewZippedModel", HelloWorld, globals())
#!/usr/bin/env python from flask import Flask, request, render_template, url_for, Response, json from yhat import Yhat from uuid import uuid4 import numpy as np from bandits import EpsilonGreedy app = Flask(__name__) yh = Yhat("__username__", "__apikey__", "http://cloud.yhathq.com/") arms = ["EuclideanBeerRec", "CosineBeerRec", "CorrelationBeerRec"] eg = EpsilonGreedy(3) ids = {} @app.route('/', methods=['GET', 'POST']) def index(): if request.method == 'POST': arm = eg.choose_arm() arm_name = arms[arm] u_id = str(uuid4()) pred = yh.predict(arm_name, {"beers": request.json['beers']}) ids[u_id] = {'arm': arm, 'arm_name': arm_name} return Response(json.dumps({ 'result': pred['result'], 'uid': u_id }), mimetype='application/json') else: