def one_hot_form_input(brand, item_type, title, est_price): """ Returns pandas series """ X_sample, _ = joblib.load(reg_model_path) one_hot_array = X_sample one_hot_array['cost'] = est_price if brand in one_hot_array.index.values: one_hot_array[brand] = 1 else: one_hot_array['other brand'] = 1 if item_type in one_hot_array.index.values: one_hot_array[item_type] = 1 else: pass adjectives_query = "SELECT * FROM item_adjectives;" adjectives = [a[1] for a in engine.execute(adjectives_query).fetchall()] for adj in adjectives: match = re.search('{}'.format(adj), title, re.IGNORECASE) if match: #l.append(1) try: one_hot_array[adj] = 1 except TypeError: pass # take the exponent because the model was trained on the log of rent values # prediction = np.exp(reg.predict(input_df.T)[0]) return one_hot_array
def find_by_type_and_brand(item_type, brand): item_type = item_type.replace("'", "''") brand = brand.replace("'", "''") query = "SELECT I.id, I.brand, I.item_type, I.cost, I.sku, \ I.rent_per_week, I.created_at, I.title, I.description, \ I.year_purchased, I.rent_per_week, \ R.rental_date, R.return_date, \ RI.item_price, RI.refunded, RI.fit_return, \ RI.created_at, \ RI.updated_at \ FROM items I \ LEFT JOIN rental_items RI ON I.id = RI.item_id \ LEFT JOIN rentals R ON R.id = RI.rental_id \ WHERE item_type='{}' and brand='{}';".format(item_type, brand) return engine.execute(query).fetchall()
from wombat.engine import ml_model from wombat.models import engine, dbsession, Item from wtforms import Form, StringField, SubmitField, SelectField, FloatField, validators item_types = engine.execute('SELECT DISTINCT item_type,\ count(item_type) FROM items GROUP BY item_type ORDER BY \ count(item_type) DESC;').fetchall() item_types = [(r[0], r[0].title()) for r in item_types] brands_query = "SELECT * FROM brands;" brands = ml_model.brands[0:100] brand_names = sorted([b[1] for b in engine.execute(brands_query).fetchall()], key=str.lower) brand_names.remove('other brand') brands = [(b, b) for b in brand_names] brands = [('', 'Choose a brand')] + [('other brand', 'Other')] + brands class DescriptionForm(Form): description = StringField('Description') item_type = SelectField( label='Category', choices=item_types, validators=[validators.Required(message='Category is required')]) brand = SelectField( label='Brand', choices=brands, validators=[validators.Required(message='Please choose a brand')]) est_price = FloatField( label='Retail Price', validators=[ validators.Required(message="""Please enter a retail price. If you
if part[1] in speech_parts: p_stemmer = PorterStemmer() word = p_stemmer.stem(part[0].lower()) if (word not in existing_words) and (word not in l) and (word not in banned_words): l.append("{}\n".format(word)) print("l is: {}".format(l)) print("word is: {}".format(word)) except Exception as e: print(str(e)) print("l is: {}".format(l)) with open('key_words2.txt', 'a') as fp: for word in l: fp.write(word) def get_first_sentence(string): try: return re.split(r'(?<=[.:;])\s', string)[0] except TypeError: return '' res = engine.execute("SELECT title, description FROM items WHERE brand != 'LENDER SUBMISSION FILL IN' AND rent_per_week < 1000").fetchall() for text in res[0:10]: first_sentence = get_first_sentence(text[1]) item_title = text[0] combined = ' '.join([item_title, first_sentence]) process_sentence(combined) # print(res[0][0]) # process_sentence2(0)
# Usually we just select all the brands but sometimes we need to limit the # number we choose for testing purposes and it makes sense just to pick the # most popular brands_query = "SELECT brand, count(brand) FROM items WHERE brand != 'LENDER SUBMISSION FILL IN' AND rent_per_week < {} GROUP BY brand ORDER BY count(brand) DESC;".format( rent_per_week_max) brand_df = pd.read_sql_query(brands_query, engine) brands_escaped = [ "\'{}\'".format(brand.replace("'", "''")) for brand in brand_df['brand'] ] brands_escaped = ', '.join(brands_escaped) # create the list of brands so other modules can access what brands are being # used to create the model res = engine.execute(brands_query).fetchall() brands = [r[0] for r in res] brand_length = len(brands) # grab items form db to train model. # training query is the canonical query that the machine learning model is # based on. If you change it then you have to reconstuct the model canonical_query = "SELECT brand, item_type, title, cost, rent_per_week, description FROM items WHERE brand in ({}) AND rent_per_week < {}".format( brands_escaped, rent_per_week_max) df = pd.read_sql_query(canonical_query, engine) canonical_df = df # get one-hot columns for brands dummified_brands = pd.get_dummies(df['brand']) df = pd.concat([df, dummified_brands], axis=1) df = df.drop('brand', axis=1)
def get_brands(): res = engine.execute('SELECT DISTINCT brand FROM items;').fetchall() return [brand[0] for brand in res]
def get_item_types(): res = engine.execute('SELECT DISTINCT item_type FROM items;').fetchall() return [item[0] for item in res]