예제 #1
0
def one_hot_form_input(brand, item_type, title, est_price):
    """
    Returns pandas series
    """
    X_sample, _ = joblib.load(reg_model_path)
    one_hot_array = X_sample
    one_hot_array['cost'] = est_price

    if brand in one_hot_array.index.values:
        one_hot_array[brand] = 1
    else:
        one_hot_array['other brand'] = 1

    if item_type in one_hot_array.index.values:
        one_hot_array[item_type] = 1
    else:
        pass

    adjectives_query = "SELECT * FROM item_adjectives;"
    adjectives = [a[1] for a in engine.execute(adjectives_query).fetchall()]
    for adj in adjectives:
        match = re.search('{}'.format(adj), title, re.IGNORECASE)
        if match:
            #l.append(1)
            try:
                one_hot_array[adj] = 1
            except TypeError:
                pass

    # take the exponent because the model was trained on the log of rent values
    # prediction = np.exp(reg.predict(input_df.T)[0])
    return one_hot_array
예제 #2
0
def find_by_type_and_brand(item_type, brand):
    item_type = item_type.replace("'", "''")
    brand = brand.replace("'", "''")

    query = "SELECT I.id, I.brand, I.item_type, I.cost, I.sku, \
    I.rent_per_week, I.created_at, I.title, I.description, \
    I.year_purchased, I.rent_per_week, \
    R.rental_date, R.return_date, \
    RI.item_price, RI.refunded, RI.fit_return, \
    RI.created_at, \
    RI.updated_at \
    FROM items I \
    LEFT JOIN rental_items RI ON I.id = RI.item_id \
    LEFT JOIN rentals R ON R.id = RI.rental_id \
    WHERE item_type='{}' and brand='{}';".format(item_type, brand)
    return engine.execute(query).fetchall()
예제 #3
0
from wombat.engine import ml_model
from wombat.models import engine, dbsession, Item
from wtforms import Form, StringField, SubmitField, SelectField, FloatField, validators

item_types = engine.execute('SELECT DISTINCT item_type,\
        count(item_type) FROM items GROUP BY item_type ORDER BY \
        count(item_type) DESC;').fetchall()
item_types = [(r[0], r[0].title()) for r in item_types]
brands_query = "SELECT * FROM brands;"
brands = ml_model.brands[0:100]
brand_names = sorted([b[1] for b in engine.execute(brands_query).fetchall()],
                     key=str.lower)
brand_names.remove('other brand')
brands = [(b, b) for b in brand_names]
brands = [('', 'Choose a brand')] + [('other brand', 'Other')] + brands


class DescriptionForm(Form):
    description = StringField('Description')
    item_type = SelectField(
        label='Category',
        choices=item_types,
        validators=[validators.Required(message='Category is required')])
    brand = SelectField(
        label='Brand',
        choices=brands,
        validators=[validators.Required(message='Please choose a brand')])
    est_price = FloatField(
        label='Retail Price',
        validators=[
            validators.Required(message="""Please enter a retail price. If you
예제 #4
0
                if part[1] in speech_parts:
                    p_stemmer = PorterStemmer()
                    word = p_stemmer.stem(part[0].lower())
                    if (word not in existing_words) and (word not in l) and (word not in banned_words):
                        l.append("{}\n".format(word))
                print("l is: {}".format(l))
                print("word is: {}".format(word))

    except Exception as e:
        print(str(e))

    print("l is: {}".format(l))
    with open('key_words2.txt', 'a') as fp:
        for word in l:
            fp.write(word)

def get_first_sentence(string):
    try:
        return re.split(r'(?<=[.:;])\s', string)[0]
    except TypeError:
        return ''

res = engine.execute("SELECT title, description FROM items WHERE brand != 'LENDER SUBMISSION FILL IN' AND rent_per_week < 1000").fetchall()
for text in res[0:10]:
    first_sentence = get_first_sentence(text[1])
    item_title = text[0]
    combined = ' '.join([item_title, first_sentence])
    process_sentence(combined)
# print(res[0][0])
# process_sentence2(0)
예제 #5
0
# Usually we just select all the brands but sometimes we need to limit the
# number we choose for testing purposes and it makes sense just to pick the
# most popular
brands_query = "SELECT brand, count(brand) FROM items WHERE brand != 'LENDER SUBMISSION FILL IN' AND rent_per_week < {} GROUP BY brand ORDER BY count(brand) DESC;".format(
    rent_per_week_max)

brand_df = pd.read_sql_query(brands_query, engine)
brands_escaped = [
    "\'{}\'".format(brand.replace("'", "''")) for brand in brand_df['brand']
]
brands_escaped = ', '.join(brands_escaped)

# create the list of brands so other modules can access what brands are being
# used to create the model
res = engine.execute(brands_query).fetchall()
brands = [r[0] for r in res]
brand_length = len(brands)

# grab items form db to train model.
# training query is the canonical query that the machine learning model is
# based on. If you change it then you have to reconstuct the model
canonical_query = "SELECT brand, item_type, title, cost, rent_per_week, description FROM items WHERE brand in ({}) AND rent_per_week < {}".format(
    brands_escaped, rent_per_week_max)
df = pd.read_sql_query(canonical_query, engine)
canonical_df = df

# get one-hot columns for brands
dummified_brands = pd.get_dummies(df['brand'])
df = pd.concat([df, dummified_brands], axis=1)
df = df.drop('brand', axis=1)
예제 #6
0
def get_brands():
    res = engine.execute('SELECT DISTINCT brand FROM items;').fetchall()
    return [brand[0] for brand in res]
예제 #7
0
def get_item_types():
    res = engine.execute('SELECT DISTINCT item_type FROM items;').fetchall()
    return [item[0] for item in res]