def main(): # filename = 'final_test.csv' pre() clf = joblib.load("model.pkl") df = pd.read_csv('testing.csv') df.drop(['Unnamed: 0'], axis=1, inplace=True) df = df.dropna() columnsTitles = ['fit', 'body_type', 'category', 'weight', 'rating', 'height', 'age', 'bust_size', 'cup_size_start_in_cms', 'cup_size_end_in_cms'] df = df.reindex(columns=columnsTitles) test = np.array(df) Y_pred = clf.predict(test) print(Y_pred) size = {0: 'XS', 1: 'S', 2: 'M', 3: 'L', 4: 'XL', 5: 'XXL'} x = np.vectorize(size.get)(Y_pred) return x
plt.rc('xtick', labelsize=5) plt.autoscale(enable=True, axis='x', tight=True) plt.tight_layout() plt.show() # Plotting training data-rainfall plt.plot(train_date, train_rain) plt.title("Rainfall") plt.xticks(rotation=90) plt.rc('xtick', labelsize=5) plt.autoscale(enable=True, axis='x', tight=True) plt.tight_layout() plt.show() # Preprocessing Data pre(train_temp) pre(train_rain) # Fitting Univariate Data-temperature model_arima(train_temp, test_temp) model_arma(train_temp, test_temp) model_sarima(train_temp,test_temp) # Fitting Univariate Data-rainfall model_arima(train_rain, test_rain) model_arma(train_rain, test_rain) model_sarima(train_rain,test_rain) # Fitting Multivariate Data model_sarimax(train_temp,test_temp,train_rain,test_rain) model_var(train_temp,test_temp,train_rain,test_rain)
for item in datetime: a, b = item.split(' ') date.append(a) time.append(b) # Data Pre-processing - Fill Missing Values humidity = humidity.fillna(method='ffill', axis=0) pressure = pressure.fillna(method='ffill', axis=0) temp = temp.fillna(method='ffill', axis=0) # Typecasting and Data Splitting humidity = np.asarray(humidity) train_humidity, test_humidity = humidity[0:36000:168], humidity[36000::168] train_date, test_date = date[0:36000:168], date[36000::168] print(np.shape(test_humidity), len(train_date)) # Plotting training data plt.plot(train_date, train_humidity) plt.title("Humidity") plt.xticks(rotation=90) plt.rc('xtick', labelsize=5) plt.autoscale(enable=True, axis='x', tight=True) plt.tight_layout() plt.show() # Preprocessing Data pre(train_humidity) # Fitting Data model_arima(train_humidity, test_humidity)
]) pred = lgb_model.predict_proba(test[col])[:, 1] test['predicted_score'] = pred #processHasTrade(test,"predicted_score") sub1 = test[['instance_id', 'predicted_score']] sub = pd.read_csv("te.csv") sub = pd.merge(sub, sub1, on=['instance_id'], how='left') sub = sub.fillna(0) sub[['instance_id', 'predicted_score']].to_csv('result0326.txt', sep=" ", index=False) if __name__ == "__main__": pre() online = True train = pd.read_csv("tr.csv") test = pd.read_csv("te.csv") data = pd.concat([train, test]) data = data.drop_duplicates(subset='instance_id') data = base_process(data) #data = process_prop(data,"item_property_list") #svdSpareMat(data,"item_property_list") #data = user_click_comm_level(data) #No obvious change #data = comm_extra(data)- #cause bigger loss #data = comm_gender(data) data = brand_extra(data) #-Good feature data = user_extra(data) #-Good feature data = shop_extra(data) #-Good feature #data = user_click_comm_level(data)
# -*- coding: utf8 -*- import sys import re from preprocess import pre reload(sys) sys.setdefaultencoding("utf8") if __name__ == "__main__": test_str = u'二、变更上海市浦东新区人民法院(2013)浦民一(民)初字第30748号民事判决第十三项为:上述第四、七、八、九、十项,于本判决生效之日起十日内,由傅甲支付阎丙37,655.50元、姚丁37,655.50元、阎乙96,919.90元,合计172,230.90元(减去已支付的113,500元,实际应支付五万八千七百元九角)。' print test_str print pre(test_str)
def test_sep_senfin_pun(self): self.assertEqual(pre("separate comma, and other.", "e"), "separate comma , and other .")
def test_double_quot(self): self.assertEqual(pre("he said ''i dont know'' ", "e"), "he said '' i dont know ''")
def test_dash_in_bracket(self): self.assertEqual(pre("(i-don't-know)", "e"), "( i - don't - know )")
def test_bracket(self): self.assertEqual(pre("(i don't know)", "e"), "( i don't know )")
def test_leading_c_a(self): self.assertEqual(pre("je t'aime", "f"), "je t' aime")
def test_leading_l_punctuation(self): self.assertEqual(pre("l'election", "f"), "l' election")