def phase_offset_set(): '''Get the input of phase and offset''' global offsetEntry, phaseEntry try: offset_tmp = int(offsetEntry.get()) except: offset_tmp = 0 offsetEntry.delete(0, END) try: phase_tmp = float(phaseEntry.get()) except: phase_tmp = 0 phaseEntry.delete(0, END) if phase_tmp >= -( np.pi ) and phase_tmp <= np.pi and offset_tmp >= 0 and offset_tmp < P.Params[ 'osr']: P.Params['phase'] = phase_tmp P.Params['offset'] = offset_tmp P.offphaseReset() elif phase_tmp < -(np.pi) or phase_tmp > np.pi: phaseEntry.delete(0, END) phaseEntry.insert(0, P.Params['phase']) showerror('error', 'The phase is out of range! Please enter again.') else: offsetEntry.delete(0, END) offsetEntry.insert(0, P.Params['offset']) showerror('error', 'The offset is out of range! Please enter again.')
def AddParameterToModelFromIndex(self,index): """ See AddParameterFromIndex. This does *not* update the view. Args: Index: the in-range location of the data click Returns: wrapIdx, the index of the parameter (possibly an update) """ n = self.CurrentX.size if (not (index < n)): raise IndexError("No index {:d} in array of size {:d}".\ format(index,n)) x = self.CurrentX[index] y = self.CurrentY[index] newDat = Parameter.ParameterData(index,x,y) ParamMetaList = self.ParamMeta.mParams nParams = len(ParamMetaList) if (nParams == 0): raise ValueError("Model instantiated without Parameters") wrapIdx = self.CurrentParamNum % nParams if (self.CurrentParamNum >= nParams): # then all the parameter are here. 'Wraparound' self.CurrentParams[wrapIdx] = newDat else: # can simply add self.CurrentParams.append(newDat) # let the specific model know what parameters we just used. self.CurrentParamNum += 1 self.ParameterMade(wrapIdx, self.CurrentParamNum % nParams) if (self.AutoUpdate and self.CurrentParamNum >= nParams): self.PushToDatabase() return wrapIdx
def extractWeatherInfo(part_data, startNum=0, city_name=None): """ 从{startNum}开始抽取天气信息,晴或者多云或者小雨为0(转中雨不包含), 其他为1 :param part_data:需要含有两列组成,一列为count,另一列为time,注意这里的数据是要按日期补全的数据,不然会有不对应的情况 :param startNum: 前面跳过的样例数,也就是从第{startNum}开始生成 :param city_name: 城市名字 :return: ndarray,第一列为workOrWeekend,第2列为哪个样例的日期,0为work,1为weekend """ if city_name is None: raise Exception("city name is None") return if (startNum < 0 or startNum >= part_data.shape[0]): raise Exception("parameter skipNum error") return weathers = [] times = part_data["time"].values weather_ = Parameter.getWeather_info() weather_part = weather_[weather_.area == city_name][["date", "weather"]] for i in range(startNum, part_data.shape[0], 1): time = times[i] v = weather_part[weather_part.date == time.replace( "-", "/")]["weather"].values if (len(v) == 0 or pd.isnull(v[0])): v = 0 else: v = v[0] if "云" in v or "晴" in v or ("小雨" in v and "阴" in v): v = 0 else: v = 1 weathers.append(v) weathers.append(time) weathers = np.reshape(weathers, (len(weathers) / 2, 2)) return weathers
def test_parameter(): p = Parameter("a", 1) assert p.name == "a" assert p.value == 1 assert p.fixed is False assert p.max is None assert p.min is None
def is_done(self, job_id, url): """ Check job is done :param job_id: :return: """ para = Parameter.parameter(0, job_id) friday_run = para.is_finish() encode_str = DataEncoding.DataEncoding(json.dumps(friday_run)) encode_body = encode_str.DesEncrypt() self.data["data"] = str(encode_body.replace(b'\n', b'').decode('utf-8')) operation = json.dumps(self.data) response = requests.post(url, headers=self.headers, timeout=3, data=operation) return_object = json.loads(json.loads(response.text)["string"]) if int(str(return_object["errorCode"])) == 200: # print("job id :", job_id, "done is ", str(return_object["data"]["isDone"])) logger.info("job id " + ":" + str(job_id) + "---" + "done is " + str(return_object["data"]["isDone"])) return str(return_object["data"]["isDone"]) else: return None
def is_error(self, job_id, url): """ Check job is done :param job_id: :return: """ para = Parameter.parameter(0, job_id) friday_run = para.is_error() encode_str = DataEncoding.DataEncoding(json.dumps(friday_run)) encode_body = encode_str.DesEncrypt() self.data["data"] = str(encode_body.replace(b'\n', b'').decode('utf-8')) operation = json.dumps(self.data) response = requests.post(url, headers=self.headers, timeout=3, data=operation) return_object = json.loads(json.loads(response.text)["string"]) # print(return_object) job_str = "job id: " + str(job_id) # print(str(job_str).center(50, '=')) number = 0 if int(str(return_object["errorCode"])) == 200: items = list(return_object["data"]["jobProcessedRecordExecResultList"]) # print(str(items).center(50, '=')) for item in items: if int(item["errorCode"]) != 200: number += 1 logger.info(item) # logger.info(str(item)) logger.info((str(job_str) + ' error number: ' + str(number)).center(50, '='))
def run_ad(self, job_id, url, total_count): """ Get job executing statistic result :param job_id: :return: """ total_processed_count = 0 para = Parameter.parameter(0, job_id) friday_run = para.go_on_ad_to_order() encode_str = DataEncoding.DataEncoding(json.dumps(friday_run)) encode_body = encode_str.DesEncrypt() self.data["data"] = str(encode_body.replace(b'\n', b'').decode('utf-8')) operation = json.dumps(self.data) response = requests.post(url, headers=self.headers, timeout=3, data=operation) return_object = json.loads(json.loads(response.text)["string"]) if int(str(return_object["errorCode"])) == 200: if return_object["data"]["totalAdCount"] is not None: total_ad_count = list(return_object["data"]["totalAdCount"]) # print("*******************************************") # print(total_ad_count) for item in total_ad_count: country_item = dict(item) # print(country_item["countryCode"], country_item["totalProcessedCount"]) logger.info(country_item["countryCode"] + "---" + str(country_item["totalProcessedCount"])) total_processed_count += int(country_item["totalProcessedCount"]) # print("job id :", job_id, "total count :", total_processed_count) logger.info("job id:" + str(job_id) + " --- total count: " + str(total_count) + ", Run total count: " + str(total_processed_count))
def simulate(self, sim_length): """ simulate the patient over the specified simulation length """ # random number generator for this patient self._rng = rndClasses.RNG(self._id) k = 0 # current time step # while the patient is alive and simulation length is not yet reached while self._stateMonitor.get_if_alive( ) and k * self._delta_t < sim_length: # find the transition probabilities of the future states trans_probs = self._param.get_transition_prob( self._stateMonitor.get_current_state()) # create an empirical distribution empirical_dist = rndClasses.Empirical(trans_probs) # sample from the empirical distribution to get a new state # (returns an integer from {0, 1, 2, ...}) new_state_index = empirical_dist.sample(self._rng) # update health state self._stateMonitor.update(k, P.HealthStats(new_state_index)) # increment time step k += 1
def onAdd(self): if self.isFormValid(): a = Parameter.createParameter() name = self.form.nameLineEdit.text() if name: a.Name = name a.ObjectLabel = str(self.form.objectComboBox.currentText()) a.ObjectProperty = str(self.form.propertyComboBox.currentText()) if self.form.minRangeCheckBox.isChecked(): a.MinRangeEnabled = True a.MinRange = self.form.minRangeSpinBox.value() if self.form.maxRangeCheckBox.isChecked(): a.MaxRangeEnabled = True a.MaxRange = self.form.maxRangeSpinBox.value() a.Value = self.form.valueSpinBox.value() # Reset default widget self.default() else: FreeCAD.Console.PrintError("Invalid data. Could not create parameter.\n")
def setParamSpec(): '''Set span and center frequency''' try: span_tmp = float(P.spanEntry.get()) except: span_tmp = P.Params['span'] P.spanEntry.delete(0, END) try: center_freq_tmp = float(P.CFEntry.get()) except: center_freq_tmp = P.Params['center_freq'] CFEntry.delete(0, END) if span_tmp <= 1920 and span_tmp > 0 and center_freq_tmp <= 6000 and center_freq_tmp >= 70: if P.Params['center_freq'] != center_freq_tmp: P.Params['center_freq'] = round(center_freq_tmp, 4) #最多保留4位小数 ParaSetCliSock.set_param('rx_freq', P.Params['center_freq']) P.Params['span'] = span_tmp P.cfspanReset() P.as_spec.set_xlim(P.Params['cf_start'], P.Params['cf_end']) P.as_spec.set_xticks( np.linspace(P.Params['cf_start'], P.Params['cf_end'], 11)) P.as_spec.set_xticklabels([''] * 11) P.as_spec.grid(True, color='k', linewidth='1.5') # CF CF_label = ' CF: {}MHz'.format(P.Params['center_freq']) P.CFLabel['text'] = CF_label # span span_label = ' Span: {}KHz'.format(P.Params['span']) P.spanLabel['text'] = span_label elif span_tmp > 1920 or span_tmp <= 0: P.spanEntry.delete(0, END) P.spanEntry.insert(0, P.Params['span']) showerror('error', 'The span is out of range! Please enter again.') else: P.CFEntry.delete(0, END) P.CFEntry.insert(0, P.Params['center_freq']) showerror('error', 'The center frequency is out of range! Please enter again.')
def _setup(self): # Terrible temporary solution to an issue regarding compacting weights re: CUDNN RNN if issubclass(type(self.module), torch.nn.RNNBase): self.module.flatten_parameters = self.widget_demagnetizer_y2k_edition for name_w in self.weights: print('Applying weight drop of {} to {}'.format(self.dropout, name_w)) w = getattr(self.module, name_w) del self.module._parameters[name_w] self.module.register_parameter(name_w + '_raw', Parameter(w.data))
def cosmomc2april(self, line): plist = [ Parameter("Obh2", line['omegabh2']), Parameter("Om", line['omegam*']), Parameter("h", line['H0*'] / 100.) ] if "nnu" in self.names: plist.append(Parameter("Nnu", line['nnu'])) if "w" in self.names: plist.append(Parameter("w", line['w'])) if "wa" in self.names: plist.append(Parameter("wa", line['wa'])) if "omegak" in self.names: plist.append(Parameter("Ok", line['omegak'])) if "mnu" in self.names: plist.append(Parameter("mnu", line['mnu'])) return plist
def __init__(self, id, therapy): """ create a cohort of patients :param id: an integer to specify the seed of the random number generator """ self._initial_pop_size = Data.POP_SIZE self._patients = [] # list of patients # populate the cohort for i in range(self._initial_pop_size): # create a new patient (use id * pop_size + i as patient id) #if Data.PSA_ON: #patient = Patient(id * self._initial_pop_size + i, P.ParametersProbabilistic(i, therapy)) #else: patient = Patient(id * self._initial_pop_size + i, P.ParametersFixed(therapy)) # add the patient to the cohort self._patients.append(patient)
def __init__(self, joint_handles): # constants self.idle_wait_time = 4 self.idle_gap = 3.0 self.propagate_gap = 2.0 self.light_duration = 4 # unit: second self.sma_duration = 4 # unit:second self.state = 0 # set it to idle at first self.active_list = [] self.propagate_list = [] self.idle_event_start_time = 0.0 self.active_event_start_time = 0.0 self.sma_handles = joint_handles self.parameter_list = [] for handle in self.sma_handles: self.parameter_list.append(Parameter('sma'))
def fitparameter(self): """ :rtype: Parameter """ return Parameter( fmin=self.fmin.get_value(), fmax=self.fmax.get_value(), df=self.df.get_value(), raster=self.parser.getboolean(rst, 'Raster'), pixel=self.pixel.get_value_as_int(), dim=self.dim.get_value(), spektroskopie=self.parser.getboolean(man, 'Spektroskopie'), hysterese=self.parser.getboolean(man, 'Hysterese'), dcmin=self.parser.getfloat(man, 'Umin'), dcmax=self.parser.getfloat(man, 'Umax'), ddc=self.parser.getfloat(man, 'dU'), mittelungen=self.mittelungen.get_value_as_int(), amp_fitfkt=self.combobox('methode_amp').get_active(), ph_fitfkt=self.combobox('methode_phase').get_active(), filter_breite=self.spinbutton('savgol_koeff').get_value_as_int(), filter_ordnung=self.spinbutton('savgol_ordnung').get_value_as_int(), phase_versatz=self.spinbutton('phase_versatz').get_value(), bereich_min=self.bereich_min.get_value(), bereich_max=self.bereich_max.get_value(), amp=Fitparameter( guete_min=self.spinbutton('q_min').get_value(), guete_max=self.spinbutton('q_max').get_value(), off_min=self.spinbutton('off_min').get_value(), off_max=self.spinbutton('off_max').get_value() ), amp_min=self.spinbutton('amp_min').get_value(), amp_max=self.spinbutton('amp_max').get_value(), phase=Fitparameter( guete_min=self.spinbutton('phase_q_min').get_value(), guete_max=self.spinbutton('phase_q_max').get_value(), off_min=self.spinbutton('phase_off_min').get_value(), off_max=self.spinbutton('phase_off_max').get_value() ), konf=self.konf, kanal=self.kanal, version=self.version )
def fitparameter(self): """ :rtype: Parameter """ fmin = self.fmin.get_value() fmax = self.fmax.get_value() df = self.df.get_value() return Parameter( fmin=fmin, fmax=fmax, df=df, pixel=self.pixel.get_value_as_int(), dim=self.dim.get_value(), mittelungen=self.mittelungen.get_value_as_int(), amp_fitfkt=self.combobox('methode_amp').get_active(), ph_fitfkt=self.combobox('methode_phase').get_active(), filter_breite=self.spinbutton('savgol_koeff').get_value_as_int(), filter_ordnung=self.spinbutton( 'savgol_ordnung').get_value_as_int(), phase_versatz=self.spinbutton('phase_versatz').get_value(), bereich_min=self.bereich_min.get_value(), bereich_max=self.bereich_max.get_value(), amp=Fitparameter(guete_min=self.spinbutton('q_min').get_value(), guete_max=self.spinbutton('q_max').get_value(), off_min=self.spinbutton('off_min').get_value(), off_max=self.spinbutton('off_max').get_value()), amp_min=self.spinbutton('amp_min').get_value(), amp_max=self.spinbutton('amp_max').get_value(), phase=Fitparameter( guete_min=self.spinbutton('phase_q_min').get_value(), guete_max=self.spinbutton('phase_q_max').get_value(), off_min=self.spinbutton('phase_off_min').get_value(), off_max=self.spinbutton('phase_off_max').get_value()), konf=self.konf, version=self.version)
def predictAllShop_ANN_part_together(all_data, trainAsTest=False, saveFilePath=None, featurePath=None, cate_level=0, cate_name=None, featureSavePath=None, needSaveFeature=False, time=1): """ 使用所有商家所有数据训练,预测所有商店 :param trainAsTest: 是否使用训练集后14天作为测试集 :param model: 某个模型 :param saveFilePath :param featurePath: :param cate_level: :param cate_name: :param featureSavePath: :param needSaveFeature: :param time:跑第几次 :return: """ ignores = 0 shopids = None shop_need_to_predict = 2000 if (cate_level is 0): shopids = range(1, 1 + shop_need_to_predict, 1) else: shopids = Parameter.extractShopValueByCate(cate_level, cate_name) shop_info = pd.read_csv(Parameter.shopinfopath, names=[ "shopid", "cityname", "locationid", "perpay", "score", "comment", "level", "cate1", "cate2", "cate3" ]) weekOrWeekend = True day_back_num = 21 sameday_backNum = 7 week_backnum = 3 other_features = [statistic_functon_mean, statistic_functon_median] other_features = [] '''将cate1 onehot''' cate = shop_info['cate1'].tolist() cate_dup = set(cate) cates = [] for i in range(len(cate_dup)): cates.append([i]) hot_encoder = OneHotEncoder().fit(cates) dicts = dict(zip(cate_dup, range(len(cate_dup)))) cate_num = [] for c in cate: cate_num.append([dicts[c]]) '''cate1 onehot finish''' if featurePath is None: all_x = None all_y = None for shopid in shopids: if shopid in Parameter.ignore_shopids: print "ignore get train", shopid ignores += 1 continue print "get ", shopid, " train" part_data = all_data[all_data.shopid == shopid] last_14_real_y = None # 取出一部分做训练集 if trainAsTest: #使用训练集后14天作为测试集的话,训练集为前面部分 last_14_real_y = part_data[len(part_data) - 14:]["count"].values part_data = part_data[0:len(part_data) - 14] # print last_14_real_y skipNum = part_data.shape[0] - 128 if skipNum < 0: skipNum = 0 train_x = None if sameday_backNum != 0: sameday = extractBackSameday(part_data, sameday_backNum, skipNum, nan_method_sameday_mean) train_x = getOneWeekdayFomExtractedData(sameday) if day_back_num != 0: if train_x is not None: train_x = np.concatenate( (train_x, getOneWeekdayFomExtractedData( extractBackDay(part_data, day_back_num, skipNum, nan_method_sameday_mean))), axis=1) else: train_x = getOneWeekdayFomExtractedData( extractBackDay(part_data, day_back_num, skipNum, nan_method_sameday_mean)) if weekOrWeekend: ws = getOneWeekdayFomExtractedData( extractWorkOrWeekend(part_data, skipNum)) hot_encoder = onehot(ws) train_x = np.concatenate( (train_x, hot_encoder.transform(ws).toarray()), axis=1) count = extractCount(part_data, skipNum) train_y = getOneWeekdayFomExtractedData(count) for feature in other_features: value = getOneWeekdayFomExtractedData( extractBackWeekValue(part_data, week_backnum, skipNum, nan_method_sameday_mean, feature)) train_x = np.append(train_x, value, axis=1) # '''添加商家信息''' # # print train_x,train_x.shape # index = shopid - 1 # oneshopinfo = shop_info.ix[index] # shop_perpay = oneshopinfo['perpay'] if not pd.isnull(oneshopinfo['perpay']) else 0 # shop_score = oneshopinfo['score'] if not pd.isnull(oneshopinfo['score']) else 0 # shop_comment = oneshopinfo['comment'] if not pd.isnull(oneshopinfo['comment']) else 0 # shop_level = oneshopinfo['level'] if not pd.isnull(oneshopinfo['level']) else 0 # shop_cate1 = oneshopinfo['cate1'] # import warnings # with warnings.catch_warnings(): # warnings.simplefilter("ignore",category=DeprecationWarning) # shop_cate1_encoder = hot_encoder.transform([dicts[shop_cate1]]).toarray() # train_x = np.insert(train_x,train_x.shape[1],shop_perpay,axis=1) # train_x = np.insert(train_x,train_x.shape[1],shop_score,axis=1) # train_x = np.insert(train_x,train_x.shape[1],shop_comment,axis=1) # train_x = np.insert(train_x,train_x.shape[1],shop_level,axis=1) # for i in range(shop_cate1_encoder.shape[1]): # train_x = np.insert(train_x,train_x.shape[1],shop_cate1_encoder[0][i],axis=1) # '''商家信息添加完毕''' if all_x is None: all_x = train_x all_y = train_y else: all_x = np.insert(all_x, all_x.shape[0], train_x, axis=0) all_y = np.insert(all_y, all_y.shape[0], train_y, axis=0) # '''添加周几''' # extract_weekday = getOneWeekdayFomExtractedData(extractWeekday(part_data, skipNum)) # train_x = np.append(train_x, extract_weekday, axis=1) # '''''' # train_x = train_x.reshape((train_x.shape[0], # train_x.shape[1], 1)) # print model.get_weights() # part_counts = [] # for i in range(7): # weekday = i + 1 # part_count = getOneWeekdayFomExtractedData(count, weekday) # part_counts.append(part_count) train_x = all_x train_y = all_y if needSaveFeature: featureAndLabel = np.concatenate((train_x, train_y), axis=1) flDF = pd.DataFrame( featureAndLabel, columns=[ "sameday1", "sameday2", "sameday3", "week_mean1", "week_mean2", "week_mean3", "week_median1", "week_median2", "week_median3", "perpay", "score", "comment", "level", "cate1_1", "cate1_2", "cate1_3", "cate1_4", "cate1_5", "cate1_6", "label" ]) if featureSavePath is None: if trainAsTest: featureSavePath = "train_feature/%df_%d_%s.csv" % ( flDF.shape[1] - 1, cate_level, cate_name) else: featureSavePath = "feature/%df_%d_%s.csv" % ( flDF.shape[1] - 1, cate_level, cate_name) flDF.to_csv(featureSavePath) else: #有featurePath文件 flDF = pd.read_csv(featurePath, index_col=0) train_x = flDF.values[:, :-1] train_y = flDF.values[:, -1:] # print train_x # print train_y '''将t标准化''' x_scaler = MinMaxScaler().fit(train_x) y_scaler = MinMaxScaler().fit(train_y) train_x = x_scaler.transform(train_x) train_y = y_scaler.transform(train_y) '''标准化结束''' '''构造神经网络''' h1_activation = "relu" rnn_epoch = 60 verbose = 0 h_unit = 16 batch_size = 5 np.random.seed(128) model = Sequential() model.add( Dense(h_unit, init="normal", input_dim=train_x.shape[1], activation=h1_activation)) #sigmoid model.add( Dense(1, init="normal", activation='linear', activity_regularizer=activity_l2(0.01))) sgd = SGD(0.005) # rmsprop = RMSprop(0.01) # adagrad = Adagrad(0.05) adadelta = Adadelta(0.01) adam = Adam(0.0001) adamax = Adamax(0.01) nadam = Nadam(0.01) model.compile(loss="mse", optimizer=adam) '''构造结束''' model.fit(train_x, train_y, nb_epoch=rnn_epoch, batch_size=batch_size, verbose=verbose) format = "%Y-%m-%d" if trainAsTest: startTime = datetime.datetime.strptime("2016-10-18", format) else: startTime = datetime.datetime.strptime("2016-11-1", format) timedelta = datetime.timedelta(1) '''预测所有商家''' preficts_all = None real_all = None for j in shopids: if j in Parameter.ignore_shopids: print "ignore predict", j continue print "predict:", j preficts = [] part_data = all_data[all_data.shopid == j] last_14_real_y = None if trainAsTest: #使用训练集后14天作为测试集的话,训练集为前面部分 last_14_real_y = part_data[len(part_data) - 14:]["count"].values part_data = part_data[0:len(part_data) - 14] '''预测14天''' for i in range(14): currentTime = startTime + timedelta * i strftime = currentTime.strftime(format) # index = getWeekday(strftime) - 1 # part_count = part_counts[index] #取前{sameday_backNum}周同一天的值为特征进行预测 part_data = part_data.append( { "count": 0, "shopid": j, "time": strftime, "weekday": getWeekday(strftime) }, ignore_index=True) x = None if sameday_backNum != 0: x = getOneWeekdayFomExtractedData( extractBackSameday(part_data, sameday_backNum, part_data.shape[0] - 1, nan_method_sameday_mean)) if day_back_num != 0: if x is None: x = getOneWeekdayFomExtractedData( extractBackDay(part_data, day_back_num, part_data.shape[0] - 1, nan_method_sameday_mean)) else: x = np.concatenate( (x, getOneWeekdayFomExtractedData( extractBackDay(part_data, day_back_num, part_data.shape[0] - 1, nan_method_sameday_mean))), axis=1) if weekOrWeekend: x = np.concatenate( (x, hot_encoder.transform( getOneWeekdayFomExtractedData( extractWorkOrWeekend( part_data, part_data.shape[0] - 1))).toarray()), axis=1) for feature in other_features: x_value = getOneWeekdayFomExtractedData( extractBackWeekValue(part_data, week_backnum, part_data.shape[0] - 1, nan_method_sameday_mean, feature)) x = np.append(x, x_value, axis=1) # '''添加周几''' # x = np.append(x, getOneWeekdayFomExtractedData(extractWeekday(part_data, part_data.shape[0]-1)), axis=1) # '''''' # '''添加商家信息''' # index = j - 1 # oneshopinfo = shop_info.ix[index] # shop_perpay = oneshopinfo['perpay'] if not pd.isnull(oneshopinfo['perpay']) else 0 # shop_score = oneshopinfo['score'] if not pd.isnull(oneshopinfo['score']) else 0 # shop_comment = oneshopinfo['comment'] if not pd.isnull(oneshopinfo['comment']) else 0 # shop_level = oneshopinfo['level'] if not pd.isnull(oneshopinfo['level']) else 0 # shop_cate1 = oneshopinfo['cate1'] # import warnings # with warnings.catch_warnings(): # warnings.simplefilter("ignore",category=DeprecationWarning) # shop_cate1_encoder = hot_encoder.transform([dicts[shop_cate1]]).toarray() # x = np.insert(x,x.shape[1],shop_perpay,axis=1) # x = np.insert(x,x.shape[1],shop_score,axis=1) # x = np.insert(x,x.shape[1],shop_comment,axis=1) # x = np.insert(x,x.shape[1],shop_level,axis=1) # for i in range(shop_cate1_encoder.shape[1]): # x = np.insert(x,x.shape[1],shop_cate1_encoder[0][i],axis=1) # '''商家信息添加完毕''' x = x_scaler.transform(x) # for j in range(sameday_backNum): # x.append(train_y[len(train_y) - (j+1)*7][0]) # x = np.array(x).reshape((1, sameday_backNum)) # print x # x = x.reshape(1, sameday_backNum, 1) predict = model.predict(x) if predict.ndim == 2: predict = y_scaler.inverse_transform(predict)[0][0] elif predict.ndim == 1: predict = y_scaler.inverse_transform(predict)[0] if (predict <= 0): predict == 1 preficts.append(predict) part_data.set_value(part_data.shape[0] - 1, "count", predict) preficts = (removeNegetive(toInt(np.array(preficts)))).astype(int) if preficts_all is None: preficts_all = preficts else: preficts_all = np.insert(preficts_all, preficts_all.shape[0], preficts, axis=0) if trainAsTest: last_14_real_y = (removeNegetive(toInt( np.array(last_14_real_y)))).astype(int) if real_all is None: real_all = last_14_real_y else: real_all = np.insert(real_all, real_all.shape[0], last_14_real_y, axis=0) # print preficts,last_14_real_y print str(j) + ',score:', scoreoneshop(preficts, last_14_real_y) # preficts = np.array(preficts) preficts_all = preficts_all.reshape((len(shopids) - ignores, 14)) if trainAsTest: real_all = real_all.reshape((len(shopids) - ignores, 14)) preficts_all = np.concatenate((preficts_all, real_all), axis=1) shopids = shopids.tolist() for remove in Parameter.ignore_shopids: try: shopids.remove(remove) except: pass preficts_all = np.insert(preficts_all, 0, shopids, axis=1) if saveFilePath is not None: path = saveFilePath + "_%ds_%dd_%df_%d_%s_%d_%d_%d_%s_%dtime.csv" \ % (sameday_backNum, day_back_num, train_x.shape[1],cate_level,cate_name ,rnn_epoch,batch_size,h_unit,h1_activation,time) print "save in :", path np.savetxt(path, preficts_all, fmt="%d", delimiter=",") return preficts_all
def Main(contigfile_, tuple_of_bamfiles, tuple_of_means, tuple_of_thresholds, edge_support, read_len, cont_threshold, ratio, output_dest, std_dev, covcutoff, haplratio, haplthreshold, detect_haplotype, detect_duplicate, gff_file, fosmidpool, mapquality): from time import time import CreateGraph as CG import MakeScaffolds as MS import GenerateOutput as GO import Parameter from copy import deepcopy tot_start = time() F = [] #list of (ordered) lists of tuples containing (contig_name, direction, position, length, links). The tuple is a contig within a scaffold and the list of tuples is the scaffold. Scaffolds = {} #scaffold dict with contig objects for easy fetching of all contigs in a scaffold n = len(tuple_of_bamfiles) # number of libraries we have param = Parameter.parameter() # object containing all parameters (user specified, defaulted and comuted along tha way.) param.scaffold_indexer = 1 # global indicator for scaffolds, used to index scaffolds when they are created param.map_quality = mapquality param.rel_weight = ratio Contigs = {} # contig dict that stores contig objects if not os.path.exists(output_dest): os.makedirs(output_dest) param.information_file = open(os.path.join(output_dest + 'Statistics.txt'), 'w') Information = param.information_file open(output_dest + '/haplotypes.fa', 'w') #Read in the sequences of the contigs in memory contigfile = open(contigfile_, 'r') C_dict = ReadInContigseqs(contigfile) #C_dict = {} param.gff_file = gff_file #iterate over libraries param.first_lib = True for i in range(0, n): start = time() param.bamfile = tuple_of_bamfiles[i] param.mean_ins_size = tuple_of_means[i] param.ins_size_threshold = tuple_of_thresholds[i] param.edgesupport = edge_support[i] param.read_len = read_len[i] param.output_directory = output_dest param.std_dev_ins_size = std_dev[i] param.contig_threshold = cont_threshold[i] param.cov_cutoff = covcutoff[i] param.hapl_ratio = haplratio param.hapl_threshold = haplthreshold param.detect_haplotype = detect_haplotype param.detect_duplicate = detect_duplicate param.fosmidpool = fosmidpool print >> Information, '\nPASS ' + str(i + 1) + '\n\n' print 'Starting scaffolding with library: ', param.bamfile (G, Contigs, Scaffolds, F, param) = CG.PE(Contigs, Scaffolds, F, Information, output_dest, C_dict, param) #Create graph, single out too short contigs/scaffolds and store them in F param.first_lib = False #not the first lib any more if G == None: print '0 contigs/super-contigs passed the length criteria of this step. Exiting and printing results.. ' break elapsed = time() - start print >> Information, 'Time elapsed for creating graph, iteration ' + str(i) + ': ' + str(elapsed) + '\n' start = time() (Contigs, Scaffolds, F, param) = MS.Algorithm(G, Contigs, Scaffolds, F, Information, C_dict, param) # Make scaffolds, store the complex areas (consisting of contig/scaffold) in F, store the created scaffolds in Scaffolds, update Contigs elapsed = time() - start print >> Information, 'Time elapsed for making scaffolds, iteration ' + str(i) + ': ' + str(elapsed) + '\n' print 'Writing out scaffolding results for step', i + 1, ' ...' Scaffolds_copy = deepcopy(Scaffolds) Contigs_copy = deepcopy(Contigs) F_copy = deepcopy(F) for scaffold_ in Scaffolds_copy.keys(): #iterate over keys in hash, so that we can remove keys while iterating over it ### Go to function and print to F ### Remove Scaf_obj from Scaffolds and Contig_obj from contigs S_obj = Scaffolds_copy[scaffold_] list_of_contigs = S_obj.contigs #list of contig objects contained in scaffold object Contigs_copy, F_copy = GO.WriteToF(F_copy, Contigs_copy, list_of_contigs) del Scaffolds_copy[scaffold_] #print F GO.PrintOutput(F_copy, C_dict, Information, output_dest, param, i + 1) ### Calculate stats for last scaffolding step scaf_lengths = [Scaffolds[scaffold_].s_length for scaffold_ in Scaffolds.keys()] sorted_lengths = sorted(scaf_lengths, reverse=True) N50, L50 = CG.CalculateStats(sorted_lengths, param) param.current_L50 = L50 param.current_N50 = N50 # ### Call a print scaffolds function here for remaining scaffolds that has "passed" all library levels # for scaffold_ in Scaffolds.keys(): #iterate over keys in hash, so that we can remove keys while iterating over it # ### Go to function and print to F # ### Remove Scaf_obj from Scaffolds and Contig_obj from contigs # S_obj=Scaffolds[scaffold_] # list_of_contigs=S_obj.contigs #list of contig objects contained in scaffold object # Contigs, F = GO.WriteToF(F,Contigs,list_of_contigs) # del Scaffolds[scaffold_] # #print F # GO.PrintOutput(F,C_dict,Information,output_dest) elapsed = time() - tot_start print >> Information, 'Total time for scaffolding: ' + str(elapsed) + '\n' print 'Finished\n\n '
__author__ = "github.com/wardsimon" __version__ = "0.0.1" import numpy as np from curly-telegram.interface import Interface, calculators_list from curly-telegram.Objects.fitting import Model, Parameter x = np.linspace(0, 10, 100) y = 3.0 * x + 2.0 + np.random.normal(-1.0, 1.0, len(x)) p1 = Parameter("m", 1.5) p2 = Parameter("c", 0.5) f = lambda x, m, c: m * x + c # noqa: E731 m = Model(f, [p1, p2]) interface = Interface(model=m) interface.x = x interface.y = y interface.ftol = 1e-4 interface.set_calculator("scipy") interface.fit() interface.plot() for calc in calculators_list: interface.set_calculator(calc.name) interface.fit() interface.plot()
def predictAllShop_MultiCNN_HPS(all_data, trainAsTest=False, saveFilePath=None, featurePath=None, cate_level=0, cate_name=None, featureSavePath=None, needSaveFeature=False, ignore_shopids=[], needCV=False, model_path=None, Augmented=False, ignore_get_train=True, ignore_predict=True, addNoiseInResult=False, time=1): """ 通过gridsearch找超参数 :param trainAsTest: 是否使用训练集后14天作为测试集 :param saveFilePath :param featurePath: :param cate_level: :param cate_name: :param featureSavePath: :param needSaveFeature: :param ignore_shopids: :param create_model_function: :param needCV :param Augmented:是否增广样本 :param ignore_get_train:是否忽略获取样本 :param ignore_predict:是否忽略预测 :return: """ augument_time = 1 verbose = 2 last_N_days = 70 #记录已经被忽略的商店数量 # ignores = 0 shop_need_to_predict = 2000 if (cate_level is 0): shopids = np.arange(1, 1 + shop_need_to_predict, 1) else: shopids = Parameter.extractShopValueByCate(cate_level, cate_name) shop_info = pd.read_csv(Parameter.shopinfopath, names=[ "shopid", "cityname", "locationid", "perpay", "score", "comment", "level", "cate1", "cate2", "cate3" ]) weather = False weekOrWeekend = False day_back_num = 21 sameday_backNum = 8 week_backnum = 3 other_features = [statistic_functon_mean, statistic_functon_median] other_features = [] shop_features = ["perpay", "comment", "score", "level"] shop_features = [] #是否是周末hot_encoder hot_encoder = onehot([[1], [0]]) #类别1hot_encoder cate1_list = np.unique(shop_info['cate1']) cate1_label_encoder = labelEncoder(cate1_list) cate1_list2 = cate1_label_encoder.transform(cate1_list).reshape((-1, 1)) cate1_hot_encoder = onehot(cate1_list2) if featurePath is None: train_x, train_y = getTrainXY( all_data, cate1_hot_encoder, cate1_label_encoder, day_back_num, hot_encoder, ignore_get_train, ignore_shopids, last_N_days, other_features, 0, shop_features, shop_info, shopids, trainAsTest, weather, weekOrWeekend, week_backnum) train_x2 = getTrainXY(all_data, cate1_hot_encoder, cate1_label_encoder, 0, hot_encoder, ignore_get_train, ignore_shopids, last_N_days, other_features, sameday_backNum, shop_features, shop_info, shopids, trainAsTest, weather, weekOrWeekend, week_backnum)[0] """增广训练集""" if Augmented: train_xs, train_y = augmentTrainX(augument_time, [train_x, train_x2], train_y) train_x = train_xs[0] train_x2 = train_xs[1] if needSaveFeature: featureAndLabel = np.concatenate((train_x, train_y), axis=1) flDF = pd.DataFrame(featureAndLabel) if featureSavePath is None: if trainAsTest: featureSavePath = Parameter.projectPath + "lzj/train_feature/%dCatelevel_%sCatename_%dfeatures_%dSameday_%dDay_%dLast" % ( cate_level, cate_name, flDF.shape[1] - 1, sameday_backNum, day_back_num, last_N_days) else: featureSavePath = Parameter.projectPath + "lzj/feature/%dCatelevel_%sCatename_%dfeatures_%dSameday_%dDay_%dLast" % ( cate_level, cate_name, flDF.shape[1] - 1, sameday_backNum, day_back_num, last_N_days) if Augmented: featureSavePath += ("_Augment%d" % augument_time) featureSavePath += ".csv" print "save feature in :", featureSavePath flDF.to_csv(featureSavePath) else: #有featurePath文件 if trainAsTest: path = Parameter.projectPath + "lzj/train_feature/" + featurePath else: path = Parameter.projectPath + "lzj/feature/" + featurePath flDF = pd.read_csv(path, index_col=0) train_x = flDF.values[:, :-1] train_y = flDF.values[:, -1:] # print train_x # print train_y '''将t标准化''' x_scaler = MinMaxScaler().fit(train_x) x2_scaler = MinMaxScaler().fit(train_x2) y_scaler = MinMaxScaler().fit(train_y) train_x = x_scaler.transform(train_x) train_x2 = x2_scaler.transform(train_x2) train_y = y_scaler.transform(train_y) '''标准化结束''' """CNN""" train_x = np.reshape(train_x, (train_x.shape[0], train_x.shape[1], 1)) train_x2 = np.reshape(train_x2, (train_x2.shape[0], train_x2.shape[1], 1)) if model_path is None: if needCV: '''gridsearchCV''' # nb_epoch=rnn_epoch, batch_size=batch_size, verbose=verbose # input_dim, h1_unit = 16, optimizer = "adagrad", init = "normal"): input_dim = [(train_x.shape[1], train_x.shape[2])] input_dim2 = [(train_x2.shape[1], train_x2.shape[2])] h1_acqtivation = ["relu"] h1_unit = [8, 12, 16, 20] model = KerasRegressor(build_fn=create_model_MultiCNN, verbose=verbose) batch_size = [3, 5, 7, 10] epochs = [10, 15, 20, 25, 30] param_grid = dict(batch_size=batch_size, nb_epoch=epochs, h1_unit=h1_unit, input_shape1=input_dim, input_shape2=input_dim2) grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, scoring="neg_mean_squared_error") grid.refit = False grid_result = grid.fit(train_x, train_y) print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_)) for params, mean_score, scores in grid_result.grid_scores_: print("%f (%f) with: %r" % (scores.mean(), scores.std(), params)) if not needCV: input_dim = (train_x.shape[1], train_x.shape[2]) input_dim2 = (train_x2.shape[1], train_x2.shape[2]) # h1_unit = 16 + (time) * 4 h1_unit = 24 h1_activation = "relu" batch_size = 3 epochs = 40 else: input_dim = (train_x.shape[1], train_x.shape[2]) input_dim2 = (train_x2.shape[1], train_x2.shape[2]) epochs = grid_result.best_params_['nb_epoch'] batch_size = grid_result.best_params_['batch_size'] h1_unit = grid_result.best_params_["h1_unit"] h1_activation = "sigmoid" print train_x.shape print train_x2.shape print train_y.shape early_stopping = EarlyStopping(monitor='val_loss', patience=2) best_model = create_model_MultiCNN(input_shape1=input_dim, input_shape2=input_dim2, h1_unit=h1_unit, h1_activation=h1_activation) hist = best_model.fit([train_x, train_x2], train_y, verbose=verbose, batch_size=batch_size, nb_epoch=epochs, validation_split=0.1, callbacks=[early_stopping]) print hist.history #保存模型 if trainAsTest: model_save_path = Parameter.projectPath+"lzj/train_model/" + \ "%dlast_%ds_%dd_%df_%d_%s_%d_%d_%d_%s.json" \ % (last_N_days,sameday_backNum, day_back_num, train_x.shape[1], cate_level, cate_name , epochs, batch_size, h1_unit, h1_activation) saveModel(model_save_path, best_model) else: model_save_path = Parameter.projectPath+"lzj/model/" + \ "%dlast_%ds_%dd_%df_%d_%s_%d_%d_%d_%s.json" \ % (last_N_days,sameday_backNum, day_back_num, train_x.shape[1], cate_level, cate_name , epochs, batch_size, h1_unit, h1_activation) saveModel(model_save_path, best_model) else: #model_path is not none print "get model from " + model_path best_model = getModel(model_path) format = "%Y-%m-%d" if trainAsTest: startTime = datetime.datetime.strptime("2016-10-18", format) else: startTime = datetime.datetime.strptime("2016-11-1", format) timedelta = datetime.timedelta(1) '''预测商家''' model = best_model preficts_all = None real_all = None for j in shopids: if ignore_predict: if j in ignore_shopids: print "ignore predict", j # ignores += 1 continue print "predict:", j preficts = [] part_data = all_data[all_data.shopid == j] last_14_real_y = None if trainAsTest: #使用训练集后14天作为测试集的话,训练集为前面部分 last_14_real_y = part_data[len(part_data) - 14:]["count"].values part_data = part_data[0:len(part_data) - 14] '''预测14天''' for i in range(14): currentTime = startTime + timedelta * i strftime = currentTime.strftime(format) # index = getWeekday(strftime) - 1 # part_count = part_counts[index] #取前{sameday_backNum}周同一天的值为特征进行预测 part_data = part_data.append( { "count": 0, "shopid": j, "time": strftime, "weekday": getWeekday(strftime) }, ignore_index=True) x = None x2 = None x = getOneShopTrainXY(cate1_hot_encoder, cate1_label_encoder, day_back_num, hot_encoder, other_features, part_data, 0, shop_features, shop_info, j, part_data.shape[0] - 1, x, weather, weekOrWeekend, week_backnum)[0] x2 = getOneShopTrainXY(cate1_hot_encoder, cate1_label_encoder, 0, hot_encoder, other_features, part_data, sameday_backNum, shop_features, shop_info, j, part_data.shape[0] - 1, x2, weather, weekOrWeekend, week_backnum)[0] x = x_scaler.transform(x) x2 = x2_scaler.transform(x2) """CNN""" x = np.reshape(x, (x.shape[0], x.shape[1], 1)) x2 = np.reshape(x2, (x2.shape[0], x2.shape[1], 1)) predict = model.predict([x, x2]) '''将y还原''' if predict.ndim == 2: predict = y_scaler.inverse_transform(predict)[0][0] elif predict.ndim == 1: predict = y_scaler.inverse_transform(predict)[0] '''将y还原结束''' # print predict if (predict <= 0): predict == 0 if addNoiseInResult: predict = predict * ( 1 + 0.05 * abs(np.random.normal(scale=(i + 1) * 0.05))) preficts.append(predict) part_data.set_value(part_data.shape[0] - 1, "count", predict) preficts = (removeNegetive(toInt(np.array(preficts)))).astype(int) if preficts_all is None: preficts_all = preficts else: preficts_all = np.insert(preficts_all, preficts_all.shape[0], preficts, axis=0) if trainAsTest: last_14_real_y = (removeNegetive(toInt( np.array(last_14_real_y)))).astype(int) if real_all is None: real_all = last_14_real_y else: real_all = np.insert(real_all, real_all.shape[0], last_14_real_y, axis=0) # print preficts,last_14_real_y print str(j) + ',score:', scoreoneshop(preficts, last_14_real_y) # preficts = np.array(preficts) shopids = shopids.tolist() if ignore_predict: for remove_id in ignore_shopids: try: shopids.remove(remove_id) except: pass preficts_all = preficts_all.reshape((len(shopids), 14)) if trainAsTest: real_all = real_all.reshape((len(shopids), 14)) preficts_all = np.concatenate((preficts_all, real_all), axis=1) preficts_all = np.insert(preficts_all, 0, shopids, axis=1) if saveFilePath is not None: if model_path is None: path = saveFilePath + "%dLast_%ds_%dd_%df_%d_%s_%d_%d_%d_%s_%dshops" \ % (last_N_days,sameday_backNum, day_back_num, train_x.shape[1], cate_level, cate_name , epochs, batch_size, h1_unit, h1_activation,len(shopids)) else: import re r = re.compile( r"""/(\d+)last_(\d+)s_(\d+)d_(\d+)f_(\d+)_(\S+)_(\d+)_(\d+)_(\d+)_(\w+).json""" ) m = r.search(model_path) path = saveFilePath + "%dLast_%ds_%dd_%df_%d_%s_%d_%d_%d_%s_%dshops" \ % (int(m.group(1)),int(m.group(2)), int(m.group(3)), int(m.group(4)), int(m.group(5)), m.group(6) , int(m.group(7)), int(m.group(8)), int(m.group(9)), m.group(10),len(shopids)) if Augmented: path += "_augmented" if addNoiseInResult: path += "_addNoiseInResult" path = path + "_%dtime" % time if trainAsTest: path = path + "_train" path += ".csv" print "save in :", path np.savetxt(path, preficts_all, fmt="%d", delimiter=",") return preficts_all
'K_MIN': symbolset.ix[i, 'K_MIN'], 'startdate': symbolset.ix[i, 'startdate'], 'enddate': symbolset.ix[i, 'enddate'], 'result_para_dic': Parameter.result_para_dic } forward_mode_dic = {} for k, v in Parameter.forward_mode_para_dic.items(): enable = symbolset.ix[i, k] if enable: sub_stop_loss_dic = {} for k1 in v.keys(): if k1 == k: sub_stop_loss_dic[k1] = True else: sub_stop_loss_dic[ k1] = Parameter.para_str_to_float( symbolset.ix[i, k1]) forward_mode_dic[k] = sub_stop_loss_dic symbol_para_dic['forward_mode_dic'] = forward_mode_dic strategy_forward_para.append(symbol_para_dic) strategyParameterSet[strategy_name] = strategy_forward_para for strategy_name, strategy_bt_parameter in strategyParameterSet.items(): strategy_folder = "%s%s\\" % (Parameter.root_path, strategy_name) for strategyParameter in strategy_bt_parameter: strategy_name = strategyParameter['strategy_name'] exchange_id = strategyParameter['exchange_id'] sec_id = strategyParameter['sec_id'] bar_type = strategyParameter['K_MIN'] startdate = strategyParameter['startdate'] enddate = strategyParameter['enddate']
def predictAllShop_LC_HPS(all_data, trainAsTest=False, saveFilePath=None, featurePath=None, cate_level=0, cate_name=None, featureSavePath=None, needSaveFeature=False, ignore_shopids=[], needCV=False, model_path=None, Augmented=False, ignore_get_train=True, ignore_predict=True, addNoiseInResult=False, time=1): """ 通过gridsearch找超参数 :param trainAsTest: 是否使用训练集后14天作为测试集 :param saveFilePath :param featurePath: :param cate_level: :param cate_name: :param featureSavePath: :param needSaveFeature: :param ignore_shopids: :param create_model_function: :param needCV :param Augmented:是否增广样本 :param ignore_get_train:是否忽略获取样本 :param ignore_predict:是否忽略预测 :return: """ augument_time = 1 verbose = 2 last_N_days = 60 #记录已经被忽略的商店数量 # ignores = 0 shop_need_to_predict = 2000 if (cate_level is 0): shopids = np.arange(1, 1 + shop_need_to_predict, 1) else: shopids = Parameter.extractShopValueByCate(cate_level, cate_name) shop_info = pd.read_csv(Parameter.shopinfopath, names=[ "shopid", "cityname", "locationid", "perpay", "score", "comment", "level", "cate1", "cate2", "cate3" ]) weather = False weekOrWeekend = False day_back_num = 21 sameday_backNum = 0 week_backnum = 3 other_features = [statistic_functon_mean, statistic_functon_median] other_features = [] shop_features = ["perpay", "comment", "score", "level"] shop_features = [] #是否是周末hot_encoder hot_encoder = onehot([[1], [0]]) #类别1hot_encoder cate1_list = np.unique(shop_info['cate1']) cate1_label_encoder = labelEncoder(cate1_list) cate1_list2 = cate1_label_encoder.transform(cate1_list).reshape((-1, 1)) cate1_hot_encoder = onehot(cate1_list2) if featurePath is None: all_x = None all_y = None for shopid in shopids: if ignore_get_train: if shopid in ignore_shopids: print "ignore get train", shopid continue print "get ", shopid, " train" part_data = all_data[all_data.shopid == shopid] last_14_real_y = None # 取出一部分做训练集 if trainAsTest: #使用训练集后14天作为测试集的话,训练集为前面部分 last_14_real_y = part_data[len(part_data) - 14:]["count"].values part_data = part_data[0:len(part_data) - 14] # print last_14_real_y '''确定跳过前面多少天的数据''' skipNum = part_data.shape[0] - last_N_days if skipNum < 0: skipNum = 0 train_x = None '''获取特征''' if sameday_backNum != 0: #sameday sameday = extractBackSameday(part_data, sameday_backNum, skipNum, nan_method_sameday_mean) train_x = getOneWeekdayFomExtractedData(sameday) if day_back_num != 0: #day if train_x is not None: train_x = np.concatenate( (train_x, getOneWeekdayFomExtractedData( extractBackDay(part_data, day_back_num, skipNum, nan_method_sameday_mean))), axis=1) else: train_x = getOneWeekdayFomExtractedData( extractBackDay(part_data, day_back_num, skipNum, nan_method_sameday_mean)) if weekOrWeekend: #weekOrWeekend ws = getOneWeekdayFomExtractedData( extractWorkOrWeekend(part_data, skipNum)) train_x = np.concatenate((train_x, hot_encoder.transform(ws)), axis=1) count = extractCount(part_data, skipNum) train_y = getOneWeekdayFomExtractedData(count) for feature in other_features: value = getOneWeekdayFomExtractedData( extractBackWeekValue(part_data, week_backnum, skipNum, nan_method_sameday_mean, feature)) train_x = np.append(train_x, value, axis=1) '''添加商家信息''' # print train_x,train_x.shape index = shopid - 1 oneshopinfo = shop_info.ix[index] shop_city = oneshopinfo['cityname'] shop_perpay = oneshopinfo['perpay'] if not pd.isnull( oneshopinfo['perpay']) else 0 shop_score = oneshopinfo['score'] if not pd.isnull( oneshopinfo['score']) else 0 shop_comment = oneshopinfo['comment'] if not pd.isnull( oneshopinfo['comment']) else 0 shop_level = oneshopinfo['level'] if not pd.isnull( oneshopinfo['level']) else 0 shop_cate1 = oneshopinfo['cate1'] import warnings with warnings.catch_warnings(): warnings.simplefilter("ignore", category=DeprecationWarning) shop_cate1_encoder = cate1_hot_encoder.transform( cate1_label_encoder.transform([shop_cate1])) if "perpay" in shop_features: train_x = np.insert(train_x, train_x.shape[1], shop_perpay, axis=1) if "score" in shop_features: train_x = np.insert(train_x, train_x.shape[1], shop_score, axis=1) if "comment" in shop_features: train_x = np.insert(train_x, train_x.shape[1], shop_comment, axis=1) if "level" in shop_features: train_x = np.insert(train_x, train_x.shape[1], shop_level, axis=1) if "cate1" in shop_features: for i in range(shop_cate1_encoder.shape[1]): train_x = np.insert(train_x, train_x.shape[1], shop_cate1_encoder[0][i], axis=1) '''商家信息添加完毕''' '''天气特征''' if weather: weathers = getOneWeekdayFomExtractedData( extractWeatherInfo(part_data, skipNum, shop_city)) train_x = np.append(train_x, weathers, axis=1) '''天气特征结束''' if all_x is None: all_x = train_x all_y = train_y else: all_x = np.insert(all_x, all_x.shape[0], train_x, axis=0) all_y = np.insert(all_y, all_y.shape[0], train_y, axis=0) # '''添加周几''' # extract_weekday = getOneWeekdayFomExtractedData(extractWeekday(part_data, skipNum)) # train_x = np.append(train_x, extract_weekday, axis=1) # '''''' # train_x = train_x.reshape((train_x.shape[0], # train_x.shape[1], 1)) # print model.get_weights() # part_counts = [] # for i in range(7): # weekday = i + 1 # part_count = getOneWeekdayFomExtractedData(count, weekday) # part_counts.append(part_count) train_x = all_x train_y = all_y """增广训练集""" if Augmented: print "augment data" new_train_x = np.ndarray( (train_x.shape[0] * (augument_time + 1), train_x.shape[1])) new_train_y = np.ndarray( (train_y.shape[0] * (augument_time + 1), train_y.shape[1])) def augument_relu(v): # 高斯增广。。。。似乎效果不太好,极大可能改变样本 return v * (1 + 0.01 * np.random.normal()) def augument_relu2(v): return v * 1.05 end = train_x.shape[0] for index in range(end): new_train_x[index] = train_x[index] new_train_y[index] = train_y[index] sert_index = index + 1 for index in range(end): print "%d / %d" % (index, end) for t in range(augument_time): new_train_x[sert_index] = train_x[index] # train_x = np.concatenate((train_x, [train_x[index]]), axis=0) # print train_x ov = train_y[index][0] # train_y = np.concatenate((train_y, [[augument_relu(ov)]]), axis=0) new_train_y[sert_index] = [augument_relu2(ov)] sert_index += 1 # print train_y print "augment finish" train_x = new_train_x train_y = new_train_y if needSaveFeature: featureAndLabel = np.concatenate((train_x, train_y), axis=1) flDF = pd.DataFrame(featureAndLabel) if featureSavePath is None: if trainAsTest: featureSavePath = Parameter.projectPath + "lzj/train_feature/%dCatelevel_%sCatename_%dfeatures_%dSameday_%dDay_%dLast" % ( cate_level, cate_name, flDF.shape[1] - 1, sameday_backNum, day_back_num, last_N_days) else: featureSavePath = Parameter.projectPath + "lzj/feature/%dCatelevel_%sCatename_%dfeatures_%dSameday_%dDay_%dLast" % ( cate_level, cate_name, flDF.shape[1] - 1, sameday_backNum, day_back_num, last_N_days) if Augmented: featureSavePath += ("_Augment%d" % augument_time) featureSavePath += ".csv" print "save feature in :", featureSavePath flDF.to_csv(featureSavePath) else: #有featurePath文件 if trainAsTest: path = Parameter.projectPath + "lzj/train_feature/" + featurePath else: path = Parameter.projectPath + "lzj/feature/" + featurePath flDF = pd.read_csv(path, index_col=0) train_x = flDF.values[:, :-1] train_y = flDF.values[:, -1:] # print train_x # print train_y '''将t标准化''' x_scaler = MinMaxScaler().fit(train_x) y_scaler = MinMaxScaler().fit(train_y) train_x = x_scaler.transform(train_x) train_y = y_scaler.transform(train_y) '''标准化结束''' """CNN""" train_x = np.reshape(train_x, (train_x.shape[0], train_x.shape[1], 1)) if model_path is None: if needCV: '''gridsearchCV''' # nb_epoch=rnn_epoch, batch_size=batch_size, verbose=verbose # input_dim, h1_unit = 16, optimizer = "adagrad", init = "normal"): input_dim = [(train_x.shape[1], train_x.shape[2])] h1_acqtivation = ["relu"] h1_unit = [8, 12, 16, 20] model = KerasRegressor(build_fn=create_model_LocallyConnected, verbose=verbose) batch_size = [3, 5, 7, 10] epochs = [10, 15, 20, 25, 30] param_grid = dict(batch_size=batch_size, nb_epoch=epochs, h1_unit=h1_unit, input_shape=input_dim) grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, scoring="neg_mean_squared_error") grid.refit = False grid_result = grid.fit(train_x, train_y) print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_)) for params, mean_score, scores in grid_result.grid_scores_: print("%f (%f) with: %r" % (scores.mean(), scores.std(), params)) if not needCV: input_dim = (train_x.shape[1], train_x.shape[2]) h1_unit = 16 + (time) * 4 h1_activation = "sigmoid" batch_size = 3 epochs = 40 else: input_dim = (train_x.shape[1], train_x.shape[2]) epochs = grid_result.best_params_['nb_epoch'] batch_size = grid_result.best_params_['batch_size'] h1_unit = grid_result.best_params_["h1_unit"] h1_activation = "sigmoid" early_stopping = EarlyStopping(monitor='val_loss', patience=2) best_model = create_model_LocallyConnected(input_shape=input_dim, h1_unit=h1_unit, h1_activation=h1_activation) hist = best_model.fit(train_x, train_y, verbose=verbose, batch_size=batch_size, nb_epoch=epochs, validation_split=0.1, callbacks=[early_stopping]) print hist.history #保存模型 if trainAsTest: model_save_path = Parameter.projectPath+"lzj/train_model/" + \ "%dlast_%ds_%dd_%df_%d_%s_%d_%d_%d_%s.json" \ % (last_N_days,sameday_backNum, day_back_num, train_x.shape[1], cate_level, cate_name , epochs, batch_size, h1_unit, h1_activation) saveModel(model_save_path, best_model) else: model_save_path = Parameter.projectPath+"lzj/model/" + \ "%dlast_%ds_%dd_%df_%d_%s_%d_%d_%d_%s.json" \ % (last_N_days,sameday_backNum, day_back_num, train_x.shape[1], cate_level, cate_name , epochs, batch_size, h1_unit, h1_activation) saveModel(model_save_path, best_model) else: #model_path is not none print "get model from " + model_path best_model = getModel(model_path) format = "%Y-%m-%d" if trainAsTest: startTime = datetime.datetime.strptime("2016-10-18", format) else: startTime = datetime.datetime.strptime("2016-11-1", format) timedelta = datetime.timedelta(1) '''预测商家''' model = best_model preficts_all = None real_all = None for j in shopids: if ignore_predict: if j in ignore_shopids: print "ignore predict", j # ignores += 1 continue print "predict:", j preficts = [] part_data = all_data[all_data.shopid == j] last_14_real_y = None if trainAsTest: #使用训练集后14天作为测试集的话,训练集为前面部分 last_14_real_y = part_data[len(part_data) - 14:]["count"].values part_data = part_data[0:len(part_data) - 14] '''预测14天''' for i in range(14): currentTime = startTime + timedelta * i strftime = currentTime.strftime(format) # index = getWeekday(strftime) - 1 # part_count = part_counts[index] #取前{sameday_backNum}周同一天的值为特征进行预测 part_data = part_data.append( { "count": 0, "shopid": j, "time": strftime, "weekday": getWeekday(strftime) }, ignore_index=True) x = None if sameday_backNum != 0: x = getOneWeekdayFomExtractedData( extractBackSameday(part_data, sameday_backNum, part_data.shape[0] - 1, nan_method_sameday_mean)) if day_back_num != 0: if x is None: x = getOneWeekdayFomExtractedData( extractBackDay(part_data, day_back_num, part_data.shape[0] - 1, nan_method_sameday_mean)) else: x = np.concatenate( (x, getOneWeekdayFomExtractedData( extractBackDay(part_data, day_back_num, part_data.shape[0] - 1, nan_method_sameday_mean))), axis=1) if weekOrWeekend: x = np.concatenate( (x, hot_encoder.transform( getOneWeekdayFomExtractedData( extractWorkOrWeekend(part_data, part_data.shape[0] - 1)))), axis=1) for feature in other_features: x_value = getOneWeekdayFomExtractedData( extractBackWeekValue(part_data, week_backnum, part_data.shape[0] - 1, nan_method_sameday_mean, feature)) x = np.append(x, x_value, axis=1) # '''添加周几''' # x = np.append(x, getOneWeekdayFomExtractedData(extractWeekday(part_data, part_data.shape[0]-1)), axis=1) # '''''' '''添加商家信息''' index = j - 1 oneshopinfo = shop_info.ix[index] shop_city = oneshopinfo["cityname"] shop_perpay = oneshopinfo['perpay'] if not pd.isnull( oneshopinfo['perpay']) else 0 shop_score = oneshopinfo['score'] if not pd.isnull( oneshopinfo['score']) else 0 shop_comment = oneshopinfo['comment'] if not pd.isnull( oneshopinfo['comment']) else 0 shop_level = oneshopinfo['level'] if not pd.isnull( oneshopinfo['level']) else 0 if "perpay" in shop_features: x = np.insert(x, x.shape[1], shop_perpay, axis=1) if "score" in shop_features: x = np.insert(x, x.shape[1], shop_score, axis=1) if "comment" in shop_features: x = np.insert(x, x.shape[1], shop_comment, axis=1) if "level" in shop_features: x = np.insert(x, x.shape[1], shop_level, axis=1) shop_cate1 = oneshopinfo['cate1'] if "cate1" in shop_features: shop_cate1_encoder = cate1_hot_encoder.transform( cate1_label_encoder.transform([shop_cate1]).reshape( (-1, 1))) for i in range(shop_cate1_encoder.shape[1]): x = np.insert(x, x.shape[1], shop_cate1_encoder[0][i], axis=1) '''商家信息添加完毕''' '''天气特征''' if weather: weathers = getOneWeekdayFomExtractedData( extractWeatherInfo(part_data, part_data.shape[0] - 1, shop_city)) x = np.append(x, weathers, axis=1) '''天气特征结束''' # for j in range(sameday_backNum): # x.append(train_y[len(train_y) - (j+1)*7][0]) # x = np.array(x).reshape((1, sameday_backNum)) x = x_scaler.transform(x) """CNN""" x = np.reshape(x, (x.shape[0], x.shape[1], 1)) predict = model.predict(x) '''将y还原''' if predict.ndim == 2: predict = y_scaler.inverse_transform(predict)[0][0] elif predict.ndim == 1: predict = y_scaler.inverse_transform(predict)[0] '''将y还原结束''' # print predict if (predict <= 0): predict == 0 if addNoiseInResult: predict = predict * ( 1 + 0.05 * abs(np.random.normal(scale=(i + 1) * 0.05))) preficts.append(predict) part_data.set_value(part_data.shape[0] - 1, "count", predict) preficts = (removeNegetive(toInt(np.array(preficts)))).astype(int) if preficts_all is None: preficts_all = preficts else: preficts_all = np.insert(preficts_all, preficts_all.shape[0], preficts, axis=0) if trainAsTest: last_14_real_y = (removeNegetive(toInt( np.array(last_14_real_y)))).astype(int) if real_all is None: real_all = last_14_real_y else: real_all = np.insert(real_all, real_all.shape[0], last_14_real_y, axis=0) # print preficts,last_14_real_y print str(j) + ',score:', scoreoneshop(preficts, last_14_real_y) # preficts = np.array(preficts) shopids = shopids.tolist() if ignore_predict: for remove_id in ignore_shopids: try: shopids.remove(remove_id) except: pass preficts_all = preficts_all.reshape((len(shopids), 14)) if trainAsTest: real_all = real_all.reshape((len(shopids), 14)) preficts_all = np.concatenate((preficts_all, real_all), axis=1) preficts_all = np.insert(preficts_all, 0, shopids, axis=1) if saveFilePath is not None: if model_path is None: path = saveFilePath + "%dLast_%ds_%dd_%df_%d_%s_%d_%d_%d_%s_%dshops" \ % (last_N_days,sameday_backNum, day_back_num, train_x.shape[1], cate_level, cate_name , epochs, batch_size, h1_unit, h1_activation,len(shopids)) else: import re r = re.compile( r"""/(\d+)last_(\d+)s_(\d+)d_(\d+)f_(\d+)_(\S+)_(\d+)_(\d+)_(\d+)_(\w+).json""" ) m = r.search(model_path) path = saveFilePath + "%dLast_%ds_%dd_%df_%d_%s_%d_%d_%d_%s_%dshops" \ % (int(m.group(1)),int(m.group(2)), int(m.group(3)), int(m.group(4)), int(m.group(5)), m.group(6) , int(m.group(7)), int(m.group(8)), int(m.group(9)), m.group(10),len(shopids)) if Augmented: path += "_augmented" if addNoiseInResult: path += "_addNoiseInResult" if trainAsTest: path = path + "_train" path = path + "_%dtime.csv" % time print "save in :", path np.savetxt(path, preficts_all, fmt="%d", delimiter=",") return preficts_all
def startElement(self, name, attrs): if name == 'managedObject': object = attrs.get('class', None) self.__managedObjectList.append(object) self.__currentObject = object self.__currentChildObjectList = [ ] # new object, reset child object list self.__paramNameDictionary[self.__currentObject] = [ ] # no params yet if name == 'childManagedObject': child = attrs.get('class', None) self.__currentChildObjectList.append(child) if name == 'p': self.__currentParamLevel += 1 name = attrs.get('name', None) fullName = attrs.get('fullName', None) maxOccurs = attrs.get('maxOccurs', None) status = attrs.get('status', None) hidden = attrs.get('hidden', None) # skip parameter, if status is 'deleted' or # it is excluded (e.g. change origins) if status == 'deleted' or hidden == 'true': if self.__skipParam == False: self.__skipParam = True self.__skipLevel = self.__currentParamLevel # ignore inner elements too if self.__skipParam: return paramInstance = Parameter.Parameter( ) # instantiate Parameter object to store the data paramInstance.setName(name) paramInstance.setFullName(fullName) paramInstance.setMaxOccurs(maxOccurs) paramInstance.setStatus(status) paramInstance.setManagedObject(self.__currentObject) # initialize children list to empty self.__currentChildDictionary[self.__currentParamLevel] = [] # e.g. child parameters are missing, but can't know if there are any yet if self.__currentParamLevel > 0: self.__currentChildDictionary[self.__currentParamLevel - 1].append(name) self.__currentParamStackList.append(name) # RAML2.1: no nested lists anymore if len(self.__currentParamStackList) > 2: print ERROR_MSG print "multi-level parameter structure definition in PDDB, not allowed in RAML2.1!" print "managedObject:", self.__currentObject print "param stack:", self.__currentParamStackList sys.exit(1) parentParam = self.__getParenParam() paramInstance.setParentParam(parentParam) self.__storeParamInstance(paramInstance, name) # no need to check parameter value fields if self.__skipParam: return if name == 'creation': creationPriority = attrs.get('priority', None) paramInstance = self.__getParamInstance() paramInstance.setCreationPriority( creationPriority ) # reference, so object in dictionary is updated if name == 'modification': modificationType = attrs.get('type', None) paramInstance = self.__getParamInstance() paramInstance.setModificationType(modificationType) if name == 'feature': features = attrs.get('name', None) feaType = attrs.get('type', None) # PDDB definitions not very consistent if feaType == 'standard' and features.find('optional') != -1: feaType = 'optional' # print "fix feaType:", features, feaType # ignore standard features # print "features:", features, "-", feaType if features != None and feaType.find('standard') == -1: paramInstance = self.__getParamInstance() paramInstance.setFeatures(features) # one-way parameters should not be present in download plan if name == 'interface': source = attrs.get('source', None) target = attrs.get('target', None) bidirection = attrs.get('bidirectional', None) paramInstance = self.__getParamInstance() paramInstance.setInterface() if (source == 'RAC' and target == 'RNC') or (source == 'RNC' and target == 'RAC') or \ (source == 'RAC' and target == 'IADA') or (source == 'IADA' and target == 'RAC'): if bidirection == 'no': paramInstance.setPlanDirection('uni') # unidirectional if (source == 'RAC' and target == 'RNC') or (source == 'RAC' and target == 'IADA'): paramInstance.setInterfaceDirection('uni-down') else: paramInstance.setInterfaceDirection('uni-up') else: paramInstance.setPlanDirection('bi') # birectional paramInstance.setInterfaceDirection('bi') if (source == 'EM' and target == 'RNC') or (source == 'RNC' and target == 'EM'): if bidirection == 'no': paramInstance.setGuiDirection('uni') # unidirectional else: paramInstance.setGuiDirection('bi') # birectional if name == 'property': location = attrs.get('name', None) if location == 'Location in GUI': value = attrs.get('value', None) paramInstance = self.__getParamInstance() paramInstance.setLocationInGui(value) if name == 'simpleType': valueBase = attrs.get('base', None) paramInstance = self.__getParamInstance() paramInstance.setParamType(name) paramInstance.setValueBase(valueBase) if name == 'complexType': paramInstance = self.__getParamInstance() paramInstance.setParamType(name) if name == 'editing': for attrName in attrs.keys(): if attrName == 'divisor': paramInstance = self.__getParamInstance() paramInstance.setDivisor(attrs.get(attrName)) if attrName == 'shift': paramInstance = self.__getParamInstance() paramInstance.setShift(attrs.get(attrName)) if attrName == 'multiplicand': paramInstance = self.__getParamInstance() paramInstance.setMultiplicand(attrs.get(attrName)) if attrName == 'internalValue': paramInstance = self.__getParamInstance() paramInstance.setInternalValue(attrs.get(attrName)) if name == 'range': for attrName in attrs.keys(): if attrName == 'minIncl': paramInstance = self.__getParamInstance() paramInstance.setMinValue(attrs.get(attrName)) if attrName == 'maxIncl': paramInstance = self.__getParamInstance() paramInstance.setMaxValue(attrs.get(attrName)) if attrName == 'step': paramInstance = self.__getParamInstance() paramInstance.setStep(attrs.get(attrName)) if name == 'enumeration': enum = attrs.get('value', None) text = attrs.get('text', "") paramInstance = self.__getParamInstance() paramInstance.addEnumerationValue(enum) paramInstance.addEnumerationText(enum, text) if name == 'bit': default = attrs.get('default', None) fixed = attrs.get('fixed', False) paramInstance = self.__getParamInstance() paramInstance.addBitValue(default, fixed) if name == 'default': # default value inside'bit' element defaultValue = attrs.get('value', None) paramInstance = self.__getParamInstance() paramInstance.setDefaultValue(defaultValue) if name == 'special': special = attrs.get('value', None) paramInstance = self.__getParamInstance() paramInstance.setSpecialValue(special) if name == 'minLength': min = attrs.get('value', None) paramInstance = self.__getParamInstance() paramInstance.setMinValue(min) if name == 'maxLength': max = attrs.get('value', None) paramInstance = self.__getParamInstance() paramInstance.setMaxValue(max)
## ## This class has parameter defintions for all ## parameter used in this code. ## ## Change here for bounds, or import and rewrite. ## ## from Parameter import * ## Parameters are value, variation, bounds Om_par = Parameter("Om", 0.3038, 0.1, (0.05, 1.5), "\Omega_m") Obh2_par = Parameter("Obh2", 0.02234, 0.0002, (0.02, 0.025), "\Omega_{b}h^2") h_par = Parameter("h", 0.6821, 0.05, (0.4, 1.0), "h") mnu_par = Parameter("mnu", 0.06, 0.1, (0, 1.0), "\Sigma m_{\\nu}") Nnu_par = Parameter("Nnu", 3.046, 0.5, (3.046, 5.046), "N_{\\rm eff}") Ok_par = Parameter("Ok", 0.0, 0.1, (-1.5, 1.5), "\Omega_k") w_par = Parameter("w", -1.0, 0.1, (-2.0, 0.0), "w_0") wa_par = Parameter("wa", 0.0, 0.1, (-2.0, 2.0), "w_a") ## this is the prefactor parameter c/rdH0 Pr_par = Parameter("Pr", 28.6, 4, (5, 70), "c/(H_0r_d)") ## Poly Cosmology Parameters Om1_par = Parameter("Om1", 0.0, 0.1, (-3, 3), "\Omega_1") Om2_par = Parameter("Om2", 0.0, 0.1, (-3, 3), "\Omega_2") ## JordiCDM Cosmology Parameters q_par = Parameter("q", 0.0, 0.2, (0, 1), "q") za_par = Parameter("za", 3, 1.0, (2, 10), "z_a")
paramType = "prtype" paramName = "prname" tree = ET.ElementTree(file='protocol.xml') root = tree.getroot() # 读取所有类 cVector = [] for elem in root: if elem.tag == classTag: tClass = N.ClassNode(elem.attrib[className]) for methods in elem: if methods.tag == methodTag: tMethods = M.Method(methods.attrib[methodName], methods.attrib[methodFlow]) for params in methods: if params.tag == paramTag: tParam = Parameter.Parameter(params.attrib[paramName], params.attrib[paramType]) tMethods.appendPara(tParam) tClass.addMethod(tMethods) cVector.append(tClass) print(cVector) for t in cVector: t.display() for t in cVector: dec = decode.Decode(t) dec.decode()
def main(self, is_all): """ Run AD main function :param is_all: :return: """ Job_id_list = [] Job_done = 0 Job_total_count = {} parameter_function = Parameter.parameter(Global.get_comm_week(), 0) if str("all") == str(is_all): run_country = parameter_function.AllCountry() return_code = self.Exce(Global.get_master_1_url(), run_country) run_ad_object = json.loads(return_code) Job_id_list = [run_ad_object["data"]["jobExecutationId"]] Job_done = 1 Job_total_count[run_ad_object["data"][ "jobExecutationId"]] = run_ad_object["data"]["totalAdCount"] logger.info(run_ad_object) else: run_country = parameter_function.JPN() return_code = self.Exce(Global.get_master_1_url(), run_country) run_ad_object = json.loads(return_code) Job_id_list.append([run_ad_object["data"]["jobExecutationId"]]) Job_total_count[run_ad_object["data"][ "jobExecutationId"]] = run_ad_object["data"]["totalAdCount"] logger.info(run_ad_object) run_country = parameter_function.CHN() return_code = self.Exce(Global.get_master_2_url(), run_country) run_ad_object = json.loads(return_code) Job_id_list.append([run_ad_object["data"]["jobExecutationId"]]) Job_total_count[run_ad_object["data"][ "jobExecutationId"]] = run_ad_object["data"]["totalAdCount"] logger.info(run_ad_object) run_country = parameter_function.USA() return_code = self.Exce(Global.get_master_2_url(), run_country) run_ad_object = json.loads(return_code) Job_id_list.append([run_ad_object["data"]["jobExecutationId"]]) Job_total_count[run_ad_object["data"][ "jobExecutationId"]] = run_ad_object["data"]["totalAdCount"] logger.info(run_ad_object) run_country = parameter_function.Other() return_code = self.Exce(Global.get_master_2_url(), run_country) run_ad_object = json.loads(return_code) Job_id_list.append([run_ad_object["data"]["jobExecutationId"]]) Job_total_count[run_ad_object["data"][ "jobExecutationId"]] = run_ad_object["data"]["totalAdCount"] logger.info(run_ad_object) Job_done = 4 Global.set_job_id_list(Job_id_list) Global.set_job_done(Job_done) Global.set_job_total_count(Job_total_count)
## ## This class has parameter defintions for all ## parameter used in this code. ## ## Change here for bounds, or import and rewrite. ## ## from Parameter import * ## Parameters are value, variation, bounds #0.3038, 0.02234, 0.6821 Om_par = Parameter("Om", 0.3038, 0.05, (0.05, 1.5), "\Omega_m*") Obh2_par = Parameter("Obh2", 0.02234, 0.0002, (0.02, 0.025), "\Omega_bh^2") h_par = Parameter("h", 0.6821, 0.05, (0.4, 1.0), "h") mnu_par = Parameter("mnu", 0.06, 0.1, (0, 1.0), "\Sigma m_{\\nu}") Nnu_par = Parameter("Nnu", 3.046, 0.5, (3.046, 5.046), "N_{\\rm eff}") Ok_par = Parameter("Ok", 0.0, 0.1, (-1.5, 1.5), "\Omega_k") w_par = Parameter("w", 1.0, 0.1, (-0.5, 2.0), "w_0") wa_par = Parameter("wa", 1.0, 0.1, (-0.5, 2.0), "w_a") wb_par = Parameter("wb", 0.7, 0.2, (-2., 3.0), "w_b") wc_par = Parameter("wc", 0.7, 0.2, (-3., 5.0), "w_c") ## this is the prefactor parameter c/rdH0 Pr_par = Parameter("Pr", 28.6, 4, (5, 70), "c/(H_0r_d)") ## Poly Cosmology Parameters Om1_par = Parameter("Om1", 0.0, 0.1, (-3, 3), "\Omega_1") Om2_par = Parameter("Om2", 0.0, 0.1, (-3, 3), "\Omega_2")
def initialize(): pm = Parameter.Parameter( ) #Parameter werden initialisiert (in Parameter.py) us = units.units(pm) #noch units initialiseren return (pm, us)
import TradeCost import Environment import Condition import MoneyManager import Signal import Stoploss import ProfitGoal import Slippage import AllocateFunds if __name__ == "__main__": suite = unittest.TestSuite() suite.addTest(Datetime.suite()) suite.addTest(Parameter.suite()) suite.addTest(MarketInfo.suite()) suite.addTest(StockTypeInfo.suite()) suite.addTest(Stock.suite()) suite.addTest(KData.suite()) suite.addTest(Indicator.suite()) suite.addTest(TradeCost.suite()) suite.addTest(Environment.suite()) suite.addTest(Environment.suiteTestCrtEV()) suite.addTest(Condition.suite()) suite.addTest(Condition.suiteTestCrtCN()) suite.addTest(MoneyManager.suite()) suite.addTest(MoneyManager.suiteTestCrtMM()) suite.addTest(Signal.suite())
def predictAllShop_ANN2_HPS(all_data, trainAsTest=False, saveFilePath=None, featurePath=None, cate_level=0, cate_name=None, featureSavePath=None, needSaveFeature=False, ignore_shopids=[], needCV=False, model_path=None): """ 通过gridsearch找超参数 :param trainAsTest: 是否使用训练集后14天作为测试集 :param model: 某个模型 :param saveFilePath :param featurePath: :param cate_level: :param cate_name: :param featureSavePath: :param needSaveFeature: :param ignore_shopids: :param create_model_function: :param needCV :return: """ verbose = 2 last_N_days = 70 #记录已经被忽略的商店数量 ignores = 0 shopids = None shop_need_to_predict = 2000 if (cate_level is 0): shopids = np.arange(1, 1 + shop_need_to_predict, 1) else: shopids = Parameter.extractShopValueByCate(cate_level, cate_name) shop_info = pd.read_csv(Parameter.shopinfopath, names=[ "shopid", "cityname", "locationid", "perpay", "score", "comment", "level", "cate1", "cate2", "cate3" ]) weather = True weekOrWeekend = True day_back_num = 21 sameday_backNum = 7 week_backnum = 3 other_features = [statistic_functon_mean, statistic_functon_median] other_features = [] shop_features = ["perpay", "comment", "score", "level"] shop_features = [] #是否是周末hot_encoder hot_encoder = onehot([[1], [0]]) #类别1hot_encoder cate1_list = np.unique(shop_info['cate1']) cate1_label_encoder = labelEncoder(cate1_list) cate1_list2 = cate1_label_encoder.transform(cate1_list).reshape((-1, 1)) cate1_hot_encoder = onehot(cate1_list2) if featurePath is None: all_x = None all_y = None for shopid in shopids: if shopid in ignore_shopids: print "ignore get train", shopid ignores += 1 continue print "get ", shopid, " train" part_data = all_data[all_data.shopid == shopid] last_14_real_y = None # 取出一部分做训练集 if trainAsTest: #使用训练集后14天作为测试集的话,训练集为前面部分 last_14_real_y = part_data[len(part_data) - 14:]["count"].values part_data = part_data[0:len(part_data) - 14] # print last_14_real_y '''确定跳过前面多少天的数据''' skipNum = part_data.shape[0] - last_N_days if skipNum < 0: skipNum = 0 train_x = None '''获取特征''' if sameday_backNum != 0: #sameday sameday = extractBackSameday(part_data, sameday_backNum, skipNum, nan_method_sameday_mean) train_x = getOneWeekdayFomExtractedData(sameday) if day_back_num != 0: #day if train_x is not None: train_x = np.concatenate( (train_x, getOneWeekdayFomExtractedData( extractBackDay(part_data, day_back_num, skipNum, nan_method_sameday_mean))), axis=1) else: train_x = getOneWeekdayFomExtractedData( extractBackDay(part_data, day_back_num, skipNum, nan_method_sameday_mean)) if weekOrWeekend: #weekOrWeekend ws = getOneWeekdayFomExtractedData( extractWorkOrWeekend(part_data, skipNum)) train_x = np.concatenate((train_x, hot_encoder.transform(ws)), axis=1) count = extractCount(part_data, skipNum) train_y = getOneWeekdayFomExtractedData(count) for feature in other_features: value = getOneWeekdayFomExtractedData( extractBackWeekValue(part_data, week_backnum, skipNum, nan_method_sameday_mean, feature)) train_x = np.append(train_x, value, axis=1) '''添加商家信息''' # print train_x,train_x.shape index = shopid - 1 oneshopinfo = shop_info.ix[index] shop_city = oneshopinfo['cityname'] shop_perpay = oneshopinfo['perpay'] if not pd.isnull( oneshopinfo['perpay']) else 0 shop_score = oneshopinfo['score'] if not pd.isnull( oneshopinfo['score']) else 0 shop_comment = oneshopinfo['comment'] if not pd.isnull( oneshopinfo['comment']) else 0 shop_level = oneshopinfo['level'] if not pd.isnull( oneshopinfo['level']) else 0 shop_cate1 = oneshopinfo['cate1'] import warnings with warnings.catch_warnings(): warnings.simplefilter("ignore", category=DeprecationWarning) shop_cate1_encoder = cate1_hot_encoder.transform( cate1_label_encoder.transform([shop_cate1])) if "perpay" in shop_features: train_x = np.insert(train_x, train_x.shape[1], shop_perpay, axis=1) if "score" in shop_features: train_x = np.insert(train_x, train_x.shape[1], shop_score, axis=1) if "comment" in shop_features: train_x = np.insert(train_x, train_x.shape[1], shop_comment, axis=1) if "level" in shop_features: train_x = np.insert(train_x, train_x.shape[1], shop_level, axis=1) if "cate1" in shop_features: for i in range(shop_cate1_encoder.shape[1]): train_x = np.insert(train_x, train_x.shape[1], shop_cate1_encoder[0][i], axis=1) '''商家信息添加完毕''' '''天气特征''' if weather: weathers = getOneWeekdayFomExtractedData( extractWeatherInfo(part_data, skipNum, shop_city)) train_x = np.append(train_x, weathers, axis=1) '''天气特征结束''' if all_x is None: all_x = train_x all_y = train_y else: all_x = np.insert(all_x, all_x.shape[0], train_x, axis=0) all_y = np.insert(all_y, all_y.shape[0], train_y, axis=0) # '''添加周几''' # extract_weekday = getOneWeekdayFomExtractedData(extractWeekday(part_data, skipNum)) # train_x = np.append(train_x, extract_weekday, axis=1) # '''''' # train_x = train_x.reshape((train_x.shape[0], # train_x.shape[1], 1)) # print model.get_weights() # part_counts = [] # for i in range(7): # weekday = i + 1 # part_count = getOneWeekdayFomExtractedData(count, weekday) # part_counts.append(part_count) train_x = all_x train_y = all_y if needSaveFeature: featureAndLabel = np.concatenate((train_x, train_y), axis=1) flDF = pd.DataFrame(featureAndLabel) if featureSavePath is None: if trainAsTest: featureSavePath = Parameter.projectPath + "lzj/train_feature/%dCatelevel_%sCatename_%dfeatures_%dSameday_%dDay_%dLast.csv" % ( cate_level, cate_name, flDF.shape[1] - 1, sameday_backNum, day_back_num, last_N_days) else: featureSavePath = Parameter.projectPath + "lzj/feature/%dCatelevel_%sCatename_%dfeatures_%dSameday_%dDay_%dLast.csv" % ( cate_level, cate_name, flDF.shape[1] - 1, sameday_backNum, day_back_num, last_N_days) flDF.to_csv(featureSavePath) else: #有featurePath文件 if trainAsTest: path = Parameter.projectPath + "lzj/train_feature/" + featurePath else: path = Parameter.projectPath + "lzj/feature/" + featurePath flDF = pd.read_csv(path, index_col=0) train_x = flDF.values[:, :-1] train_y = flDF.values[:, -1:] # print train_x # print train_y '''将t标准化''' x_scaler = MinMaxScaler().fit(train_x) y_scaler = MinMaxScaler().fit(train_y) train_x = x_scaler.transform(train_x) train_y = y_scaler.transform(train_y) '''标准化结束''' if model_path is None: if needCV: '''gridsearchCV''' # nb_epoch=rnn_epoch, batch_size=batch_size, verbose=verbose # input_dim, h1_unit = 16, optimizer = "adagrad", init = "normal"): input_dim = [train_x.shape[1]] h1_activation = ["relu"] h1_unit = [8, 12, 16, 20] h2_unit = [2, 4, 6, 8, 10] model = KerasRegressor(build_fn=create_model2, verbose=verbose) batch_size = [3, 5, 7, 10] epochs = [10, 15, 20, 25, 30, 40] param_grid = dict(batch_size=batch_size, nb_epoch=epochs, h1_unit=h1_unit, h2_unit=h2_unit, input_dim=input_dim) grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, scoring="neg_mean_squared_error") grid.refit = False grid_result = grid.fit(train_x, train_y) print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_)) for params, mean_score, scores in grid_result.grid_scores_: print("%f (%f) with: %r" % (scores.mean(), scores.std(), params)) if not needCV: input_dim = train_x.shape[1] h1_activation = "relu" h1_unit = 12 h2_unit = 8 batch_size = 3 epochs = 80 else: input_dim = train_x.shape[1] epochs = grid_result.best_params_['nb_epoch'] batch_size = grid_result.best_params_['batch_size'] h1_unit = grid_result.best_params_["h1_unit"] h2_unit = grid_result.best_params_["h2_unit"] h1_activation = "relu" early_stopping = EarlyStopping(monitor='val_loss', patience=2) best_model = create_model2(input_dim=input_dim, h1_unit=h1_unit, h2_unit=h2_unit) hist = best_model.fit(train_x, train_y, verbose=verbose, batch_size=batch_size, nb_epoch=epochs, validation_split=0.1, callbacks=[early_stopping]) print hist.history #保存模型 if trainAsTest: model_save_path = Parameter.projectPath+"lzj/train_model/" + \ "%dlast_%ds_%dd_%df_%d_%s_%d_%d_%d_%s_%d_%s.json" \ % (last_N_days,sameday_backNum, day_back_num, train_x.shape[1], cate_level, cate_name , epochs, batch_size, h1_unit, h1_activation,h2_unit,h1_activation) else: model_save_path = Parameter.projectPath+"lzj/model/" + \ "%dlast_%ds_%dd_%df_%d_%s_%d_%d_%d_%s_%d_%s.json" \ % (last_N_days,sameday_backNum, day_back_num, train_x.shape[1], cate_level, cate_name , epochs, batch_size, h1_unit, h1_activation,h2_unit,h1_activation) print "model save in :", model_save_path saveModel(model_save_path, best_model) else: #model_path is not none best_model = getModel(model_path) format = "%Y-%m-%d" if trainAsTest: startTime = datetime.datetime.strptime("2016-10-18", format) else: startTime = datetime.datetime.strptime("2016-11-1", format) timedelta = datetime.timedelta(1) '''预测所有商家''' model = best_model preficts_all = None real_all = None for j in shopids: if j in ignore_shopids: print "ignore predict", j continue print "predict:", j preficts = [] part_data = all_data[all_data.shopid == j] last_14_real_y = None if trainAsTest: #使用训练集后14天作为测试集的话,训练集为前面部分 last_14_real_y = part_data[len(part_data) - 14:]["count"].values part_data = part_data[0:len(part_data) - 14] '''预测14天''' for i in range(14): currentTime = startTime + timedelta * i strftime = currentTime.strftime(format) # index = getWeekday(strftime) - 1 # part_count = part_counts[index] #取前{sameday_backNum}周同一天的值为特征进行预测 part_data = part_data.append( { "count": 0, "shopid": j, "time": strftime, "weekday": getWeekday(strftime) }, ignore_index=True) x = None if sameday_backNum != 0: x = getOneWeekdayFomExtractedData( extractBackSameday(part_data, sameday_backNum, part_data.shape[0] - 1, nan_method_sameday_mean)) if day_back_num != 0: if x is None: x = getOneWeekdayFomExtractedData( extractBackDay(part_data, day_back_num, part_data.shape[0] - 1, nan_method_sameday_mean)) else: x = np.concatenate( (x, getOneWeekdayFomExtractedData( extractBackDay(part_data, day_back_num, part_data.shape[0] - 1, nan_method_sameday_mean))), axis=1) if weekOrWeekend: x = np.concatenate( (x, hot_encoder.transform( getOneWeekdayFomExtractedData( extractWorkOrWeekend(part_data, part_data.shape[0] - 1)))), axis=1) for feature in other_features: x_value = getOneWeekdayFomExtractedData( extractBackWeekValue(part_data, week_backnum, part_data.shape[0] - 1, nan_method_sameday_mean, feature)) x = np.append(x, x_value, axis=1) # '''添加周几''' # x = np.append(x, getOneWeekdayFomExtractedData(extractWeekday(part_data, part_data.shape[0]-1)), axis=1) # '''''' '''添加商家信息''' index = j - 1 oneshopinfo = shop_info.ix[index] shop_city = oneshopinfo["cityname"] shop_perpay = oneshopinfo['perpay'] if not pd.isnull( oneshopinfo['perpay']) else 0 shop_score = oneshopinfo['score'] if not pd.isnull( oneshopinfo['score']) else 0 shop_comment = oneshopinfo['comment'] if not pd.isnull( oneshopinfo['comment']) else 0 shop_level = oneshopinfo['level'] if not pd.isnull( oneshopinfo['level']) else 0 if "perpay" in shop_features: x = np.insert(x, x.shape[1], shop_perpay, axis=1) if "score" in shop_features: x = np.insert(x, x.shape[1], shop_score, axis=1) if "comment" in shop_features: x = np.insert(x, x.shape[1], shop_comment, axis=1) if "level" in shop_features: x = np.insert(x, x.shape[1], shop_level, axis=1) shop_cate1 = oneshopinfo['cate1'] if "cate1" in shop_features: shop_cate1_encoder = cate1_hot_encoder.transform( cate1_label_encoder.transform([shop_cate1]).reshape( (-1, 1))) for i in range(shop_cate1_encoder.shape[1]): x = np.insert(x, x.shape[1], shop_cate1_encoder[0][i], axis=1) '''商家信息添加完毕''' '''天气特征''' if weather: weathers = getOneWeekdayFomExtractedData( extractWeatherInfo(part_data, part_data.shape[0] - 1, shop_city)) x = np.append(x, weathers, axis=1) x = x_scaler.transform(x) '''天气特征结束''' # for j in range(sameday_backNum): # x.append(train_y[len(train_y) - (j+1)*7][0]) # x = np.array(x).reshape((1, sameday_backNum)) # print x # x = x.reshape(1, sameday_backNum, 1) predict = model.predict(x) if predict.ndim == 2: predict = y_scaler.inverse_transform(predict)[0][0] elif predict.ndim == 1: predict = y_scaler.inverse_transform(predict)[0] # print predict if (predict <= 0): predict == 1 preficts.append(predict) part_data.set_value(part_data.shape[0] - 1, "count", predict) preficts = (removeNegetive(toInt(np.array(preficts)))).astype(int) if preficts_all is None: preficts_all = preficts else: preficts_all = np.insert(preficts_all, preficts_all.shape[0], preficts, axis=0) if trainAsTest: last_14_real_y = (removeNegetive(toInt( np.array(last_14_real_y)))).astype(int) if real_all is None: real_all = last_14_real_y else: real_all = np.insert(real_all, real_all.shape[0], last_14_real_y, axis=0) # print preficts,last_14_real_y print str(j) + ',score:', scoreoneshop(preficts, last_14_real_y) # preficts = np.array(preficts) preficts_all = preficts_all.reshape((len(shopids) - ignores, 14)) if trainAsTest: real_all = real_all.reshape((len(shopids) - ignores, 14)) preficts_all = np.concatenate((preficts_all, real_all), axis=1) shopids = shopids.tolist() for remove in ignore_shopids: try: shopids.remove(remove) except: pass preficts_all = np.insert(preficts_all, 0, shopids, axis=1) if saveFilePath is not None: path = saveFilePath + "%dLast_%ds_%dd_%df_%d_%s_%d_%d_%d_%s_%d_%s_%dshops" \ % (last_N_days,sameday_backNum, day_back_num, train_x.shape[1], cate_level, cate_name , epochs, batch_size, h1_unit, h1_activation,h2_unit,h1_activation,len(shopids)-ignores) if trainAsTest: path = path + "_train.csv" else: path = path + ".csv" print "save in :", path np.savetxt(path, preficts_all, fmt="%d", delimiter=",") return preficts_all
i += 1 finalresults = ("%s %s %d finalresult.csv" % (strategyName, domain_symbol, K_MIN)) resultlist.to_csv(finalresults) return resultlist if __name__ == '__main__': # ====================参数和文件夹设置====================================== indexcols = Parameter.ResultIndexDic strategyParameterSet = {} if not Parameter.multi_symbol_bt_swtich: strategy_bt_parameter = [] # 策略参数设置 strategy_name = Parameter.strategy_name strategy_para_name_list = Parameter.get_strategy_para_name_list( strategy_name) # 单品种单周期模式 default_para_dic = Parameter.strategy_para_dic[strategy_name] paradic = { 'strategy_name': strategy_name, 'exchange_id': Parameter.exchange_id, 'sec_id': Parameter.sec_id, 'K_MIN': Parameter.K_MIN, 'startdate': Parameter.startdate, 'enddate': Parameter.enddate, 'result_para_dic': Parameter.result_para_dic, 'new_para': default_para_dic['new_para'] } if default_para_dic['new_para']: # 参数新增模式下,加载默认参数 for para_name in strategy_para_name_list:
def Main(contigfile_, tuple_of_bamfiles, tuple_of_means, tuple_of_thresholds, edge_support, read_len, cont_threshold, ratio, output_dest, std_dev, covcutoff, haplratio, haplthreshold, detect_haplotype, detect_duplicate, gff_file, fosmidpool, mapquality): from time import time import CreateGraph as CG import MakeScaffolds as MS import GenerateOutput as GO import Parameter from copy import deepcopy tot_start = time() F = [ ] #list of (ordered) lists of tuples containing (contig_name, direction, position, length, links). The tuple is a contig within a scaffold and the list of tuples is the scaffold. Scaffolds = { } #scaffold dict with contig objects for easy fetching of all contigs in a scaffold n = len(tuple_of_bamfiles) # number of libraries we have param = Parameter.parameter( ) # object containing all parameters (user specified, defaulted and comuted along tha way.) param.scaffold_indexer = 1 # global indicator for scaffolds, used to index scaffolds when they are created param.map_quality = mapquality param.rel_weight = ratio Contigs = {} # contig dict that stores contig objects if not os.path.exists(output_dest): os.makedirs(output_dest) param.information_file = open(os.path.join(output_dest + 'Statistics.txt'), 'w') Information = param.information_file open(output_dest + '/haplotypes.fa', 'w') #Read in the sequences of the contigs in memory contigfile = open(contigfile_, 'r') C_dict = ReadInContigseqs(contigfile) #C_dict = {} param.gff_file = gff_file #iterate over libraries param.first_lib = True for i in range(0, n): start = time() param.bamfile = tuple_of_bamfiles[i] param.mean_ins_size = tuple_of_means[i] param.ins_size_threshold = tuple_of_thresholds[i] param.edgesupport = edge_support[i] param.read_len = read_len[i] param.output_directory = output_dest param.std_dev_ins_size = std_dev[i] param.contig_threshold = cont_threshold[i] param.cov_cutoff = covcutoff[i] param.hapl_ratio = haplratio param.hapl_threshold = haplthreshold param.detect_haplotype = detect_haplotype param.detect_duplicate = detect_duplicate param.fosmidpool = fosmidpool print >> Information, '\nPASS ' + str(i + 1) + '\n\n' print 'Starting scaffolding with library: ', param.bamfile (G, Contigs, Scaffolds, F, param) = CG.PE( Contigs, Scaffolds, F, Information, output_dest, C_dict, param ) #Create graph, single out too short contigs/scaffolds and store them in F param.first_lib = False #not the first lib any more if G == None: print '0 contigs/super-contigs passed the length criteria of this step. Exiting and printing results.. ' break elapsed = time() - start print >> Information, 'Time elapsed for creating graph, iteration ' + str( i) + ': ' + str(elapsed) + '\n' start = time() (Contigs, Scaffolds, F, param) = MS.Algorithm( G, Contigs, Scaffolds, F, Information, C_dict, param ) # Make scaffolds, store the complex areas (consisting of contig/scaffold) in F, store the created scaffolds in Scaffolds, update Contigs elapsed = time() - start print >> Information, 'Time elapsed for making scaffolds, iteration ' + str( i) + ': ' + str(elapsed) + '\n' print 'Writing out scaffolding results for step', i + 1, ' ...' Scaffolds_copy = deepcopy(Scaffolds) Contigs_copy = deepcopy(Contigs) F_copy = deepcopy(F) for scaffold_ in Scaffolds_copy.keys( ): #iterate over keys in hash, so that we can remove keys while iterating over it ### Go to function and print to F ### Remove Scaf_obj from Scaffolds and Contig_obj from contigs S_obj = Scaffolds_copy[scaffold_] list_of_contigs = S_obj.contigs #list of contig objects contained in scaffold object Contigs_copy, F_copy = GO.WriteToF(F_copy, Contigs_copy, list_of_contigs) del Scaffolds_copy[scaffold_] #print F GO.PrintOutput(F_copy, C_dict, Information, output_dest, param, i + 1) ### Calculate stats for last scaffolding step scaf_lengths = [ Scaffolds[scaffold_].s_length for scaffold_ in Scaffolds.keys() ] sorted_lengths = sorted(scaf_lengths, reverse=True) N50, L50 = CG.CalculateStats(sorted_lengths, param) param.current_L50 = L50 param.current_N50 = N50 # ### Call a print scaffolds function here for remaining scaffolds that has "passed" all library levels # for scaffold_ in Scaffolds.keys(): #iterate over keys in hash, so that we can remove keys while iterating over it # ### Go to function and print to F # ### Remove Scaf_obj from Scaffolds and Contig_obj from contigs # S_obj=Scaffolds[scaffold_] # list_of_contigs=S_obj.contigs #list of contig objects contained in scaffold object # Contigs, F = GO.WriteToF(F,Contigs,list_of_contigs) # del Scaffolds[scaffold_] # #print F # GO.PrintOutput(F,C_dict,Information,output_dest) elapsed = time() - tot_start print >> Information, 'Total time for scaffolding: ' + str(elapsed) + '\n' print 'Finished\n\n '