def query_apis(): from data_params import APIs for params in APIs: logger.info('requesting api: ' + params['api']) if params['api'] == 'wb': api_wb(params)
def mt_app_handler(msg): logger.debug('app handler') if len(msg.data): if msg.data[0] == TEMP_HUM_DATA: logger.debug('temp hum data') temp_hum_handler(msg) return if msg.data[0] == COOR_START: logger.warning('coor start') coor_start_handler() return if msg.data[0] == END_REPORT_STATUS_CMD: logger.warning('end report status') end_report_status_handler(msg) return if msg.data[0] == ROUTER_STATUS_CMD: logger.warning('router report status') router_report_status_handler(msg) return if msg.data[0] == MASTER_GET_ADDR_COUNT_CMD: logger.warning("addr info") addr_info_handler(msg) return if msg.data[0] == COOR_REPORT_NV_PARAMS_CMD: logger.warning("coor report nv params") coor_report_nv_params_handler(msg) return if msg.data[0] == SUCCESS: logger.info('cmd success') return pass logger.warning('unknown cmd id ') else: logger.warning('app msg data empty')
def LoadConfig(json_file): global g_config logger.info("config file is " + json_file) with open(json_file, 'r') as b_oj: settings = json.load(b_oj) g_config = settings return g_config
def prepare_data_params(PROJECT_NAME): try: shutil.copytree('data/.boiler_plate', 'data/' + PROJECT_NAME) except OSError: logger.info('There already is a project named ' + PROJECT_NAME + ', please choose another name') sys.exit() path='data/' + PROJECT_NAME + '/data_params.py' rename_project(path, PROJECT_NAME)
def register_func(data_func): logger.info('register func name:%s , code :%s' % (data_func['name'], data_func['code'])) handler_funcs[data_func['code']] = { 'func': data_func['func'], 'name': data_func['name'] } pick.dump(handler_funcs, open(handler_funcs_filename, 'wb')) pass
def pars_img(): """Берет картинку с сайта""" try: image = soup.find( class_="page_raspis_block_img").find("img").get("src") urlretrieve(image, filename="schedule/schedule.png") logger.info("Начинаю скачивание файла") except NotSchedule: logger.warning("NOT schedule")
def prepare_template(PROJECT_NAME): try: shutil.copyfile('app/templates/.boiler_plate.html', 'app/templates/' + PROJECT_NAME + '.html') except OSError: logger.info('There already is a project named ' + PROJECT_NAME + ', please choose another name') sys.exit() template_path = 'app/templates/' + PROJECT_NAME + '.html' rename_project(template_path, PROJECT_NAME)
def __init__(self, feature_size, field_size, embedding_size=8, dropout_fm=[1.0, 1.0], deep_layers=[32, 32], dropout_deep=[0.5, 0.5, 0.5], deep_layers_activation=tf.nn.relu, epoch=10, batch_size=256, learning_rate=0.001, optimizer_type="adam", batch_norm=0, batch_norm_decay=0.995, verbose=False, random_seed=2016, use_fm=True, use_deep=True, loss_type="logloss", eval_metric=roc_auc_score, l2_reg=0.0, greater_is_better=True): assert (use_fm or use_deep) assert loss_type in ["logloss", "mse"], \ "loss_type can be either 'logloss' for classification task or 'mse' for regression task" self.feature_size = feature_size # denote as M=259, size of the feature dictionary self.field_size = field_size # denote as F=39, size of the feature fields logger.info("{} / {}".format(feature_size, field_size)) self.embedding_size = embedding_size # denote as K, size of the feature embedding self.dropout_fm = dropout_fm self.deep_layers = deep_layers self.dropout_deep = dropout_deep self.deep_layers_activation = deep_layers_activation self.use_fm = use_fm self.use_deep = use_deep self.l2_reg = l2_reg self.epoch = epoch self.batch_size = batch_size self.learning_rate = learning_rate self.optimizer_type = optimizer_type self.batch_norm = batch_norm self.batch_norm_decay = batch_norm_decay self.verbose = verbose self.random_seed = random_seed self.loss_type = loss_type self.eval_metric = eval_metric self.greater_is_better = greater_is_better self.train_result, self.valid_result = [], [] self._init_graph()
def LoadCheckPoint(): global g_file_name global g_cur_point fd = open(g_file_name, "a+") fd.seek(0, 0) ct = fd.readline() if ct != "": g_cur_point = ct.split(',')[1].strip("\n") logger.info("get record check point from " + g_file_name + ", url: " + ct) fd.close() return
def GetPageCnt(base_url): logger.info("get page count, url: " + base_url) html = request_common.GetHtml(base_url) soup = BeautifulSoup(html) datanow = soup.find(name="div", attrs={"class": "spagenext"}) # print(datanow.span.text) a = datanow.span.text # a = "页数 1/14 总数:189套(含限制房产)" logger.info(a) cnt = a.split("/")[1].split(" ")[0] return cnt
def prepare_static(PROJECT_NAME): try: shutil.copytree('app/static/viz/.boiler_plate', 'app/static/viz/' + PROJECT_NAME) except OSError as e: if e.strerror=='File exists': logger.info('There already is a project named ' + PROJECT_NAME + ', please choose another name') sys.exit() else: raise path='app/static/viz/' + PROJECT_NAME + '/parameters.js' rename_project(path, PROJECT_NAME)
def __handler_login(self, err_code): logger.info(f"login result; {err_code}") if self.__login_dialog is True: self.dynamicCall("KOA_Functions(QString, QString)", "ShowAccountWindow", "") self.__receive_event = True # logger.debug(f"계좌수: {self.GetLoginInfo('ACCOUNT_CNT')}") self.__accounts = self.GetLoginInfo('ACCNO').split(';') del self.__accounts[-1] logger.debug(f"전체 계좌 리스트: {self.__accounts}")
def add_data_db(user_id, first_name, last_name, user_name): """ Добавляет пользоваетеля в базу данных и проверяет: если он есть в БД, то не добавлять """ list_arg = [(user_id, first_name, last_name, user_name)] cursor.execute("SELECT * FROM users WHERE user_id = ?", (user_id, )) check1 = cursor.fetchall() if not check1: cursor.executemany('INSERT INTO users VALUES(?,?,?,?)', list_arg) else: logger.info("Пользователь уже зарегистрован в базе данных") conn.commit()
def my_cleaning(PROJECT_NAME): sys.path.insert(0,'../' + PROJECT_NAME + '/') from data_params import DATABASE sys.path.insert(0,'../' + PROJECT_NAME + '/2.cleaning/') from my_cleaning_methods import * for collection in TO_CLEAN: df = mongo_to_dataframe(DATABASE, collection['collection_name']) df = collection['method'](df) logger.info('cleaning ' + collection['collection_name'] + ', with shape: ' + str(df.shape)) dataframe_to_mongo(df, DATABASE, collection['collection_name'], erase=True) logger.info('cleaning of ' + collection['collection_name'] + ' successful, output shape: ' + str(df.shape))
def update_net_param(net_param: NetParams): sess = DBSession() try: query = sess.query(NetParams) rst = query.filter_by(net_param_id=1).first() rst.remote_uart_addr = 1234 logger.info(rst) sess.commit() except Exception as e: logger.error(e) sess.rollback() logger.warning('update router_device failed %s' % net_param) sess.close() pass
def GetAndSavePriceInfo(bdinfo, files2mail): buildings = {} title = bdinfo.name build_info_url = request_common.GetHostUrl() + bdinfo.short_url presell_id = GetFirstPreSellId(build_info_url) url_param = "?isopen=&presellid=" + presell_id + "&buildingid=&area=&allprice=&housestate=&housetype=&page=" base_url = request_common.GetHostUrl() + bdinfo.short_url + url_param strcnt = GetPageCnt(base_url) if not unicode(strcnt).isnumeric(): logger.info("get total page count failed") return False cnt = int(strcnt) logger.info("total page count: " + str(cnt)) all_page = [] for num in range(1, cnt + 1): item_page = base_url + str(num) all_page.append(item_page) i = 1 for url in all_page: time.sleep(1) logger.info("start parse, page: " + str(i)) html = request_common.GetHtml(url) ParseBuildingDetail(buildings, html) logger.info("parse end, page: " + str(i)) i = i + 1 if (format_out.IsCanGoodFormat(buildings)): format_out.GoodOut(buildings, title, files2mail) else: format_out.SimpleOut(buildings, title, files2mail) return True
def mt_sys_handler(msg: Msg): logger.debug('sys handler') if len(msg.data): if msg.data[0] == SUCCESS: logger.info('success') handler_info = get_handler_func(msg.cmd_state1, msg.cmd_state2) if handler_info: handler_name = handler_info['name'] handler_func = handler_info['func'] logger.info('handler func %s' % handler_name) handler_func(msg) pass else: logger.warning('handler func not found') pass
def addr_info_handler(msg): data = msg.data idx = 1 count = data[idx:idx + 1] logger.info('addr count %d' % bytes_to_int(count)) idx += 1 addrs = [] for i in range(bytes_to_int(count)): addr = data[idx:idx + 2] idx += 2 _addr = bytes_to_int(addr) addrs.append('0X%04X' % _addr) pass logger.warning('addr info %s' % addrs) pass
def ParseBuildingInfo(tr, infos): td = tr.td name = td.a.text.strip() url = '' for sib in td.next_siblings: if (type(sib) == type(td)): url = sib.a.attrs['href'] break if not IsNewBuildingsAlias(url): logger.info("Had Save This Building Info: " + name) return False infos.append(PreSellInfo(name, url)) logger.info("Need To Save This Building Info: " + name) return True
def fcs_state(ser): global state ch = ser.read() if len(ch) == 0: return ch = int().from_bytes(ch, 'big') fcs_token = cal_fcs() if fcs_token == ch: mt_msg = Msg(msg) msg_queue.put(mt_msg) logger.debug(mt_msg) logger.debug('fcs success') else: logger.info('fcs failed') state = SOP_STATE init_msg() timer.cancel()
def api_wb(params): from data_params import DATABASE country_codes = mongo_to_dataframe('utilities', 'country_code') df = wb.download( indicator=params['indicator'], country=params['country'], start=params['start'], end=params['end'] ) df = df.reset_index() df.rename(columns=params['col_rename'], inplace=True) df = pd.merge(df,country_codes, left_on='country',right_on='country_name') df = df[['GDP_cst_dollars','ISO3','population','year']] logger.info('inserting df with shape: ' + str(df.shape)) dataframe_to_mongo(df, DATABASE,params['collection_name'], erase=True) logger.info('insertion sucessful in db' + DATABASE + ' of collection: ' + params['collection_name'])
def handle(self): data = self.request.recv(1024) data = str(data, 'utf-8') data = json.loads(data) items = [] pv = data['pv'] data = data['param'] for d in data: param = find_params_by_name(d['name']) item = NvItem(int(param['item_id']), int(param['item_len']), int(d['value'])) items.append(item) pass logger.info(items) msg_data = mt_sys.app_msg_req(pv, items) serial_out_msg_queue.put({'data': msg_data}) msg_data = mt_sys.app_msg_net_param_update() serial_out_msg_queue.put({'data': msg_data})
def update_archive(PROJECT_NAME): df = pd.read_csv('app/archive.csv') if PROJECT_NAME in df.project_name.unique(): logger.info('There already is a project named ' + PROJECT_NAME + ', please choose another name') sys.exit() last_id = df.index[-1] previous_url = df.loc[:, 'project_name'][last_id] df = df.append({ 'project_name':PROJECT_NAME, 'template_name':PROJECT_NAME + '.html', 'previous_url':previous_url, }, ignore_index = True ) df.loc[df.index == last_id,'next_url'] = PROJECT_NAME df.to_csv('app/archive.csv', index=False)
def wrapper(**kwargs): tries = kwargs['tries'] delay = kwargs['delay'] db_name = kwargs['db_name'] for i in range(int(tries)): logger.info(f"It is my {i} try to connect") conn = None if os.path.isfile(db_name): try: conn = sqlite3.connect(db_name) except Exception as e: logger.error(f"The {i} attempt failed", exc_info=True) time.sleep(delay) else: logger.info("Connection established") return conn else: logger.error(f"No database {db_name}")
def _create_examples(self, lines, set_type): """Creates examples for the training and dev sets.""" examples = [] print("length of lines:", len(lines)) for (i, line) in enumerate(lines): # print('#i:',i,line) if i == 0: # TODO 为什么第一行不要??? logger.info(line) continue guid = "%s-%s" % (set_type, i) try: label = tokenization.convert_to_unicode(line[0]) text_a = tokenization.convert_to_unicode(line[1]) text_b = tokenization.convert_to_unicode(line[2]) examples.append( InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label)) except Exception: print('###error.i:', i, line) return examples
def run(self): while True: try: send_data = serial_out_msg_queue.get() data = send_data['data'] self.client.send_data(data) try: rep = serial_rep_msg_queue.get(timeout=2) if rep == SUCCESS: logger.info('rep success,data = %s' % data) pass if rep == FAILED: logger.warning('rep failed,data = %s' % data) pass except Empty as e: logger.warning('rep timeout,data = %s' % data) pass except Exception as e: time.sleep(5) logger.error(traceback.format_exc()) pass
def GetHtml(url): config = my_config.GetConfig() cookie = config['remote_server']['cookie'] html = "" request = urllib2.Request( url, headers={ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0 Win64 x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.80 Safari/537.36' }) request.add_header("Cookie", cookie) while (True): try: response = urllib2.urlopen(request, timeout=60) html = response.read() break except urllib2.HTTPError, e: traceback.print_stack() logger.info(e.code) logger.error(e.reason) logger.error(traceback)
def insert_scrapped_in_mongo(): from data_params import PROJECT, SCRAPPED, DATABASE #insert path to scrapped data common_path = "../" + PROJECT + "/1.getting the data/1.scrapping/" + PROJECT + '/' #insert each scrapped file in mongodb for file in SCRAPPED: logger.info('inserting scraped data') try: df = pd.read_json(open(common_path + file['file']), orient=file['orient']) logger.info('inserting df with shape: ' + str(df.shape)) dataframe_to_mongo(df, DATABASE,file['collection_name'], erase=True) logger.info('insertion sucessful in db' + DATABASE + ' of collection: ' + file['collection_name']) except: logger.info('No file ' + file['file'] + 'of type' + file['type'] + 'in project scrapped folder')
def SendEMail(files2mail): config = my_config.GetConfig() # 第三方 SMTP 服务 mail_host = "smtp.qq.com" # 设置服务器 sender = config['email']['sender'] mail_user = config['email']['mail_user'] # 用户名 mail_pass = config['email']['mail_pass'] # 口令 receivers = [] for recv in config['email']['receivers']: receivers.append(recv['qq']) msg = MIMEMultipart() msg['From'] = formataddr([u'Chant', sender]) msg['To'] = formataddr([u'Nancy', receivers]) msg['Subject'] = Header(config['email']['topic'], 'utf-8').encode() msg.attach(MIMEText(config['email']['text'], 'plain', 'utf-8')) for file in files2mail: # 添加附件 att1 = MIMEText(open(file, 'rb').read(), 'base64', 'utf-8') att1["Content-Type"] = 'application/octet-stream' att1.add_header('Content-Disposition', 'attachment', filename=Header(file, 'utf-8').encode()) # 防止中文附件名称乱码 msg.attach(att1) try: smtpObj = smtplib.SMTP() smtpObj.connect(mail_host, 25) # 25 为 SMTP 端口号 smtpObj.login(mail_user, mail_pass) smtpObj.set_debuglevel(1) smtpObj.sendmail(sender, receivers, msg.as_string()) smtpObj.quit() logger.info("Send EMail OK") except smtplib.SMTPException as e: logger.error("Send EMail Error: " + e)
def _run_base_model_dfm(dfTrain, dfTest, folds, dfm_params): # 将训练数据以类的形式 fd = FeatureDictionary( dfTrain=dfTrain, dfTest=dfTest, numeric_cols=config.NUMERIC_COLS, ignore_cols=config.IGNORE_COLS) logger.info(fd.feat_dim) logger.info(fd.feat_dict) # 数据预处理 data_parser = DataParser(feat_dict=fd) Xi_train, Xv_train, y_train = data_parser.parse(df=dfTrain, has_label=True) Xi_test, Xv_test, ids_test = data_parser.parse(df=dfTest) dfm_params["feature_size"] = fd.feat_dim dfm_params["field_size"] = len(Xi_train[0]) logger.info("特征的数量/2值化处理后特征数量:{}/{}".format(dfm_params["field_size"], dfm_params["feature_size"])) y_train_meta = np.zeros((dfTrain.shape[0], 1), dtype=float) y_test_meta = np.zeros((dfTest.shape[0], 1), dtype=float) _get = lambda x, l: [x[i] for i in l] gini_results_cv = np.zeros(len(folds), dtype=float) gini_results_epoch_train = np.zeros((len(folds), dfm_params["epoch"]), dtype=float) gini_results_epoch_valid = np.zeros((len(folds), dfm_params["epoch"]), dtype=float) for i, (train_idx, valid_idx) in enumerate(folds): # k fold Xi_train_, Xv_train_, y_train_ = _get(Xi_train, train_idx), _get(Xv_train, train_idx), _get(y_train, train_idx) Xi_valid_, Xv_valid_, y_valid_ = _get(Xi_train, valid_idx), _get(Xv_train, valid_idx), _get(y_train, valid_idx) pprint(dfm_params) dfm = DeepFM(**dfm_params) dfm.fit(Xi_train_, Xv_train_, y_train_, Xi_valid_, Xv_valid_, y_valid_) y_train_meta[valid_idx, 0] = dfm.predict(Xi_valid_, Xv_valid_) y_test_meta[:, 0] += dfm.predict(Xi_test, Xv_test) gini_results_cv[i] = gini_norm(y_valid_, y_train_meta[valid_idx]) gini_results_epoch_train[i] = dfm.train_result gini_results_epoch_valid[i] = dfm.valid_result y_test_meta /= float(len(folds)) # save result if dfm_params["use_fm"] and dfm_params["use_deep"]: clf_str = "DeepFM" elif dfm_params["use_fm"]: clf_str = "FM" elif dfm_params["use_deep"]: clf_str = "DNN" print("%s: %.5f (%.5f)" % (clf_str, gini_results_cv.mean(), gini_results_cv.std())) filename = "%s_Mean%.5f_Std%.5f.csv" % (clf_str, gini_results_cv.mean(), gini_results_cv.std()) _make_submission(ids_test, y_test_meta, filename) _plot_fig(gini_results_epoch_train, gini_results_epoch_valid, clf_str) return y_train_meta, y_test_meta
def insert_file_in_mongo(file, DATABASE='utilities', collection_name='country_code'): logger.info('inserting' + file['name']) if file['type'] == 'csv': df = pd.read_csv(file['name']) elif file['type'] == 'excel': df = pd.read_excel( file['name'], sheetname=file['sheetname'], skiprows=file['skiprows'] ) logger.info('inserting df with shape: ' + str(df.shape)) dataframe_to_mongo(df, DATABASE,collection_name, erase=True) logger.info('insertion sucessful in db ' + DATABASE + ' of collection ' + collection_name)
def UpdateCheckPoint(update_info): global g_cur_point new_point = update_info.name + "," + update_info.short_url if g_cur_point == update_info.short_url: logger.info("old record url, no need update: " + new_point) return logger.info("record check point is : " + g_cur_point + " , will update to : " + new_point) global g_file_name fd = open(g_file_name, "w") fd.seek(0, 0) point = new_point + "\n" fd.write(point) g_cur_point = update_info.short_url logger.info(g_file_name + "record check point has update to: " + g_cur_point + " for " + update_info.name) return
def _load_data(): dfTrain = pd.read_csv(config.TRAIN_FILE) dfTest = pd.read_csv(config.TEST_FILE) dfTrain = preprocess(dfTrain) dfTest = preprocess(dfTest) cols = [c for c in dfTrain.columns if c not in ["id", "target"]] cols = [c for c in cols if (not c in config.IGNORE_COLS)] X_train = dfTrain[cols].values y_train = dfTrain["target"].values X_test = dfTest[cols].values ids_test = dfTest["id"].values cat_features_indices = [i for i, c in enumerate(cols) if c in config.CATEGORICAL_COLS] assert len(X_train[0]) == len(X_test[0]) logger.info("test/train: {}/{}".format(len(X_test), len(X_train))) logger.info("features: {}".format(cols)) logger.info(cat_features_indices) return dfTrain, dfTest, X_train, y_train, X_test, ids_test, cat_features_indices
def GetFirstPreSellId(build_info_url): logger.info("get first presell id, url: " + build_info_url) html = request_common.GetHtml(build_info_url) soup = BeautifulSoup(html) datanow = soup.find(name="div", attrs={ "class": "lptypebar", "id": "building_dd" }) datanow = soup.find(name="div", attrs={"class": "lptypebarin"}) presell_id = "" tab = datanow.a i = 1 for sib in tab.next_siblings: if (type(sib) == type(tab)): presell_id = sib.attrs["id"] logger.info(str(i) + " " + str(presell_id)) break id = presell_id.split('_')[1] logger.info("presell id: " + presell_id) return id
added = [] for name in after: if not name in before: ext = os.path.splitext(name)[-1].lower() if ext == ".csv": added.append(name) removed = [] for name in before: if not name in after: removed.append(name) if added: for added_file in added: # dla każdego pliku z listy dodanych wykonaj: if 'coronavirus' in added_file: print("".join([str(name) for name in added_file])) db.coronavirus_analysis(r'E:\Moje\Python szkolenie\Projekt-zaliczenie\files\coronavirus_cases.csv') if 'vacinnes' in added_file: db.vacinnes_analysis(r'E:\Moje\Python szkolenie\Projekt-zaliczenie\files\vacinnes.csv') else: logger.info("Nothing was added") if removed: logger.info(f'Removed {removed}') else: logger.info('Nothing was removed') before = after db.close_conn() time.sleep(10)
def GoodOut(buildings, title, files2mail): logger.info("----------- GoodOut: " + title) for rd_list in buildings.values(): rd_list.sort(key=price_common.takeSecond, reverse=True) mosthighlevl = 1 maxroomonelevel = 2 # 遍历builingmap fmtbuildings = {} for k, rd_list in buildings.items(): # 获取楼幢 及 单元 tmpbuilding, tmpUnit = price_common.getBuilding(k) # 获取 楼幢的 单元map if tmpbuilding not in fmtbuildings: fmtbuildings[tmpbuilding] = {} tmpUnitMap = fmtbuildings.get(tmpbuilding) if tmpUnit not in tmpUnitMap: tmpUnitMap[tmpUnit] = {} levelMap = tmpUnitMap.get(tmpUnit) # 遍历 每栋里的房间 for rd in rd_list: level = int(price_common.getRoomNum(rd.room)) / 100 if level > mosthighlevl: mosthighlevl = level if level not in levelMap: levelMap[level] = [] roomlist = levelMap.get(level) roomlist.append(rd) if len(roomlist) > maxroomonelevel: maxroomonelevel = len(roomlist) writebuildings = {} startrow = 1 startqueue = 2 bdst = sorted(fmtbuildings.iteritems(), key=price_common.bd_sort_key) for bditem in bdst: bd = bditem[0] utmp = bditem[1] startqueue = startqueue + 2 currow = startrow curqueue = startqueue startqueue = startqueue + 2 + (len(utmp) * maxroomonelevel) # 楼幢 bdqueue = curqueue + (len(utmp) * maxroomonelevel) / 2 logger.info(currow, bdqueue, bd) insertrecord(writebuildings, currow, bdqueue, bd) unitrow = currow + 1 unitidx = 1 leftlevelqueue = curqueue - 1 rightlevelqueue = curqueue + (len(utmp) * maxroomonelevel) roomstartqueue = curqueue needlevelqueue = True utst = sorted(utmp.iteritems(), key=price_common.ut_sort_key) for utitem in utst: ut = utitem[0] lvmp = utitem[1] # 单元 unitqueue = curqueue + maxroomonelevel * (unitidx - 1) logger.info(unitrow, unitqueue, ut) insertrecord(writebuildings, unitrow, unitqueue, ut) levelrow = unitrow + 2 roomstartqueue = unitqueue realneedlevelqueue = True if needlevelqueue: realneedlevelqueue = True needlevelqueue = False else: realneedlevelqueue = False lvst = sorted(lvmp.iteritems(), reverse=True) for roommark in range(0, maxroomonelevel): insertrecord(writebuildings, unitrow + 1, roomstartqueue + roommark, str(roommark + 1) + u'号室', 0, 5000) for lvitem in lvst: lv = lvitem[0] rl = lvitem[1] if realneedlevelqueue: logger.info(levelrow, leftlevelqueue, lv) logger.info(levelrow, rightlevelqueue, lv) insertrecord(writebuildings, levelrow, leftlevelqueue, str(lv) + u'楼') insertrecord(writebuildings, levelrow, rightlevelqueue, str(lv) + u'楼') curroomqueue = roomstartqueue curroomrow = levelrow for rmdt in rl: logger.info(curroomrow, curroomqueue, rmdt.room) detail = rmdt.area + "\n" + rmdt.record_price + "\n" + rmdt.total_price insertrecord(writebuildings, curroomrow, curroomqueue, detail, 3000, 0) curroomqueue = curroomqueue + 1 levelrow = levelrow + 1 unitidx = unitidx + 1 # logger.info('-------------') xlfile = xlwt.Workbook(encoding='utf-8') xltable = xlfile.add_sheet(title) for row, ql in writebuildings.items(): for queuerecord in ql: logger.info(str(row) + str(queuerecord[0]) + str(queuerecord[1])) xltable.write(row, queuerecord[0], queuerecord[1]) if queuerecord[2] != 0: xltable.row(row).height = queuerecord[2] if queuerecord[3] != 0: xltable.col(queuerecord[0]).width = queuerecord[3] postfix = time.strftime("_%Y%m%d_%H%M%S.xls", time.localtime()) filename = "./" + title + postfix xlfile.save(filename) files2mail.append(filename) return True
def UpdateCheckPoint(update_info): global g_cur_point new_point = update_info.name + "," + update_info.short_url if g_cur_point == update_info.short_url: logger.info("old record url, no need update: " + new_point) return logger.info("record check point is : " + g_cur_point + " , will update to : " + new_point) global g_file_name fd = open(g_file_name, "w") fd.seek(0, 0) point = new_point + "\n" fd.write(point) g_cur_point = update_info.short_url logger.info(g_file_name + "record check point has update to: " + g_cur_point + " for " + update_info.name) return if __name__ == "__main__": update_info = PreSellInfo("府", "price.htm") LoadCheckPoint() UpdateCheckPoint(update_info) al = [] al.append("aaaa") al.append("bbb") logger.info(al)
### 3.1. Compute random samples for which shap values will be computed df_distribution = get_saltelli_sample(problem, 10000, calc_second_order=False) df_sample = df_distribution.sample(300) # We used saltelli_sample as # a random generator, but the final number of sample is given in this line ### 3.2. Init KernelShap reference = shap.sample(df_distribution, 50) # These samples are used to # compute to expectations of variables that are "shut-down" by shap. explainer = shap.KernelExplainer(f_algo, reference) ### 3.3. Perform shap values computation shap_values = explainer.shap_values(df_sample) ### 3.4. Aggregate over samples defined in 3.1. and log it df_shap = pd.Series(np.abs(shap_values).mean(axis=0), index=problem['names']) shap_total_list = [] for variable in sorted(VAR_DICT_NO_MSF.keys()): shap_total_list.append("{:.5f}".format(df_shap[variable])) shap_str = ';'.join(shap_total_list) logger.setLevel(logging.DEBUG) logger.info('SHAP_IMPORTANCE;{};{}'.format(scenario, shap_str)) logger.setLevel(logging.ERROR)
def _init_graph(self): self.graph = tf.Graph() with self.graph.as_default(): tf.set_random_seed(self.random_seed) # placeholder self.feat_index = tf.placeholder(tf.int32, shape=[None, self.field_size], name="feat_index") # None * F self.feat_value = tf.placeholder(tf.float32, shape=[None, self.field_size], name="feat_value") # None * F logger.info(self.feat_index.shape) logger.info(self.feat_value.shape) self.label = tf.placeholder(tf.float32, shape=[None, 1], name="label") # None * 1 self.dropout_keep_fm = tf.placeholder(tf.float32, shape=[None], name="dropout_keep_fm") self.dropout_keep_deep = tf.placeholder(tf.float32, shape=[None], name="dropout_keep_deep") self.train_phase = tf.placeholder(tf.bool, name="train_phase") # 初始化模型的参数 self.weights = self._initialize_weights() pprint(self.weights) # model self.embeddings = tf.nn.embedding_lookup( self.weights["feature_embeddings"], self.feat_index ) # None * F(39) * K # feature_embeddings= 259 * k feat_value = tf.reshape(self.feat_value, shape=[-1, self.field_size, 1]) # None * 39 * 1 self.embeddings = tf.multiply(self.embeddings, feat_value) # 将连续变量做一个乘法处理 logger.info(self.embeddings) # None * 39 * K(8) # ---------- first order term ---------- self.y_first_order = tf.nn.embedding_lookup( self.weights["feature_bias"], self.feat_index) # None * F * 1 # feature_bias 259 * 1 self.y_first_order = tf.reduce_sum( tf.multiply(self.y_first_order, feat_value), 2) # None * F(39) # 线性组合部分, 常数项没有? self.y_first_order = tf.nn.dropout( self.y_first_order, self.dropout_keep_fm[0]) # None * F # ---------- second order term --------------- # sum_square part # 元素和的平方 self.summed_features_emb = tf.reduce_sum(self.embeddings, 1) # None * K self.summed_features_emb_square = tf.square( self.summed_features_emb) # None * K # square_sum part # 平方的加和 self.squared_features_emb = tf.square(self.embeddings) self.squared_sum_features_emb = tf.reduce_sum( self.squared_features_emb, 1) # None * K # second order self.y_second_order = 0.5 * tf.subtract( self.summed_features_emb_square, self.squared_sum_features_emb) # None * K self.y_second_order = tf.nn.dropout( self.y_second_order, self.dropout_keep_fm[1]) # None * K # ---------- Deep component ---------- self.y_deep = tf.reshape(self.embeddings, shape=[ -1, self.field_size * self.embedding_size ]) # None * (F*K) # FM 和 deep 共享输入 self.y_deep = tf.nn.dropout(self.y_deep, self.dropout_keep_deep[0]) for i in range(0, len(self.deep_layers)): self.y_deep = tf.add( tf.matmul(self.y_deep, self.weights["layer_%d" % i]), self.weights["bias_%d" % i]) # None * layer[i] * 1 if self.batch_norm: self.y_deep = self.batch_norm_layer( self.y_deep, train_phase=self.train_phase, scope_bn="bn_%d" % i) # None * layer[i] * 1 self.y_deep = self.deep_layers_activation(self.y_deep) self.y_deep = tf.nn.dropout( self.y_deep, self.dropout_keep_deep[1 + i]) # dropout at each Deep layer # ---------- DeepFM ---------- if self.use_fm and self.use_deep: concat_input = tf.concat( [self.y_first_order, self.y_second_order, self.y_deep], axis=1) # None *(F + K + deeplayers[-1] nodes) elif self.use_fm: concat_input = tf.concat( [self.y_first_order, self.y_second_order], axis=1) # elif self.use_deep: concat_input = self.y_deep logger.info(concat_input) self.out = tf.add( tf.matmul(concat_input, self.weights["concat_projection"]), self.weights["concat_bias"]) # loss if self.loss_type == "logloss": self.out = tf.nn.sigmoid(self.out) self.loss = tf.losses.log_loss(self.label, self.out) elif self.loss_type == "mse": self.loss = tf.nn.l2_loss(tf.subtract(self.label, self.out)) # l2 regularization on weights if self.l2_reg > 0: self.loss += tf.contrib.layers.l2_regularizer(self.l2_reg)( self.weights["concat_projection"]) if self.use_deep: for i in range(len(self.deep_layers)): self.loss += tf.contrib.layers.l2_regularizer( self.l2_reg)(self.weights["layer_%d" % i]) # optimizer if self.optimizer_type == "adam": self.optimizer = tf.train.AdamOptimizer( learning_rate=self.learning_rate, beta1=0.9, beta2=0.999, epsilon=1e-8).minimize(self.loss) elif self.optimizer_type == "adagrad": self.optimizer = tf.train.AdagradOptimizer( learning_rate=self.learning_rate, initial_accumulator_value=1e-8).minimize(self.loss) elif self.optimizer_type == "gd": self.optimizer = tf.train.GradientDescentOptimizer( learning_rate=self.learning_rate).minimize(self.loss) elif self.optimizer_type == "momentum": self.optimizer = tf.train.MomentumOptimizer( learning_rate=self.learning_rate, momentum=0.95).minimize(self.loss) elif self.optimizer_type == "yellowfin": self.optimizer = YFOptimizer(learning_rate=self.learning_rate, momentum=0.0).minimize(self.loss) # init self.saver = tf.train.Saver() init = tf.global_variables_initializer() self.sess = self._init_session() self.sess.run(init) # save_path = self.saver.save(self.sess, save_path=os.path.join(SUB_DIR, "model"), global_step=0) # logger.info("模型初始化完成,保存路径为:{}".format(save_path)) # writer = tf.summary.FileWriter("./logs", self.sess.graph) # number of params total_parameters = 0 for variable in self.weights.values(): shape = variable.get_shape() variable_parameters = 1 for dim in shape: variable_parameters *= dim.value total_parameters += variable_parameters if self.verbose > 0: print("#params: %d" % total_parameters)
def insert_static_in_mongo(): from data_params import PROJECT, STATIC, DATABASE #insert path to scrapped data common_path = "../" + PROJECT + "/1.getting the data/3.static_files/" #insert each scrapped file in mongodb for file in STATIC: logger.info('inserting static data') if file['type'] == 'csv': try: df = pd.read_csv(common_path + file['file']) except: logger.info('No file ' + file['file'] + ' of type ' + file['type'] + ' in project statics folder') sys.exit() elif file['type'] == 'excel': try: df = pd.read_excel( common_path + file['file'], sheetname=file['sheetname'], skiprows=file['skiprows'] ) except: logger.info('No file ' + file['file'] + ' of type ' + file['type'] + ' in project statics folder') sys.exit() elif file['type'] == 'json': try: df = pd.read_json(common_path + file['file']) except: logger.info('No file ' + file['file'] + ' of type ' + file['type'] + ' in project statics folder') sys.exit() logger.info('inserting df with shape: ' + str(df.shape)) dataframe_to_mongo(df, DATABASE,file['collection_name'], erase=True) logger.info('insertion sucessful in db' + DATABASE + ' of collection: ' + file['collection_name'])