def from_file_airport_insert(config, param, airport_paths): path = ''.join([base_path, str(param), '/']) update_sql = "insert into airport(iata_code,name,name_en,city_id,belong_city_id,map_info,status,time2city_center,inner_order) values(%s,%s,%s,%s,%s,%s,%s,%s,%s)" conn = pymysql.connect(**config) cursor = conn.cursor() _count = 0 save_result = [] logger = get_logger('step3', path) for airport_path in airport_paths: # logger.debug("函数名:{0},入机场文件名:{1}".format(from_file_airport_insert.__name__, airport_path)) with open(path + airport_path, 'r+') as airport: reader = csv.DictReader(airport) for row in reader: _count += 1 # logger.debug(row) save_result.append( (row['iata_code'], row['name'], row['name_en'], row['city_id'], row['belong_city_id'], row['map_info'], row['status'], row['time2city_center'], row['inner_order'])) if len(save_result) >= 1000: cursor.executemany(update_sql, save_result) conn.commit() save_result = [] if save_result: cursor.executemany(update_sql, save_result) conn.commit() save_result = [] return _count
def __init__(self, cfg, name): tf.keras.Model.__init__(self) self.model_name = name self.logger = get_logger() self.logger.info('Using model: {:s}'.format(self.model_name)) self._num_classes = cfg.dataset.num_classes self._input_shape = None return
def __init__(self, db_name, coll_name): self.logger = get_logger("test_count") self.tasks = [] client = pymongo.MongoClient( 'mongodb://*****:*****@10.19.2.103:27017/') self.collections = client[db_name][coll_name] self.create_index() # 数据游标偏移量,用于在查询时发生异常恢复游标位置 self.offset = 0 # 数据游标前置偏移量,用于在入库时恢复游标位置 self.pre_offset = 0
def __init__(self, args): self.cfg = get_config( args) # Get dictionary with configuration parameters self.logger = get_logger() self._pipeline = dict() self.loss_fn = tf.losses.categorical_crossentropy # Loss function self.net = None # Main network instance self.opt = None # Optimizer instance self._lr_scheduler = CustomLearningRateScheduler() self._ckpt_hdl = CheckpointHandler(self.cfg) return
def __init__(self, cfg): """ :param cfg: Configuration """ self._logger = get_logger() self._cfg = cfg # A tensor to hold the checkpoint integer id self._ckpt_id = None self._ckpt = None # An object of class tf.train.Checkpoint() self._ckpt_mngr = None # An object of class tf.train.CheckpointManager() self._ckpt_status = None # Holds result of self._ckpt.restore(). Used to verify a successful restore return
def main(): logger = get_logger() args = parse_args() sqz = DevelopSqueezenet(args) sqz.load_checkpointables( sqz.cfg.model_saver.checkpoint_id) # Load checkpoint # A call to make sure that a concrete function exists for this polymorphic function concrete_fn = sqz.net.call.get_concrete_function(batch_x=tf.TensorSpec( [None, 224, 224, 3], tf.float32), training=False) logger.info('Saving the model in directory: {:s}'.format( sqz.cfg.directories.dir_model)) tf.saved_model.save(sqz.net, sqz.cfg.directories.dir_model) # Save model return
def __init__(self, worker, source, _type, task_name, routine_key, queue, **kwargs): # 任务基本信息 self.worker = worker self.source = source self.type = _type self.task_name = task_name self.routine_key = routine_key self.queue = queue self.task_type = kwargs.get('task_type', TaskType.NORMAL) self.priority = int(kwargs.get("priority", 3)) self.logger = get_logger("InsertMongoTask") self.tasks = TaskList() self.collection_name = self.generate_collection_name() # 数据游标偏移量,用于在查询时发生异常恢复游标位置 self.offset = 0 # 数据游标前置偏移量,用于在入库时恢复游标位置 self.pre_offset = 0 client = pymongo.MongoClient(host='10.10.231.105') self.db = client['MongoTask'] # 建立所需要的全部索引 self.create_mongo_indexes() # CITY TASK 获取 date_list if self.task_type == TaskType.CITY_TASK: self.date_list = self.generate_list_date() else: self.date_list = None # 修改 logger 日志打印 # modify handler's formatter datefmt = "%Y-%m-%d %H:%M:%S" file_log_format = "%(asctime)-15s %(threadName)s %(filename)s:%(lineno)d %(levelname)s " \ "[source: {}][type: {}][task_name: {}][collection_name: {}]: %(message)s".format( self.source, self.type, self.task_name, self.collection_name) formtter = logging.Formatter(file_log_format, datefmt) for each_handler in self.logger.handlers: each_handler.setFormatter(formtter) self.logger.info("[init InsertTask]")
def main(): # log_option: # 0: No logging # 1: On screen logs # 2: 1 + File logging to logs/latest.log # 3: 2 + File logging to logs/<timestamp>.log setup_logger(log_option=2) logger = get_logger() args = parse_args() dev = DevelopSqueezenet(args) try: dev.run() except Exception as e: logger.exception(str(e)) return
def insert_airport(path=None): logger = get_logger('update_airport') cursor = city_conn.cursor() select_sql = "SELECT id FROM city WHERE name=%s" update_sql = "UPDATE airport SET iata_code=%s,NAME=%s,name_en=%s,city_id=%s,belong_city_id=%s,map_info=%s,STATUS=%s,time2city_center=%s,inner_order=%s WHERE id=%s" insert_sql = "INSERT INTO airport(iata_code,name,name_en,city_id,belong_city_id,map_info,status,time2city_center,inner_order) VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s)" with open(path, 'r+') as airport: reader = csv.reader(airport) next(reader) logger.debug( "更新的字段:iata_code,name,name_en,city_id,belong_city_id,map_info,status,\ time2city_center,in") for row in reader: try: map_info = row[6].replace(',', ',').split(',') map_info = ','.join([map_info[1].strip(), map_info[0].strip()]) if not str(row[4]).isdigit(): cursor.execute(select_sql, (row[4], )) city_id = cursor.fetchone()[0] if city_id: cursor.execute( update_sql, (row[1], row[2], row[3], city_id, city_id, map_info, row[7], row[8], row[9], row[0])) city_conn.commit() logger.debug( "更新后的结果:{0},{1},{2},{3},{4},{5},{6},{7},{8},{9},". format(row[0], row[1], row[2], row[3], city_id, city_id, map_info, row[7], row[8], row[9])) elif str(row[4]).isdigit(): cursor.execute(update_sql, (row[1], row[2], row[3], row[4], row[5], map_info, row[7], row[8], row[9], row[0])) city_conn.commit() logger.debug( "更新后的结果:{0},{1},{2},{3},{4},{5},{6},{7},{8},{9},". format(row[0], row[1], row[2], row[3], row[4], row[5], map_info, row[7], row[8], row[9])) except Exception as e: print(traceback.format_exc()) city_conn.rollback()
def _set_directories(cfg): """ Sets up directory paths in cfg and creates them if directory doesn't exist :param cfg: An EasyDict dictionary for configuration parameters :return: None """ logger = get_logger() # Repository path cfg.directories.dir_repo = os.path.abspath( os.path.join(os.path.dirname(__file__), '..')) # Convert all directory paths to absolute paths for directory in cfg.directories.keys(): if not os.path.isabs(directory): cfg.directories[directory] = os.path.join( cfg.directories.dir_repo, cfg.directories[directory]) # Create directories if they don't exist os.makedirs(cfg.directories.dir_model, exist_ok=True) logger.debug('Model save directory: {:s}'.format( cfg.directories.dir_model)) os.makedirs(cfg.directories.dir_log, exist_ok=True) logger.debug('Log dump directory: {:s}'.format(cfg.directories.dir_log)) os.makedirs(cfg.directories.dir_tb_home, exist_ok=True) # Tensorboard directory cfg.directories.dir_tb = os.path.join(cfg.directories.dir_tb_home, time.strftime("%Y-%m-%d_%H-%M-%S")) logger.debug('Tensorboard directory: {:s}'.format(cfg.directories.dir_tb)) os.makedirs(cfg.directories.dir_ckpt, exist_ok=True) logger.debug('Checkpoint directory: {:s}'.format(cfg.directories.dir_ckpt)) cfg.directories.dir_ckpt_train = os.path.join(cfg.directories.dir_ckpt) os.makedirs(cfg.directories.dir_ckpt_train, exist_ok=True) logger.debug('Checkpoint train parameters directory: {:s}'.format( cfg.directories.dir_ckpt)) return
#!/usr/bin/env python # -*- coding: utf-8 -*- # @Time : 2017/12/26 下午8:16 # @Author : Hou Rong # @Site : # @File : MkContent.py # @Software: PyCharm from warnings import filterwarnings import pymysql from service_platform_conn_pool import base_data_pool, fetchall, new_service_platform_pool from itertools import permutations from my_logger import get_logger logger = get_logger("train_content") filterwarnings('ignore', category=pymysql.err.Warning) offset = 0 def get_task(): sql = '''SELECT DISTINCT city_id, src_city_code FROM station_src, station_relation WHERE station_src.src_station_code IS NOT NULL AND station_src.station_id = station_relation.station_id AND city_id != '' GROUP BY city_id;''' for line in fetchall(base_data_pool, sql): yield line
# @Author : Hou Rong # @Site : # @File : multi_city.py # @Software: PyCharm import gevent.monkey gevent.monkey.patch_all() import time import gevent.pool from my_logger import get_logger from service_platform_conn_pool import base_data_pool from poi_ori.poi_insert_db import poi_insert_data from poi_ori.already_merged_city import init_already_merged_city logger = get_logger("multi_city_insert_db") pool = gevent.pool.Pool(size=16) def poi_ori_insert_data(poi_type, cids=None): already_merged_city = init_already_merged_city( poi_type="{}_data".format(poi_type)) if not cids: conn = base_data_pool.connection() cursor = conn.cursor() cursor.execute('''SELECT id FROM city;''') cids = list(map(lambda x: x[0], cursor.fetchall())) cursor.close() conn.close()
import gspread from oauth2client.service_account import ServiceAccountCredentials import pandas as pd import connectmongo import my_logger from datetime import datetime import time from send_mail import send_warning import settings logger = my_logger.get_logger('PI-COVID') logger.info("Começando atualização") # Seleciona escopo de acesso scope = ['https://www.googleapis.com/auth/spreadsheets'] # # Credenciais # credentials = service_account.Credentials.from_service_account_file(filename="credentials.json") # scopedCreds = credentials.with_scopes(scope) # gc = gspread.Client(auth=scopedCreds) # gc.session = AuthorizedSession(scopedCreds) # Autoriza acesso creds = ServiceAccountCredentials.from_json_keyfile_name( filename=settings.CRED_PATH, scopes=scope) gc = gspread.authorize(creds) # Informações da planilha PICOVID = gc.open_by_key(settings.SPREAD_ID) PAGINAS = PICOVID.worksheets() REGEX_FALSE = r'^=IF\(\$A.*<>\"\";false;""\)$'
#!/usr/bin/env python # -*- coding: utf-8 -*- # @Time : 2017/11/23 上午11:29 # @Author : Hou Rong # @Site : # @File : poi_city_mapping.py # @Software: PyCharm import re import pandas import dataset from service_platform_conn_pool import base_data_pool, source_info_str, source_info_pool, fetchall from my_logger import get_logger logger = get_logger("city_mapping") conn = base_data_pool.connection() cursor = conn.cursor() cursor.execute('''SELECT city.id AS city_id, country.mid AS country_id, city.name AS city_name, country.name AS country_name FROM city JOIN country ON city.country_id = country.mid;''') city_info = {line[0]: line for line in cursor.fetchall()} conn.close() def hotels_get_geo_id_by_dest_id(dest_id): sql = '''SELECT sid FROM ota_location
# @Author : Hou Rong # @Site : # @File : load_final_data.py # @Software: PyCharm import os import pymysql import time import pymysql.err from warnings import filterwarnings from service_platform_conn_pool import base_data_final_pool from my_logger import get_logger # ignore pymysql warnings filterwarnings('ignore', category=pymysql.err.Warning) logger = get_logger("load_data") final_database = 'BaseDataFinal' final_table = { "hotel": "hotel_detail.sql", "attr": "daodao_attr_detail.sql", "rest": "daodao_rest_detail.sql", "total": "qyer_detail.sql" } time_key = { "hotel_detail": "update_time", "attr_detail": "utime", "rest_detail": "utime", "total_detail": "insert_time",
#!/usr/bin/env python # -*- coding: utf-8 -*- # @Time : 2017/10/18 上午9:30 # @Author : Hou Rong # @Site : # @File : unknown_keywords.py # @Software: PyCharm from service_platform_conn_pool import poi_ori_pool from toolbox.Hash import encode from my_logger import func_time_logger, get_logger logger = get_logger("insert unknown keywords") count = 0 @func_time_logger def insert_unknown_keywords(_type, _keyword_or_keywords): conn = poi_ori_pool.connection() cursor = conn.cursor() sql = '''INSERT IGNORE INTO unknown_keywords (`type`, `key_hash`, `keywords`) VALUES (%s, %s, %s);''' if isinstance(_keyword_or_keywords, str): _hash_key = encode(_keyword_or_keywords) cursor.execute(sql, (_type, _hash_key, _keyword_or_keywords)) elif isinstance(_keyword_or_keywords, (list, set, tuple)): for each_keyword in _keyword_or_keywords: _hash_key = encode(each_keyword) cursor.execute(sql, (_type, _hash_key, each_keyword)) else: logger.debug( "[unknown _keyword_or_keywords type: {}][_type: {}][_keyword_or_keywords: {}]"
# project name: pyranometer # created by diego aliaga daliaga_at_chacaltaya.edu.bo import datetime import sqlalchemy as sa from sqlalchemy.orm import Session from sqlalchemy.ext.automap import automap_base import pandas as pd import my_logger import db_sync.functions as functions logger = my_logger.get_logger(name=__name__, level='DEBUG') class Database(object): """ a class Attributes: dic: None table: None user: None psw: None ip: None database_name: None time_column_name: None url: None eng: None meta: None base: None time_column: None session: None
#!/usr/bin/env python # -*- coding: utf-8 -*- # @Time : 2017/12/7 上午12:27 # @Author : Hou Rong # @Site : # @File : img_larger_than_90_update.py # @Software: PyCharm import json import pymysql.cursors from data_source import MysqlSource from service_platform_conn_pool import base_data_final_pool from my_logger import get_logger logger = get_logger("img_larger_than_90_update") poi_ori_config = { 'host': '10.10.228.253', 'user': '******', 'password': '******', 'charset': 'utf8', 'db': 'BaseDataFinal' } def update_sql(sid, data): sql = '''UPDATE poi_images SET `use` = 0, info = %s WHERE source = 'qyer' AND sid = '{}' AND file_name = %s;'''.format(sid) conn = base_data_final_pool.connection() cursor = conn.cursor() _res = cursor.executemany(sql, data)
def task_start(): param = sys.argv[1] # param = '706' zip_path = get_zip_path(param) file_name = zip_path.split('/')[-1] zip_path = ''.join([base_path, file_name]) zip = zipfile.ZipFile(zip_path) file_name = zip.filename.split('.')[0].split('/')[-1] path = ''.join([base_path, str(param), '/']) logger = get_logger('step3', path) save_path = [] database_name = ''.join(['add_city_', param]) temp_config = config temp_config['db'] = database_name if path.endswith('/'): file_path = ''.join([path, file_name]) else: file_path = '/'.join([path, file_name]) file_list = os.listdir(file_path) hotels_path = None for child_file in file_list: child_file_path = '/'.join([file_path, child_file]) if ('新增城市' in child_file) and (len(child_file.split('.')[0]) == 4): city_path = child_file_path elif ('新增机场' in child_file) and (len(child_file.split('.')[0]) == 4): airport_path = child_file_path elif os.path.isdir(child_file_path): picture_path = child_file_path elif '酒店配置' in child_file: hotels_path = child_file_path elif '景点配置' in child_file: attr_path = child_file_path conf = configparser.ConfigParser() conf.read('/search/cuixiyi/ks3up-tool-2.0.6-20170801/city.conf', encoding='utf-8') conf.set('city', 'srcPrefix', picture_path) conf.write(open('/search/cuixiyi/ks3up-tool-2.0.6-20170801/city.conf', 'w')) judge_city_id = 1 try: return_result = defaultdict(dict) return_result['data'] = {} return_result['error']['error_id'] = 0 return_result['error']['error_str'] = '' conn = pymysql.connect(**test_config) cursor = conn.cursor() logger.debug("新增城市入库执行开始") city_base_path = ''.join([base_path, str(param), '/']) city_infos = read_file(city_path, temp_config, city_base_path) # if city_infos: # select_sql = "select * from city where id=%s" # with open(path+'city_id.csv','r+') as city: # reader = csv.DictReader(city) # for row in reader: # city_id = row['city_id'] # cursor.execute(select_sql,(city_id,)) # if cursor.fetchall(): # judge_city_id = 0 # break logger.debug("[新增城市入库执行完毕]") logger.debug("[新增城市图片名称更新开始]") if judge_city_id: city_map_path = revise_pictureName(picture_path, temp_config, param) logger.debug("[新增城市的图片名称更新完毕]") logger.debug("城市更新后的图片名称更新到city表响应的new_product_pic字段-开始") update_city_picture = update_city_pic(picture_path, temp_config, param) logger.debug("城市更新后的图片名称更新到city表响应的new_product_pic字段-结束") logger.debug("新增机场入库开始执行") # if judge_city_id: # new_airport_insert(temp_config, param) logger.debug("新增机场入库执行完毕") logger.debug("为城市提供共享机场开始执行") share_airport_to_data_path = [] if not airport_path: need_share_airport_path = list( update_share_airport(temp_config, param)) elif airport_path: share_airport_path = from_file_get_share_airport(config, param) citys = share_airport_path[2] airport_infos = share_airport_path[3] if citys: need_share_airport_path = list( update_share_airport(temp_config, param, citys, airport_infos)) else: need_share_airport_path = [] share_airport_to_data_path = list(share_airport_path)[:2] with open(path + 'city_airport_info.csv', 'w+') as city: writer = csv.writer(city) writer.writerow( ('id_number', 'city_id', 'city_name', 'city_name_en', 'country_id', 'city_map_info', 'new_product_city_pic', 'airport_name', 'airport_name_en', 'airport_map_info', 'airport_belong_city_id', 'airport_from')) for city_id in city_infos.keys(): temp_save = [] temp_save.extend([ city_infos[city_id]['id_number'], city_id, city_infos[city_id]['city_name'], city_infos[city_id]['city_name_en'], city_infos[city_id]['country_id'], city_infos[city_id]['city_map_info'] ]) if not update_city_picture.get(str(city_id), None): pic_name = '' else: pic_name = update_city_picture[str( city_id)]['new_product_city_pic'] temp_save.append(pic_name) if airport_infos.get(city_id, None): temp_save.extend([ airport_infos[city_id]['airport_name'], airport_infos[city_id]['airport_name_en'], airport_infos[city_id]['airport_map_info'], airport_infos[city_id]['airport_belong_city_id'], airport_infos[city_id]['airport_from'] ]) writer.writerow(temp_save) need_share_airport_path.append('city_airport_info.csv') if need_share_airport_path and judge_city_id: for airport_file_path in need_share_airport_path: airport_file_path = '/'.join([param, airport_file_path]) save_path.append(airport_file_path) temp_path = ''.join([base_path, airport_file_path]) os.system("rsync -vI {0} 10.10.150.16::opcity/{1}".format( temp_path, param)) city_airport_data = pandas.read_csv(path + 'city_airport_info.csv') data = {} data['新增城市总数'] = len(city_airport_data.values) data['无机场城市数量'] = len(city_airport_data[ city_airport_data['airport_from'].isnull()].values) data['有机场城市数量'] = len( city_airport_data[city_airport_data['airport_from'].apply( lambda x: x == '标注机场', )].values) data['有共享机场城市数量'] = len( city_airport_data[city_airport_data['airport_from'].apply( lambda x: x == '生成共享机场', )].values) with open(path + 'city_airport_count.csv', 'w+') as city: writer = csv.DictWriter( city, fieldnames=['新增城市总数', '有机场城市数量', '有共享机场城市数量', '无机场城市数量']) writer.writeheader() writer.writerow(data) count_file = '/'.join([param, 'city_airport_count.csv']) save_path.append(count_file) temp_path = ''.join([base_path, count_file]) os.system("rsync -vI {0} 10.10.150.16::opcity/{1}".format( temp_path, param)) logger.debug("城市共享机场执行完毕") logger.debug("城市共享机场入库开始") if judge_city_id and share_airport_to_data_path: count = from_file_airport_insert(temp_config, param, share_airport_to_data_path) logger.debug("城市共享机场入库结束,机场入库总数:{0}".format(count)) logger.debug("将新增城市更新到ota_location的各个源-开始") if hotels_path and judge_city_id: add_others_source_city(city_path, hotels_path, attr_path, temp_config, param) logger.debug("将新增城市更新到ota_location的各个源-结束") return_result = json.dumps(return_result) print('[result][{0}]'.format(return_result)) csv_path = ';'.join(save_path) update_step_report(csv_path, param, 1, 0) logger.debug("上传图片开始") if judge_city_id: os.system( 'java -jar /search/cuixiyi/ks3up-tool-2.0.6-20170801/ks3up-2.0.6.jar -c /search/cuixiyi/ks3up-tool-2.0.6-20170801/city.conf start' ) logger.debug("上传图片结束") # logger.debug("开始更新ota_location表") # collection_name,task_name = create_task(city_path,path,database_name,param) # tasks = modify_status('step3',param,[collection_name,task_name]) # hotel_file_name,poi_file_name = from_ota_get_city(temp_config,param) # temp_path = ''.join([base_path,hotel_file_name,]) # os.system("rsync -vI {0} 10.10.150.16::opcity/{1}".format(temp_path, param)) # temp_path = ''.join([base_path,poi_file_name]) # os.system("rsync -vI {0} 10.10.150.16::opcity/{1}".format(temp_path, param)) # logger.debug("结束更新ota_location表") except Exception as e: print(traceback.format_exc()) csv_path = ';'.join(save_path) return_result['error']['error_id'] = 1 return_result['error']['error_str'] = traceback.format_exc() return_result = json.dumps(return_result) print('[result][{0}]'.format(return_result)) update_step_report(csv_path, param, -1, 0) logger.debug('[result][{0}]'.format(return_result))
#!/usr/bin/env python # -*- coding: utf-8 -*- # @Time : 2017/12/12 下午12:03 # @Author : Hou Rong # @Site : # @File : ImgErrorMd5Search.py # @Software: PyCharm import random from data_source import MysqlSource from service_platform_conn_pool import base_data_pool, fetchall from my_logger import get_logger from collections import defaultdict logger = get_logger("img_error_md5_search") poi_ori_config = { 'host': '10.10.228.253', 'user': '******', 'password': '******', 'charset': 'utf8', 'db': 'ServicePlatform' } def used_file_name(): sql = '''SELECT first_image, image_list FROM chat_shopping;''' _img_set = set() _count = 0
#!/usr/bin/env python # -*- coding: utf-8 -*- # @Time : 2017/12/30 下午9:27 # @Author : Hou Rong # @Site : # @File : reset_task.py # @Software: PyCharm import pymongo import time from my_logger import get_logger logger = get_logger("reset_mongo_task") client = pymongo.MongoClient(host='10.10.231.105') db = client['MongoTask'] for each_name in db.collection_names(): if each_name.startswith('Task_Queue_hotel_detail_TaskName_') and each_name.endswith('20171127a'): if 'expedia' not in each_name: continue start = time.time() collections = db[each_name] res = collections.update( {'finished': 0}, { '$set': { 'running': 0, 'used_times': 0 } }, multi=True
#!/usr/bin/env python # encoding: utf-8 import pymysql from pymysql.cursors import SSDictCursor, SSCursor from my_logger import get_logger logger = get_logger("data_source") class MysqlSource: """ 数据库data源 """ def __init__(self, db_config, table_or_query='', size=500, is_table=True, is_dict_cursor=False): self._db_config = db_config if is_dict_cursor: self._db_config['cursorclass'] = SSDictCursor else: self._db_config['cursorclass'] = SSCursor self._db_config['charset'] = 'utf8mb4' self._size = size self._table = table_or_query if is_table: self._sql = 'select * from {0}'.format(self._table) else: self._sql = table_or_query
import threading from collections import defaultdict from toolbox.Common import is_legal from data_source import MysqlSource from service_platform_conn_pool import base_data_pool, poi_ori_pool from my_logger import get_logger, func_time_logger from poi_ori.already_merged_city import update_already_merge_city poi_type = None online_table_name = None data_source_table = None white_list = list() max_id = None lock = threading.Lock() logger = get_logger("poi_ori") def init_global_name(_poi_type): global poi_type global online_table_name global data_source_table poi_type = _poi_type if poi_type == 'attr': online_table_name = 'chat_attraction' data_source_table = 'attr' elif poi_type == 'rest': online_table_name = 'chat_restaurant' data_source_table = 'rest' elif poi_type == 'shop': online_table_name = 'chat_shopping'
import logging import my_logger from db_layer.models import * from db_layer.type_const import * from telegraph import Telegraph import requests logger = my_logger.get_logger() logger.setLevel(logging.INFO) def init_db(): logger.info('Connect to db') db.connect() logger.info('Init models') db.create_tables([User, QueuePost], safe=True) def create_user(name: str, telegram_id: int, nickname: str): try: logger.info('Создание пользователя с nickname {nick} ...'.format( nick=nickname)) User.create(name=name, telegram_id=telegram_id, nickname=nickname) except Exception: logger.error('ошибка создания пользователя!') return False else: logger.info( 'успешное созданием пользователя {nick}'.format(nick=nickname)) return True
#!/usr/bin/env python # -*- coding: utf-8 -*- # @Time : 2017/11/29 下午7:20 # @Author : Hou Rong # @Site : # @File : hotel_too_far.py # @Software: PyCharm import os import numpy as np from data_source import MysqlSource from service_platform_conn_pool import base_data_pool from my_logger import get_logger logger = get_logger("hotel_too_far") SQL_PATH = '/search/hourong/data_sql' spider_task_data_config = { # 'host': '10.10.238.148', 'host': '10.10.228.253', 'user': '******', 'password': '******', 'charset': 'utf8', # 'db': 'tmp' 'db': 'tmp' } def dist_from_coordinates(lat1, lon1, lat2, lon2): R = 6371 # Earth radius in km
#!/usr/bin/env python # -*- coding: utf-8 -*- # @Time : 2017/12/14 上午10:46 # @Author : Hou Rong # @Site : # @File : detail_list_crawl_diff.py # @Software: PyCharm import pymongo from my_logger import get_logger from service_platform_conn_pool import service_platform_pool, fetchall logger = get_logger('check_list_crawl_diff') RespClient = pymongo.MongoClient(host='10.10.213.148') RespDB = RespClient['data_result'] check_collection = [ # ('Qyer20171214a', 'Task_Queue_poi_list_TaskName_list_total_qyer_20171209a', 'detail_total_qyer_20171209a'), ('qyer', 'Task_Queue_poi_list_TaskName_list_total_qyer_20171214a', 'detail_total_qyer_20171214a') ] def task_resp_url(collection_name, task_collection_name): __set = set() resp_collections = RespDB[collection_name] _count = 0 for line in resp_collections.find({'collections': task_collection_name}): # result loop for each in line['result']: _count += 1
from serviceplatform_data.load_final_data import main as load_final_data from serviceplatform_data.load_final_data_test import main as load_final_data_qyer from service_platform_report.routine_report import main as routine_report from service_platform_report.send_error_email import send_error_report_email from serviceplatform_data.insert_data_mongo import insert_hotel_data, insert_city_data from serviceplatform_data.get_nearby_hotel_city import get_nearby_city from serviceplatform_data.update_hotel_validation import UpdateHotelValidation from serviceplatform_data.insert_poi_detect_task_info import get_task_info from serviceplatform_data.delete_already_scanned_file import delete_already_scanned_file from my_logger import get_logger from service_platform_report.merge_report import poi_merged_report from service_platform_report.task_progress_report_mongo_split_task import task_progress_report_split_task_main SEND_TO = ['*****@*****.**', "*****@*****.**"] logger = get_logger('cron_task_monitor') def send_email(title, mail_info, mail_list, want_send_html=False): try: mail_list = ';'.join(mail_list) data = { 'mailto': mail_list, 'content': mail_info, 'title': title, } if want_send_html: data['mail_type'] = 'html' requests.post('http://10.10.150.16:9000/sendmail', data=data) except Exception as e: logger.exception(msg="[send email error]", exc_info=e)
#!/usr/bin/env python # -*- coding: utf-8 -*- # @Time : 2017/10/30 下午9:37 # @Author : Hou Rong # @Site : # @File : move_data.py # @Software: PyCharm from data_source import MysqlSource from service_platform_conn_pool import base_data_final_pool from my_logger import get_logger logger = get_logger("move_data") poi_ori_config = { 'host': '10.10.228.253', 'user': '******', 'password': '******', 'charset': 'utf8', 'db': 'ServicePlatform' } def insert_data(data): update_sql = '''REPLACE INTO poi_images (file_name, source, sid, url, pic_size, bucket_name, url_md5, pic_md5, `use`, part, date, info) VALUE (%(file_name)s, %(source)s, %(sid)s, %(url)s, %(pic_size)s, %(bucket_name)s, %(url_md5)s, %(pic_md5)s, %(use)s, %(part)s, %(date)s, %(info)s);''' conn = base_data_final_pool.connection() cursor = conn.cursor() _res = cursor.executemany(update_sql, data) conn.commit() cursor.close()
# @Software: PyCharm import pymongo import datetime import json import hashlib import toolbox.Date from data_source import MysqlSource from patched_mongo import mongo_patched_insert from my_logger import get_logger toolbox.Date.DATE_FORMAT = "%Y%m%d" client = pymongo.MongoClient(host='10.10.231.105') collections = client['MongoTask']['Task'] logger = get_logger("insert_mongo_task") base_data_final_config = { 'host': '10.10.228.253', 'user': '******', 'password': '******', 'charset': 'utf8', 'db': 'BaseDataFinal' } offset = 0 pre_offset = 0 def insert_mongo(data): global offset
#!/usr/bin/env python # -*- coding: utf-8 -*- # @Time : 2017/12/6 下午9:34 # @Author : Hou Rong # @Site : # @File : select_and_update_table.py # @Software: PyCharm import re import json from data_source import MysqlSource from service_platform_conn_pool import poi_ori_pool, poi_face_detect_pool, service_platform_pool, base_data_final_pool, \ fetchall from my_logger import get_logger logger = get_logger("select_and_update_table") poi_ori_config = { 'host': '10.10.228.253', 'user': '******', 'password': '******', 'charset': 'utf8', 'db': 'poi_merge' } def update_sql(data): sql = '''UPDATE chat_attraction SET beentocount = %s, plantocount = %s, commentcount = %s WHERE id = %s;''' conn = poi_ori_pool.connection() cursor = conn.cursor()
#!/usr/bin/env python # -*- coding: utf-8 -*- # @Time : 2017/10/25 上午9:49 # @Author : Hou Rong # @Site : # @File : insert_poi_detect_task_info.py # @Software: PyCharm import pymysql from warnings import filterwarnings from service_platform_conn_pool import service_platform_pool, base_data_pool from data_source import MysqlSource from my_logger import get_logger filterwarnings('ignore', category=pymysql.err.Warning) logger = get_logger("insert_poi_detect_task_info") service_platform_conf = { 'host': '10.10.228.253', 'user': '******', 'password': '******', 'charset': 'utf8', # 'db': 'ServicePlatform' 'db': 'poi_merge' } offset = 0 cid2grade = None def insert_task_data(data, _count):