Пример #1
0
def structure_rank_create(environment):
    """
    结构化rank创建映射,并优化rank
    :param environment: 
    :return: 
    """
    structure_rank_create_time = time.time()
    sparkTask.structureMapRank_task(environment, rank_output_path)
    utils.rm_mkdir(current_rank_version + "/result", constant.local_sign)

    # 结构化rank下载
    commond = "hadoop fs -text " + rank_output_path + "/structureMapRank/part* > " + current_rank_version + constant.local_structure_rank_path
    utils.execute_command(commond, shell=True)

    # 结构化rank 优化
    parse_commond = "java -Xms4096M -Xmx7096M -jar " + constant.java_jar_path + "structure-optimize-1.0-SNAPSHOT.jar " + current_rank_version + constant.local_structure_rank_path + " " + current_rank_version + constant.local_structure_optimize_path
    utils.execute_command(parse_commond, shell=True)

    # 结构化rank上传
    utils.rm_mkdir(rank_output_path + "/structureOptimizeRank/", constant.cluster_sign)

    upload_rank_structure_status_commond = "hadoop fs -put " + current_rank_version + constant.local_structure_optimize_path + " " + rank_output_path + "/structureOptimizeRank/"
    utils.execute_command(upload_rank_structure_status_commond, shell=True)

    logger.info("spark structure_rank_create finished,used time:%s s", str(time.time() - structure_rank_create_time))
Пример #2
0
def main(environment='beta'):
    rank_begin_time = time.time()
    logger.info("rank work flow begin")
    time_version = datetime.datetime.now().strftime('%Y%m%d_%H:%M')
    global zeus_poi_path
    global zeus_myself_path
    global zeus_structure_path
    global zeus_polygon_path
    global rank_output_path
    global current_rank_version

    logger.info("current environment:" + environment)
    logger.info("zeus_poi_path:" + zeus_poi_path)
    logger.info("zeus_myself_path:" + zeus_myself_path)
    logger.info("zeus_structure_path:" + zeus_structure_path)
    logger.info("zeus_polygon_path:" + zeus_polygon_path)
    logger.info("rank_output_path:" + rank_output_path)
    current_rank_version = constant.rank_version_path + time_version
    utils.rm_mkdir(current_rank_version,constant.local_sign)

    parse_excel_upload()
    feature_poi_create(environment)
    download_feature_poi()
    rank_create()
    rank_combine_upload(environment)
    brand_rank_create(environment)
    structure_rank_create(environment)
    rank_optimization(environment)
    logger.info("rank work flow finished,total time:{time}s,environment:{environment}".format(
        time=str(time.time() - rank_begin_time), environment=environment))
Пример #3
0
def brand_rank_create(environment):
    brand_rank_create_time = time.time()
    sparkTask.brandFeature_task(environment, rank_output_path)
    utils.rm_mkdir(current_rank_version + constant.local_brandfeaturePoi_path, constant.local_sign)
    commond = "hadoop fs -get " + rank_output_path + "/brandFeatureValue/*-feature " + current_rank_version + constant.local_brandfeaturePoi_path
    utils.execute_command(commond, shell=True)

    utils.rm_mkdir(current_rank_version + constant.brand_rank_path, constant.local_sign)
    # brand predict
    brandRankPrediction.files_rank_cluster(current_rank_version + constant.local_brandfeaturePoi_path,
                                           current_rank_version + constant.brand_rank_path)

    upload_brand_rank_commond = "hadoop fs -put " + current_rank_version + constant.brand_rank_path + " " + rank_output_path
    utils.execute_command(upload_brand_rank_commond, shell=True)

    logger.info("brand rank create and upload finished,used time:%s s", str(time.time() - brand_rank_create_time))
Пример #4
0
def download_feature_poi():
    """
    feature poi download in two ways (bycategory,by city)
    :return:
    """
    # 特征值数据下载
    download_begin_time = time.time()

    utils.rm_mkdir(current_rank_version + constant.local_featurePoi_path, constant.local_sign)
    commond = "hadoop fs -get " + rank_output_path + "/featureValue/*-feature " + current_rank_version + constant.local_featurePoi_path
    utils.execute_command(commond, shell=True)
    logger.info("featurePoi download finished,used time:%s s", str(time.time() - download_begin_time))

    # 城市特征值数据下载
    download_begin_time = time.time()
    utils.rm_mkdir(current_rank_version + constant.local_city_featurePoi_path, constant.local_sign)

    commond = "hadoop fs -get " + rank_output_path + "/cityFeatureValue/*-feature " + current_rank_version + constant.local_city_featurePoi_path
    utils.execute_command(commond, shell=True)
    logger.info("cityfeaturePoi download finished,used time:%s s", str(time.time() - download_begin_time))
Пример #5
0
def parse_excel_upload():
    """
    解析特征阀值并上传
    :return:
    """

    download_rank_config_commond = 'wget "http://svn.sogou-inc.com/svn/go2map/data/poi/edit/trunk/rank/poi-rank.xlsx"  --user=svnsogoumap --password="******" -O /search/odin/taoyongbo/rank/input/poi-rank.xlsx'
    utils.execute_command(download_rank_config_commond, shell=True)

    # 解析excel 生成特征阈值及权重配置文件
    parse_commond = "java  -Xms800M -Xmx2g -jar " + constant.java_jar_path + "excelparse.jar"
    utils.execute_command(parse_commond, shell=True)
    parse_excel_time = time.time()
    logger.info("parse_excel finished,used time:%s s", str(time.time() - parse_excel_time))

    utils.rm_mkdir(current_rank_version + "/config/")
    mv_config_rank_commond = "cp /search/odin/taoyongbo/rank/result/poi-threshold.txt /search/odin/taoyongbo/rank/result/poi-weight.txt " + current_rank_version + "/config/"
    utils.execute_command(mv_config_rank_commond, shell=True)

    upload_config_time = time.time()
    utils.rm_mkdir(rank_output_path, constant.cluster_sign)
    utils.rm_mkdir(rank_output_path + "/config/", constant.cluster_sign)

    # 特征阈值文件上传
    upload_threshold_commond = "hadoop fs -put " + current_rank_version + constant.poi_threshold_path + " " + rank_output_path + "/config/poi-threshold.txt"

    utils.execute_command(upload_threshold_commond, shell=True)

    # 权重文件上传
    upload_weight_commond = "hadoop fs -put " + current_rank_version + constant.weight_path + " " + rank_output_path + "/config/poiWeight.txt"
    utils.execute_command(upload_weight_commond, shell=True)

    logger.info("upload_threshold finished,used  time:%s s", str(time.time() - upload_config_time))
Пример #6
0
def rank_create():
    """
    rank 生产并备份
    :return:
    """
    # 切割文件
    splitFeatureFile.split_file(current_rank_version)

    utils.rm_mkdir(current_rank_version + constant.local_multi_path, constant.local_sign)
    utils.rm_mkdir(current_rank_version + constant.hotCount_single_rank_path, constant.local_sign)
    utils.rm_mkdir(current_rank_version + constant.hitCount_single_rank_path, constant.local_sign)

    # 多维度特征值文件聚类
    cluster_begin_time = time.time()

    rankPrediction.files_rank_cluster(current_rank_version + constant.local_split_featurePoi_path,
                                      current_rank_version + constant.local_multi_path,
                                      current_rank_version + constant.weight_path)

    logger.info("multi featurePoi cluster finished,used time:%s s", str(time.time() - cluster_begin_time))

    hotcount_cluster_begin_time = time.time()
    # 单字段特征值文件聚类
    poiRankCluster.files_rank_cluster(current_rank_version + constant.local_city_featurePoi_path,
                                      current_rank_version + constant.hotCount_single_rank_path, "single",
                                      "hotCount")
    logger.info("hotCount featurePoi cluster finished,used time:%s s", str(time.time() - hotcount_cluster_begin_time))

    hitcount_cluster_begin_time = time.time()
    poiRankCluster.files_rank_cluster(current_rank_version + constant.local_city_featurePoi_path,
                                      current_rank_version + constant.hitCount_single_rank_path, "single",
                                      "hitCount")
    logger.info("hitCount featurePoi cluster finished,used time:%s s", str(time.time() - hitcount_cluster_begin_time))

    multiRankcommond = "cat " + current_rank_version + constant.local_multi_path + "*-rank > " + current_rank_version + "/multiRank"
    utils.execute_command(multiRankcommond, shell=True)
    hotCountRankcommond = "cat " + current_rank_version + constant.hotCount_single_rank_path + "*_rank > " + current_rank_version + "/hotCountRank"
    utils.execute_command(hotCountRankcommond, shell=True)
    hitCountRankcommond = "cat " + current_rank_version + constant.hitCount_single_rank_path + "*_rank > " + current_rank_version + "/hitCountRank"
    utils.execute_command(hitCountRankcommond, shell=True)