Python load_unstripped_json примеры, util.load_unstripped_json Python примеры использования

Пример #1

0

Показать файл

Файл: gae_reprocess.py Проект: DannyBenItzhak/analytics

def main():
    options = get_cmd_line_args()
    config = util.load_unstripped_json(options.config)
    g_logger.info("Fetching failed jobs from progress db")
    start_dt = date_util.from_date_iso(options.start_date)
    mongo = gae_download.open_db_conn(config)
    coordinator_cfg = config["coordinator_cfg"]
    # Don't touch tasks that was recently started
    # TODO(yunfang): parameterize this
    two_hours_ago = datetime.datetime.now() - datetime.timedelta(hours=2)
    results = ka_download_coordinator.get_failed_jobs(mongo, coordinator_cfg)
    if not results:
        g_logger.info("Empty result set. Nothing to reprocess.")
        exit(0)
    for rec in results:
        if rec["history"]["1"] < start_dt:
            continue
        if rec["history"]["1"] >= two_hours_ago:
            # Started less than 2 hours ago
            continue
        # Reprocess
        fetch_interval = config['kinds'][rec['kind']][1]
        gae_download.fetch_and_process_data(rec["kind"], rec["start_dt"],
            rec["end_dt"], fetch_interval, config)
    g_logger.info("Done reprocessing!!")

Пример #2

0

Показать файл

Файл: back_populate.py Проект: bopopescu/analytics-1

def main():
    options = get_cmd_line_args()
    config = util.load_unstripped_json(options.config)
    #hard code some args
    config['max_threads'] = 2
    config['coordinator_cfg']['control_db'] = "ka_backpopulate_cntrl"
    config["sub_process_time_out"] = 86400 * 3
    with open(options.file_list) as f:
        file_list = f.readlines()
    processes = []
    for gzfile in file_list:
        while True:
            if len(active_children()) < config['max_threads']:
                g_logger.info("Starting loading %s ...", gzfile)
                p = Process(target=gz_pickle_to_mongo,
                            args=(config, gzfile.strip()))
                processes.append((p, gzfile.strip(), time.time()))
                p.start()
                time.sleep(5)
                break
            else:
                monitor(config, processes)
                time.sleep(10)
    while len(active_children()) > 0:
        monitor(config, processes)
        time.sleep(10)

Пример #3

0

Показать файл

Файл: back_populate.py Проект: DannyBenItzhak/analytics

def main():
    options = get_cmd_line_args()
    config = util.load_unstripped_json(options.config)
    #hard code some args
    config['max_threads'] = 2
    config['coordinator_cfg']['control_db'] = "ka_backpopulate_cntrl"
    config["sub_process_time_out"] = 86400*3
    with open(options.file_list) as f:
        file_list = f.readlines()
    processes = []
    for gzfile in file_list:
        while True:
            if len(active_children()) < config['max_threads']:         
               g_logger.info("Starting loading %s ...", gzfile)
               p = Process(target = gz_pickle_to_mongo,
                           args = (config, gzfile.strip()))
               processes.append((p, gzfile.strip(), time.time()))
               p.start()
               time.sleep(5)
               break
            else: 
               monitor(config, processes)
               time.sleep(10)
    while len(active_children()) > 0:       
        monitor(config, processes)
        time.sleep(10)

Пример #4

0

Показать файл

def main():
    options = get_cmd_line_args()
    config = util.load_unstripped_json(options.config)
    g_logger.info("Fetching failed jobs from progress db")
    start_dt = date_util.from_date_iso(options.start_date)
    mongo = gae_download.open_db_conn(config)
    coordinator_cfg = config["coordinator_cfg"]
    # Don't touch tasks that was recently started
    # TODO(yunfang): parameterize this
    two_hours_ago = datetime.datetime.now() - datetime.timedelta(hours=2)
    results = ka_download_coordinator.get_failed_jobs(mongo, coordinator_cfg)
    if not results:
        g_logger.info("Empty result set. Nothing to reprocess.")
        exit(0)
    for rec in results:
        if rec["history"]["1"] < start_dt:
            continue
        if rec["history"]["1"] >= two_hours_ago:
            # Started less than 2 hours ago
            continue
        # Reprocess
        fetch_interval = config['kinds'][rec['kind']][1]
        gae_download.fetch_and_process_data(rec["kind"], rec["start_dt"],
                                            rec["end_dt"], fetch_interval,
                                            config)
    g_logger.info("Done reprocessing!!")

Пример #5

0

Показать файл

Файл: bulk_insert.py Проект: DannyBenItzhak/analytics

def main():
    options = get_cmd_line_args()
    # NOTE: the Mongo connection specified on command line will override
    # what may be specified in the config file for gae_download
    mongo_conn = pymongo.Connection(options.server, options.port)
    config = util.load_unstripped_json(options.config)
    insert_dumpfile_into_mongo(options.file, config, mongo_conn, 
                               options.offset, options.limit)

Пример #6

0

Показать файл

Файл: bulk_insert.py Проект: bopopescu/analytics-1

def main():
    options = get_cmd_line_args()
    # NOTE: the Mongo connection specified on command line will override
    # what may be specified in the config file for gae_download
    mongo_conn = pymongo.Connection(options.server, options.port)
    config = util.load_unstripped_json(options.config)
    insert_dumpfile_into_mongo(options.file, config, mongo_conn,
                               options.offset, options.limit)

Пример #7

0

Показать файл

Файл: mongo_util.py Проект: DannyBenItzhak/analytics

def get_db(db_name, config_location):
    """Return a pymongo Database reference as configured in 'config'."""
    config = util.load_unstripped_json(config_location)
    db_config = config['databases']['mongo'][db_name]

    server_name = db_config['server']
    mongo_db_name = db_config['database']
    
    return get_connection(server_name, config_location)[mongo_db_name]

Пример #8

0

Показать файл

def get_db(db_name, config_location):
    """Return a pymongo Database reference as configured in 'config'."""
    config = util.load_unstripped_json(config_location)
    db_config = config['databases']['mongo'][db_name]

    server_name = db_config['server']
    mongo_db_name = db_config['database']

    return get_connection(server_name, config_location)[mongo_db_name]

Пример #9

0

Показать файл

Файл: mongo_util.py Проект: DannyBenItzhak/analytics

def get_connection(mongo_server_name, config_location):
    """Return a pymongo.Connection to the named server.
    
    NOTE: the mongo_server_name is not the hostname of the machine or 
    the name of EC2 instance, it is the name given to the mongo server in 
    the main analytics config file, the location of which is 
    the second argument.
    """
    config = util.load_unstripped_json(config_location)
    server_config = config['servers']['mongo'][mongo_server_name]

    host = server_config['host']
    port = server_config['port']

    return pymongo.Connection(host, port)

Пример #10

0

Показать файл

def get_connection(mongo_server_name, config_location):
    """Return a pymongo.Connection to the named server.
    
    NOTE: the mongo_server_name is not the hostname of the machine or 
    the name of EC2 instance, it is the name given to the mongo server in 
    the main analytics config file, the location of which is 
    the second argument.
    """
    config = util.load_unstripped_json(config_location)
    server_config = config['servers']['mongo'][mongo_server_name]

    host = server_config['host']
    port = server_config['port']

    return pymongo.Connection(host, port)

Пример #11

0

Показать файл

Файл: gae_download.py Проект: mwahl/analytics

def main():
    options = get_cmd_line_args()
    config = load_unstripped_json(options.config)
    for key in DEFAULT_DOWNLOAD_SETTINGS.keys():
        if key not in config:
            config[key] = DEFAULT_DOWNLOAD_SETTINGS[key]
    if options.start_date and options.end_date:
        start_dt = date_util.from_date_iso(options.start_date)
        end_dt = date_util.from_date_iso(options.end_date)
    else:
        ts = time.time()
        end_ts = ts - (ts % int(options.proc_interval))
        start_ts = end_ts - int(options.proc_interval)
        start_dt = dt.datetime.fromtimestamp(start_ts)
        end_dt = dt.datetime.fromtimestamp(end_ts)
    if options.archive_dir:
        # Override the archive directory, if specified.
        config['archive_dir'] = options.archive_dir
    start_data_process(config, start_dt, end_dt)

Пример #12

0

Показать файл

Файл: gae_download.py Проект: mwahl/analytics

def main():
    options = get_cmd_line_args()
    config = load_unstripped_json(options.config)
    for key in DEFAULT_DOWNLOAD_SETTINGS.keys():
        if key not in config:
            config[key] = DEFAULT_DOWNLOAD_SETTINGS[key]
    if options.start_date and options.end_date:
        start_dt = date_util.from_date_iso(options.start_date)
        end_dt = date_util.from_date_iso(options.end_date)
    else:
        ts = time.time()
        end_ts = ts - (ts % int(options.proc_interval))
        start_ts = end_ts - int(options.proc_interval)
        start_dt = dt.datetime.fromtimestamp(start_ts)
        end_dt = dt.datetime.fromtimestamp(end_ts)
    if options.archive_dir:
        # Override the archive directory, if specified.
        config['archive_dir'] = options.archive_dir
    start_data_process(config, start_dt, end_dt)

Python load_unstripped_json примеры использования