Exemplos de load_unstripped_json em Python, exemplos de util.load_unstripped_json em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: gae_reprocess.py Projeto: DannyBenItzhak/analytics

def main():
    options = get_cmd_line_args()
    config = util.load_unstripped_json(options.config)
    g_logger.info("Fetching failed jobs from progress db")
    start_dt = date_util.from_date_iso(options.start_date)
    mongo = gae_download.open_db_conn(config)
    coordinator_cfg = config["coordinator_cfg"]
    # Don't touch tasks that was recently started
    # TODO(yunfang): parameterize this
    two_hours_ago = datetime.datetime.now() - datetime.timedelta(hours=2)
    results = ka_download_coordinator.get_failed_jobs(mongo, coordinator_cfg)
    if not results:
        g_logger.info("Empty result set. Nothing to reprocess.")
        exit(0)
    for rec in results:
        if rec["history"]["1"] < start_dt:
            continue
        if rec["history"]["1"] >= two_hours_ago:
            # Started less than 2 hours ago
            continue
        # Reprocess
        fetch_interval = config['kinds'][rec['kind']][1]
        gae_download.fetch_and_process_data(rec["kind"], rec["start_dt"],
            rec["end_dt"], fetch_interval, config)
    g_logger.info("Done reprocessing!!")

Exemplo n.º 2

0

Exibir arquivo

Arquivo: back_populate.py Projeto: bopopescu/analytics-1

def main():
    options = get_cmd_line_args()
    config = util.load_unstripped_json(options.config)
    #hard code some args
    config['max_threads'] = 2
    config['coordinator_cfg']['control_db'] = "ka_backpopulate_cntrl"
    config["sub_process_time_out"] = 86400 * 3
    with open(options.file_list) as f:
        file_list = f.readlines()
    processes = []
    for gzfile in file_list:
        while True:
            if len(active_children()) < config['max_threads']:
                g_logger.info("Starting loading %s ...", gzfile)
                p = Process(target=gz_pickle_to_mongo,
                            args=(config, gzfile.strip()))
                processes.append((p, gzfile.strip(), time.time()))
                p.start()
                time.sleep(5)
                break
            else:
                monitor(config, processes)
                time.sleep(10)
    while len(active_children()) > 0:
        monitor(config, processes)
        time.sleep(10)

Exemplo n.º 3

0

Exibir arquivo

Arquivo: back_populate.py Projeto: DannyBenItzhak/analytics

def main():
    options = get_cmd_line_args()
    config = util.load_unstripped_json(options.config)
    #hard code some args
    config['max_threads'] = 2
    config['coordinator_cfg']['control_db'] = "ka_backpopulate_cntrl"
    config["sub_process_time_out"] = 86400*3
    with open(options.file_list) as f:
        file_list = f.readlines()
    processes = []
    for gzfile in file_list:
        while True:
            if len(active_children()) < config['max_threads']:         
               g_logger.info("Starting loading %s ...", gzfile)
               p = Process(target = gz_pickle_to_mongo,
                           args = (config, gzfile.strip()))
               processes.append((p, gzfile.strip(), time.time()))
               p.start()
               time.sleep(5)
               break
            else: 
               monitor(config, processes)
               time.sleep(10)
    while len(active_children()) > 0:       
        monitor(config, processes)
        time.sleep(10)

Exemplo n.º 4

0

Exibir arquivo

def main():
    options = get_cmd_line_args()
    config = util.load_unstripped_json(options.config)
    g_logger.info("Fetching failed jobs from progress db")
    start_dt = date_util.from_date_iso(options.start_date)
    mongo = gae_download.open_db_conn(config)
    coordinator_cfg = config["coordinator_cfg"]
    # Don't touch tasks that was recently started
    # TODO(yunfang): parameterize this
    two_hours_ago = datetime.datetime.now() - datetime.timedelta(hours=2)
    results = ka_download_coordinator.get_failed_jobs(mongo, coordinator_cfg)
    if not results:
        g_logger.info("Empty result set. Nothing to reprocess.")
        exit(0)
    for rec in results:
        if rec["history"]["1"] < start_dt:
            continue
        if rec["history"]["1"] >= two_hours_ago:
            # Started less than 2 hours ago
            continue
        # Reprocess
        fetch_interval = config['kinds'][rec['kind']][1]
        gae_download.fetch_and_process_data(rec["kind"], rec["start_dt"],
                                            rec["end_dt"], fetch_interval,
                                            config)
    g_logger.info("Done reprocessing!!")

Exemplo n.º 5

0

Exibir arquivo

Arquivo: bulk_insert.py Projeto: DannyBenItzhak/analytics

def main():
    options = get_cmd_line_args()
    # NOTE: the Mongo connection specified on command line will override
    # what may be specified in the config file for gae_download
    mongo_conn = pymongo.Connection(options.server, options.port)
    config = util.load_unstripped_json(options.config)
    insert_dumpfile_into_mongo(options.file, config, mongo_conn, 
                               options.offset, options.limit)

Exemplo n.º 6

0

Exibir arquivo

Arquivo: bulk_insert.py Projeto: bopopescu/analytics-1

def main():
    options = get_cmd_line_args()
    # NOTE: the Mongo connection specified on command line will override
    # what may be specified in the config file for gae_download
    mongo_conn = pymongo.Connection(options.server, options.port)
    config = util.load_unstripped_json(options.config)
    insert_dumpfile_into_mongo(options.file, config, mongo_conn,
                               options.offset, options.limit)

Exemplo n.º 7

0

Exibir arquivo

Arquivo: mongo_util.py Projeto: DannyBenItzhak/analytics

def get_db(db_name, config_location):
    """Return a pymongo Database reference as configured in 'config'."""
    config = util.load_unstripped_json(config_location)
    db_config = config['databases']['mongo'][db_name]

    server_name = db_config['server']
    mongo_db_name = db_config['database']
    
    return get_connection(server_name, config_location)[mongo_db_name]

Exemplo n.º 8

0

Exibir arquivo

def get_db(db_name, config_location):
    """Return a pymongo Database reference as configured in 'config'."""
    config = util.load_unstripped_json(config_location)
    db_config = config['databases']['mongo'][db_name]

    server_name = db_config['server']
    mongo_db_name = db_config['database']

    return get_connection(server_name, config_location)[mongo_db_name]

Exemplo n.º 9

0

Exibir arquivo

Arquivo: mongo_util.py Projeto: DannyBenItzhak/analytics

def get_connection(mongo_server_name, config_location):
    """Return a pymongo.Connection to the named server.
    
    NOTE: the mongo_server_name is not the hostname of the machine or 
    the name of EC2 instance, it is the name given to the mongo server in 
    the main analytics config file, the location of which is 
    the second argument.
    """
    config = util.load_unstripped_json(config_location)
    server_config = config['servers']['mongo'][mongo_server_name]

    host = server_config['host']
    port = server_config['port']

    return pymongo.Connection(host, port)

Exemplo n.º 10

0

Exibir arquivo

def get_connection(mongo_server_name, config_location):
    """Return a pymongo.Connection to the named server.
    
    NOTE: the mongo_server_name is not the hostname of the machine or 
    the name of EC2 instance, it is the name given to the mongo server in 
    the main analytics config file, the location of which is 
    the second argument.
    """
    config = util.load_unstripped_json(config_location)
    server_config = config['servers']['mongo'][mongo_server_name]

    host = server_config['host']
    port = server_config['port']

    return pymongo.Connection(host, port)

Exemplo n.º 11

0

Exibir arquivo

Arquivo: gae_download.py Projeto: mwahl/analytics

def main():
    options = get_cmd_line_args()
    config = load_unstripped_json(options.config)
    for key in DEFAULT_DOWNLOAD_SETTINGS.keys():
        if key not in config:
            config[key] = DEFAULT_DOWNLOAD_SETTINGS[key]
    if options.start_date and options.end_date:
        start_dt = date_util.from_date_iso(options.start_date)
        end_dt = date_util.from_date_iso(options.end_date)
    else:
        ts = time.time()
        end_ts = ts - (ts % int(options.proc_interval))
        start_ts = end_ts - int(options.proc_interval)
        start_dt = dt.datetime.fromtimestamp(start_ts)
        end_dt = dt.datetime.fromtimestamp(end_ts)
    if options.archive_dir:
        # Override the archive directory, if specified.
        config['archive_dir'] = options.archive_dir
    start_data_process(config, start_dt, end_dt)

Exemplo n.º 12

0

Exibir arquivo

Arquivo: gae_download.py Projeto: mwahl/analytics

def main():
    options = get_cmd_line_args()
    config = load_unstripped_json(options.config)
    for key in DEFAULT_DOWNLOAD_SETTINGS.keys():
        if key not in config:
            config[key] = DEFAULT_DOWNLOAD_SETTINGS[key]
    if options.start_date and options.end_date:
        start_dt = date_util.from_date_iso(options.start_date)
        end_dt = date_util.from_date_iso(options.end_date)
    else:
        ts = time.time()
        end_ts = ts - (ts % int(options.proc_interval))
        start_ts = end_ts - int(options.proc_interval)
        start_dt = dt.datetime.fromtimestamp(start_ts)
        end_dt = dt.datetime.fromtimestamp(end_ts)
    if options.archive_dir:
        # Override the archive directory, if specified.
        config['archive_dir'] = options.archive_dir
    start_data_process(config, start_dt, end_dt)