Exemplo n.º 1
0
def setup():
    delegate.fakeload()    
    from infogami.utils import types
    types.register_type('/i18n(/.*)?/strings.[^/]*', '/type/i18n')
    
    for site in db.get_all_sites():
        load_strings(site)
Exemplo n.º 2
0
def setup():
    delegate.fakeload()
    from infogami.utils import types
    types.register_type('/i18n(/.*)?/strings.[^/]*', '/type/i18n')

    for site in db.get_all_sites():
        load_strings(site)
Exemplo n.º 3
0
def spark_split_jobs():
    # List of states and sites
    state = {}
    site = {}
    db = get_all_sites()
    print("TOTAL SITES %u" % len(db))
    for site_info in db:
        # Accumulate by state
        if not site_info['state'] in state:
            state[site_info['state']] = [site_info['site_id']]
        else:
            state[site_info['state']].append(site_info['site_id'])

        # Reverse index by site_id
        if not site_info['site_id'] in site:
            site[site_info['site_id']] = site_info
    '''
    # Print states and sites
    min_sites = 9999999
    max_sites = 0
    min_state = ""
    max_state = ""
    for s in state.keys():
        num_sites = len(state[s])
        print("STATE: %s SITES: %u" % (s, len(state[s])))
        if s is None:
            continue
        if num_sites < min_sites:
            min_sites = num_sites
            min_state = s
        if num_sites > max_sites:
            max_sites = num_sites
            max_state = s
    print("TOTAL STATES => %u" % (len(state.keys())))
    print("MAX SITES STATE %s SITES %u" % (max_state, max_sites))
    print("MIN SITES STATE %s SITES %u" % (min_state, min_sites))
    '''

    site_list_of_lists = []
    for s in state:
        site_list_of_lists.append(state[s])

    # Distributed the states evenly in the list
    zipped_list = evenly_spaced(site_list_of_lists)

    # Break list into chunks
    run_id = 1
    for i in range(0, len(zipped_list), SITES_PER_JOB):
        split_list = zipped_list[i:i + SITES_PER_JOB]
        #print_state_count(split_list, site)
        # Move parquet files to RUNx folder
        move_parquet(split_list, run_id)
        # Run the spark job on the RUNx folder files
        #spark_run_split_job(run_id)
        run_id = run_id + 1
Exemplo n.º 4
0
def load_all():
    def load_macros(site): 
        for m in db.get_all_macros(site):
            _load_macro(m, lazy=True)
    
    def load_templates(site):
        for t in db.get_all_templates(site):
            _load_template(t, lazy=True)
    
    for site in db.get_all_sites():
        context.site = site
        load_macros(site)
        load_templates(site)
Exemplo n.º 5
0
def load_all():
    def load_macros(site):
        for m in db.get_all_macros(site):
            _load_macro(m, lazy=True)

    def load_templates(site):
        for t in db.get_all_templates(site):
            _load_template(t, lazy=True)

    for site in db.get_all_sites():
        context.site = site
        load_macros(site)
        load_templates(site)
Exemplo n.º 6
0
def spark_split_jobs():
    # List of states and sites
    state = {}
    site = {}
    db = get_all_sites()
    print("TOTAL SITES %u" % len(db))
    for site_info in db:
        # Accumulate by state
        if not site_info['state'] in state:
            state[site_info['state']] = [site_info['site_id']]
        else:
            state[site_info['state']].append(site_info['site_id'])

        # Reverse index by site_id
        if not site_info['site_id'] in site:
            site[site_info['site_id']] = site_info

    site_list_of_lists = []
    for s in state:
        site_list_of_lists.append(state[s])

    # Distributed the states evenly in the list
    zipped_list = evenly_spaced(site_list_of_lists)

    run_id = 2
    split_list = zipped_list[5000:10000]
    move_parquet(split_list, run_id)
    run_id = run_id + 1

    threads = []

    for i in range(0, len(zipped_list), SITES_PER_JOB):
        if i < 2:
            continue
        split_list = zipped_list[i:i + SITES_PER_JOB]
        #print_state_count(split_list, site)
        # Move parquet files to RUNx folder
        t = threading.Thread(target=move_parquet, args=(split_list, run_id))
        t.start()
        time.sleep(1)
        threads.append(t)
        #move_parquet(split_list, run_id)
        # Run the spark job on the RUNx folder files
        run_id = run_id + 1

    for t in threads:
        t.join()
    print("ALL FILES COPIED!")