def control(batch_enhanced, s):
    batch_enhanced_full = utils.read_from_s3(
        utils.file_name(sufix="_batch_enhanced_full"),
        seed=[],
        directory=s["s3dir"])
    print("Length of enhanced pre batch: " + str(len(batch_enhanced_full)))
    enhanced_w_batch = add_batch(batch_enhanced_full, batch_enhanced)
    utils.write_to_s3(enhanced_w_batch,
                      utils.file_name(sufix="_batch_enhanced_full"),
                      directory=s["s3dir"])

    filtered_label_dict = make_filtered_label_dict(batch_enhanced_full)
    return filtered_label_dict
Exemplo n.º 2
0
    def save(self):
        hparams_file = os.path.join(
            self.model_dir, "{}_config.yml".format(file_name(self.config)))
        print_out("  saving config to %s" % hparams_file)

        to_dump_dict = dict(self.__dict__)
        if to_dump_dict['train_data']:
            to_dump_dict['train_data'] = os.path.abspath(
                to_dump_dict['train_data'])
        if to_dump_dict['test_data']:
            to_dump_dict['test_data'] = os.path.abspath(
                to_dump_dict['test_data'])
        if to_dump_dict['dev_data']:
            to_dump_dict['dev_data'] = os.path.abspath(
                to_dump_dict['dev_data'])
        if to_dump_dict['pretrain_data']:
            to_dump_dict['pretrain_data'] = os.path.abspath(
                to_dump_dict['pretrain_data'])
        else:
            to_dump_dict.pop('pretrain_data')
        if to_dump_dict['vocab_file']:
            to_dump_dict['vocab_file'] = os.path.abspath(
                to_dump_dict['vocab_file'])

        with codecs.getwriter("utf-8")(open(hparams_file, "wb")) as f:
            yaml.dump(to_dump_dict, f, default_flow_style=False)
Exemplo n.º 3
0
def change_png2eps(root='./result/figures'):

    files = file_name(root)[0]
    for index, name in enumerate(files):
        img = plt.imread('{}/{}'.format(root, name))
        f_index = name.find('.', 0)
        f_name = name[:f_index]
        plt.imsave('{}/test_{}.eps'.format(root, f_name), img, format='eps', dpi=500)
def control(filtered_label_dict, s):
    user_data, filtered_label_dict = update_user_info(filtered_label_dict, s)
    filtered_label_dict = update_dict_with_user_info(filtered_label_dict,
                                                     user_data)
    utils.write_to_s3(
        filtered_label_dict,
        utils.file_name(prefix='_batch_filt_label_dict_enhanced_'),
        directory=s["s3dir"])
    return filtered_label_dict
Exemplo n.º 5
0
def get_library_list():
    """Get library actionlists"""
    global __LIBRARY_ACTIONLISTS
    if __LIBRARY_ACTIONLISTS:
        return __LIBRARY_ACTIONLISTS
    pattern = os.path.join(config.PHATCH_ACTIONLISTS_PATH, '*.phatch')
    actionlist_paths = glob.glob(pattern)
    __LIBRARY_ACTIONLISTS = dict((utils.file_name(actionlist_file),
                                  eval_safe(open(actionlist_file).read()))
                                 for actionlist_file in actionlist_paths)
    return __LIBRARY_ACTIONLISTS
Exemplo n.º 6
0
def control(batch_enhanced,s):
    # statuses_enhanced = utils.read_from_s3(utils.file_name( sufix = "_enhanced"), directory='data-aws/gen_two/')
    batch_enhanced_full = utils.read_from_s3(utils.file_name( sufix = "_batch_enhanced_full"), seed=[], directory=s["s3dir"])
    print("Length of enhanced pre batch: " + str(len(batch_enhanced_full)))
    # enhanced_w_batch = add_batch(batch_enhanced_full, batch_enhanced)
    # utils.write_to_s3(enhanced_w_batch, utils.file_name( sufix = "_batch_enhanced_full"), directory=s["s3dir"])
    # utils.write_to_s3(processed_list, utils.file_name( sufix = "_processed_list"), directory=s["s3dir"])

    filtered_label_dict = make_filtered_label_dict(batch_enhanced_full)

    # utils.write_to_s3(filtered_label_dict, utils.file_name( prefix = "_batch_filt_label_dict_enhanced_"), directory=s["s3dir"])
    return filtered_label_dict
Exemplo n.º 7
0
def get_library_list():
    """Get library actionlists"""
    global __LIBRARY_ACTIONLISTS
    if __LIBRARY_ACTIONLISTS:
        return __LIBRARY_ACTIONLISTS
    pattern = os.path.join(config.PHATCH_ACTIONLISTS_PATH, '*.phatch')
    actionlist_paths = glob.glob(pattern)
    __LIBRARY_ACTIONLISTS = dict(
        (
            utils.file_name(actionlist_file),
            eval_safe(open(actionlist_file).read()))
        for actionlist_file in actionlist_paths)
    return __LIBRARY_ACTIONLISTS
Exemplo n.º 8
0
def get_actions():
    """Get phatch actions
    Returns a dictionary that maps action names to objects"""
    global __ACTIONS
    if __ACTIONS:
        return __ACTIONS
    action_names = [
        utils.file_name(action_file) for action_file in glob.glob(
            os.path.join(config.PHATCH_ACTIONS_PATH, '*.py'))
    ]
    default_values = get_defaults()
    __ACTIONS = dict(
        (name,
         __import__('actions.%s' % name, name, fromlist=['actions']).Action())
        for name in action_names if name != '__init__')
    for name, fields in default_values.iteritems():
        set_action_fields(__ACTIONS[name], fields)
    return __ACTIONS
Exemplo n.º 9
0
def execute_actionlists(input, actionlists=None, options=''):
    """Execute a list of actionlists on input path.
    If no actionlist was given all actionlists will be executed"""
    errors = []
    if not actionlists:
        actionlists = dict((utils.file_name(path),
                            os.path.join(config.OUT_ACTIONLISTS_PATH, path))
                           for path in os.listdir(config.OUT_ACTIONLISTS_PATH))

    total = len(actionlists)
    for i, name in enumerate(sorted(actionlists)):
        sys.stdout.write('\rRunning %s/%s %s' %
                         (i + 1, total, name[:50].ljust(50)))
        sys.stdout.flush()
        if not execute_actionlist(input, actionlists[name], options):
            errors.append(name)
    print
    return errors
Exemplo n.º 10
0
def assemble_segements(file_path):
    utils.log("assembling segements ... ")

    file_list = os.listdir(utils.dir_name(file_path))
    num_file = 0
    for fn in file_list:
        if (re.findall(utils.file_name(file_path) + ".\d+", fn)):
            num_file = num_file + 1

    fp = open(file_path, 'wb')
    for i in range(num_file):
        fn = file_path + '.' + str(i)
        f = open(fn, 'rb')
        fp.write(f.read())
        f.close()
        os.remove(file_path + '.' + str(i))

    fp.close()
    utils.log("finished assembling segements")
Exemplo n.º 11
0
def get_actions():
    """Get phatch actions
    Returns a dictionary that maps action names to objects"""
    global __ACTIONS
    if __ACTIONS:
        return __ACTIONS
    action_names = [utils.file_name(action_file) for action_file in
            glob.glob(os.path.join(config.PHATCH_ACTIONS_PATH, '*.py'))]
    default_values = get_defaults()
    __ACTIONS = dict(
        (
            name,
            __import__(
                'actions.%s' % name,
                name,
                fromlist=['actions']).Action())
        for name in action_names
        if name != '__init__')
    for name, fields in default_values.iteritems():
        set_action_fields(__ACTIONS[name], fields)
    return __ACTIONS
Exemplo n.º 12
0
def execute_actionlists(input, actionlists=None, options=''):
    """Execute a list of actionlists on input path.
    If no actionlist was given all actionlists will be executed"""
    errors = []
    if not actionlists:
        actionlists = dict(
            (
                utils.file_name(path),
                os.path.join(config.OUT_ACTIONLISTS_PATH, path))
            for path in os.listdir(config.OUT_ACTIONLISTS_PATH))

    total = len(actionlists)
    for i, name in enumerate(sorted(actionlists)):
        sys.stdout.write(
            '\rRunning %s/%s %s' % (
                i + 1,
                total,
                name[:50].ljust(50)))
        sys.stdout.flush()
        if not execute_actionlist(input, actionlists[name], options):
            errors.append(name)
    print
    return errors
    # all_users_lookup = [u for u in all_users if u not in user_data]
    ## Cache code##
    user_data, all_users_lookup = utils.get_from_cache_m(
        all_users, "user_data")
    utils.log(len(all_users), "Number total users in set: ")
    utils.log(len(all_users_lookup), "Number users needing lookup: ")
    if len(all_users_lookup) > 0:
        user_chunks = make_user_chunks(all_users_lookup, 100)
        for this_lookup in user_chunks:
            user_dict_lookup = do_user_lookup(this_lookup, s)
            user_data.update(user_dict_lookup)
    return user_data, filtered_label_dict


def control(filtered_label_dict, s):
    user_data, filtered_label_dict = update_user_info(filtered_label_dict, s)
    filtered_label_dict = update_dict_with_user_info(filtered_label_dict,
                                                     user_data)
    utils.write_to_s3(
        filtered_label_dict,
        utils.file_name(prefix='_batch_filt_label_dict_enhanced_'),
        directory=s["s3dir"])
    return filtered_label_dict


if __name__ == "__main__":
    sd = utils.getDefaultSettings()
    filtered_label_dict = utils.read_from_s3(
        utils.file_name(prefix='_batch_filt_label_dict_enhanced_'),
        directory=sd["s3dir"])
    control(filtered_label_dict, sd)
Exemplo n.º 14
0
            dt[key]["statuses"].append(value)
    else:
        dt[key] = {"statuses":[value]}
    return None

def make_filtered_label_dict(batch_enhanced, label_dict, threshhold = 1):
    processed_list = set()
    for status in batch_enhanced:
        norm_labels = status["satellite_enhanced"]["combined_labels"]
        if len(norm_labels) > 0:
            # for label in norm_labels[0]: # Experimental: Choose first one
            uate_dict_nodup(label_dict, norm_labels[0], status)
    for l, v in label_dict.items():
        label_dict[l]["count"] = len(v)
        for s in label_dict[l]["statuses"]:
            processed_list.add(int(s["id"]))
    
    return label_dict, list(processed_list)
    
def control(batch_enhanced,s):    
    filtered_label_dict = utils.read_from_s3(utils.file_name( prefix = "_batch_filt_label_dict_enhanced_fld"), directory=s["s3dir"])
    filtered_label_dict, processed_list = make_filtered_label_dict(batch_enhanced, filtered_label_dict)
    utils.write_to_s3(filtered_label_dict, utils.file_name( prefix = "_batch_filt_label_dict_enhanced_fld"), directory=s["s3dir"])
    utils.write_to_s3(processed_list, utils.file_name( sufix = "_processed_list_fld"), directory=s["s3dir"])
    return filtered_label_dict

if __name__ == "__main__":
    sd = utils.getDefaultSettings()
    batch_enhanced = utils.read_from_s3(utils.file_name( sufix = "_batch_enhanced_d"), directory=sd["s3dir"])
    # statuses_enhanced = utils.read_from_s3(utils.file_name( sufix = "_batch_enhanced_full"), seed=[], directory=sd["s3dir"])
    control(batch_enhanced, sd)
Exemplo n.º 15
0
                max_embed = this_embed
            #for links, add to "labels" so we can do the short url processing after
            #for refs, create entry "labels_proc so we preserve the original ref and can store the quoted status.
            root["satellite_enhanced"]["labels"]["quoted_labels_links_deep"] = max_url
            root["satellite_enhanced"]["labels"]["quoted_labels_twrefs_deep"] = max_embed
            if max_embed != None and max_embed != []:
                root["satellite_enhanced"]["labels"]["quoted_labels_twrefs_deep_status"] = qt
        else:
            existing_cnt += 1
    utils.log(existing_cnt, "Existing Count: ")
    return statuses_enhanced


def control(batch_enhanced, s):
    utils.log("", "Starting deep trace")
    # batch_enhanced = utils.read_from_s3(utils.file_name( sufix = "_enhanced"), directory="data-aws/gen_two/")
    batch_enhanced = trace_links_down(batch_enhanced, s)
    # utils.write_to_s3(
    #     batch_enhanced,
    #     utils.file_name(batch_enhanced, sufix="_batch_enhanced_c"),
    #     directory=s["s3dir"])
    return batch_enhanced


if __name__ == "__main__":
    sd = utils.getDefaultSettings()
    batch_enhanced = utils.read_from_s3(
        utils.file_name(sufix="_batch_enhanced"), directory=sd["s3dir"])

    control(batch_enhanced, sd)
 
Exemplo n.º 16
0
def filter_on_day(stat):
    current_day_key = utils.file_date()
    dd = utils.make_local(stat["created_at"])
    key = str(dd.day) + "-" + str(dd.month) + "-" + str(dd.year)
    return_bool = key == current_day_key
    return return_bool

def enhance(batch_enhanced):
    filtered_enhanced_batch = []
    utils.log(len(batch_enhanced), "Number batch statuses: ")
    for stat in batch_enhanced:
        enhanced = {}
        enhanced["created_at_tz"] = add_time_zone_date(stat)
        enhanced["labels"] = get_combined_labels(stat)
        ref_cnt = 0
        for key, val in enhanced["labels"].items():
            ref_cnt += len(val) 
        if ref_cnt > 0:
            stat["satellite_enhanced"] = enhanced
            filtered_enhanced_batch.append(stat)
    utils.log(len(filtered_enhanced_batch), "Number enhanced batch statuses: ")
    return filtered_enhanced_batch

def control(date_filtered_batch, s):
    batch_enhanced = enhance(date_filtered_batch)
    return batch_enhanced

if __name__ == "__main__":
    sd = utils.getDefaultSettings()
    date_filtered_batch = utils.read_from_s3(utils.file_name(sufix="_batch_enhanced_full"), directory=sd["s3dir"])
    control(date_filtered_batch, sd)
Exemplo n.º 17
0
def control(batch_enhanced,s):    
    filtered_label_dict = utils.read_from_s3(utils.file_name( prefix = "_batch_filt_label_dict_enhanced_fld"), directory=s["s3dir"])
    filtered_label_dict, processed_list = make_filtered_label_dict(batch_enhanced, filtered_label_dict)
    utils.write_to_s3(filtered_label_dict, utils.file_name( prefix = "_batch_filt_label_dict_enhanced_fld"), directory=s["s3dir"])
    utils.write_to_s3(processed_list, utils.file_name( sufix = "_processed_list_fld"), directory=s["s3dir"])
    return filtered_label_dict