예제 #1
0
def estimate_probability(seqs, n_states, n_items, epsilon=0.5, normalized=True):
    """
    Estimate the transition probabilities from a list of item sequences (<-> bigram model on the states = past histories).

    Args:
     * ``seqs`` (*dict: user -> sequence*): state sequences.
     * ``n_states`` (*int*): number of states in the model.
     * ``n_items`` (*int*): number of items/actions in the model.
     * ``epsilon`` (*float, optional*): smoothing parameter. Defaults to 0.5.

    Returns:
     * ``js_count`` (*n_states x n_items ndarray*): estimated probability transitions.

    """
    ### Count co-occurrences
    js_count = np.zeros((n_states, n_items), dtype=float) # js[s1, a] = P(s1.a | s1; cluster)
    for _, session in iteritems(seqs):
        s1 = 0
        for item in session[args.history:]:
            s2 = get_next_state_id(s1, item)
            js_count[s1, item - 1] += 1
            s1 = s2

    ### Normalize
    if normalized:
        for s1, s1_counts in enumerate(js_count[:, :]):
            print("      state: %d / %d   \r" % (s1 + 1, n_states), file=sys.stderr, end=' ')
            nrm = np.sum(s1_counts) + n_items * epsilon
            js_count[s1, :] = (s1_counts + epsilon) / nrm

    ### Return
    return js_count
예제 #2
0
 def add_lookaheads(cls, lookbacks, followset):
     for trans, lb in iteritems(lookbacks):
         for state, p in lb:
             f = followset.get(trans, [])
             laheads = p.lookaheads.setdefault(state, [])
             for a in f:
                 if a not in laheads:
                     laheads.append(a)
예제 #3
0
 def from_cache(cls, grammar, data):
     lr_action = [
         dict([(str(k), v) for k, v in iteritems(action)])
         for action in data["lr_action"]
     ]
     lr_goto = [
         dict([(str(k), v) for k, v in iteritems(goto)])
         for goto in data["lr_goto"]
     ]
     return LRTable(
         grammar,
         lr_action,
         lr_goto,
         data["default_reductions"],
         data["sr_conflicts"],
         data["rr_conflicts"]
     )
예제 #4
0
파일: download.py 프로젝트: T2BE/lfm
def download_gist(uri, dest):
	gid = GitURL(uri).repo
	clip.echo('Downloading Gist "{}" to "{}"...'.format(gid, dest))
	ret = []
	req = requests.get('https://api.github.com/gists/{}'.format(gid))
	res = req.json()
	for k, v in utils.iteritems(res['files']):
		ret.append(k)
		with open(os.path.join(dest, k), 'w') as f:
			f.write(v['content'])
	return ret
예제 #5
0
def download_gist(uri, dest):
    gid = GitURL(uri).repo
    clip.echo('Downloading Gist "{}" to "{}"...'.format(gid, dest))
    ret = []
    req = requests.get('https://api.github.com/gists/{}'.format(gid))
    res = req.json()
    for k, v in utils.iteritems(res['files']):
        ret.append(k)
        with open(os.path.join(dest, k), 'w') as f:
            f.write(v['content'])
    return ret
예제 #6
0
파일: ok.py 프로젝트: pombredanne/ok
def list_tasks():
    okapi.notify_on_close = False
    from inspect import getmembers, isfunction
    d = {
        e[0]: e[1]
        for e in getmembers(utils.load_config(), isfunction)
        if not e[0].startswith('_')
    }
    col_width = max(len(k) for k, v in utils.iteritems(d)) + 2
    for k in sorted(d):
        clip.echo('{}{}'.format(k.ljust(col_width), d[k].__doc__ or ''))
예제 #7
0
 def compute_grammar_hash(self, g):
     hasher = hashlib.sha1()
     hasher.update(g.start.encode())
     hasher.update(json.dumps(sorted(g.terminals)).encode())
     for term, (assoc, level) in sorted(iteritems(g.precedence)):
         hasher.update(term.encode())
         hasher.update(assoc.encode())
         hasher.update(bytes(level))
     for p in g.productions:
         hasher.update(p.name.encode())
         hasher.update(json.dumps(p.prec).encode())
         hasher.update(json.dumps(p.prod).encode())
     return hasher.hexdigest()
예제 #8
0
def run():
    config = LambdaConfig().load_from_cwd()
    clip.echo(PROMPT)
    user_input = {
        'FunctionName': clip.prompt('Function name: ', skip=True),
        'Handler': clip.prompt('Handler: ', skip=True),
        'Description': clip.prompt('Description: ', skip=True),
        'Runtime': clip.prompt('Runtime', default='nodejs'),
        'Timeout': clip.prompt('Timeout: ', type=int, skip=True),
        'MemorySize': clip.prompt('Memory size: ', type=int, skip=True)
    }
    config.update_config({k: v for k, v in utils.iteritems(user_input) if v})
    install_task = clip.prompt('Install task: ', skip=True)
    if install_task is not None:
        config.update({'install': install_task})
    config.dump_to_cwd()
예제 #9
0
파일: init.py 프로젝트: T2BE/lfm
def run():
	config = LambdaConfig().load_from_cwd()
	clip.echo(PROMPT)
	user_input = {
		'FunctionName': clip.prompt('Function name: ', skip=True),
		'Handler': clip.prompt('Handler: ', skip=True),
		'Description': clip.prompt('Description: ', skip=True),
		'Runtime': clip.prompt('Runtime', default='nodejs'),
		'Timeout': clip.prompt('Timeout: ', type=int, skip=True),
		'MemorySize': clip.prompt('Memory size: ', type=int, skip=True)
	}
	config.update_config({k: v for k, v in utils.iteritems(user_input) if v})
	install_task = clip.prompt('Install task: ', skip=True)
	if install_task is not None:
		config.update({
			'install': install_task
		})
	config.dump_to_cwd()
예제 #10
0
 def data_is_valid(self, g, data):
     if g.start != data["start"]:
         return False
     if sorted(g.terminals) != data["terminals"]:
         return False
     if sorted(g.precedence) != sorted(data["precedence"]):
         return False
     for key, (assoc, level) in iteritems(g.precedence):
         if data["precedence"][key] != [assoc, level]:
             return False
     if len(g.productions) != len(data["productions"]):
         return False
     for p, (name, prod, (assoc, level)) in zip(g.productions, data["productions"]):
         if p.name != name:
             return False
         if p.prod != prod:
             return False
         if p.prec != (assoc, level):
             return False
     return True
예제 #11
0
def load_data(base_name, plevel, ulevel, hlength, sv=False):
    """
    Load and pre-format the Foodmart data (products, customers and user sessions).

    Args:
     * ``base_name`` (*str*): path to the main data folder.
     * ``plevel`` (*int*): level parameter for the product clustering.
     * ``hlength`` (*int*): history length.
     * ``sv`` (*bool, optional*): if True, store the computed informations in .items, .profiles, .train and .test

    Returns:
     * ``product_to_cluster`` (*ndarray*): maps a productID to a clusterID. Note 0 -> -1 is the empty selection.
     * ``customer_to_cluster`` (*ndarray*): maps a customerID to a clusterID.
    """

    # Init output folder
    if sv:
        output_base = init_output_dir(plevel, ulevel, hlength)


    ###### Load and Cluster items
    #########################################################################

    print("\n\033[92m-----> Load and Cluster products\033[0m")
    product_to_cluster = np.zeros(line_count(load_datafile(base_name, "product.csv")) + 1, dtype=int)      # Product ID -> Cluster ID
    tmp_index = {}                          # Cluster name -> Cluster ID
    tmp_clusters = defaultdict(lambda: [])  # Cluster name -> Product ID list

    # Load product list
    if plevel == 0:
        f = load_datafile(base_name, "product.csv")
        r = csv.reader(f)
        next(r)
        for product in r:
            tmp_clusters[product[3]].append(int(product[1]))
            try:
                product_to_cluster[int(product[1])] = tmp_index[product[3]]
            except KeyError:
                tmp_index[product[3]] = len(tmp_index) + 1
                product_to_cluster[int(product[1])] = tmp_index[product[3]]
        f.close()

    else:
        # Load product categories
        product_classes = {}
        f = load_datafile(base_name, "product_class.csv")
        r = csv.reader(f)
        next(r)
        for categories in r:
            product_classes[int(categories[0])] = categories[plevel]
        f.close()

        # Cluster products
        f = load_datafile(base_name, "product.csv")
        r = csv.reader(f)
        next(r)
        for product in r:
            try:
                product_to_cluster[int(product[1])] = tmp_index[product_classes[int(product[0])]]
            except KeyError:
                tmp_index[product_classes[int(product[0])]] = len(tmp_index) + 1
                product_to_cluster[int(product[1])] = tmp_index[product_classes[int(product[0])]]
            tmp_clusters[product_classes[int(product[0])]].append(int(product[1]))
        f.close()

    # Print summary
    print("   %d product profiles (%d products)" % (len(tmp_index), (len(product_to_cluster) - 1)))
    print('\n'.join("     > %s: %.2f%%" % (k, 100 * float(len(v)) / (len(product_to_cluster) - 1)) for k, v in iteritems(tmp_clusters)))
    actions = sorted(itervalues(tmp_index))
    product_to_cluster[0] = 0 # Empty selection

    # Init states
    print("\n\033[92m-----> [Optional] Export states description\033[0m")
    init_base_writing(len(actions), args.history)
    if sv:
        rv_tmp_indx = {v: k for k, v in tmp_index.items()}
        rv_tmp_indx[0] = str(chr(35))
        with open("%s.states" % output_base, 'w') as f:
            f.write('\n'.join("%f\t%s" % (x, '|'.join(rv_tmp_indx[y] for y in id_to_state(x))) for x in xrange(get_nstates(len(actions), args.history))))

    ###### Load and Store user sessions
    #########################################################################

    print("\n\033[92m-----> Load user sessions and shop profits \033[0m")
    user_sessions = defaultdict(lambda: [0] * hlength)

    # Load session
    f = load_datafile(base_name, "sales.csv")
    r = csv.reader(f)
    next(r)
    for sale in r:
        product_clusterID = product_to_cluster[int(sale[0])]
        user_sessions[int(sale[2])].append(product_clusterID)
    f.close()

    # Save product clusters information
    if sv:
        with open("%s.items" % output_base, 'w') as f:
            f.write('\n'.join("%d\t%s\t%d" %(tmp_index[k], k, len(tmp_clusters[k])) for k in sorted(tmp_index.keys(), key=lambda x: tmp_index[x])))

    # Return values
    return product_to_cluster, user_sessions, actions, output_base
예제 #12
0
파일: cli.py 프로젝트: kereyroper/lfm
def lfm_deploy(path, **kwargs):
	deploy.run(path, {k: v for k, v in utils.iteritems(kwargs) if v})
예제 #13
0
    assert(args.D * args.ulevel < len(user_sessions)), "not enough data to fit choice of parameters D and n"

    ###### 2. Store rewards
    print("\n\033[91m-----> Reward function\033[0m")
    print("   %d States in the database" % n_states)
    print("   %d Actions in the database" % n_items)
    with open("%s.rewards" % output_base, 'w') as f:
        for item in actions:
            f.write("%d\t%s\n" % (item, 1.0))

    ###### 3. Cluster sequences with a perplexity criterion
    print("\n\033[91m-----> Clustering\033[0m")
    clusters = defaultdict(lambda: {})
    f_prop = open("%s.profiles" % output_base, 'w')
    alternate = 1
    seqs = {k:v for k, v in iteritems(user_sessions) if len(v) > (args.history + 20)} # to ensure reliable perplexity
    #seqs = dict(user_sessions)
    while len(clusters) < args.ulevel:
        # estimate probability over all sequences still available
        js_count = estimate_probability(seqs, n_states, n_items, epsilon)

        # estimate perplexity and sort sequences in decreasing order
        aux = [(user, seq, compute_perplexity(seq, js_count)) for user, seq in iteritems(seqs)]
        aux = sorted(aux, key=lambda x: - x[2])

        # form a cluster out of the sequences with the highest perplexity
        cluster_id = len(clusters)
        mean_perp = 0
        browse = aux[:args.D] if alternate else aux[-args.D:]
        for user, seq, p in browse:
            clusters[cluster_id][user] = seq
예제 #14
0
파일: cli.py 프로젝트: sysalexis/lfm
def lfm_deploy(path, **kwargs):
    deploy.run(path, {k: v for k, v in utils.iteritems(kwargs) if v})
예제 #15
0
 def unused_productions(self):
     return [p for p, prods in iteritems(self.nonterminals) if not prods]
예제 #16
0
파일: cli.py 프로젝트: leonid-s-usov/lfm
def lfm_deploy(uri, **kwargs):
	if kwargs['profile'] is not None:
		boto3.setup_default_session(profile_name=kwargs['profile'])
		del kwargs['profile']
	deploy.run(uri, {k: v for k, v in utils.iteritems(kwargs) if v})
    if (direction.__eq__('S')):
        return (maze[i + 1][j] == '1')


left = {'N': 'W', 'W': 'S', 'S': 'E', 'E': 'N'}


def turnLeft(orient):
    """
    Return the orientation to the left of ``orient``.
    """
    global left
    return left[orient]


right = {v: k for (k, v) in iteritems(left)}


def turnRight(orient):
    """
    Returns the orientation to the right of ``orient``.
    """
    global right
    return right[orient]


def mazeBoundaries(maze):
    """
    Returns the reachable boundaries of the maze.
    """
    width = len(maze)
예제 #18
0
 def unused_terminals(self):
     return [
         t for t, prods in iteritems(self.terminals)
         if not prods and t != "error"
     ]
예제 #19
0
    if(direction.__eq__('E')):
        return (maze[i][j+1] == '1')
    if(direction.__eq__('W')):
        return (maze[i][j-1] == '1')
    if(direction.__eq__('S')):
        return (maze[i+1][j] == '1')

left = {'N':'W','W':'S','S':'E','E':'N'}
def turnLeft(orient):
    """
    Return the orientation to the left of ``orient``.
    """
    global left
    return left[orient]

right = {v: k for (k, v) in iteritems(left)}
def turnRight(orient):
    """
    Returns the orientation to the right of ``orient``.
    """
    global right
    return right[orient]

def mazeBoundaries(maze):
    """
    Returns the reachable boundaries of the maze.
    """
    width, height = len(maze), len(maze[0])
    # Find min x
    for x in xrange(width):
      if not all(z == '1' for z in maze[x]):