def estimate_probability(seqs, n_states, n_items, epsilon=0.5, normalized=True): """ Estimate the transition probabilities from a list of item sequences (<-> bigram model on the states = past histories). Args: * ``seqs`` (*dict: user -> sequence*): state sequences. * ``n_states`` (*int*): number of states in the model. * ``n_items`` (*int*): number of items/actions in the model. * ``epsilon`` (*float, optional*): smoothing parameter. Defaults to 0.5. Returns: * ``js_count`` (*n_states x n_items ndarray*): estimated probability transitions. """ ### Count co-occurrences js_count = np.zeros((n_states, n_items), dtype=float) # js[s1, a] = P(s1.a | s1; cluster) for _, session in iteritems(seqs): s1 = 0 for item in session[args.history:]: s2 = get_next_state_id(s1, item) js_count[s1, item - 1] += 1 s1 = s2 ### Normalize if normalized: for s1, s1_counts in enumerate(js_count[:, :]): print(" state: %d / %d \r" % (s1 + 1, n_states), file=sys.stderr, end=' ') nrm = np.sum(s1_counts) + n_items * epsilon js_count[s1, :] = (s1_counts + epsilon) / nrm ### Return return js_count
def add_lookaheads(cls, lookbacks, followset): for trans, lb in iteritems(lookbacks): for state, p in lb: f = followset.get(trans, []) laheads = p.lookaheads.setdefault(state, []) for a in f: if a not in laheads: laheads.append(a)
def from_cache(cls, grammar, data): lr_action = [ dict([(str(k), v) for k, v in iteritems(action)]) for action in data["lr_action"] ] lr_goto = [ dict([(str(k), v) for k, v in iteritems(goto)]) for goto in data["lr_goto"] ] return LRTable( grammar, lr_action, lr_goto, data["default_reductions"], data["sr_conflicts"], data["rr_conflicts"] )
def download_gist(uri, dest): gid = GitURL(uri).repo clip.echo('Downloading Gist "{}" to "{}"...'.format(gid, dest)) ret = [] req = requests.get('https://api.github.com/gists/{}'.format(gid)) res = req.json() for k, v in utils.iteritems(res['files']): ret.append(k) with open(os.path.join(dest, k), 'w') as f: f.write(v['content']) return ret
def list_tasks(): okapi.notify_on_close = False from inspect import getmembers, isfunction d = { e[0]: e[1] for e in getmembers(utils.load_config(), isfunction) if not e[0].startswith('_') } col_width = max(len(k) for k, v in utils.iteritems(d)) + 2 for k in sorted(d): clip.echo('{}{}'.format(k.ljust(col_width), d[k].__doc__ or ''))
def compute_grammar_hash(self, g): hasher = hashlib.sha1() hasher.update(g.start.encode()) hasher.update(json.dumps(sorted(g.terminals)).encode()) for term, (assoc, level) in sorted(iteritems(g.precedence)): hasher.update(term.encode()) hasher.update(assoc.encode()) hasher.update(bytes(level)) for p in g.productions: hasher.update(p.name.encode()) hasher.update(json.dumps(p.prec).encode()) hasher.update(json.dumps(p.prod).encode()) return hasher.hexdigest()
def run(): config = LambdaConfig().load_from_cwd() clip.echo(PROMPT) user_input = { 'FunctionName': clip.prompt('Function name: ', skip=True), 'Handler': clip.prompt('Handler: ', skip=True), 'Description': clip.prompt('Description: ', skip=True), 'Runtime': clip.prompt('Runtime', default='nodejs'), 'Timeout': clip.prompt('Timeout: ', type=int, skip=True), 'MemorySize': clip.prompt('Memory size: ', type=int, skip=True) } config.update_config({k: v for k, v in utils.iteritems(user_input) if v}) install_task = clip.prompt('Install task: ', skip=True) if install_task is not None: config.update({'install': install_task}) config.dump_to_cwd()
def run(): config = LambdaConfig().load_from_cwd() clip.echo(PROMPT) user_input = { 'FunctionName': clip.prompt('Function name: ', skip=True), 'Handler': clip.prompt('Handler: ', skip=True), 'Description': clip.prompt('Description: ', skip=True), 'Runtime': clip.prompt('Runtime', default='nodejs'), 'Timeout': clip.prompt('Timeout: ', type=int, skip=True), 'MemorySize': clip.prompt('Memory size: ', type=int, skip=True) } config.update_config({k: v for k, v in utils.iteritems(user_input) if v}) install_task = clip.prompt('Install task: ', skip=True) if install_task is not None: config.update({ 'install': install_task }) config.dump_to_cwd()
def data_is_valid(self, g, data): if g.start != data["start"]: return False if sorted(g.terminals) != data["terminals"]: return False if sorted(g.precedence) != sorted(data["precedence"]): return False for key, (assoc, level) in iteritems(g.precedence): if data["precedence"][key] != [assoc, level]: return False if len(g.productions) != len(data["productions"]): return False for p, (name, prod, (assoc, level)) in zip(g.productions, data["productions"]): if p.name != name: return False if p.prod != prod: return False if p.prec != (assoc, level): return False return True
def load_data(base_name, plevel, ulevel, hlength, sv=False): """ Load and pre-format the Foodmart data (products, customers and user sessions). Args: * ``base_name`` (*str*): path to the main data folder. * ``plevel`` (*int*): level parameter for the product clustering. * ``hlength`` (*int*): history length. * ``sv`` (*bool, optional*): if True, store the computed informations in .items, .profiles, .train and .test Returns: * ``product_to_cluster`` (*ndarray*): maps a productID to a clusterID. Note 0 -> -1 is the empty selection. * ``customer_to_cluster`` (*ndarray*): maps a customerID to a clusterID. """ # Init output folder if sv: output_base = init_output_dir(plevel, ulevel, hlength) ###### Load and Cluster items ######################################################################### print("\n\033[92m-----> Load and Cluster products\033[0m") product_to_cluster = np.zeros(line_count(load_datafile(base_name, "product.csv")) + 1, dtype=int) # Product ID -> Cluster ID tmp_index = {} # Cluster name -> Cluster ID tmp_clusters = defaultdict(lambda: []) # Cluster name -> Product ID list # Load product list if plevel == 0: f = load_datafile(base_name, "product.csv") r = csv.reader(f) next(r) for product in r: tmp_clusters[product[3]].append(int(product[1])) try: product_to_cluster[int(product[1])] = tmp_index[product[3]] except KeyError: tmp_index[product[3]] = len(tmp_index) + 1 product_to_cluster[int(product[1])] = tmp_index[product[3]] f.close() else: # Load product categories product_classes = {} f = load_datafile(base_name, "product_class.csv") r = csv.reader(f) next(r) for categories in r: product_classes[int(categories[0])] = categories[plevel] f.close() # Cluster products f = load_datafile(base_name, "product.csv") r = csv.reader(f) next(r) for product in r: try: product_to_cluster[int(product[1])] = tmp_index[product_classes[int(product[0])]] except KeyError: tmp_index[product_classes[int(product[0])]] = len(tmp_index) + 1 product_to_cluster[int(product[1])] = tmp_index[product_classes[int(product[0])]] tmp_clusters[product_classes[int(product[0])]].append(int(product[1])) f.close() # Print summary print(" %d product profiles (%d products)" % (len(tmp_index), (len(product_to_cluster) - 1))) print('\n'.join(" > %s: %.2f%%" % (k, 100 * float(len(v)) / (len(product_to_cluster) - 1)) for k, v in iteritems(tmp_clusters))) actions = sorted(itervalues(tmp_index)) product_to_cluster[0] = 0 # Empty selection # Init states print("\n\033[92m-----> [Optional] Export states description\033[0m") init_base_writing(len(actions), args.history) if sv: rv_tmp_indx = {v: k for k, v in tmp_index.items()} rv_tmp_indx[0] = str(chr(35)) with open("%s.states" % output_base, 'w') as f: f.write('\n'.join("%f\t%s" % (x, '|'.join(rv_tmp_indx[y] for y in id_to_state(x))) for x in xrange(get_nstates(len(actions), args.history)))) ###### Load and Store user sessions ######################################################################### print("\n\033[92m-----> Load user sessions and shop profits \033[0m") user_sessions = defaultdict(lambda: [0] * hlength) # Load session f = load_datafile(base_name, "sales.csv") r = csv.reader(f) next(r) for sale in r: product_clusterID = product_to_cluster[int(sale[0])] user_sessions[int(sale[2])].append(product_clusterID) f.close() # Save product clusters information if sv: with open("%s.items" % output_base, 'w') as f: f.write('\n'.join("%d\t%s\t%d" %(tmp_index[k], k, len(tmp_clusters[k])) for k in sorted(tmp_index.keys(), key=lambda x: tmp_index[x]))) # Return values return product_to_cluster, user_sessions, actions, output_base
def lfm_deploy(path, **kwargs): deploy.run(path, {k: v for k, v in utils.iteritems(kwargs) if v})
assert(args.D * args.ulevel < len(user_sessions)), "not enough data to fit choice of parameters D and n" ###### 2. Store rewards print("\n\033[91m-----> Reward function\033[0m") print(" %d States in the database" % n_states) print(" %d Actions in the database" % n_items) with open("%s.rewards" % output_base, 'w') as f: for item in actions: f.write("%d\t%s\n" % (item, 1.0)) ###### 3. Cluster sequences with a perplexity criterion print("\n\033[91m-----> Clustering\033[0m") clusters = defaultdict(lambda: {}) f_prop = open("%s.profiles" % output_base, 'w') alternate = 1 seqs = {k:v for k, v in iteritems(user_sessions) if len(v) > (args.history + 20)} # to ensure reliable perplexity #seqs = dict(user_sessions) while len(clusters) < args.ulevel: # estimate probability over all sequences still available js_count = estimate_probability(seqs, n_states, n_items, epsilon) # estimate perplexity and sort sequences in decreasing order aux = [(user, seq, compute_perplexity(seq, js_count)) for user, seq in iteritems(seqs)] aux = sorted(aux, key=lambda x: - x[2]) # form a cluster out of the sequences with the highest perplexity cluster_id = len(clusters) mean_perp = 0 browse = aux[:args.D] if alternate else aux[-args.D:] for user, seq, p in browse: clusters[cluster_id][user] = seq
def unused_productions(self): return [p for p, prods in iteritems(self.nonterminals) if not prods]
def lfm_deploy(uri, **kwargs): if kwargs['profile'] is not None: boto3.setup_default_session(profile_name=kwargs['profile']) del kwargs['profile'] deploy.run(uri, {k: v for k, v in utils.iteritems(kwargs) if v})
if (direction.__eq__('S')): return (maze[i + 1][j] == '1') left = {'N': 'W', 'W': 'S', 'S': 'E', 'E': 'N'} def turnLeft(orient): """ Return the orientation to the left of ``orient``. """ global left return left[orient] right = {v: k for (k, v) in iteritems(left)} def turnRight(orient): """ Returns the orientation to the right of ``orient``. """ global right return right[orient] def mazeBoundaries(maze): """ Returns the reachable boundaries of the maze. """ width = len(maze)
def unused_terminals(self): return [ t for t, prods in iteritems(self.terminals) if not prods and t != "error" ]
if(direction.__eq__('E')): return (maze[i][j+1] == '1') if(direction.__eq__('W')): return (maze[i][j-1] == '1') if(direction.__eq__('S')): return (maze[i+1][j] == '1') left = {'N':'W','W':'S','S':'E','E':'N'} def turnLeft(orient): """ Return the orientation to the left of ``orient``. """ global left return left[orient] right = {v: k for (k, v) in iteritems(left)} def turnRight(orient): """ Returns the orientation to the right of ``orient``. """ global right return right[orient] def mazeBoundaries(maze): """ Returns the reachable boundaries of the maze. """ width, height = len(maze), len(maze[0]) # Find min x for x in xrange(width): if not all(z == '1' for z in maze[x]):