Пример #1
0
def enter():
    global forest, retsim, ui
    retsim = Retsim()
    forest = Forest()
    ui = Ui()

    forest.set_center_object(retsim)
Пример #2
0
    def _print_schedule_default(self):
        """
        Print the scheduling table in normal or detailed mode.
        """
        from forest import Forest
        from rcColor import color
        tree = Forest()
        head_node = tree.add_node()
        head_node.add_column("Action", color.BOLD)
        head_node.add_column("Last Run", color.BOLD)
        if self.options.verbose:
            head_node.add_column("Next Run", color.BOLD)
        head_node.add_column("Config Parameter", color.BOLD)
        head_node.add_column("Schedule Definition", color.BOLD)

        for data in self._print_schedule_data():
            node = head_node.add_node()
            node.add_column(data["action"], color.LIGHTBLUE)
            node.add_column(data["last_run"])
            if self.options.verbose:
                node.add_column(data["next_run"])
            node.add_column(data["config_parameter"])
            node.add_column(data["schedule_definition"])

        tree.out()
Пример #3
0
def main():
    debug = False
    threshold = 1
    for i in range(0, len(sys.argv)):
        if sys.argv[i] == '-d':
            debug = True
        elif sys.argv[i] == '-t':
            threshold = sys.argv[i+1]
        elif sys.argv[i] == '-h':
            print "Usage: python main.py [OPTIONS]\n"
            print "--------- Options -------------"
            print "'-d' : debug mode"
            print "'-t [VALUE]' : set mean square error threshold to value"
            return

    (data, training, test) = readData(
                filename = 'whitewine.csv',
                debug = False,
                label_index = 11,
                variable_index = (0,10),
                separator= ';')

    forest = Forest(filename = 'whitewine.csv',
                    label_index = 11,
                    variable_index = (0,10),
                    separator=';',
                    mse_threshold= 0.02,
                    debug = debug,
                    num_trees=5)
    forest.build()

    best = forest.predict(test)
    print best
Пример #4
0
def format_service(path, idata, mon_data=None, discard_disabled=False, nodename=None):
    name, namespace, kind = split_path(path)
    svc_notice = get_svc_notice(idata)

    tree = Forest(
        separator=" ",
        widths=(
            (14, None),
            None,
            10,
            None,
        ),
    )
    node_name = tree.add_node()
    node_name.add_column(strip_path(path, os.environ.get("OSVC_NAMESPACE")), color.BOLD)
    node_name.add_column()
    if "cluster" in idata:
        node_name.add_column(idata["cluster"].get("avail", "n/a"), STATUS_COLOR[idata["cluster"].get("avail", "n/a")])
    else:
        node_name.add_column()
    node_name.add_column(svc_notice)
    node_instances = node_name.add_node()
    node_instances.add_column("instances")
    add_instances(node_instances, path, nodename, mon_data)
    if nodename in service_nodes(path, mon_data):
        add_node_node(node_instances, nodename, idata, mon_data, discard_disabled=discard_disabled)
    add_parents(node_name, idata, mon_data, namespace)
    add_children(node_name, idata, mon_data, namespace)
    add_scaler_slaves(node_name, idata, mon_data, namespace)
    add_slaves(node_name, idata, mon_data, namespace)

    tree.out()
def predict():
    trainingRowIds = random.sample(range(1, len(data)), int(.8 * len(data)))
    forest = Forest(data, outcomeLabel, continuousColumns, trainingRowIds,
                    columnsToIgnore)
    correct = sum(1 for rowId, row in enumerate(data)
                  if rowId > 0 and rowId not in trainingRowIds
                  and forest.get_prediction(row) == row[1])
    return 100 * correct / (len(data) - 1 - len(trainingRowIds))
Пример #6
0
def main():
    (X_train, y_train), (X_test, y_test) = tf.contrib.keras.datasets.mnist.load_data()
    X_train = (X_train / 255.).reshape(-1, 28*28)
    X_test = (X_test / 255.).reshape(-1, 28*28)

    forest = Forest(28*28, 10)
    forest.fit(X_train, y_train)
    print("final testing accuracy: %.4f" % (forest.predict(X_test) == y_test).mean())
Пример #7
0
 def train_forest(self, sample_indices, training_context, training_parameters):
     forest = Forest()
     for i in xrange(training_parameters.numOfTrees):
         # TODO: perform bagging on the samples
         tree = ArrayTree(training_parameters.maximumDepth)
         self.train_tree(tree, sample_indices, training_context, training_parameters)
         forest.append(tree)
     return forest
Пример #8
0
def main(args):
    parser = argparse.ArgumentParser(description='Random forest classifier')
    parser.add_argument(
        'training_dataset',
        help='CSV file containing the training dataset file',
        metavar='training_dataset',
        type=argparse.FileType('r'),
    )
    parser.add_argument(
        'test_dataset',
        help='CSV file containing the test dataset file',
        metavar='test_dataset',
        type=argparse.FileType('r'),
    )
    parser.add_argument(
        '-t',
        '--target_column',
        help='Name of the target dataset column (default: last column)',
        metavar='target_column',
        type=str,
    )
    args = parser.parse_args(args)

    training_rows = [
        row for row in csv.DictReader(
            args.training_dataset, delimiter=',', quotechar='"')
    ]
    assert len(training_rows) > 0

    training_columns = list(training_rows[0].keys())

    target_column = args.target_column or training_columns[-1]
    assert target_column in training_columns

    type_cast(training_rows, training_columns)

    test_rows = [
        row for row in csv.DictReader(
            args.test_dataset, delimiter=',', quotechar='"')
    ]
    test_columns = list(test_rows[0].keys())
    type_cast(test_rows, test_columns)

    forest = Forest(
        columns=training_columns,
        target_column=target_column,
        rows=training_rows,
    )

    # TODO CSV output
    for row in test_rows:
        print('{} -> {}'.format(
            ', '.join([
                ':'.join((key, str(value)))
                for (key, value) in list(row.items())
            ]), forest.classify(row)))
def main():
    args = parameter_check()
    forest = Forest(args.forest_size, args.tree_init_percentage, args.lumberjack_init_percentage,
                    args.bear_init_percentage, args.base_percentage, args.tree_init_age_min, args.tree_init_age_max,
                    args.tree_age_limit, args.sapling_age_limit, args.tree_spawn_rate, args.elder_tree_spawn_rate)
    for x in range(0, 4800):
        forest.action()
        if len(forest.trees) == 0:
            print("Simulation Halt: no more trees exist")
            break
Пример #10
0
 def train_forest(self, sample_indices, training_context,
                  training_parameters):
     forest = Forest()
     for i in xrange(training_parameters.numOfTrees):
         # TODO: perform bagging on the samples
         tree = ArrayTree(training_parameters.maximumDepth)
         self.train_tree(tree, sample_indices, training_context,
                         training_parameters)
         forest.append(tree)
     return forest
Пример #11
0
    def toforest(self, forest):
        '''generate a forest object (for kbest)'''

        cache = {}
        lmforest = Forest(forest.sent, forest.cased_sent, is_tforest=True, tag=forest.tag)
        lmforest.refs = forest.refs

        self._toforest(lmforest, forest.sent, cache)

        return lmforest
Пример #12
0
 def test_eq(self):
     self.forest.add_children(3, [4, 5])
     self.forest.add_children(4, [6, 7])
     self.forest.add_children(5, [8 ,9])
     self.assert_size(7)
     other_forest = Forest()
     other_forest.add_children(3, [4, 5])
     other_forest.add_children(4, [6, 7])
     other_forest.add_children(5, [8 ,9])
     self.assertEqual(other_forest.size, self.forest.size)
     self.assertEqual(other_forest, self.forest)
Пример #13
0
def runForest(datasetName, **kwargs):
    '''Execute the forest: data sets \in {iris, mnist}'''
    # Get data
    dataTrain, dataTest = dl.get_data(datasetName)
    Xtrain, ytrain = dataTrain
    Xtest, ytest = dataTest
    # Build forest
    forest = Forest(**kwargs)
    N = forest.train(Xtrain, ytrain)
    # Test forest
    correct = numpy.equal(forest.apply(Xtest), ytest).mean()
    return (correct, N)
class ForestFactory(MapFactory):
    def __init__(self):
        self.forest = Forest()

    def get_wall_tile_data(self):
        return self.forest.get_wall_tile_data()

    def get_floor_tile_data(self):
        return self.forest.get_floor_tile_data()

    def get_inaccessible_tile_data(self):
        return self.forest.get_inaccessible_tile_data()
Пример #15
0
def test5():
    f = Forest([11,5, 9,7]) 
    f.rotate(1 ) #
    for i in range (3):
        f.rotate( 2 ) #
    f.draw(bg=" ")
    pass
Пример #16
0
 def network_show(self):
     data = {}
     for name, netdata in self.networks_data().items():
         if self.options.name and name != self.options.name:
             continue
         data[name] = netdata
     if self.options.format in ("json", "flat_json"):
         return data
     if not data:
         return
     from forest import Forest
     from rcColor import color
     tree = Forest()
     tree.load(data, title="networks")
     print(tree)
Пример #17
0
def main():
  from ngram import Ngram
  from model import Model
  from forest import Forest
  
  flags.DEFINE_integer("beam", 100, "beam size", short_name="b")
  flags.DEFINE_integer("debuglevel", 0, "debug level")
  flags.DEFINE_boolean("mert", True, "output mert-friendly info (<hyp><cost>)")
  flags.DEFINE_boolean("cube", True, "using cube pruning to speedup")
  flags.DEFINE_integer("kbest", 1, "kbest output", short_name="k")
  flags.DEFINE_integer("ratio", 3, "the maximum items (pop from PQ): ratio*b", short_name="r")
  

  argv = FLAGS(sys.argv)

  weights = Model.cmdline_model()
  lm = Ngram.cmdline_ngram()
  
  false_decoder = CYKDecoder(weights, lm)
  
  def non_local_scorer(cedge, ders):
    (lmsc, alltrans, sig) = false_decoder.deltLMScore(cedge.lhsstr, ders)
    fv = Vector()
    fv["lm"] = lmsc
    return ((weights.dot(fv), fv), alltrans, sig)
  cube_prune = CubePruning(FeatureScorer(weights), non_local_scorer, FLAGS.k, FLAGS.ratio)

  for i, forest in enumerate(Forest.load("-", is_tforest=True, lm=lm), 1):
    a = false_decoder.beam_search(forest, b = FLAGS.beam)
    b = cube_prune.run(forest.root)

    assert a[0], b[0].score[0]
    assert a[1], b[0].score[1]
    print a
    print b[0]
Пример #18
0
class Map():

    #translates the strings we receive as returns to which function to call next
    scenes = {
        'birth': Birth(),
        'grove': Grove(),
        'metropolis': Metropolis(),
        'death': Death(),
        'doorway': Doorway(),
        'forest': Forest(),
        'plains': Plains(),
        'elfkingdom': ElfKingdom()
    }

    #we provide a place to start
    def __init__(self, start_scene):
        self.start_scene = start_scene

    def next_scene(self, scene_name):
        val = Map.scenes.get(scene_name)
        return val

    #plays the first scene in the game
    def opening_scene(self):
        return self.next_scene(self.start_scene)
Пример #19
0
 def load(self, filenames):
     # new: multiple files
     for filename in filenames.split():
         for forest in Forest.load(filename):
             if forest is not None:
                 yield forest
             else:
                 yield None ## special treatment above
Пример #20
0
 def test_forest_class():
     """
     Forest class
     """
     tree = Forest()
     tree.load({})
     tree.out()
     overall_node = tree.add_node()
     overall_node.add_column("overall")
     node = overall_node.add_node()
     node.add_column("avail")
     node.add_column()
     node.add_column("up", color.GREEN)
     node = node.add_node()
     node.add_column("res#id")
     node.add_column("....")
     node.add_column("up", color.GREEN)
     col = node.add_column(
         "docker container [email protected]"
         "nsvc.com/busybox:latest")
     col.add_text("warn", color.BROWN)
     col.add_text("err", color.RED)
     node = overall_node.add_node()
     node.add_column("accessory")
     node = overall_node.add_node()
     node.load("loaded text", title="loaded title")
     node = overall_node.add_node()
     node.load({"text": "loaded dict"})
     node = overall_node.add_node()
     node.load([{"text": "loaded list"}])
     buff = str(tree)
     assert "loaded" in buff
Пример #21
0
    def fit(self,
            trajectories,
            targets,
            n_estimators,
            max_radius,
            min_trajectories,
            sample_share=0.66,
            processes=1):
        """
        Fit random forest
        :param trajectories: list of trajectories
        :param targets: list of targets
        :param n_estimators: the size of the forest
        :param max_radius: maximum radius in searching for decision point in trees
        :param min_trajectories: minimum number of trajectories to split further in trees
        :param sample_share: share of sample size
        :return: the builded forest
        """
        forest = Forest()

        if processes == 1:
            #serial option
            trees = [
                self.tree_fit(trajectories, targets, max_radius,
                              min_trajectories, sample_share)
                for i in range(n_estimators)
            ]

        else:
            #several processess option
            pool = mp.Pool(processes=processes)
            results = [
                pool.apply_async(self.tree_fit,
                                 args=(trajectories, targets, max_radius,
                                       min_trajectories, sample_share))
                for i in range(n_estimators)
            ]
            trees = [p.get() for p in results]

        #add trees to the forest
        for tree in trees:
            forest.add(tree)

        return forest
Пример #22
0
def cross_valid_values(list_of_specimen, num_chunks, n_estimators,
                       max_features_select):
    """
        A function to perform cross validation of the model

        :param list_of_specimen:        a list of training examples
        :param num_chunks:              a K number in K-fold cross validation
        :param n_estimators:            a number of trees in RF
        :param max_features_select:     a number of features per tree

        :return:                    an mean accuracy of the model
        :rtype:                     int
    """

    if (num_chunks <= 1):
        print("number of chunks has to be greater than 1")
        return -1
    scores = np.array([])
    loss = [0 for i in range(num_chunks)]
    list_of_specimen = list(np.random.permutation(list_of_specimen))

    for i in range(num_chunks):

        begin, end = int(i * len(list_of_specimen) / num_chunks), int(
            (i + 1) * len(list_of_specimen) / num_chunks)
        list_of_testing_specimen = list_of_specimen[begin:end].copy()

        testing_predictions = list_of_testing_specimen

        list_of_training_specimen = list_of_specimen.copy(
        )[:begin] + list_of_specimen.copy()[end:]

        classifier = Forest(n_estimators,
                            list_of_training_specimen,
                            max_feat_select=max_features_select)
        testing_predictions = classifier.predict(testing_predictions)

        acc = accuracy(list_of_testing_specimen, testing_predictions)
        loss[i] = 1 - acc

        scores = np.append(scores, acc)
    avg_acc = sum(scores) / len(scores)
    return avg_acc
Пример #23
0
 def print_checks(self, data):
     from forest import Forest
     from rcColor import color
     tree = Forest()
     head_node = tree.add_node()
     head_node.add_column(rcEnv.nodename, color.BOLD)
     for chk_type, instances in data.items():
         node = head_node.add_node()
         node.add_column(chk_type, color.BROWN)
         for instance in instances:
             _node = node.add_node()
             _node.add_column(str(instance["instance"]), color.LIGHTBLUE)
             _node.add_column(instance["path"])
             _node.add_column(str(instance["value"]))
             if instance["driver"] == "generic":
                 _node.add_column()
             else:
                 _node.add_column(instance["driver"])
     tree.out()
Пример #24
0
    def to_forest(self):
        """
        Returns a L{Forest} of graphs.

        @rtype:  L{Forest}
        @return: a forest containing (weakly) disconnected graph components
        """
        from forest import Forest
        graphs = [self.induced_graph(component) for component in self.weak_components()]
        return Forest(graphs, 'iter_nodes')
Пример #25
0
    def print_tree(self, devices=None, verbose=False):
        ftree = Forest()
        node = ftree.add_node()
        node.add_column(rcEnv.nodename, color.BOLD)
        node.add_column("Type", color.BOLD)
        node.add_column("Size", color.BOLD, align="right")
        node.add_column("Pct of Parent", color.BOLD, align="right")

        filtered = devices is not None and len(devices) > 0
        if filtered:
            devs = [self.get_dev_by_devpath(devpath) for devpath in devices]
        else:
            devs = [self.dev[r.child] for r in self.root]
        for dev in devs:
            if dev is None or (not filtered and dev.parents != []):
                continue
            dev.print_dev(node=node, highlight=devices, verbose=verbose)

        ftree.out()
Пример #26
0
 def network_status(self):
     data = self.network_status_data(self.options.name)
     if self.options.format in ("json", "flat_json"):
         return data
     from forest import Forest
     from rcColor import color
     tree = Forest()
     head = tree.add_node()
     head.add_column("name", color.BOLD)
     head.add_column("type", color.BOLD)
     head.add_column("network", color.BOLD)
     head.add_column("size", color.BOLD)
     head.add_column("used", color.BOLD)
     head.add_column("free", color.BOLD)
     head.add_column("pct", color.BOLD)
     for name in sorted(data):
         ndata = data[name]
         net_node = head.add_node()
         net_node.add_column(name, color.BROWN)
         net_node.add_column(data[name]["type"])
         net_node.add_column(data[name]["network"])
         net_node.add_column("%d" % data[name]["size"])
         net_node.add_column("%d" % data[name]["used"])
         net_node.add_column("%d" % data[name]["free"])
         net_node.add_column("%.2f%%" % data[name]["pct"])
         if not self.options.verbose:
             continue
         ips_node = net_node.add_node()
         ips_node.add_column("ip", color.BOLD)
         ips_node.add_column("node", color.BOLD)
         ips_node.add_column("service", color.BOLD)
         ips_node.add_column("resource", color.BOLD)
         for ip in sorted(ndata.get("ips", []),
                          key=lambda x:
                          (x["ip"], x["node"], x["path"], x["rid"])):
             ip_node = ips_node.add_node()
             ip_node.add_column(ip["ip"])
             ip_node.add_column(ip["node"])
             ip_node.add_column(ip["path"])
             ip_node.add_column(ip["rid"])
     print(tree)
Пример #27
0
    def print_tree_bottom_up(self, devices=None, verbose=False):
        ftree = Forest()
        node = ftree.add_node()
        node.add_column(rcEnv.nodename, color.BOLD)
        node.add_column("Type", color.BOLD)
        node.add_column("Parent Use", color.BOLD, align="right")
        node.add_column("Size", color.BOLD, align="right")
        node.add_column("Ratio", color.BOLD, align="right")

        if devices is None:
            devices = set()
        else:
            devices = set(devices)
        for dev in self.get_bottom_devs():
            if len(devices) > 0 and len(set(dev.devpath) & devices) == 0:
                continue
            dev.print_dev_bottom_up(node=node,
                                    highlight=devices,
                                    verbose=verbose)

        ftree.out()
Пример #28
0
    def __init__(self, graph):
        self._graph = graph
        self._forest = Forest()
        self._subgraph = {} # { category id: { category name: [ node id ]}}
        self._version = 0
        self._deleted_nodes = set()
        self._current_level = dict([(category, "root") for category in self._forest.get_categories()])
        self._constraints = {} # {include, not_include}

        self.setup_constraints()
        self.set_logging()
        self.build_subgraphs()
Пример #29
0
 def __call__(self, data):
   vec = Vector()
   for i, (key, val) in enumerate(data):
     splits = val.split("****")
     if len(splits) <> 2:
       print >>sys.stderr,"skipping sent"
       continue
     sent, oracle = splits 
     s2 = sent.replace("\t\t\t", "\n")
     o2 = oracle.replace("\t\t\t", "\n")
     sent_forest = Forest.load(StringIO(s2), True, lm=None).next()
     oracle_forest = Forest.load(StringIO(o2), True, lm=None).next()
     assert sent_forest, oracle_forest
     #print >>sys.stderr, len(sent_forest)
     #print >>sys.stderr, len(oracle_forest)
     example_marg, example_partition  = fast_inside_outside.collect_marginals(sent_forest, self.weights)
     oracle_marg, oracle_partition  = fast_inside_outside.collect_marginals(oracle_forest, self.weights)
     vec += example_marg - oracle_marg
     vec["log_likelihood"] += example_partition-oracle_partition
     #vec["log_likelihood"] += example_partition-oracle_partition 
     self.processed += 1
   for feat in vec:
     yield feat, vec[feat]
Пример #30
0
def main():
  from ngram import Ngram
  from model import Model
  from forest import Forest
  
  flags.DEFINE_integer("beam", 100, "beam size", short_name="b")
  flags.DEFINE_integer("debuglevel", 0, "debug level")
  flags.DEFINE_boolean("mert", True, "output mert-friendly info (<hyp><cost>)")
  flags.DEFINE_boolean("cube", True, "using cube pruning to speedup")
  flags.DEFINE_integer("kbest", 1, "kbest output", short_name="k")
  flags.DEFINE_integer("ratio", 3, "the maximum items (pop from PQ): ratio*b", short_name="r")
  

  argv = FLAGS(sys.argv)
  [outfile] = argv[1:]
  weights = Model.cmdline_model()
  lm = Ngram.cmdline_ngram()
  

  false_decoder = CYKDecoder(weights, lm)
  out = utility.getfile(outfile, 1)
  old_bleu = Bleu()
  new_bleu = Bleu()
  
  for i, forest in enumerate(Forest.load("-", is_tforest=True, lm=lm), 1):
    
    oracle_forest, oracle_item = oracle_extracter(forest, weights, false_decoder, 100, 2, extract=100)
    print >>sys.stderr, "processed sent %s " % i
    oracle_forest.dump(out)
    bleu, hyp, fv, edgelist = forest.compute_oracle(weights, 0.0, 1)

    forest.bleu.rescore(hyp)
    old_bleu += forest.bleu
    forest.bleu.rescore(oracle_item[0].full_derivation)
    new_bleu += forest.bleu

    bad_bleu, _, _, _ = oracle_forest.compute_oracle(weights, 0.0, -1)
    #for i in range(min(len(oracle_item), 5)):
     # print >>sys.stderr, "Oracle Trans: %s %s %s" %(oracle_item[i].full_derivation, oracle_item[i].score, str(oracle_item[i].score[2]))
     # print >>sys.stderr, "Oracle BLEU Score: %s"% (forest.bleu.rescore(oracle_item[i].full_derivation))
    print >>sys.stderr, "Oracle BLEU Score: %s"% (forest.bleu.rescore(oracle_item[0].full_derivation))
    print >>sys.stderr, "Worst new Oracle BLEU Score: %s"% (bad_bleu)
    print >>sys.stderr, "Old Oracle BLEU Score: %s"% (bleu)
    
    print >>sys.stderr, "Running Oracle BLEU Score: %s"% (new_bleu.compute_score())
    print >>sys.stderr, "Running Old Oracle BLEU Score: %s"% (old_bleu.compute_score())
Пример #31
0
def kruskal(G):
    forest = Forest(map(lambda x: Graph([x]),G.V))
    edges = sorted(G.E)
    for e in edges:
        if len(forest) == 1:
            break
        t1 = forest.find_tree(e.origin)
        t2 = forest.find_tree(e.destination)
        if t1 == t2:
            continue
        forest.merge_trees(t1, t2)
        t1.add_edge(e)
    return forest[0]
Пример #32
0
    def __init__(self, n, stocha, obs):

        self.gold_mines = []
        self.forests = []
        self.obstacles = []
        self.board = []
        self.quotas = [False for k in range(NB_RESOURCES)]
        self.n= n
        self.time = 0
        self.reward = 0
        self.stocha = stocha

        # Board instantiation
        for i in range(n):
            for j in range(n):
                if (i, j) in OBSTACLES and obs:
                    obstacle = Obstacle([i, j])
                    self.board.append(obstacle)
                    self.obstacles.append(obstacle)
                elif (i,j) in GOLD_MINES:
                    gold_mine = GoldMine([i,j])
                    self.board.append(gold_mine)
                    self.gold_mines.append(gold_mine)
                elif (i,j) in FORESTS:
                    forest = Forest([i,j])
                    self.board.append(forest)
                    self.forests.append(forest)
                elif (i,j) == PLAYER:
                    self.player = Player([i,j], NOTHING)
                    self.board.append(FreeTile([i,j]))
                elif (i,j) == CHEST:
                    self.chest = Chest([i,j])
                    self.chest_next = True
                    self.board.append(self.chest)
                else:
                    self.board.append(FreeTile([i,j]))

        self.gold_mines_next = [False for k in self.gold_mines]
        self.forests_next = [False for k in self.forests]
Пример #33
0
def forest2bmp(forest: Forest, filename: str):
    bmp = Image.new('RGB', (forest.x * 32, forest.y * 32), (255, 255, 255))
    for y in range(forest.y):
        for x in range(forest.x):
            cell = str(forest.get_cell(x, y))
            dx = x * 32
            dy = y * 32

            if cell == 'W':
                bmp.paste(wood, (dx, dy))
            elif cell == 'B':
                bmp.paste(bamboo, (dx, dy))
            elif cell == 'F':
                bmp.paste(fire, (dx, dy))
            elif cell == 'S':
                bmp.paste(soil, (dx, dy))
            elif cell == 'P':
                bmp.paste(pool, (dx, dy))
            elif cell == 'R':
                bmp.paste(road, (dx, dy))
            elif cell == 'M':
                bmp.paste(mountain, (dx, dy))

    bmp.save(filename, 'PNG')
Пример #34
0
    def __init__(self):
        self.mr = MountainRange(MOUNTAIN, 330, 330, 60, 0.25)
        self.bgd = MountainRange(BACKGROUND_TREES, 310, 310, 10, 0.5)
        self.bg_trees = Forest(BACKGROUND_TREES, 310, 10, 40, 40, 50, 0.5)
        self.lake = MountainRange(LAKE, 290, 290, 0, 0)
        self.mg = MountainRange(MIDGROUND_TREES, 60, 60, 10, 1.5)
        self.mg_trees = Forest(MIDGROUND_TREES, 60, 5, 40, 55, 75, 1.5)
        self.fg = MountainRange(FOREGROUND_TREES, 0, 0, 10, 2.5)
        self.fg_trees = Forest(FOREGROUND_TREES, 10, 30, 90, 90, 170, 2.5)
        self.side_checkpoint_marker = SideCheckPointMarker(0)

        #these could be done after construction of the Stage object
        self.bg_trees.load_tree_image('./resources/trees/small_trees_bg.png')
        self.mg_trees.load_tree_image('./resources/trees/med_trees_mg.png')
        self.fg_trees.load_tree_image('./resources/trees/large_trees_fg.png')

        #lane buoys could be processed by this class too

        self.bg = pygame.Surface((32, 32))
        self.bg.convert()
        self.bg.fill(pygame.Color("#FFE5C1"))
        self.bg.fill(pygame.Color("#dff1ff"))
Пример #35
0
def main():
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument("--tree_estimator_directory","-td",default="/infolab/node4/lukuang/2015-RTS/src/my_code/post_analysis/predictor_analysis/disk4-5/predictor_data/post/tree_estimator")
    parser.add_argument("--number_of_iterations","-ni",type=int,default=50)
    parser.add_argument("--error_threshold","-et",type=int,default=30)
    parser.add_argument("--silent_query_info_file","-sf",default="/infolab/node4/lukuang/2015-RTS/disk4-5/eval/silent_query_info")
    parser.add_argument("--retrieval_method","-rm",choices=list(map(int, RetrievalMethod)),default=0,type=int,
        help="""
            Choose the retrieval method:
                0:f2exp
                1:dirichlet
                2:pivoted
                3:bm25
        """)
    parser.add_argument("--use_auc","-ua",action="store_true")
    parser.add_argument("--metric_string","-ms",default="P_10")
    args=parser.parse_args()

    index_type = IndexType.processed
    eval_data = EvalData(index_type,args.metric_string)
    args.retrieval_method = RetrievalMethod(args.retrieval_method)
    result_dir = R_DIR[index_type][args.retrieval_method]
    print "result dir %s" %(result_dir)
    result_files = get_result_files(result_dir)
    query_data_file = os.path.join(args.tree_estimator_directory,index_type.name,args.retrieval_method.name)
    query_data_file = os.path.join(query_data_file,"data")
    print "get value pair %s" %(query_data_file)
    values = json.load(open(query_data_file))

    all_metrics = {}
    for day in values:
        all_metrics[day] =  eval_data.get_metric(result_files[day])


    silent_query_info = json.load(open(args.silent_query_info_file))

    # print all_metrics
    query_data = []
    silent_judgments = []
    silent_days = {}
    day = "10"
    silent_list = {}
    for qid in values.values()[0].keys():
        # m = re.search("^(\d+)_",qid)
        # if m:
        #     q_num = int(m.group(1))
        #     if q_num > 650:
        #         continue
        # else:
        #     raise RuntimeError("Mal qid format %s" %(qid))
        day_qid = "10_%s" %(qid)
        # print day_qid
        
        
        # print results[day]

        if qid in all_metrics[day]:
            
            day_query_metric = all_metrics[day][qid]

            m = re.search("^(\d+)_",qid)
            if m:
                q_num = m.group(1)
            else:
                raise RuntimeError("Mal qid format %s" %(qid))
            
            if q_num in silent_query_info :
                silent_days[day_qid] = 1
                silent_judgments.append(1)
            else:
                if day_query_metric == .0:
                    silent_list[q_num] = 0
                silent_judgments.append(0)
                silent_days[day_qid] = 0
        
        else: 
            day_query_metric = .0
            silent_judgments.append(1)
            silent_days[day_qid] = 1

        single_data = {}
        single_data["day_qid"] = day_qid
        single_data["metric"] = day_query_metric
        single_data["values"] = values[day][qid]
        query_data.append(single_data)
        
            

    print "There are %d queries" %(len(query_data))
    print "%d of them are silent" %(sum(silent_judgments))
    print "There are %d queries with silent list" %(len(silent_list))
 
    skf = StratifiedKFold(n_splits=10)
    eval_metrics = []
    for training_index, test_index in skf.split(query_data, silent_judgments):
        training_data = []
        testing_data = []
        metrics = {}
        # print "%d training %d testing" %(len(training_index),len(test_index))
        for i in training_index:
            training_data.append( deepcopy(query_data[i]))

        for j in test_index:
            testing_data.append( deepcopy(query_data[j]))
            day_qid = query_data[j]["day_qid"]
            metrics[day_qid] = query_data[j]["metric"]

        # print training_data
        forest = Forest(training_data,args.error_threshold,args.number_of_iterations)
        
        forest.start_training()

        predicted_values = forest.output_result(testing_data)
        y_true, y_score = make_score_prediction_lists(predicted_values,silent_days)

        if args.use_auc:
            reversed_score = []
            for i in y_score:
                reversed_score.append(-1*i)
            score = roc_auc_score(y_true, reversed_score)
            print "the auc score is %f"  %(score)
            eval_metrics.append(score)
        else:
            best_f1_score = best_f1(y_true, y_score)
            print "the best f1 score is %f" %(best_f1_score)
            eval_metrics.append(best_f1_score)


    print "Average performance: %f" %(sum(eval_metrics)/(1.0*len(eval_metrics)))
Пример #36
0
                    # define circle radius based on tree stage
                    mrad = tree.stage * 0.24 * rad
                    xe = x + ((cell_w - mrad) / 2)
                    ye = y + ((cell_h - mrad) / 2)
                    scene.addEllipse(xe, ye, mrad, mrad, QPen(trans),
                                     QBrush(qcol))

    def colorFromTree(self, tree):
        col = "transparent"
        if tree != None:
            col_switch = {
                config.V: "red",
                config.HV: "turquoise",
                config.HEALTHY: "green"
            }
            col = col_switch.get(tree.rating)
        return QColor(col)


if __name__ == '__main__':

    import sys

    app = QApplication(sys.argv)
    test_forest = Forest(50, 50)
    test_forest.set_random_grid()
    fv = ForestViewer(test_forest)
    fv.show()
    # Use this when debugging w/ IPython in Spyder IDE
    app.aboutToQuit.connect(app.deleteLater)
    sys.exit(app.exec_())
Пример #37
0
def main():
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument("--tree_estimator_directory","-td",default="/infolab/node4/lukuang/2015-RTS/src/my_code/post_analysis/predictor_analysis/disk4-5/predictor_data/post/tree_estimator")
    parser.add_argument("--number_of_iterations","-ni",type=int,default=50)
    parser.add_argument("--error_threshold","-et",type=int,default=30)
    parser.add_argument("--silent_query_info_file","-sf",default="/infolab/node4/lukuang/2015-RTS/disk4-5/eval/silent_query_info")
    parser.add_argument("--retrieval_method","-rm",choices=list(map(int, RetrievalMethod)),default=0,type=int,
        help="""
            Choose the retrieval method:
                0:f2exp
                1:dirichlet
                2:pivoted
                3:bm25
        """)
    parser.add_argument("--use_auc","-ua",action="store_true")
    parser.add_argument("--title_only","-to",action="store_true")
    parser.add_argument("--metric_string","-ms",default="P_10")
    parser.add_argument("tree_store_dir")
    args=parser.parse_args()

    index_type = IndexType.processed
    eval_data = EvalData(index_type,args.metric_string)
    args.retrieval_method = RetrievalMethod(args.retrieval_method)
    result_dir = R_DIR[index_type][args.retrieval_method]
    print "result dir %s" %(result_dir)
    result_files = get_result_files(result_dir)
    query_data_file = os.path.join(args.tree_estimator_directory,index_type.name,args.retrieval_method.name)
    query_data_file = os.path.join(query_data_file,"data")
    print "get value pair %s" %(query_data_file)
    values = json.load(open(query_data_file))

    all_metrics = {}
    for day in values:
        all_metrics[day] =  eval_data.get_metric(result_files[day])


    silent_query_info = json.load(open(args.silent_query_info_file))
    # print all_metrics
    title_query_data = []
    desc_query_data = []
    query_data = []
    silent_judgments = []

    silent_days = {}
    day = "10"
    for qid in values.values()[0].keys():
        # m = re.search("^(\d+)_",qid)
        # if m:
        #     q_num = int(m.group(1))
        #     if q_num > 650:
        #         continue
        # else:
        #     raise RuntimeError("Mal qid format %s" %(qid))
        day_qid = "10_%s" %(qid)
        # print day_qid
        
        
        # print results[day]
        if args.title_only:
            if "title" not in qid:
                continue
        if qid in all_metrics[day]:
            
            day_query_metric = all_metrics[day][qid]

            m = re.search("^(\d+)_",qid)
            if m:
                q_num = m.group(1)
            else:
                raise RuntimeError("Mal qid format %s" %(qid))
            
            if q_num in silent_query_info :
                silent_days[day_qid] = 1
            else:
                silent_days[day_qid] = 0
        
        else: 
            print "%s query has no metric!" %(qid)
            day_query_metric = .0
            silent_days[day_qid] = 1

        single_data = {}
        single_data["day_qid"] = day_qid
        single_data["metric"] = day_query_metric
        single_data["values"] = values[day][qid]

        if "title" in qid:
            title_query_data.append(single_data)
        else:
            desc_query_data.append(single_data)

        query_data.append(single_data)
        silent_judgments.append( silent_days[day_qid] )
        
    title_tree = load_tree(args.tree_store_dir,QueryPart.title,args.retrieval_method,args.metric_string)
    title_predicted = title_tree.output_result(title_query_data)
    if not args.title_only:
        desc_tree = load_tree(args.tree_store_dir,QueryPart.desc,args.retrieval_method,args.metric_string)
        desc_predicted = desc_tree.output_result(desc_query_data)


    # print "There are %d queries" %(len(query_data))
    # print "%d of them are silent" %(sum(silent_judgments))


    print "There are %d samples" %(len(query_data))
    # print thresholds
        
    num_of_split = 10
    f1_macro_average = .0
    f1_average = .0
    skf = StratifiedKFold(n_splits=num_of_split,shuffle=True)
    for training_index, test_index in skf.split(query_data, silent_judgments):
        all_training_data = []
        training_title_query_data = []
        training_desc_query_data = []


        # print "%d training %d testing" %(len(training_index),len(test_index))
        for i in training_index:
            single_data = deepcopy(query_data[i])
            day_qid = single_data["day_qid"]
                    
            all_training_data.append(single_data )
            if "title" in day_qid:
                training_title_query_data.append(single_data)
            else:
                if not args.title_only:
                    training_desc_query_data.append(single_data)
        
        train_title_predicted = title_tree.output_result(training_title_query_data)
        if not args.title_only:
            train_desc_predicted = desc_tree.output_result(training_desc_query_data)
        else:
            train_desc_predicted = {0:0}
        thresholds = get_threshold(train_title_predicted.values(),train_desc_predicted.values(),args.title_only)
        best_tree_threshold = {}
        best_f1_score = -1000
        best_f1_threshold = .0
        for threshold in thresholds:
            sub_training_data = []
            training_pre_y_true = []
            training_pre_y_score = []
            for single_data in all_training_data:
                day_qid = single_data["day_qid"]
                if "title" in day_qid:
                    if (title_predicted[day_qid] <= threshold["title"]):
                        
                        sub_training_data.append(single_data )
                    else:
                        training_pre_y_score.append(1000)
                        training_pre_y_true.append(silent_days[day_qid])
                else:
                    if not args.title_only:
                        if (desc_predicted[day_qid]  <= threshold["desc"]):
                            sub_training_data.append(single_data) 
                        else:
                            training_pre_y_score.append(1000)
                            training_pre_y_true.append(silent_days[day_qid])


            forest = Forest(sub_training_data,args.error_threshold,args.number_of_iterations)
            forest.start_training()

            training_predicted_values = forest.output_result(sub_training_data)
            training_y_true, training_y_score = make_score_prediction_lists(training_predicted_values,silent_days)
            training_y_true  = training_pre_y_true + training_y_true
            training_y_score  = training_pre_y_score + training_y_score
            threshold_best_f1_threshold,theshold_best_f1_score = get_best_f1_threshold(training_y_true, training_y_score)
            if theshold_best_f1_score > best_f1_score:
               best_tree_threshold =  threshold
               best_f1_score = theshold_best_f1_score
               best_f1_threshold = threshold_best_f1_threshold
        
        print "best f1 threshold:%f, best f1 %f:" %(best_f1_threshold,best_f1_score)
        print best_tree_threshold

        testing_data = []
        testing_pre_y_true = []
        testing_pre_y_score = []

        for j in test_index:
            single_data = deepcopy(query_data[j])
            day_qid = single_data["day_qid"]
                    

            if "title" in day_qid:
                if (title_predicted[day_qid] <= best_tree_threshold["title"]):
                        
                    testing_data.append(single_data )
                else:
                    testing_pre_y_score.append(1000)
                    testing_pre_y_true.append(silent_days[day_qid])
            else:
                if not args.title_only:
                    if (desc_predicted[day_qid] <= best_tree_threshold["desc"]):
                            
                        testing_data.append(single_data )
                    else:
                        testing_pre_y_score.append(1000)
                        testing_pre_y_true.append(silent_days[day_qid])

        # test_forest = Forest(testing_data,args.error_threshold,args.number_of_iterations)
        # test_forest.start_training()

        test_predicted_values = forest.output_result(testing_data)
        testing_y_true, testing_y_score = make_score_prediction_lists(test_predicted_values,silent_days)
        testing_y_true  = testing_pre_y_true + testing_y_true
        testing_y_score  = testing_pre_y_score + testing_y_score
        test_y_predict = []
        for single_score in testing_y_score:
            if single_score < best_f1_threshold:
                test_y_predict.append(1)
            else:
                test_y_predict.append(0)
        f1_macro_average += f1(testing_y_true, test_y_predict,average="macro")/(1.0*num_of_split)
        f1_average += f1(testing_y_true, test_y_predict)/(1.0*num_of_split)

    

    print "Positive f1: %f" %(f1_average)
    print "Average f1: %f" %(f1_macro_average)
    print "-"*20
Пример #38
0
# run_forest.py
# -------------
# by Chris Proctor

# This module is just a short script setting up a forest with a few boring
# animals. If you run this, the animals will chat with each other.

from forest import Forest
from animal import Animal

forest = Forest()
for name in ["Lily", "Todd", "Fred", "Suzanne"]:
    animal = Animal(name)
    forest.add_animal(animal)

forest.run()
    
Пример #39
0
def test3():
    f = Forest([5,2,3,7], widen=2, margin=10 )  #
    f.draw()
    pass
Пример #40
0
import sys
sys.path.append("..")
from forest import Forest
from StringIO import StringIO
f = open("/tmp/features_oracle")
for l in f:
  sent, _ = l.split("****")
  s2 = sent.replace("\t\t\t", "\n")
  f = open("/tmp/blah4", 'w')
  f.write(s2)
  f.close()
  for sent_forest in Forest.load(StringIO(s2), True, lm=None):
    sent_forest.dump()











Пример #41
0
def main():
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument("--year","-y",choices=list(map(int, Year)),default=0,type=int,
        help="""
            Choose the year:
                0:2015
                1:2016
                2:2011
        """)
    parser.add_argument("--tree_estimator_directory","-td",default="/infolab/node4/lukuang/2015-RTS/src/my_code/post_analysis/predictor_analysis/predictor_data/post/tree_estimator")
    parser.add_argument("--number_of_iterations","-ni",type=int,default=50)
    parser.add_argument("--error_threshold","-et",type=int,default=50)
    parser.add_argument("--expansion","-e",choices=list(map(int, Expansion)),default=0,type=int,
        help="""
            Choose the expansion:
                0:raw
                1:static:
                2:dynamic
        """)
    parser.add_argument("--retrieval_method","-rm",choices=list(map(int, RetrievalMethod)),default=0,type=int,
        help="""
            Choose the retrieval method:
                0:f2exp
                1:dirichlet
                2:pivoted
                3:bm25
        """)
    parser.add_argument("dest_file")
    args=parser.parse_args()

    # if args.error_threshold >= 50:
    #     raise ValueError("Threshold cannot be greater than 50!")

    args.year = Year(args.year)
    args.retrieval_method = RetrievalMethod(args.retrieval_method)
    args.expansion = Expansion(args.expansion)


    eval_data = EvalData(args.year)
    result_dir = R_DIR[args.year][args.expansion][args.retrieval_method]
    results = read_results(result_dir,eval_data)
    query_data_file = os.path.join(args.tree_estimator_directory,args.year.name,args.expansion.name,args.retrieval_method.name)
    query_data_file = os.path.join(query_data_file,"data")
    print "get value pair %s" %(query_data_file)
    values = json.load(open(query_data_file))

    # print results

    # create query_data
    query_data = []
    ndcgs = {}
    for qid in eval_data.days:
        for day in eval_data.days[qid]:
            day_qid = "%s_%s" %(day,qid)
            # print day_qid
            # print results[day]
            if qid in results[day]:
                day_results = {qid: results[day][qid]}
                day_query_ndcg = eval_data.ndcg(day,day_results)
            else:
                day_query_ndcg = .0
            ndcgs[day_qid] = day_query_ndcg
            
            single_data = {}
            single_data["day_qid"] = day_qid
            single_data["ndcg"] = day_query_ndcg
            single_data["values"] = values[day][qid]
            query_data.append(single_data)

    # print ndcgs

    forest = Forest(query_data,args.error_threshold,args.number_of_iterations)
    
    forest.start_training()

    predicted_values = forest.output_result(query_data)

    # print predicted_values
    kt = evaluate_kt(ndcgs,predicted_values)
    print kt
    # print "The predicted kendall's tau is %f" %(kt)

    with open(args.dest_file,'w') as f:
        cPickle.dump(forest, f, protocol=cPickle.HIGHEST_PROTOCOL)
Пример #42
0
 def make_x_tree(self):
     f = Forest()
     f.add_children(0, [1, 2])
     f.add_parents(0, [3, 4])
     return f
Пример #43
0
def run():
    config_dict = yaml.load(open(sys.argv[1], 'r'))
    print config_dict
    data_location = config_dict['data_location']
    uniq_map_file = config_dict['uniq_map_file']
    runiq_map_file = config_dict['runiq_map_file']
    vertices_map, runiq_map = load_data(data_location)
    broken, unequal = fix_similarity_symmetry(vertices_map)
    print "* Fixed similarity relation symmetry (%d unidirected, %d unequal)" % (broken, unequal)

    print "* Vertices map generated"
    _, deleted = purge_invalid_vertices(vertices_map, runiq_map, uniq_map_file, runiq_map_file)
    print "* Cleaned up vertices map (deleted %d isolated vertices)" % (deleted)
 
    if 'min_elems' in config_dict:
        forest = Forest(vertices_map, min_graph_elems=config_dict['min_elems'])
    else:
        forest = Forest(vertices_map)
    ccs = forest.build_connected_components()
    print "* Built connected components"
    forest.build_forest(ccs)
    print "* Built graphs out of connected components"
    forest.reduce()
    print "* Forest reduced!"

    for graph in forest.elements:
        print graph.distance_matrix()

    print len(forest.elements)
    print forest.elements_size_hist()
    forest.pickle(config_dict['pickle_dir'])
Пример #44
0
class ForestTests(unittest.TestCase):
    def setUp(self):
        self.forest = Forest()

    def assert_equal_sets(self, a, b):
        self.assertEqual(set(a), set(b))

    def assert_size(self, size):
        self.assertEqual(self.forest.size, size)

    def make_x_tree(self):
        f = Forest()
        f.add_children(0, [1, 2])
        f.add_parents(0, [3, 4])
        return f

    def test_empty_forest(self):
        self.assertTrue(self.forest.empty())
        self.assert_size(0)

    def test_exceptions(self):
        with self.assertRaises(NotInForest):
            self.forest.parents(0)

        with self.assertRaises(NotInForest):
            self.forest.children(1)

    def test_is_root(self):
        for node in range(5):
            self.forest.add_node(node)
        
        for node in range(5):
            self.assertTrue(self.forest.is_root(node))

    def test_contains(self):
        for i in range(5):
            self.forest.add_node(i)

        for i in range(5):
            self.assertTrue(i in self.forest)
        
    def test_add_node(self):
        self.forest.add_node(0)
        self.assertEqual(self.forest.size, 1)

    def test_add_children(self):
        self.forest.add_node(0)
        self.forest.add_children(0, [1, 2])
        self.assert_size(3)
        self.assert_equal_sets(self.forest.children(0), [1, 2])

    def test_add_children_to_existing_parent(self):
        self.forest.add_node(0)
        self.forest.add_children(0, [1, 2, 3])
        self.forest.add_children(0, [4, 5])
        self.assert_size(6)
        self.assert_equal_sets(self.forest.children(0), [1, 2, 3, 4, 5])

    def test_add_preexisting_children(self):
        self.forest.add_children(0, [1, 2])
        self.forest.add_children(0, [1, 2, 3])
        self.assert_size(4)
        self.assert_equal_sets(self.forest.children(0), [1, 2, 3])

    def test_add_grandchildren(self):
        self.forest.add_children(0, [1, 2])
        self.forest.add_children(1, [3, 4, 5])
        self.forest.add_children(2, [6, 7, 5])
        self.assert_size(8)
        self.assert_equal_sets(self.forest.children(0), [1, 2])
        self.assert_equal_sets(self.forest.children(1), [3, 4, 5])
        self.assert_equal_sets(self.forest.children(2), [6, 7, 5])

    def test_add_parent(self):
        self.forest.add_node(0)
        self.forest.add_parent(0, 1)
        self.assert_size(2)
        self.assert_equal_sets(self.forest.parents(0), [1])

    def test_add_parent_to_existing_child(self):
        self.forest.add_child(0, 1)
        self.forest.add_parent(1, 2)
        self.assert_size(3)
        self.assert_equal_sets(self.forest.parents(1), [0, 2])

    def test_add_preexisting_parent(self):
        self.forest.add_parents(0, [1, 2, 3])
        self.forest.add_parents(0, [2, 3, 4])
        self.assert_size(5)
        self.assert_equal_sets(self.forest.parents(0), [1, 2, 3, 4])

    def test_add_grandparent(self):
        self.forest.add_parent(0, 1)
        self.forest.add_parent(1, 2)
        self.assert_size(3)
        self.assert_equal_sets(self.forest.parents(0), [1])
        self.assert_equal_sets(self.forest.parents(1), [2])

    def test_roots(self):
        self.forest.add_children(0, [1, 2])
        self.forest.add_children(3, [4, 5])
        self.forest.add_children(6, [5, 1])
        self.assert_equal_sets(self.forest.roots, [0, 3, 6])

    def test_roots_with_intersected_tree(self):
        pass

    def test_eq(self):
        self.forest.add_children(3, [4, 5])
        self.forest.add_children(4, [6, 7])
        self.forest.add_children(5, [8 ,9])
        self.assert_size(7)
        other_forest = Forest()
        other_forest.add_children(3, [4, 5])
        other_forest.add_children(4, [6, 7])
        other_forest.add_children(5, [8 ,9])
        self.assertEqual(other_forest.size, self.forest.size)
        self.assertEqual(other_forest, self.forest)

    def test_neq(self):
        self.forest.add_children(3, [4, 5])
        self.forest.add_children(4, [6, 7])
        self.forest.add_children(5, [8 ,9])
        self.assert_size(7)
        other_forest = Forest()
        self.assertEqual(other_forest.size, 0)
        self.assertNotEqual(other_forest, self.forest)

    def test_replace(self):
        self.forest = self.make_x_tree()
        self.assert_size(5)
        self.forest.replace(0, 5)
        self.assert_size(5)
        self.assertTrue(5 in self.forest)
        self.assert_equal_sets(self.forest.children(5), [1, 2])
        self.assert_equal_sets(self.forest.parents(5), [3, 4])
        self.assertFalse(0 in self.forest)

    def test_replace_nonexistent_raises_error(self):
        with self.assertRaises(NotInForest):
            self.forest.replace(0, 1)

    def test_subtree_leaf(self):
        self.forest.add_children(0, [1, 2, 3])
        expected = Forest().add_node(1)

        leaf = self.forest.subtree(1)

        self.assert_size(4)
        self.assertEqual(leaf.size, 1)
        self.assertEqual(leaf, expected)

    def test_subtree_tree(self):
        self.forest = self.make_x_tree()
        expected = Forest().add_children(0, [1, 2])

        tree = self.forest.subtree(0)

        self.assert_size(5)
        self.assertEqual(tree.size, 3)
        self.assertEqual(tree, expected)

    def test_subtree_with_loop(self):
        self.forest = self.make_x_tree()
        self.forest.add_parents(5, [1, 2])
        self.forest.add_child(5, 6)
        expected = self.make_x_tree()
        expected.add_child(5, 6)

        tree = self.forest.subtree(0)

        self.assert_size(7)
        self.assertEqual(tree.size, 5)
        self.assertEqual(tree, expected)

    def test_bfs(self):
        pass
Пример #45
0
    def partition_chart_to_rule_chart(self, chart):

        graph_edge_list = chart.graph.triples()
        node_order = chart.graph.get_ordered_nodes()

        result = Forest()
        seen = {}

        fragment_counter = [0]

        def convert_chart(partition, external_nodes, nt, first=False):
            nt = NonterminalLabel(nt.label)  # Get rid of the index

            if partition in seen:
                node = seen[partition]
                result.use_counts[node] += 1
                return node

            leaves = chart.tree.leaves()

            edges_in_partition = [graph_edge_list[i] for i in range(len(partition.edges)) if partition.edges[i] == 1]

            if not partition in chart:  # leaf

                graph = Hgraph.from_triples(edges_in_partition, {}, warn=False)
                graph.roots = graph.find_roots()
                graph.roots.sort(lambda x, y: node_order[x] - node_order[y])
                graph.external_nodes = external_nodes
                str_rhs = [leaves[i] for i in range(partition.str_start, partition.str_end + 1)]
                rule = Rule(0, nt.label, graph, tuple(str_rhs), 1)
                rule_id = self.add_rule(rule)
                fragment = fragment_counter[0]
                result[fragment] = [(rule_id, [])]
                result.use_counts[fragment] += 1
                seen[partition] = fragment
                fragment_counter[0] += 1
                return fragment

            poss = []
            count = 0
            for possibility in chart[partition]:
                count += 1
                partition_graph = Hgraph.from_triples(edges_in_partition, {}, warn=False)  # This is the parent graph
                partition_graph.roots = partition_graph.find_roots()
                partition_graph.roots.sort(lambda x, y: node_order[x] - node_order[y])
                partition_graph.external_nodes = external_nodes
                children = []
                # print partition_graph.to_amr_string()

                spans_to_nt = {}
                old_pgraph = partition_graph

                index = 1
                for subpartition in possibility:  # These are the different sub-constituents

                    edges_in_subpartition = [
                        graph_edge_list[i] for i in range(len(subpartition.edges)) if subpartition.edges[i] == 1
                    ]
                    if edges_in_subpartition:  # Some constituents do not have any edges aligned to them
                        sub_graph = Hgraph.from_triples(edges_in_subpartition, {}, warn=False)
                        sub_graph.roots = sub_graph.find_roots()
                        sub_graph.roots.sort(lambda x, y: node_order[x] - node_order[y])
                        external_node_list = partition_graph.find_external_nodes2(sub_graph)
                        external_node_list.sort(lambda x, y: node_order[x] - node_order[y])
                        sub_external_nodes = dict([(k, v) for v, k in enumerate(external_node_list)])
                        sub_graph.external_nodes = sub_external_nodes
                        sub_nt = NonterminalLabel("%s%i" % (subpartition.phrase, len(sub_external_nodes)), index)
                        children.append(convert_chart(subpartition, sub_external_nodes, sub_nt))  # Recursive call
                        old_pgraph = partition_graph
                        partition_graph = partition_graph.collapse_fragment2(
                            sub_graph, sub_nt, external=external_node_list, warn=False
                        )

                        spans_to_nt[subpartition.str_start] = (sub_nt, subpartition.str_end)
                    else:
                        sub_nt = NonterminalLabel(subpartition.phrase, index)

                    # assert partition_graph.is_connected()
                    index += 1

                partition_graph.roots = partition_graph.find_roots()
                partition_graph.roots.sort(lambda x, y: node_order[x] - node_order[y])

                # Assemble String rule
                str_rhs = []
                i = partition.str_start
                while i <= partition.str_end:
                    if i in spans_to_nt:
                        new_nt, i = spans_to_nt[i]
                        str_rhs.append(new_nt)
                    else:
                        str_rhs.append(leaves[i])
                    i = i + 1

                rule = Rule(0, nt.label, partition_graph, tuple(str_rhs), 1)
                rule_id = self.add_rule(rule)

                poss.append((rule_id, children))

            fragment = fragment_counter[0]
            result[fragment] = poss
            result.use_counts[fragment] += 1
            seen[partition] = fragment
            fragment_counter[0] += 1
            return fragment

        result.root = convert_chart(chart.root, {}, NonterminalLabel(chart.root.phrase), first=True)
        return result
Пример #46
0
 def setUp(self):
     self.forest = Forest()
Пример #47
0
def main():
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument("--index_type","-it",choices=list(map(int, IndexType)),default=0,type=int,
        help="""
            Choose the index type:
                0:full
                1:processed
        """)
    parser.add_argument("--query_part","-qp",choices=list(map(int, QueryPart)),default=0,type=int,
        help="""
            Choose the query part:
                0:title
                1:desc
        """)
    parser.add_argument("--tree_estimator_directory","-td",default="/infolab/node4/lukuang/2015-RTS/src/my_code/post_analysis/predictor_analysis/disk4-5/predictor_data/post/tree_estimator")
    parser.add_argument("--number_of_iterations","-ni",type=int,default=50)
    parser.add_argument("--error_threshold","-et",type=int,default=30)
    parser.add_argument("--retrieval_method","-rm",choices=list(map(int, RetrievalMethod)),default=0,type=int,
        help="""
            Choose the retrieval method:
                0:f2exp
                1:dirichlet
                2:pivoted
                3:bm25
        """)
    parser.add_argument("dest_dir")
    parser.add_argument("--metric_string","-ms",default="P_10")
    args=parser.parse_args()

    # if args.error_threshold >= 50:
    #     raise ValueError("Threshold cannot be greater than 50!")

    args.index_type = IndexType(args.index_type)
    args.query_part = QueryPart(args.query_part)
    eval_data = EvalData(args.index_type,args.metric_string)
    args.retrieval_method = RetrievalMethod(args.retrieval_method)
    result_dir = R_DIR[args.index_type][args.retrieval_method]
    print "result dir %s" %(result_dir)
    result_files = get_result_files(result_dir)
    query_data_file = os.path.join(args.tree_estimator_directory,args.index_type.name,args.retrieval_method.name)
    query_data_file = os.path.join(query_data_file,"data")
    print "get value pair %s" %(query_data_file)
    values = json.load(open(query_data_file))

    all_metrics = {}
    for day in values:
        all_metrics[day] =  eval_data.get_metric(result_files[day])

    #load silent day

    # create query_data
    query_data = []
    day = "10"
    for qid in values.values()[0].keys():
        # m = re.search("^(\d+)_",qid)
        # if m:
        #     q_num = int(m.group(1))
        #     if q_num > 650:
        #         continue
        # else:
        #     raise RuntimeError("Mal qid format %s" %(qid))
        day_qid = "10_%s" %(qid)
        if args.query_part.name not in qid:
            continue
        # print day_qid
        
        
        # print results[day]

        if qid in all_metrics[day]:
            
            day_query_metric = all_metrics[day][qid]
        else:
            print "WARNING: %s metric not found!" %(qid)
            day_query_metric = .0
        
        single_data = {}
        single_data["day_qid"] = day_qid
        single_data["metric"] = day_query_metric
        single_data["values"] = values[day][qid]
        query_data.append(single_data)

    # print metrics
    print "There are %d queries" %(len(query_data))
    kf = KFold(n_splits=4,shuffle=True)
    kt = []

    for training_index, test_index in kf.split(query_data):

        training_data = []
        testing_data = []
        metrics = {}
        # print "%d training %d testing" %(len(training_index),len(test_index))
        for i in training_index:
            training_data.append( deepcopy(query_data[i]))

        for j in test_index:
            testing_data.append( deepcopy(query_data[j]))
            day_qid = query_data[j]["day_qid"]
            metrics[day_qid] = query_data[j]["metric"]

        # print training_data
        forest = Forest(training_data,args.error_threshold,args.number_of_iterations)
        
        forest.start_training()

        predicted_values = forest.output_result(testing_data)

        # print predicted_values
        # print metrics
        single_kt = evaluate_kt(metrics,predicted_values)
        print single_kt
        # print single_kt[0]
        kt.append(single_kt[0])

    print "The average kendall's tau is %f" %(sum(kt)/(1.0*len(kt)))


    forest = Forest(query_data,args.error_threshold,args.number_of_iterations)
        
    forest.start_training()

    dest_file = os.path.join(args.dest_dir,args.query_part.name,args.retrieval_method.name+"_"+args.metric_string)

    print "Store to %s" %(dest_file)
    with open(dest_file,'w') as f:
        cPickle.dump(forest, f, protocol=cPickle.HIGHEST_PROTOCOL)
Пример #48
0
    weights = Model.cmdline_model()
    lm = Ngram.cmdline_ngram()

    decoder = CYKDecoder(weights, lm)

    tot_bleu = Bleu()
    tot_score = 0.
    tot_time = 0.
    tot_len = tot_fnodes = tot_fedges = 0

    tot_lmedges = 0
    tot_lmnodes = 0
    if FLAGS.debuglevel > 0:
        print >>logs, "beam size = %d" % FLAGS.beam

    for i, forest in enumerate(Forest.load("-", is_tforest=True, lm=lm), 1):

        t = time.time()
        #decoding
        (score, trans, fv) = decoder.beam_search(forest, b=FLAGS.beam)

        re_fv = fv.__copy__()
        re_fv['lm'] = 0.0
        #print lm.word_prob(trans)
        rescore = weights.dot(re_fv)
        rescore += weights['lm'] * -lm.word_prob(trans)
        
        t = time.time() - t
        tot_time += t

        print trans
Пример #49
0
    flags.DEFINE_boolean("graph", False, "")
    flags.DEFINE_boolean("enumerate", False, "")
    flags.DEFINE_boolean("graph_node", False, "")
    flags.DEFINE_boolean("size", False, "")
    
    
    argv = FLAGS(sys.argv)

    assert not (FLAGS.size and FLAGS.graph and FLAGS.enumerate and FLAGS.graph_node)  

    weights = Model.cmdline_model()
    lm = Ngram.cmdline_ngram()
    

    f = Forest.load("-", is_tforest=True, lm=None)
    for i, forest in enumerate(f, 1):
      if len(forest) < 20 : continue
      words = set()
      for n in forest:
        for edge in n.edges:
          for i, n in enumerate(edge.rule.rhs):
            if is_lex(n):
              words.add(n)       
      print "Words", len(words)

      print "Worst case", len(words) * len(words) * len(words)

      graph = NodeExtractor().extract(forest)
      
      if FLAGS.graph:
Пример #50
0
class Stage:
    def __init__(self):
        self.mr = MountainRange(MOUNTAIN, 330, 330, 60, 0.25)
        self.bgd = MountainRange(BACKGROUND_TREES, 310, 310, 10, 0.5)
        self.bg_trees = Forest(BACKGROUND_TREES, 310, 10, 40, 40, 50, 0.5)
        self.lake = MountainRange(LAKE, 290, 290, 0, 0)
        self.mg = MountainRange(MIDGROUND_TREES, 60, 60, 10, 1.5)
        self.mg_trees = Forest(MIDGROUND_TREES, 60, 5, 40, 55, 75, 1.5)
        self.fg = MountainRange(FOREGROUND_TREES, 0, 0, 10, 2.5)
        self.fg_trees = Forest(FOREGROUND_TREES, 10, 30, 90, 90, 170, 2.5)
        self.side_checkpoint_marker = SideCheckPointMarker(0)

        #these could be done after construction of the Stage object
        self.bg_trees.load_tree_image('./resources/trees/small_trees_bg.png')
        self.mg_trees.load_tree_image('./resources/trees/med_trees_mg.png')
        self.fg_trees.load_tree_image('./resources/trees/large_trees_fg.png')

        #lane buoys could be processed by this class too

        self.bg = pygame.Surface((32, 32))
        self.bg.convert()
        self.bg.fill(pygame.Color("#FFE5C1"))
        self.bg.fill(pygame.Color("#dff1ff"))
        #self.bg.fill(pygame.Color("#dcc8ff"))

    def reset(self):
        self.side_checkpoint_marker = SideCheckPointMarker(0)

    def update(self, r_change):
        self.mr.shift_left(r_change)
        self.bgd.shift_left(r_change)
        self.bg_trees.shift_left(r_change)
        #no lake change
        self.mg.shift_left(r_change)
        self.mg_trees.shift_left(r_change)
        self.fg.shift_left(r_change)
        self.fg_trees.shift_left(r_change)

    def draw(self, screen, viewable_min_r, viewable_max_r, ghosts, player):
        # draw background
        for y in range(32):
            for x in range(64):
                screen.blit(self.bg, (x * 32, y * 32))

        self.mr.update()
        self.mr.draw(screen)
        self.bgd.update()
        self.bgd.draw(screen)
        self.bg_trees.update()
        self.bg_trees.draw(screen)
        self.lake.update()
        self.lake.draw(screen)
        checkpoint_r = self.side_checkpoint_marker.update(
            screen, viewable_min_r, viewable_max_r)

        for g in ghosts:
            g.blit(screen, viewable_min_r, viewable_max_r, SIDE)

        player.blit(screen, viewable_min_r, viewable_max_r, SIDE)

        self.mg.update()
        self.mg.draw(screen)
        self.mg_trees.update()
        self.mg_trees.draw(screen)
        self.fg.update()
        self.fg.draw(screen)
        self.fg_trees.update()
        self.fg_trees.draw(screen)
Пример #51
0
        print >> logs, "Error: must specify pruning threshold by -p or ratio by -r" + str(FLAGS)
        sys.exit(1)

    weights = Model.cmdline_model()
    lm = Ngram.cmdline_ngram() # if FLAGS.lm is None then returns None
    if lm:
        weights["lm1"] = weights["lm"] * FLAGS.lmratio
    
    onebestscores = 0
    onebestbleus = Bleu()
    myscores = 0
    myoraclebleus = Bleu()    
    
    total_nodes = total_edges = old_nodes = old_edges = 0
    
    for i, forest in enumerate(Forest.load("-", lm=lm), 1):
        if forest is None:
            print
            continue
        
        prune(forest, weights, FLAGS.prob, FLAGS.ratio)

        score, hyp, fv = forest.root.bestres
        
        forest.bleu.rescore(hyp)
        onebestscores += score
        onebestbleus += forest.bleu.copy()

        if FLAGS.oracle: #new
            bleu, hyp, fv, edgelist = forest.compute_oracle(weights, 0, 1, store_oracle=True)
            ##print >> logs, forest.root.oracle_edge
Пример #52
0
 def do_forest(self):
     if self.flag == 0:
         start = time.time()
         forest_instance = Forest(self.train, None)
         forest_instance.build_forest()
         forest_instance.write_model(self.model_file)
         end = time.time()
         print 'Training Time :', (end - start) / 60, 'mins'
     else:
         start = time.time()
         forest_instance = Forest(None, self.test)
         forest_instance.load_model(self.model_file)
         test_output = forest_instance.test_forest(self.test,
                                                   self.output_file)
         print test_output['accuracy'], '%'
         end = time.time()
         print 'Testing Time :', (end - start) / 60, 'mins'
Пример #53
0
if __name__ == "__main__":
	
	import optparse
	optparser = optparse.OptionParser(usage="usage: cat <forests> | %prog -g <GOLDFILE> [-s <suffix>]")
	optparser.add_option("-g", "--gold", dest="goldfile", \
						 help="gold file", metavar="FILE", default=None)
	optparser.add_option("-q", "--quiet", dest="quiet", action="store_true", help="no dumping", default=False)
	optparser.add_option("-r", "--remove", dest="remove_sp", action="store_true", \
						 help="remove spurious", default=False)
	optparser.add_option("-s", "--suffix", dest="suffix", help="dump suffix (1.suffix)", metavar="SUF")

	(opts, args) = optparser.parse_args()

	if opts.goldfile is None:
		opts.error("must specify gold file")
	else:
		goldtrees = readonebest(opts.goldfile)


	for i, forest in enumerate(Forest.load("-")):
		forest.goldtree = goldtrees.next()
		if opts.remove_sp:
			remove(forest)
		if opts.suffix is not None:
			forest.dump(open("%d.%s" % (i+1, opts.suffix), "wt"))
		elif not opts.quiet:
			forest.dump()

		
		
Пример #54
0
                    if feat.is_edgelocal():
                        edgefvector += FVector.convert_fullname(feat.extract(node, forest.sent))
                        
                edge.fvector += edgefvector
                print "%s ---------\t%s" % (edge, edge.fvector)
    

if __name__ == "__main__":

    try:
        import psyco
        psyco.full()
    except:
        pass
    
    import optparse
    optparser = optparse.OptionParser(usage="usage: cat <forest> | %prog [options (-h for details)]")
    optparser.add_option("", "--id", dest="sentid", type=int, help="sentence id", metavar="ID", default=0)

    (opts, args) = optparser.parse_args()

    fclasses = prep_features(["word-1", "rule-1", "wordedges"])

    for forest in Forest.load("-"):
        local_feats(forest, fclasses)
    
##        break

##    forest.dump()

Пример #55
0
def test2():
    f = Forest([5,2,3,7] )  # a forest of tree with layers = 5,2,3, and 7 as shown below
    f.draw()
    pass
Пример #56
0
print(f"Bayesian Average Error: {np.round(model5._error.mean()[0] * 100, 2)}%")

print("---- PLS ----")
model4 = PLS(**config)
model4.roll(verbose=True)
print(f"PLS Average Error: {np.round(model4._error.mean()[0] * 100, 2)}%")

# produce a neural network rolling forecast
print("---- Neural Network ----")
model3 = MLP(**config)
model3.roll(verbose=True)
print(f"NNet Average Error: {np.round(model3._error.mean()[0] * 100, 2)}%")

# produce a random forest rolling forecast
print("---- Random Forest ----")
model2 = Forest(**config)
model2.roll(verbose=True)
print(f"Forest Average Error: {np.round(model2._error.mean()[0] * 100, 2)}%")

# produce a lasso regression rolling forecast
print("---- Lasso Regression ----")
model1 = Regression(**config)
model1.roll(verbose=True)
print(f"Lasso Average Error: {np.round(model1._error.mean()[0] * 100, 2)}%")

# produce a baseline rolling forecast (exponential smoothing)
print("---- Exponential Smoothing ----")
baseline_model = Forecasting(**config)
baseline_model.roll(verbose=True)
print(f"Baseline Average Error: {np.round(baseline_model._error.mean()[0] * 100, 2)}%")
Пример #57
0
def test4():
    f = Forest([5,2,3,7], widen=2, margin=10 )  #
    f.rotate() #
    f.draw(bg=".")
    pass
Пример #58
0
class Manager:
    MAX_PATH_LENGTH = 5
    TOP_K = 50 # Top k-path we are interested

    def __init__(self, graph):
        self._graph = graph
        self._forest = Forest()
        self._subgraph = {} # { category id: { category name: [ node id ]}}
        self._version = 0
        self._deleted_nodes = set()
        self._current_level = dict([(category, "root") for category in self._forest.get_categories()])
        self._constraints = {} # {include, not_include}

        self.setup_constraints()
        self.set_logging()
        self.build_subgraphs()

    def setup_constraints(self):
        self._constraints["include"] = {}
        self._constraints["include"]["id"] = []
        self._constraints["include"]["type"] = []
        self._constraints["not_include"] = {}
        self._constraints["not_include"]["id"] = []
        self._constraints["not_include"]["type"] = []

    def set_logging(self):
        logging.basicConfig(filename="result.log", format='%(asctime)-15s %(message)s')
        self._logger = logging.getLogger()

    def build_subgraphs(self):
        for node in self.get_nodes():
            for name, val in node.get_categories().items():
                if name not in self._subgraph:
                    self._subgraph[name] = {}
                if val not in self._subgraph[name]:
                    self._subgraph[name][val] = []

                self._subgraph[name][val].append(node.get_id())

    def shell(self):
        print "type 'help' to see a list of commands"
        while True:
            line = raw_input(SHELL_PROMPT)
            if line.strip() == "":
                continue
            cmd = line.split()[0]

            if cmd not in CMDS:
                print "Invalid command:", cmd
                continue

            eval("self." + cmd)(line.split()[1:])

    def quit(self, line):
        sys.exit(0)

    def help(self, _):
        for cmd in CMDS:
            print cmd

    def similarity(self, _):
        if not len(_) == 2:
            print "Wrong arguments for <similarity>: " + str(_)
            print "Should be: <similarity> <id1> <id2>"
            return
        [id1, id2] = _

        start = time.time()

        if id1 in self._deleted_nodes:
            print "Node %s doesn't exist in current sub-graph" % (id1)
            return

        if id2 in self._deleted_nodes:
            print "Node %s doesn't exist in current sub-graph" % (id2)
            return

        score = self.compute_similarity(id1, id2)
        print "score:", score
        self._logger.warning("Time taken for similarity search: %f" % (time.time() - start))
        self._logger.warning("Score: %s " %(score))

    def drill_down(self, _):
        if not len(_) == 2:
            print "Wrong arguments for <drill_down>: " + str(_)
            print "Should be: <drill_down> <name> <val>"
            return
        [name, val] = _

        start = time.time()

        for node in self.get_nodes():
            category = node.get_category(name)
            if category and not self._forest.is_member(name, val, category):
                self._deleted_nodes.add(node.get_id())

        self._logger.warning("Time taken for drill-down: %f" % (time.time() - start))
        self._current_level[name] = val
        self._version += 1

    def roll_up(self, _):
        if not len(_) == 2:
            print "Wrong arguments for <roll_up>: " + str(_)
            print "Should be: <roll_up> <name> <val>"
            return
        [name, val] = _

        start = time.time()

        for category_value in self._subgraph[name].keys():
            if not self._forest.is_member(name, val, category_value):
                self._logger.warning("%s %s %s" % (name, val, category_value))
                continue

            for node_id in self._subgraph[name][category_value]:
                if node_id in self._deleted_nodes:
                    self._deleted_nodes.remove(node_id)

        self._logger.warning("Time taken for roll-up: %f" % (time.time() - start))
        self._current_level[name] = val
        self._version += 1

    def restore(self, _):
        to_delete = []

        for node in self.get_nodes():
            to_delete.append(node)

        for node in to_delete:
            self._graph.delete(node)

        self._graph = self._orig_graph.copy()

    def print_node(self, _):
        if not len(_) == 1:
            print "Wrong arguments for <print_node>: " + str(_)
            print "Should be: <print_node> <node_id>"
            return
        [node_id] = _

        print self._graph.get_node(node_id)

    def print_nodes(self, _):
        for node in self.get_nodes():
            self._logger.warning(node)

    def print_num_nodes(self, _):
        print "Number of nodes: %d " % (len(self.get_nodes()))

    def print_neighbors(self, _):
        if not len(_) == 1:
            print "Wrong arguments for <print_neighbors>: " + str(_)
            print "Should be: <print_neighbors> <node_id>"
            return
        [node_id] = _
        node = self._graph.get_node(node_id)

        if node is None:
            print "Node %s doesn't exist" % (node)
        else:
            print "Node %s's neighbors" % (node)
            for neighbor in self.get_neighbors(node):
                print "--> %s" % (neighbor)

    def print_meta_paths(self, _):
        if not len(_) == 2:
            print "Wrong arguments for <print_meta_paths>: " + str(_)
            print "Should be: <print_meta_paths> <node_id1> <node_id2>"
            return
        [id1, id2] = _
        node1 = self._graph.get_node(id1)
        node2 = self._graph.get_node(id2)

        if id1 in self._deleted_nodes or node1 is None:
            print "Node %s doesn't exist" % (id1)
            return
        if id2 in self._deleted_nodes or node2 is None:
            print "Node %s doesn't exist" % (id2)
            return

        node1.print_meta_paths(id2)

    def print_compressed_meta_paths(self, _):
        if not len(_) == 2:
            print "Wrong arguments for <print_meta_paths>: " + str(_)
            print "Should be: <print_meta_paths> <node_id1> <node_id2>"
            return
        [id1, id2] = _
        node1 = self._graph.get_node(id1)
        node2 = self._graph.get_node(id2)

        if id1 in self._deleted_nodes or node1 is None:
            print "Node %s doesn't exist" % (id1)
            return
        if id2 in self._deleted_nodes or node2 is None:
            print "Node %s doesn't exist" % (id2)
            return

        node1.print_compressed_meta_paths(id2)


    def search_node(self, _):
        if not len(_) == 2:
            print "Wrong arguments for <search_node>: " + str(_)
            print "Should be: <search_node> <node_id>"
            return
        node_id = _[0]
        node = self._graph.get_node(node_id)

        if node_id in self._deleted_nodes or node is None:
            print "Node %s doesn't exist" % (node_id)
        else:
            print "Node %s exists" % (node)

    def print_network_statistics(self, _):
        def stddev(l):
            import math

            mean = sum(l)/len(l)
            return math.sqrt(float(sum([(_ - mean)**2 for _ in l]))/len(l))

        def print_degree():
            degrees = []

            for node in self.get_nodes():
                degrees.append(len(self.get_neighbors(node)))

            self._logger.warning("- Degree of node avg: %d stddev: %f" % \
                (float(sum(degrees))/len(degrees), stddev(degrees)))

        def print_clustering_coeff():
            clustering_coeff = []

            for node in self.get_nodes():
                neighbors = self.get_neighbors(node)
                neighbors_id = set([node.get_id() for node in neighbors])
                count = 0
                num_neighbors = len(neighbors) if len(neighbors) else 1

                for neighbor in neighbors:
                    _neighbors = self.get_neighbors(node)
                    for _neighbor in _neighbors:
                        if _neighbor.get_id() in neighbors_id:
                            count += 1

                clustering_coeff.append(float(count)/num_neighbors)

            self._logger.warning("- Clustering coefficient avg: %d stddev: %f" % \
                (float(sum(clustering_coeff))/len(clustering_coeff), stddev(clustering_coeff)))

        def print_avg_path_length():
            avg = self.get_avg_path_length()
            self._logger.warning("- Avg Path length: " + str(avg))
            print str(avg)
        #print_degree()
        # print_clustering_coeff()
        print_avg_path_length()

    def print_children(self, _):
        for category, name in self._current_level.items():
            print "Category: %s, name: %s" % (category, name)
            for child in self._forest.get_children(category, name):
                print "- " + child
            print ""

    def print_parent(self, _):
        for category, name in self._current_level.items():
            print "Category: %s, name: %s" % (category, name)
            print "- " + self._forest.get_parent(category, name)
            print ""

    def print_constraints(self, _):
        print self._constraints

    def add_constraint(self, _):
        if not len(_) == 3 or len(_) == 4:
            print "Wrong arguments for <add_constraint>: " + str(_)
            print "Should be: <add_constraint> <is_include> <is_specific> <id> or <add_constraint> <is_include> <is_specific> <type> <val>"
            return

        if _[1] == "id":
            [is_include, is_specific, id] = _
            constraint = id
        elif _[1] == "type":
            [is_include, is_specific, type, val] = _
            constraint = (type, val)
        else:
            print "Too many arguments"
            return

        if constraint in self._constraints[is_include][is_specific]:
            print "constraint %s has already been added" % (constraint)
            return

        self._constraints[is_include][is_specific].append(constraint)
        self._version += 1

    def delete_constraint(self, _):
        if not len(_) == 3 or len(_) == 4:
            print "Wrong arguments for <delete_constraint>: " + str(_)
            print "Should be: <delete_constraint> <is_include> <is_specific> <id> or <delete_constraint> <is_include> <is_specific> <type> <val>"
            return

        if _[1] == "id":
            [is_include, is_specific, id] = _
            constraint = id
        elif _[1] == "type":
            [is_include, is_specific, type, val] = _
            constraint = (type, val)
        else:
            print "Too many arguments"
            return

        if constraint not in self._constraints[is_include][is_specific]:
            print "constraint %s does not exist" % (constraint)
            return

        self._constraints[is_include][is_specific].remove(constraint)
        self._version += 1

    def test_nyt(self):
        self.print_network_statistics([])
        nodes = [node for node in self.get_nodes() if node.get_type() == "article"]

        for _ in range(100):
            self.drill_down(["loctype", "Eurasia"])
            self.drill_down(["loctype", "Jordan"])
            self.roll_up(["loctype", "Eurasia"])
            self.roll_up(["loctype", "root"])

        self.drill_down(["loctype", "Eurasia"])
        self.print_network_statistics([])
        nodes = [node for node in self.get_nodes() if node.get_type() == "article"]

        for _ in range(100):
            node1 = random.choice(nodes)
            node2 = random.choice(nodes)

            while node1 == node2:
                node2 = random.choice(nodes)

            self.similarity([node1.get_id(), node2.get_id()])

    def test_dblp(self):
        self.print_network_statistics([])
        nodes = [node for node in self.get_nodes() if node.get_type() == "author"]

        for _ in range(100):
            node1 = random.choice(nodes)
            node2 = random.choice(nodes)

            while node1 == node2:
                node2 = random.choice(nodes)

            self.similarity([node1.get_id(), node2.get_id()])

        for _ in range(100):
            self.drill_down(["area", "DB"])
            self.roll_up(["area", "root"])

        self.drill_down(["area", "DB"])
        self.print_network_statistics([])
        nodes = [node for node in self.get_nodes() if node.get_type() == "author"]

        for _ in range(100):
            node1 = random.choice(nodes)
            node2 = random.choice(nodes)

            while node1 == node2:
                node2 = random.choice(nodes)

            self.similarity([node1.get_id(), node2.get_id()])

    def test(self):
        print "Computing similarity..."
        self.test_nyt()
        # self.test_dblp()

    def check_constraints(self, path):
        for id in self._constraints["include"]["id"]:
            is_exist = False
            for node in path:
                if node.get_id() == id:
                    is_exist = True

            if not is_exist:
                return False

        for type, val in self._constraints["include"]["type"]:
            is_exist = False
            for node in path:
                if node.get_category(type) == val:
                    is_exist = True

            if not is_exist:
                return False

        for id in self._constraints["not_include"]["id"]:
            is_exist = False
            for node in path:
                if node.get_id() == id:
                    is_exist = True

            if is_exist:
                return False

        for type, val in self._constraints["not_include"]["type"]:
            is_exist = False
            for node in path:
                if node.get_category(type) == val:
                    is_exist = True

            if is_exist:
                return False

        return True

    def find_path(self, src, dst):
        queue = deque()
        paths = []

        queue.append([src])
        while len(queue) > 0:
            cur_path = queue.popleft()
            last_node = cur_path[-1]

            if len(cur_path) == self.MAX_PATH_LENGTH:
                continue

            if len(paths) == self.TOP_K:
                break

            for neighbor in self.get_neighbors(last_node):
                if neighbor.get_id() in self._deleted_nodes:
                    continue

                new_path = cur_path[:] + [neighbor]

                if neighbor == dst:
                    if not self.check_constraints(new_path):
                        continue
                    paths.append(new_path)
                elif neighbor in cur_path:
                    continue
                else:
                    queue.append(new_path)

        return paths

    # TODO: Better weight assigning
    def compute_score(self, meta_paths):
        score = 0
        for path in meta_paths:
            score += float(1)/len(path)

        return score

    # bfs to the rescue
    def compute_similarity(self, id1, id2):
        node1 = self._graph.get_node(id1)
        node2 = self._graph.get_node(id2)

        node1_node1_path = node1.get_meta_paths(id1)
        node2_node2_path = node2.get_meta_paths(id2)
        node1_node2_path = node1.get_meta_paths(id2)

        if node1_node1_path == None or node1_node1_path[1] < self._version:
            paths = self.find_path(node1, node1)
            node1.add_meta_paths(id1, paths, self._version)

        if node2_node2_path == None or node2_node2_path[1] < self._version:
            paths = self.find_path(node2, node2)
            node2.add_meta_paths(id2, paths, self._version)

        if node1_node2_path == None or node1_node2_path[1] < self._version:
            paths = self.find_path(node1, node2)
            node1.add_meta_paths(id2, paths, self._version)
            node2.add_meta_paths(id1, paths, self._version)

        node1_node1_score = self.compute_score(node1.get_meta_paths(id1)[0])
        node2_node2_score = self.compute_score(node2.get_meta_paths(id2)[0])
        node1_node2_score = self.compute_score(node1.get_meta_paths(id2)[0])

        # print node1_node1_score, node2_node2_score, node1_node2_score
        score = 2 * node1_node2_score / (node1_node1_score + node2_node2_score)

        return score

    def get_neighbors(self, node):
        return [n for n in node.get_neighbors().values() if n.get_id() not in self._deleted_nodes]

    def get_nodes(self):
        return [n for n in self._graph.get_nodes() if n.get_id() not in self._deleted_nodes]

    def get_avg_path_length(self):
        def shortest_path(start, end):
            label = {}
            parent = {}
            for node in self._graph.get_nodes():
                label[node.get_id()] = 'UNEXPLORED'

            queue = []
            queue.append(start)
            found = False

            while (queue and not found):
                node = queue.pop(0)
                label[node.get_id()] = 'VISITED'
                neighbors = self.get_neighbors(node)

                for neighbor in neighbors:
                    if found: break

                    if label[neighbor.get_id()] == 'UNEXPLORED':
                        label[neighbor.get_id()] = 'DISCOVERY'
                        parent[neighbor.get_id()] = node.get_id()
                        queue.append(neighbor)

                        if neighbor.get_id() == end.get_id():
                            found = True
                # endfor
            # endwhile
            if not found:
                return None

            distance = 0
            cur = end.get_id()
            while ( cur != start.get_id() ):
                distance += 1
                cur  = parent[cur]
            return distance
        # end shortest path

        distances = 0
        n = 0

        sample = random.sample(self._graph.get_nodes(), 32)
        for start in sample:
            for end in sample:
                if start.get_id() == end.get_id(): continue

                path = shortest_path(start, end)
                print start, end, path
                if path:
                    distances += path
                    n += 1
            # endfor end
        # endfor start
        return float(distances)/float(n)
Пример #59
0
	def load(self, filename):
		return Forest.load(filename)
Пример #60
0
    for y in range(forest.y):
        for x in range(forest.x):
            cell = str(forest.get_cell(x, y))
            dx = x * 32
            dy = y * 32

            if cell == 'W':
                bmp.paste(wood, (dx, dy))
            elif cell == 'B':
                bmp.paste(bamboo, (dx, dy))
            elif cell == 'F':
                bmp.paste(fire, (dx, dy))
            elif cell == 'S':
                bmp.paste(soil, (dx, dy))
            elif cell == 'P':
                bmp.paste(pool, (dx, dy))
            elif cell == 'R':
                bmp.paste(road, (dx, dy))
            elif cell == 'M':
                bmp.paste(mountain, (dx, dy))

    bmp.save(filename, 'PNG')


f = Forest()
f.loads(open('default.txt', mode='r').read())
forest2bmp(f, 'r_0.png')
for t in range(24 * 7):
    f.next_generation()
    forest2bmp(f, 'r_%d.png' % (t + 1))