Пример #1
0
 def add_partition(self, drv_name, cyl_range, dev_flags=0, flags=0, dos_type=DosType.DOS0, boot_pri=0):
   # cyl range is not free anymore or invalid
   if not self.check_cyl_range(*cyl_range):
     return False
   # no space left for partition block
   if not self._has_free_rdb_blocks(1):
     return False
   # allocate block for partition
   blk_num = self._alloc_rdb_blocks(1)[0]
   self.used_blks.append(blk_num)
   self._update_hi_blk()
   # crete a new parttion block
   pb = PartitionBlock(self.rawblk, blk_num)
   heads = self.rdb.phy_drv.heads
   blk_per_trk = self.rdb.phy_drv.secs
   dos_env = PartitionDosEnv(low_cyl=cyl_range[0], high_cyl=cyl_range[1], surfaces=heads, \
                             blk_per_trk=blk_per_trk, dos_type=dos_type, boot_pri=boot_pri)
   pb.create(drv_name, dos_env, flags=flags)
   pb.write()
   # link block
   if len(self.parts) == 0:
     # write into RDB
     self.rdb.part_list = blk_num
   else:
     # write into last partition block
     last_pb = self.parts[-1]
     last_pb.part_blk.next = blk_num
     last_pb.write()
   # always write RDB as allocated block is stored there, too  
   self.rdb.write()
   # create partition object and add to partition list
   p = Partition(self.rawblk, blk_num, len(self.parts), blk_per_trk, self)
   p.read()
   self.parts.append(p)
   return True
Пример #2
0
    def get_log_info(self, log_file):
        log_messages = []
        structure_log_info = []
        if self.df_log.empty:
            start = 0
            end = 0
        else:
            start = self.df_log.shape[0]
            end = start
        count = 0
        now = datetime.now()
        with open(log_file, 'r') as fin:
            for line in fin.readlines():
                # 把非ASCII码信息替换为<NASCII>标签
                line = re.sub(r'[^\x00-\x7F]+', '<NASCII>', line)
                # 把标签外的日志信息提取出来
                try:
                    end += 1
                    match = self.format_rex.search(line.strip())
                    # match.group(header),可按之前正则表达式中的命名进行分组
                    message = [match.group(header) for header in self.headers]
                    log_messages.append(message)

                    content = list(
                        filter(
                            lambda x: x != '',
                            re.split(r'[\s=:,]',
                                     self._preprocess(
                                         match.group('Content')))))
                    lineId = end
                    length = len(content)
                    # 每个info可看做一个分区
                    info = Partition(logClustL=[])
                    info.rootNode = Node()
                    info.logClustL.append(
                        LCSObject(logTemplate=content,
                                  logIDL=[lineId],
                                  constLogTemplate=content))
                    self._addSeqToPrefixTree(info.rootNode, info.logClustL[0])
                    partition = (length, info)
                    structure_log_info.append(partition)

                    count += 1
                    if count % 100000 == 0:
                        print(count)
                        print(datetime.now() - now)
                        now = datetime.now()
                except Exception as e:
                    # print("抛出异常: "+str(e))
                    pass
        logdf = pd.DataFrame(log_messages, columns=self.headers)
        logdf.insert(0, 'LineId', None)
        logdf['LineId'] = [i + 1 for i in range(start, end)]
        if not self.df_log.empty:
            self.df_log = self.df_log.append(logdf, ignore_index=True)
        else:
            self.df_log = logdf
        return structure_log_info
Пример #3
0
    def __init__(self, path):
        self.Documents = []
        self.allowed = set([chr(i) for i in xrange(ord('a'), ord('z')+1)]+ \
                [chr(i) for i in xrange(ord('A'), ord('Z')+1)] + \
            #[',','-',' '] + [str(i) for i in xrange(10)])

                [',','.','?','-','!',' '] + [str(i) for i in xrange(10)])
        self.punctuation = [';', ':', '&', '?', "/"]
        self.P = Partition(self.punctuation)
        self.tagger = PatternTagger()
        self.sw = StopWords()
        with open(path, 'r') as f:
            for line in f:
                line = line.strip()
                if line:
                    self.Documents.append(line)
Пример #4
0
    def __init__(self, knowledge):

        self.knowledge = knowledge
        self.num_user_turns = 0

        # Number of possible actions the goal could be referring to
        self.num_goal_actions = len(knowledge.goal_actions)
        self.num_utterance_actions = self.num_goal_actions + 1  # As action is utterance is allowed to be None

        # n - Maximum possible number of parameters an action can have
        self.max_goal_params = len(knowledge.goal_params)

        # Number of possible values each parameter can take
        self.num_goal_param_values = len(knowledge.goal_params_values)

        # Create a partition with all possible states
        possible_actions = knowledge.goal_actions
        possible_param_values = dict()
        possible_param_values['patient'] = knowledge.goal_params_values
        possible_param_values['location'] = knowledge.goal_params_values
        possible_param_values['recipient'] = knowledge.goal_params_values
        complete_partition = Partition(possible_actions, possible_param_values,
                                       0.0)
        self.partitions = [complete_partition]

        # Hypotheses currently being tracked
        self.hypothesis_beliefs = None

        self.num_dialog_turns = 0
Пример #5
0
    def open(self):
        # read RDB
        self.rdb = RDBlock(self.rawblk)
        if not self.rdb.read():
            self.valid = False
            return False
        # create used block list
        self.used_blks = [self.rdb.blk_num]

        # read partitions
        part_blk = self.rdb.part_list
        self.parts = []
        num = 0
        while part_blk != Block.no_blk:
            p = Partition(self.rawblk, part_blk, num,
                          self.rdb.log_drv.cyl_blks, self)
            num += 1
            if not p.read():
                self.valid = False
                return False
            self.parts.append(p)
            # store used block
            self.used_blks.append(p.get_blk_num())
            # next partition
            part_blk = p.get_next_partition_blk()

        # read filesystems
        fs_blk = self.rdb.fs_list
        self.fs = []
        num = 0
        while fs_blk != PartitionBlock.no_blk:
            fs = FileSystem(self.rawblk, fs_blk, num)
            num += 1
            if not fs.read():
                self.valid = False
                return False
            self.fs.append(fs)
            # store used blocks
            self.used_blks += fs.get_blk_nums()
            # next partition
            fs_blk = fs.get_next_fs_blk()

        # TODO: add bad block blocks

        self.valid = True
        self.max_blks = self.rdb.log_drv.rdb_blk_hi + 1
        return True
    def getDisks(self):
        """
        Enumerate all the disks present on the system, updating the cmdb object accordingly

        Create a disk instance for each disk on the system, populate the attributes and add it to the cmdb's hardDisks collection
        @return: cmdb.hardDisks
        """
        disks = j.cloud.cmdtools.inventoryScan.getDisks()
        currentAvailableDisks = list()
        for name, value in disks.iteritems():
            size = int(float(value['size']) *
                       1024) if value['unit'] == 'GB' else int(
                           float(value['size']))
            partitions = value['partitions']
            currentAvailableDisks.append(name)
            if name in self.cmdb.disks.keys():
                self.cmdb.disks[name].name = name
                self.cmdb.disks[name].size = size
            else:
                disk = Disk()
                disk.name = name
                disk.size = size
                self.cmdb.disks[name] = disk
            if partitions:
                disk = self.cmdb.disks[name]
                disk.partitions = list()
                for part in partitions:
                    partition = Partition(
                        part['Type'], part['number'], part['start'],
                        part['end'], int(float(part['size'][0:-3])),
                        part['mountpoint'] if 'mountpoint' in part else '',
                        part['used'] if 'used' in part else 0.0,
                        part['name'] if 'name' in part else '',
                        part['flag'] if 'flag' in part else '')
                    if 'devices' in part:
                        partition.raid = PartitionRaid(
                            part['level'], part['state'], part['devices'],
                            part['activeDevices'], part['failedDevices'],
                            part['totalDevices'], part['raidDevices'],
                            part['spareDevices'], part['backendsize'])
                    disk.partitions.append(partition)

        for disk in self.cmdb.disks.keys():
            if disk not in currentAvailableDisks:
                del self.cmdb.disks[disk]
        self.cmdb.dirtyProperties.add('disks')
        return disks
Пример #7
0
 def add_partition(self,
                   drv_name,
                   cyl_range,
                   dev_flags=0,
                   flags=0,
                   dos_type=DosType.DOS0,
                   boot_pri=0,
                   more_dos_env=None):
     # cyl range is not free anymore or invalid
     if not self.check_cyl_range(*cyl_range):
         return False
     # no space left for partition block
     if not self._has_free_rdb_blocks(1):
         return False
     # allocate block for partition
     blk_num = self._alloc_rdb_blocks(1)[0]
     self.used_blks.append(blk_num)
     self._update_hi_blk()
     # crete a new parttion block
     pb = PartitionBlock(self.rawblk, blk_num)
     # setup dos env
     heads = self.rdb.phy_drv.heads
     blk_per_trk = self.rdb.phy_drv.secs
     dos_env = PartitionDosEnv(low_cyl=cyl_range[0], high_cyl=cyl_range[1], surfaces=heads, \
                               blk_per_trk=blk_per_trk, dos_type=dos_type, boot_pri=boot_pri)
     self._adjust_dos_env(dos_env, more_dos_env)
     pb.create(drv_name, dos_env, flags=flags)
     pb.write()
     # link block
     if len(self.parts) == 0:
         # write into RDB
         self.rdb.part_list = blk_num
     else:
         # write into last partition block
         last_pb = self.parts[-1]
         last_pb.part_blk.next = blk_num
         last_pb.write()
     # always write RDB as allocated block is stored there, too
     self.rdb.write()
     # flush out all changes before we read again
     self.rawblk.flush()
     # create partition object and add to partition list
     blk_per_cyl = blk_per_trk * heads
     p = Partition(self.rawblk, blk_num, len(self.parts), blk_per_cyl, self)
     p.read()
     self.parts.append(p)
     return True
Пример #8
0
 def new_box(self):
     output = []
     for i in range(self.shape[0]):
         row = []
         for j in range(self.shape[1]):
             row.append(Partition((i,j)))
         output.append(row)
     return output
Пример #9
0
 def open(self):
   # read RDB
   self.rdb = RDBlock(self.rawblk)
   if not self.rdb.read():
     self.valid = False
     return False
   # create used block list
   self.used_blks = [self.rdb.blk_num]
     
   # read partitions
   part_blk = self.rdb.part_list
   self.parts = []
   num = 0
   while part_blk != Block.no_blk:
     p = Partition(self.rawblk, part_blk, num, self.rdb.log_drv.cyl_blks, self)
     num += 1
     if not p.read():
       self.valid = False
       return False
     self.parts.append(p)
     # store used block
     self.used_blks.append(p.get_blk_num())
     # next partition
     part_blk = p.get_next_partition_blk()
   
   # read filesystems
   fs_blk = self.rdb.fs_list
   self.fs = []
   num = 0
   while fs_blk != PartitionBlock.no_blk:
     fs = FileSystem(self.rawblk, fs_blk, num)
     num += 1
     if not fs.read():
       self.valid = False
       return False
     self.fs.append(fs)
     # store used blocks
     self.used_blks += fs.get_blk_nums()
     # next partition
     fs_blk = fs.get_next_fs_blk()
     
   # TODO: add bad block blocks
   
   self.valid = True
   self.max_blks = self.rdb.log_drv.rdb_blk_hi + 1
   return True
Пример #10
0
def MST_Kruskal(g):
    """Compute a minimum spanning tree of a graph using Kruskal s algorithm
    Return a list of edges that comprise the MST
    The elements of the graph's edges are assumed to be weights
    """
    tree = []
    pq = AdaptableHeapPriorityQueue()
    forest = Partition()
    position = {}

    for v in g.vertices():
        position[v] = forest.make_group(v)

    for e in g.edges():
        pq.add(e.element(), e) # edge’s element is assumed to be its weight

    size = g.vertex_count()
    while len(tree) != size - 1 and len(pq) != 0:
        # tree not spanning and unprocessed edges remain
        wgt, e = pq.remove_min()[1]
        u, v = e.endpoints()
        a, b = forest.find(position[u]), forest.find(position[v])
        if a != b:
            forest.union(a, b)
            tree.append(e)
    return tree
Пример #11
0
    def getDisks(self):
        """
        Enumerate all the disks present on the system, updating the cmdb object accordingly

        Create a disk instance for each disk on the system, populate the attributes and add it to the cmdb's hardDisks collection
        @return: cmdb.hardDisks
        """
        disks = j.cloud.cmdtools.inventoryScan.getDisks()
        currentAvailableDisks = list()
        for name, value in disks.iteritems():
            size = int(float(value['size']) * 1024) if value['unit'] == 'GB' else int(float(value['size']))
            partitions = value['partitions']
            currentAvailableDisks.append(name)
            if name in self.cmdb.disks.keys():
                self.cmdb.disks[name].name = name
                self.cmdb.disks[name].size = size
            else:
                disk = Disk()
                disk.name = name
                disk.size = size
                self.cmdb.disks[name] = disk
            if partitions:
                disk = self.cmdb.disks[name]
                disk.partitions = list()
                for part in partitions:
                    partition = Partition(part['Type'],
                                          part['number'],
                                          part['start'],
                                          part['end'],
                                          int(float(part['size'][0:-3])),
                                          part['mountpoint'] if 'mountpoint' in part else '',
                                          part['used'] if 'used' in part else 0.0,
                                          part['name'] if 'name' in part else '',
                                          part['flag'] if 'flag' in part else '')
                    if 'devices' in part:
                        partition.raid = PartitionRaid(part['level'], part['state'], part['devices'], part['activeDevices'],
                                                       part['failedDevices'], part['totalDevices'], part['raidDevices'],
                                                       part['spareDevices'], part['backendsize'])
                    disk.partitions.append(partition)

        for disk in self.cmdb.disks.keys():
            if disk not in currentAvailableDisks:
                del self.cmdb.disks[disk]
        self.cmdb.dirtyProperties.add('disks')
        return disks
Пример #12
0
    def __init__(self, shape = (0,0)) -> None:
        self.shape = shape

        self.partitions = []
        for i in range(shape[0]):
            row = []
            for j in range(shape[1]):
                row.append(Partition((i,j)))
            self.partitions.append(row)
Пример #13
0
    def __init__(self, path):
        data_home = os.path.split(path)[0]
        self.Documents = []
        self.allowed = set([chr(i) for i in xrange(ord('a'), ord('z')+1)]+ \
                [chr(i) for i in xrange(ord('A'), ord('Z')+1)] + \
            #[',','-',' '] + [str(i) for i in xrange(10)])

                [',','.','?','-','!',' '] + [str(i) for i in xrange(10)])
        punctuation = [';', ':', '&', '?', "/"]

        #P = Partition(punctuation)
        self.tagger = PatternTagger()
        with open(path, 'r') as f:
            for line in f.readlines():
                li = line.split("\t")[1].strip()
                if li:
                    self.Documents.append(li)
        data_Inter_path = os.path.join(data_home, "Intermediate")
        self.inter = data_Inter_path
        self.P = Partition(punctuation, data_Inter_path, data_home)
        self.sw = StopWords(data_home)
Пример #14
0
def MST_Kruskal(g):
    """
    compute a minimum spanning tree of a graph using Kruskal's algorithm 
    return a list of edges that comprise the MST 
    the elements of the graph's edges are assumed to be weights 
    """
    tree = []  # list of edges in spanning tree 
    pq = HeapPriorityQueue()  # entries are edges in G with weights as key 
    forest = Partition() # keeps track o forest clusters 
    position = {}  # map each node to its Partition entry 
    
    for v in g.vertices():
        position[v] = forest.make_group(v)
        
    for e in g.edges():
        pq.add(e.element(), e)  # edge's element is assumed to be its weight 
        
    size = g.vertex_count()
    while len(tree) != size - 1 and not pq.is_empty():
        # tree not spanning and unprocessed edges remain 
        weight, edge = pq.remove_min()
        u, v = edge.endpoints()
        a = forest.find(position[u])
        b = forest.find(position[v])
        if a != b:
            tree.append(edge)
            forest.union(a, b)
    return tree         
Пример #15
0
def MST_Kruskal(g):

    tree = []
    pq = HeapPriorityQueue()
    forest = Partition()
    position = {}

    for v in g.vertices():
        position[v] = forest.make_group(v)

    for e in g.edges():
        pq.add(e.element(), e)

    size = g.vertex_count()

    while len(tree) != size - 1 and not pq.is_empty():
        weight, edge = pq.remove_min()
        u, v = edge.endpoints()
        a = forest.find(position[u])
        b = forest.find(position[v])

        if a != b:
            tree.append(edge)
            forest.union(a, b)

    return tree
Пример #16
0
    def __partitionMatch(self, logmessageL, logID):
        """
        将logmessageL与对应分组的日志类型进行LCS匹配比较
        :param logmessageL: 待匹配的日志信息
        :param logID: 日志ID
        :return: 匹配结果
        """
        length = len(logmessageL)
        LCSClust = None
        # 如果该长度的模板并未创建分组,则直接创建分组
        if length not in self.group:
            # 如果进行读取训练结果的操作,那么此时的logmessageL是以前训练的日志模板,不存有ID,以负数表示
            if logID > 0:
                LCSClust = LCSObject(logmessageL, [logID],
                                     [s for s in logmessageL if s != '<*>'])
            else:
                LCSClust = LCSObject(logmessageL, [],
                                     [s for s in logmessageL if s != '<*>'])
            LCSClustL = [LCSClust]
            self.group[length] = Partition(LCSClustL)
        # 如果已存在该长度的分组,那么就在组内进行匹配
        else:
            LCS, matchObject = self._LCSMatch(self.group[length].logClustL,
                                              logmessageL, self.max)
            # 匹配失败则添加新模板
            if not LCS:
                if logID > 0:
                    LCSClust = LCSObject(
                        logmessageL, [logID],
                        [s for s in logmessageL if s != '<*>'])
                else:
                    LCSClust = LCSObject(
                        logmessageL, [],
                        [s for s in logmessageL if s != '<*>'])
                self.group[length].logClustL.append(LCSClust)
            # 匹配成功则取LCS为新的模板,删除原模板的信息,在merge阶段决定是否添加
            else:
                template = self._getTemplate(LCS, logmessageL)
                if ' '.join(matchObject.logTemplate) != ' '.join(template):
                    LCSClust = matchObject
                    matchObject.logTemplate = template
                    matchObject.constLogTemplate = [
                        s for s in template if s != '<*>'
                    ]
                    if logID > 0:
                        matchObject.logIDL.append(logID)
                    if matchObject in self.logClustL:
                        self.logClustL.remove(matchObject)
                    self._removeSeqFromPrefixTree(self.rootNode, matchObject)

        return LCSClust
Пример #17
0
 def __init__(self, path):
     self.Documents = []
     self.allowed = set([chr(i) for i in xrange(ord('a'), ord('z')+1)]+ \
             [chr(i) for i in xrange(ord('A'), ord('Z')+1)] + \
         #[',','-',' '] + [str(i) for i in xrange(10)])
             [',','.','?','-','!',' '] + [str(i) for i in xrange(10)])
     self.punctuation = [';',':','&', '?', "/"]
     self.P = Partition(self.punctuation)
     self.tagger = PatternTagger()
     self.sw = StopWords()
     with open(path,'r') as f:
         for line in f:
             line = line.strip()
             if line:
                 self.Documents.append(line)
Пример #18
0
	def generate_partitions(self, target_af):
		'''
		 
		 Args:
			  - `target_af`: Total af of all partitions to reach.
		'''
		partition_set = []
		afs = self.gen_kato_utilizations(target_af,0.1, 1)#generate utilizations based on the number of tasks generated
		num = len(afs)
		for i in range(num):
			reg = random.randint(1,2)
			partition_now = Partition(afs[i], reg)#only generates regular partitions
			#print afs[i]
			partition_set.append(partition_now)
		return partition_set
Пример #19
0
    def generate_partitions(self, target_af):
        '''
		 
		 Args:
			  - `target_af`: Total af of all partitions to reach.
		'''
        partition_set = {}
        afs = self.gen_kato_utilizations(
            target_af, 0,
            1)  #generate utilizations based on the number of tasks generated
        num = len(afs)
        for i in range(num):
            partition_now = Partition(
                i, afs[i])  #only generates regular partitions
            #print afs[i]
            partition_set[i] = partition_now
        return partition_set
Пример #20
0
def system_init():
    print('type: {}'.format(type(osm_map)))
    # sub_graph = osm_map.subgraph([4548141057, 4548141062, 4548141067, 1457872913, 4548141073])
    # E = list(sub_graph.nodes)
    # print(E)
    # input()
    print('System Initiating...')
    taxi_table = pd.read_csv('./data/taxi_info_list.csv')
    df = pd.read_csv('./data/node_list_with_cluster.csv')
    for indexs in df.index:
        tmp = df.loc[indexs]
        node_list.append(
            Node(tmp['real_id'], tmp['lon'], tmp['lat'], int(tmp['cluster_id'])))

    # .里面包含的内容是每个partition的landmark的经纬度.其下标与partition_list的下标一一对应
    landmark_table = pd.read_csv('./data/landmark.csv')
    global landmark_list
    landmark_list = list(
        zip(landmark_table.loc[:, 'lon'], landmark_table.loc[:, 'lat'], landmark_table.loc[:, 'landmark_node_id']))

    global partition_list
    partition_list = [None] * (max(df.loc[:, 'cluster_id']) + 1)
    # 初始化所有partition实例
    for node_it in node_list:
        cid = node_it.cluster_id_belongto
        if partition_list[cid] is None:
            partition_list[cid] = Partition(cid, node_list=[], taxi_list=[])
            partition_list[cid].node_list.append(int(node_it.node_id))
        else:
            partition_list[cid].node_list.append(int(node_it.node_id))

    global taxi_list
    for taxi_it in taxi_table.index:
        tmp = taxi_table.loc[taxi_it]
        taxi_in_which_partition = check_in_which_partition(
            tmp['init_lon'], tmp['init_lat'])
        taxi_list.append(
            Taxi(int(tmp['taxi_id']), tmp['init_lon'], tmp['init_lat'], SYSTEM_INIT_TIME - TIME_OFFSET, partition_id_belongto=taxi_in_which_partition, seat_left=3))
        partition_list[taxi_in_which_partition].taxi_list.append(
            int(tmp['taxi_id']))

    # 初始化邻接矩阵
    global node_distance_matrix
    node_distance_matrix = copy.copy(node_distance.values)
Пример #21
0
 def __init__(self, path):
     self.Documents = []
     self.allowed = set(
         [chr(i) for i in xrange(ord("a"), ord("z") + 1)]
         + [chr(i) for i in xrange(ord("A"), ord("Z") + 1)]
         +
         # [',','-',' '] + [str(i) for i in xrange(10)])
         [",", ".", "?", "-", "!", " "]
         + [str(i) for i in xrange(10)]
     )
     self.punctuation = [";", ":", "&", "?", "/"]
     self.P = Partition(self.punctuation)
     self.tagger = PatternTagger()
     self.sw = StopWords()
     with open(path, "r") as f:
         for line in f:
             line = line.strip()
             if line:
                 self.Documents.append(line)
Пример #22
0
def TestEOC(p, n_start, runs, a, b, appr): 
    dic = {
        "Midpoint" : MidpointApproximation, 
        "Linear" : LinearContApproximation,
        "L2" : L2ConstantApproximation
    }
    
    f = None
    
    # Define the target function
    if p > 0:
        f = lambda x : x**p
    else:
        f = lambda x : -1/(np.log(x/np.e))
    
    ns = [n_start * (2**n) for n in range(runs)] # Build the array of all the tested "n" values
    
    EM = None # Initialize the error manager with the relevant norm
    if appr == "L2":
        EM = ErrorManager(L2Norm)
    else:
        EM = ErrorManager(MaxNorm)
    
    interp = None
    for n in ns: #Foreach prescribed n
        u = Partition.Uniform(a, b, n)
        interp = dic[appr](u, f) #Build the prescribed approximation on the interval
        EM.PushError(interp, points = 50) #Give the result to the ErrorManager, so he can compute errors and EOC
        
    # Put the error data in a nice table so we can read it    
    data = {"N" : EM.dofs, "Error" : EM.errors, "EOC": EM.EOC}
    d = PD.DataFrame(data) 
    
    f, (ax1, ax2) = plt.subplots(1, 2, sharey=False, figsize=(16,6))
    # Set axis limits
    ax1.set_xlim((-0.05, 1.05))
    ax1.set_ylim((-0.05, 1.05))
    
    interp.plot(ax1) # Plot the function
    

    EM.PlotEOC(ax2) # Plot the EOC graph
    return d # Return the table
Пример #23
0
def Kruskal(g: Graph):
    '''
    Compute MST via Kruskal's algorithm 

    Return a list of edges that comprise the MST 

    The elements of the graph's edges are assumed to be weights
    '''
    # list of edges in a spanning tree
    tree = []
    # entries in pq are edges in G, with weights as keys
    pq = AdaptableHeapPriorityQueue()
    # keeps track of forest clusters
    forest = Partition()
    # map each node to its Partition entry
    position = {}

    v: Graph.Vertex
    for v in g.vertices():
        position[v.element()] = forest.make_group(v)

    e: Graph.Edge
    for e in g.edges():
        pq.add(e.weight(), e)

    size = g.vertex_count()

    # while tree not spanning and unprocessed edges remain
    while len(tree) != size-1 and not pq.is_empty():
        edge: Graph.Edge
        weight, edge = pq.remove_min()
        u, v = edge.endpoints()

        a = forest.find(position[u.element()])
        b = forest.find(position[v.element()])

        if a != b:
            tree.append(edge)

            forest.union(a, b)

    return tree
Пример #24
0
def solve_MST(graph):
    heap = Heap(compare_fn=lambda e1, e2: 1 if e1.obj < e2.obj else -1
                if e1.obj > e2.obj else 0)

    for edge in graph.edges:
        heap.insert(edge)

    part = Partition(value_fn=lambda v: v.obj, hash_fn=lambda v: hash(v.obj))

    final = List()

    while len(heap) > 0 or (len(final) == len(graph.vertices)
                            and part.subsets == 1):
        edge = heap.pop()
        v1 = edge.v1
        v2 = edge.v2
        if v1 not in part and v2 not in part:
            part.add(v1)
            part.add(v2)
            part.set_union(v1, v2)
            final.insert(edge)
        elif (v1 in part) ^ (v2 in part):
            if v1 not in part:
                part.add(v1)
            if v2 not in part:
                part.add(v2)
            part.set_union(v1, v2)
            final.insert(edge)
        elif not part.redundant(v1, v2):
            final.insert(edge)
            part.set_union(v1, v2)

    print(len(final))
    for edge in final:
        print(edge)

    return final
Пример #25
0
    def create_initial_policy(self) :
        probs = [x * 0.1 for x in xrange(0, 10)]
        num_goals = range(0, len(self.knowledge.goal_actions))
        num_uncertain_params = range(0, len(self.knowledge.goal_params)) + [sys.maxint]
        num_dialog_turns = range(0, 10)
        yes_no = ['yes', 'no']
        utterance_type = [None, 'inform_full', 'inform_param', 'affirm', 'deny']
        values = list(itertools.product(*[probs, probs, num_goals, num_uncertain_params, num_dialog_turns, yes_no, utterance_type]))

        # Warning: Make sure you have a default value for every param in 
        # Knowledge.goal_params
        default_param_values = dict()
        default_param_values['patient'] = ['alice']
        default_param_values['recipient'] = ['hamburger']
        default_param_values['location'] = ['l3_512']

        examples = list()
        for (top_prob, sec_prob, num_goals, num_uncertain_params, num_dialog_turns, match, utterance_type) in values :
            if num_goals != 1 and num_uncertain_params != sys.maxint :
                continue
            elif num_goals == 1 and num_uncertain_params == sys.maxint :
                continue
            elif num_goals != len(self.knowledge.goal_actions) and utterance_type is None :
                continue
            s = SummaryState()
            s.knowledge = self.knowledge
            s.top_hypothesis_prob = top_prob
            s.second_hypothesis_prob = sec_prob
            s.num_dialog_turns = num_dialog_turns
        
            if num_goals != 1 :
                if utterance_type is not None :
                    utterance = Utterance(utterance_type)
                    s.top_hypothesis = (Partition(self.knowledge.goal_actions[0:num_goals]), utterance)
                    if match == 'yes' :
                        s.second_hypothesis = (Partition(self.knowledge.goal_actions[0:num_goals]), utterance)
            
                if s.get_feature_vector() != [top_prob, sec_prob, num_goals, num_uncertain_params, num_dialog_turns, match, utterance_type] :
                    print 'Problem!'
                    print s.get_feature_vector()
                    print (top_prob, sec_prob, num_goals, num_uncertain_params, num_dialog_turns, match, utterance_type)
                    print '\n'
                    
                examples.append((s, 'repeat_goal'))
            else :
                #for goal in self.knowledge.goal_actions :
                goal = 'remind'
                params = dict()
                
                param_order = self.knowledge.param_order[goal]
                if num_uncertain_params > len(param_order) :
                    continue
                num_certain_params = len(param_order) - num_uncertain_params  
                #print 'len(param_order) = ', len(param_order)
                #print 'num_uncertain_params = ', num_uncertain_params          
                #print 'num_certain_params = ', num_certain_params
                for (idx, param_name) in enumerate(param_order) :
                    if idx < num_certain_params :
                        params[param_name] = default_param_values[param_name]
                    else :
                        params[param_name] = self.knowledge.goal_params_values
                
                s = SummaryState()
                s.knowledge = self.knowledge
                s.top_hypothesis_prob = top_prob
                s.second_hypothesis_prob = sec_prob
                s.num_dialog_turns = num_dialog_turns        
                
                if utterance_type is not None :
                    utterance = Utterance(utterance_type)
                    s.top_hypothesis = (Partition([goal], params), utterance)
                    if match == 'yes' :
                        s.second_hypothesis = (Partition([goal], params), utterance)                    
            
                if s.get_feature_vector() != [top_prob, sec_prob, num_goals, num_uncertain_params, num_dialog_turns, match, utterance_type] :
                    print 'Problem!'
                    print s.get_feature_vector()
                    print (top_prob, sec_prob, num_goals, num_uncertain_params, num_dialog_turns, match, utterance_type)
                    print s.top_hypothesis[0].possible_param_values
                    print '\n'
                                
                if num_uncertain_params == 0 :
                    if top_prob < 0.3 :
                        action = 'request_missing_param'
                    elif top_prob < 0.9 :
                        action = 'confirm_action'
                    else :
                        action = 'take_action'
                else :
                    if num_uncertain_params > 0 :
                        action = 'request_missing_param'
                examples.append((s, action))
                    
        #print len(examples), 'examples'        
        D = list()
        mean = []
        cov = []
        actions = self.knowledge.summary_system_actions
        for (b, a) in examples :
            for a_prime in actions :
                D.append((b, a_prime))
                if a == a_prime :
                    mean.append(1.0)
                else :
                    mean.append(0.0)
        #print 'len(D) = ', len(D)
        cov = np.matrix(np.zeros((len(D), len(D))))
        for i in range(0, len(D)) :
            cov[(i,i)] = 0.1
        self.D = D
        self.mu = np.matrix([[x] for x in mean])
        self.C = cov
Пример #26
0
class Clean:
    def __init__(self, path):
        self.Documents = []
        self.allowed = set([chr(i) for i in xrange(ord('a'), ord('z')+1)]+ \
                [chr(i) for i in xrange(ord('A'), ord('Z')+1)] + \
            #[',','-',' '] + [str(i) for i in xrange(10)])

                [',','.','?','-','!',' '] + [str(i) for i in xrange(10)])
        self.punctuation = [';', ':', '&', '?', "/"]
        self.P = Partition(self.punctuation)
        self.tagger = PatternTagger()
        self.sw = StopWords()
        with open(path, 'r') as f:
            for line in f:
                line = line.strip()
                if line:
                    self.Documents.append(line)

    def is_number(self, s):
        try:
            float(s)
            return True
        except ValueError:
            return False

    def remove_stopwords(self, words, pos):
        new_sent = []
        new_pos = []
        for i in xrange(len(words)):
            if not self.sw.isStopWord(words[i]):
                new_sent.append(words[i])
                new_pos.append(pos[i])
        return new_sent, new_pos

    def replace_nums(self, s):
        sent = str(s)
        if sent[len(sent) - 1] == ".":
            sent = sent[0:len(sent) - 1]
        sent = sent.split()
        new_sent = []
        for word in sent:
            if self.is_number(word):
                pass
                #new_sent.append("999999")
            else:
                new_sent.append(word)
        sent = " ".join(new_sent)

        return sent

    def remove_things(self, string):
        string = string.replace("\t", " ")
        string = string.replace(" and ", ", and ")
        new_string = [char for char in string if char in self.allowed]
        return "".join(new_string)

    def clean_and_tag(self):
        with open('Intermediate/full_sentences.txt', 'w') as f,\
                open('Intermediate/full_pos.txt','w') as g,\
                open('Intermediate/sentences.txt', 'w') as m,\
                open('Intermediate/pos.txt', 'w') as n:
            for i in xrange(len(self.Documents)):
                if i % 10000 == 0 and i != 0:
                    print str(i) + " documents processed."
                doc = self.Documents[i]
                cleaned_doc = self.remove_things(doc)
                blob = TextBlob(cleaned_doc)
                for j in xrange(len(blob.sentences)):
                    sent = blob.sentences[j]
                    sent = self.replace_nums(sent)
                    split_sentence = self.P.split(sent)

                    for k in xrange(len(split_sentence)):
                        frag = split_sentence[k]
                        sent_blob = TextBlob(frag, pos_tagger=self.tagger)
                        words, pos = [], []
                        for word, tag in sent_blob.pos_tags:
                            words.append(word)
                            pos.append(tag)
                        f.write(
                            str(i) + ":" + str(j) + ":" + str(k) + ":" +
                            (" ".join(words) + "\n"))
                        g.write(" ".join(pos) + "\n")
                        no_stop_words, no_stop_pos = self.remove_stopwords(
                            words, pos)
                        m.write(
                            str(i) + ":" + str(j) + ":" + str(k) + ":" +
                            (" ".join(no_stop_words) + "\n"))
                        n.write(" ".join(no_stop_pos) + "\n")
Пример #27
0
 def create_partitions_info(self):
     return [
         Partition(partition, usage)
         for partition, usage in self.disk_usage_for_partitions()
     ]
Пример #28
0
class Clean:
    def __init__(self, path):
        self.Documents = []
        self.allowed = set([chr(i) for i in xrange(ord('a'), ord('z')+1)]+ \
                [chr(i) for i in xrange(ord('A'), ord('Z')+1)] + \
            #[',','-',' '] + [str(i) for i in xrange(10)])
                [',','.','?','-','!',' '] + [str(i) for i in xrange(10)])
        self.punctuation = [';',':','&', '?', "/"]
        self.P = Partition(self.punctuation)
        self.tagger = PatternTagger()
        self.sw = StopWords()
        with open(path,'r') as f:
            for line in f:
                line = line.strip()
                if line:
                    self.Documents.append(line)
    def is_number(self,s):
        try:
            float(s)
            return True
        except ValueError:
            return False
    def remove_stopwords(self, words, pos):
        new_sent = []
        new_pos = []
        for i in xrange(len(words)):
            if not self.sw.isStopWord(words[i]):
                new_sent.append(words[i])
                new_pos.append(pos[i])
        return new_sent,new_pos

    def replace_nums(self,s):
        sent = str(s)
        if sent[len(sent)-1] == ".":
            sent = sent[0:len(sent)-1]
        sent = sent.split()
        new_sent = []
        for word in sent:
            if self.is_number(word):
                pass
                #new_sent.append("999999")
            else:
                new_sent.append(word)
        sent = " ".join(new_sent)

        return sent
    def remove_things(self, string):
        string = string.replace("\t", " ")
        string = string.replace(" and ", ", and ")
        new_string = [char for char in string if char in self.allowed]
        return "".join(new_string)

    def clean_and_tag(self):
        with open('Intermediate/full_sentences.txt', 'w') as f,\
                open('Intermediate/full_pos.txt','w') as g,\
                open('Intermediate/sentences.txt', 'w') as m,\
                open('Intermediate/pos.txt', 'w') as n:
            for i in xrange(len(self.Documents)):
                if i%10000 == 0 and i!=0:
                    print str(i)+" documents processed."
                doc = self.Documents[i]
                cleaned_doc = self.remove_things(doc)
                blob = TextBlob(cleaned_doc)
                for j in xrange(len(blob.sentences)):
                    sent = blob.sentences[j]
                    sent = self.replace_nums(sent)
                    split_sentence = self.P.split(sent)

                    for k in xrange(len(split_sentence)):
                        frag = split_sentence[k]
                        sent_blob = TextBlob(frag, pos_tagger=self.tagger)
                        words, pos = [],[]
                        for word,tag in sent_blob.pos_tags:
                            words.append(word)
                            pos.append(tag)
                        f.write(str(i)+":"+str(j)+":"+str(k)+":"+(" ".join(words)+"\n"))
                        g.write(" ".join(pos)+"\n")
                        no_stop_words, no_stop_pos = self.remove_stopwords(words,pos)
                        m.write(str(i)+":"+str(j)+":"+str(k)+":"+(" ".join(no_stop_words)+"\n"))
                        n.write(" ".join(no_stop_pos)+"\n")
Пример #29
0
    def partition_with_overlap(self, base_partition_weight, forward_overlap,
                               backward_overlap):
        """
        Method partitions graph into overlapping partitions based on weight criteria and overlap both given in minutes.
        Method starts by partitioning graph with partition method and then adds overlap.
        """
        if not (isinstance(base_partition_weight, float) and isinstance(
                forward_overlap, int) and isinstance(backward_overlap, int)):
            raise TypeError(
                "Please only call this function with float, int and int as the arguments"
            )

        self.partition(base_partition_weight)

        # Create partition dictionary
        self.partitions = dict(
            zip(range(len(self.partitions)), self.partitions))
        for i in self.partitions:
            self.reset_partition_id(self.partitions[i], i)

        # Generate partition graph
        self.partition_graph = dict()

        for index in self.partitions:
            part = Partition(index)
            part.add_nodes(self.partitions[index])
            end_nodes = self.find_partition_edge_nodes(self.partitions[index],
                                                       end=True)
            start_nodes = self.find_partition_edge_nodes(
                self.partitions[index], end=False)

            for node in end_nodes:
                for f_node in self.adjacency_dict[node].forward_nodes:
                    part.add_forward_partition(
                        self.adjacency_dict[f_node].partition_id)

            for node in start_nodes:
                for b_node in self.adjacency_dict[node].backward_nodes:
                    part.add_backward_partition(
                        self.adjacency_dict[b_node].partition_id)

            self.partition_graph[index] = part

        # Generate overlapping partitions
        for index in self.partition_graph:
            for next_part in self.partition_graph[index].forward_partitions:
                self.forward_overlap_helper(index, next_part, 0,
                                            forward_overlap)

            for next_part in self.partition_graph[index].backward_partitions:
                self.backward_overlap_helper(index, next_part, 0,
                                             backward_overlap)

        # Clean up overlap between forward nodes and backward nodes
        for index in self.partition_graph:
            c_nodes = self.partition_graph[index].nodes
            f_nodes = self.partition_graph[index].forward_nodes
            b_nodes = self.partition_graph[index].backward_nodes

            self.partition_graph[
                index].forward_nodes = f_nodes - b_nodes - c_nodes
            self.partition_graph[index].backward_nodes = b_nodes - c_nodes
Пример #30
0
        cluster = clusters[i]
        if len(cluster) == 0:
            continue
        color_val = scalar_map.to_rgba(i)
        for line in cluster:
            plt.arrow(line.a[0],
                      line.a[1],
                      line.vector[0],
                      line.vector[1],
                      color=color_val)


"""
Setup! Processes the first n lines of the csv to begin the clustering.
"""
partitioner = Partition(LIKELIHOOD_THRES, MIN_VELOCITY)
clusterer = Cluster(EPSILON, MIN_LINES)
partitioner.pre_process(FILE_NAME, 0, 4000)
partitions = partition(partitioner)
clusters = clusterer.segment_cluster(partitions[0])
for cluster in clusterer.segment_cluster(partitions[1]):
    clusters.append(cluster)
fig = plt.figure()
cmap = plt.cm.jet
# plt.ion()
img = mpimg.imread("ref.png")
plt.imshow(img)
plot_clusters(clusters)
# plt.draw()
"""
Real time!