def calculate_non_overlapping_range_with(self, occupied): # convert block occurrences into ranges potential_block_range = RangeSet() for occurrence in self.block_occurrences(): potential_block_range.add_range( occurrence, occurrence + self.minimum_block_length) #check the intersection with the already occupied ranges block_intersection = potential_block_range.intersection(occupied) if not block_intersection: # no overlap, return complete block_range return potential_block_range # There is overlap with occupied range # we need to deal with it real_block_range = RangeSet() for lower in potential_block_range.contiguous(): # TODO: what I really want here is a find first over a generator upper = [ x for x in block_intersection.contiguous() if x[0] >= lower[0] ] if upper: lower = lower[0] upper = upper[0][0] if lower != upper: real_block_range.add_range(lower, upper) if not real_block_range: # There is complete overlap, so return None return None # Assert: check that the first slice is not larger than potential block length! first_range = next(real_block_range.contiguous()) if first_range[-1] - first_range[0] + 1 > self.minimum_block_length: raise PartialOverlapException() return real_block_range
def calculate_non_overlapping_range_with(self, occupied): # convert block occurrences into ranges potential_block_range = RangeSet() for occurrence in self.block_occurrences(): potential_block_range.add_range(occurrence, occurrence + self.minimum_block_length) #check the intersection with the already occupied ranges block_intersection = potential_block_range.intersection(occupied) if not block_intersection: # no overlap, return complete block_range return potential_block_range # There is overlap with occupied range # we need to deal with it real_block_range = RangeSet() for lower in potential_block_range.contiguous(): # TODO: what I really want here is a find first over a generator upper = [x for x in block_intersection.contiguous() if x[0] >= lower[0]] if upper: lower = lower[0] upper = upper[0][0] if lower != upper: real_block_range.add_range(lower, upper) if not real_block_range: # There is complete overlap, so return None return None # Assert: check that the first slice is not larger than potential block length! first_range = real_block_range.contiguous().next() if first_range[-1]-first_range[0]+1>self.minimum_block_length: raise PartialOverlapException() return real_block_range
def _prepare_token_array(self): # TODO: the lazy init should move to somewhere else # clear the suffix array and LCP array cache self.cached_suffix_array = None token_array_position = 0 for idx, witness in enumerate(self.witnesses): # print("witness.tokens",witness.tokens()) witness_range = RangeSet() witness_range.add_range(self.counter, self.counter + len(witness.tokens())) # the extra one is for the marker token self.counter += len(witness.tokens()) + 1 self.witness_ranges[witness.sigil] = witness_range # remember get tokens twice sigil = witness.sigil for token in witness.tokens(): token.token_data['_sigil'] = sigil token.token_data[ '_token_array_position'] = token_array_position token_array_position += 1 self.token_array.extend(witness.tokens()) # # add marker token self.token_array.append( Token({ "n": '$' + str(idx), '_sigil': sigil })) token_array_position += 1 self.token_array.pop() # remove last marker
def add_witness(self, witnessdata): # clear the suffix array and LCP array cache self.cached_suffix_array = None witness = Witness(witnessdata) self.witnesses.append(witness) witness_range = RangeSet() witness_range.add_range(self.counter, self.counter+len(witness.tokens())) # the extra one is for the marker token self.counter += len(witness.tokens()) +2 # $ + number self.witness_ranges[witness.sigil] = witness_range if not self.combined_string == "": self.combined_string += " $"+str(len(self.witnesses)-1)+ " " self.combined_string += witness.content
def _prepare_token_array(self): # TODO: the lazy init should move to somewhere else # clear the suffix array and LCP array cache self.cached_suffix_array = None for idx, witness in enumerate(self.witnesses): witness_range = RangeSet() witness_range.add_range(self.counter, self.counter+len(witness.tokens())) # the extra one is for the marker token self.counter += len(witness.tokens()) + 1 self.witness_ranges[witness.sigil] = witness_range if self.token_array: # add marker token self.token_array.append(Token({"n":"$"+str(idx-1)})) # remember get tokens twice self.token_array.extend(witness.tokens())
def _prepare_token_array(self): # TODO: the lazy init should move to somewhere else # clear the suffix array and LCP array cache self.cached_suffix_array = None for idx, witness in enumerate(self.witnesses): witness_range = RangeSet() witness_range.add_range(self.counter, self.counter + len(witness.tokens())) # the extra one is for the marker token self.counter += len(witness.tokens()) + 1 self.witness_ranges[witness.sigil] = witness_range if self.token_array: # add marker token self.token_array.append(Token({"n": "$" + str(idx - 1)})) # remember get tokens twice self.token_array.extend(witness.tokens())
def add_witness(self, witnessdata): # clear the suffix array and LCP array cache self.cached_suffix_array = None witness = Witness(witnessdata) self.witnesses.append(witness) witness_range = RangeSet() witness_range.add_range(self.counter, self.counter + len(witness.tokens())) # the extra one is for the marker token self.counter += len(witness.tokens()) + 2 # $ + number self.witness_ranges[witness.sigil] = witness_range if len(self.witnesses) > 1: self.combined_tokens.append('$') self.combined_tokens.append(str(len(self.witnesses) - 1)) for tk in witness.tokens(): self.combined_tokens.append(tk.token_string)
def _prepare_token_array(self): # TODO: the lazy init should move to somewhere else # clear the suffix array and LCP array cache self.cached_suffix_array = None token_array_position = 0 for idx, witness in enumerate(self.witnesses): # print("witness.tokens",witness.tokens()) witness_range = RangeSet() witness_range.add_range(self.counter, self.counter + len(witness.tokens())) # the extra one is for the marker token self.counter += len(witness.tokens()) + 1 self.witness_ranges[witness.sigil] = witness_range # remember get tokens twice sigil = witness.sigil for token in witness.tokens(): token.token_data['_sigil'] = sigil token.token_data['_token_array_position'] = token_array_position token_array_position += 1 self.token_array.extend(witness.tokens()) # # add marker token self.token_array.append(Token({"n": '$' + str(idx), '_sigil': sigil})) token_array_position += 1 self.token_array.pop() # remove last marker
def _get_non_overlapping_repeating_blocks(self): # The LCP intervals that are calculated from the extend suffix array are all potential blocks. # However some potential blocks overlap. To decide the definitive blocks we sort the potential blocks on the # amount of witnesses they occur in. potential_blocks = self.token_index.split_lcp_array_into_intervals() # we add all the intervals to a priority queue based on 1) number of witnesses 2) block length queue = PriorityQueue() for interval in potential_blocks: queue.put(interval) occupied = RangeSet() real_blocks = [] while not queue.empty(): item = queue.get() # print(item) # test intersection with occupied potential_block_range = item._as_range() # check the intersection with the already occupied ranges block_intersection = potential_block_range.intersection(occupied) if not block_intersection: # print("Selected!") occupied.union_update(potential_block_range) real_blocks.append(Block(potential_block_range)) continue # check complete overlap or partial if block_intersection == potential_block_range: # print("complete overlap; skip") continue # print("partial overlap!") occurrence_difference = potential_block_range.difference( block_intersection) # print(occurrence_difference) # check on left partial overlap # filter it # determine start positions start_pos = item.block_occurrences() # print(start_pos) resulting_difference = RangeSet() count = 0 for range in occurrence_difference.contiguous(): if range[0] in start_pos: resulting_difference.add_range(range[0], range[-1] + 1) count += 1 # print(resulting_difference) if count < 2: continue # in case of right partial overlap # calculate the minimum allowed range minimum_length = item.length for range in resulting_difference.contiguous(): if len(range) < minimum_length: minimum_length = len(range) # print(minimum_length) result = RangeSet() for range in resulting_difference.contiguous(): result.add_range(range[0], range[0] + minimum_length) # print("Selecting partial result: "+str(result)) occupied.union_update(result) real_blocks.append(Block(result)) return real_blocks
def _get_non_overlapping_repeating_blocks(self): # The LCP intervals that are calculated from the extend suffix array are all potential blocks. # However some potential blocks overlap. To decide the definitive blocks we sort the potential blocks on the # amount of witnesses they occur in. potential_blocks = self.token_index.split_lcp_array_into_intervals() # we add all the intervals to a priority queue based on 1) number of witnesses 2) block length queue = PriorityQueue() for interval in potential_blocks: queue.put(interval) occupied = RangeSet() real_blocks = [] while not queue.empty(): item = queue.get() # print(item) # test intersection with occupied potential_block_range = item._as_range() # check the intersection with the already occupied ranges block_intersection = potential_block_range.intersection(occupied) if not block_intersection: # print("Selected!") occupied.union_update(potential_block_range) real_blocks.append(Block(potential_block_range)) continue # check complete overlap or partial if block_intersection == potential_block_range: # print("complete overlap; skip") continue # print("partial overlap!") occurrence_difference = potential_block_range.difference(block_intersection) # print(occurrence_difference) # check on left partial overlap # filter it # determine start positions start_pos = item.block_occurrences() # print(start_pos) resulting_difference = RangeSet() count = 0 for range in occurrence_difference.contiguous(): if range[0] in start_pos: resulting_difference.add_range(range[0], range[-1]+1) count+=1 # print(resulting_difference) if count < 2: continue # in case of right partial overlap # calculate the minimum allowed range minimum_length = item.length for range in resulting_difference.contiguous(): if len(range) < minimum_length: minimum_length = len(range) # print(minimum_length) result = RangeSet() for range in resulting_difference.contiguous(): result.add_range(range[0], range[0]+minimum_length) # print("Selecting partial result: "+str(result)) occupied.union_update(result) real_blocks.append(Block(result)) return real_blocks
def testAddRange(self): """test RangeSet.add_range()""" r1 = RangeSet() r1.add_range(1, 100, 1) self.assertEqual(len(r1), 99) self.assertEqual(str(r1), "1-99") r1.add_range(40, 101, 1) self.assertEqual(len(r1), 100) self.assertEqual(str(r1), "1-100") r1.add_range(399, 423, 2) self.assertEqual(len(r1), 112) self.assertEqual( str(r1), "1-100,399,401,403,405,407,409,411,413,415,417,419,421") # With autostep... r1 = RangeSet(autostep=3) r1.add_range(1, 100, 1) self.assertEqual(r1.autostep, 3) self.assertEqual(len(r1), 99) self.assertEqual(str(r1), "1-99") r1.add_range(40, 101, 1) self.assertEqual(len(r1), 100) self.assertEqual(str(r1), "1-100") r1.add_range(399, 423, 2) self.assertEqual(len(r1), 112) self.assertEqual(str(r1), "1-100,399-421/2") # Bound checks r1 = RangeSet("1-30", autostep=2) self.assertEqual(len(r1), 30) self.assertEqual(str(r1), "1-30") self.assertEqual(r1.autostep, 2) r1.add_range(32, 35, 1) self.assertEqual(len(r1), 33) self.assertEqual(str(r1), "1-30,32-34") r1.add_range(31, 32, 1) self.assertEqual(len(r1), 34) self.assertEqual(str(r1), "1-34") r1 = RangeSet("1-30/4") self.assertEqual(len(r1), 8) self.assertEqual(str(r1), "1,5,9,13,17,21,25,29") r1.add_range(30, 32, 1) self.assertEqual(len(r1), 10) self.assertEqual(str(r1), "1,5,9,13,17,21,25,29-31") r1.add_range(40, 65, 10) self.assertEqual(len(r1), 13) self.assertEqual(str(r1), "1,5,9,13,17,21,25,29-31,40,50,60") r1 = RangeSet("1-30", autostep=3) r1.add_range(40, 65, 10) self.assertEqual(r1.autostep, 3) self.assertEqual(len(r1), 33) self.assertEqual(str(r1), "1-29,30-60/10") # One r1.add_range(103, 104) self.assertEqual(len(r1), 34) self.assertEqual(str(r1), "1-29,30-60/10,103") # Zero self.assertRaises(AssertionError, r1.add_range, 103, 103)
def _as_range(self): # convert interval into range range = RangeSet() for occurrence in self.block_occurrences(): range.add_range(occurrence, occurrence + self.minimum_block_length) return range
def select_nodes(self, profil, name, nb_nodes, host): '''Select nodes to spawn''' # 1: recover available nodelist # 2: select nb_nodes among availables nodes # 3: return the list of nodes err = "" nodes = [] if host is None: err = "Error: No host available\n" _LOGGER.error(err) self.rep_sock.send(msgpack.packb(('', [err]))) return nodes if not vc.VirtualCluster.valid_clustername(name): err = "Error: clustername '{}' is not a valid name\n".format(name) _LOGGER.error(err) self.rep_sock.send(msgpack.packb(('', [err]))) return nodes if profil not in self.profiles: err = "Error: Profil '{}' not found in configuration file\n".format( profil) _LOGGER.error(err) self.rep_sock.send(msgpack.packb(('', [err]))) return nodes nodelist = self.list_nodes(byhost=False) nodeset = NodeSet.fromlist([node.name for node in nodelist]) idx_min = 0 idx_max = nb_nodes - 1 base_range = RangeSet("%d-%d" % (idx_min, idx_max)) base_nodeset = NodeSetBase(name + '%s', base_range) ndset_inter = nodeset.intersection(base_nodeset) while len(ndset_inter) != 0: indexes = [ clustdock.VirtualNode.split_name(node)[1] for node in ndset_inter ] for idx in indexes: _LOGGER.debug("Removing %d from rangeset %s", idx, base_range) base_range.remove(idx) base_nodeset.difference_update(ndset_inter) _LOGGER.debug("Nodeset becomes '%s' after removing", base_nodeset) idx_min = max(indexes + list(base_range)) + 1 idx_max = idx_min + max([len(indexes), nb_nodes - len(base_range)]) base_range.add_range(idx_min, idx_max) _LOGGER.debug("New rangeset: %s", base_range) base_nodeset.update( NodeSetBase(name + '%s', RangeSet.fromlist([range(idx_min, idx_max)]))) _LOGGER.debug("New nodeset: %s", base_nodeset) ndset_inter = nodeset.intersection(base_nodeset) final_range = base_range _LOGGER.debug("final rangeset/nodeset: %s / %s", base_range, base_nodeset) cluster = vc.VirtualCluster(name, profil, self.profiles[profil]) nodes = [] for idx in final_range: node = cluster.add_node(idx, host) nodes.append(node) return nodes
def testAddRange(self): """test RangeSet.add_range()""" r1 = RangeSet() r1.add_range(1, 100, 1) self.assertEqual(len(r1), 99) self.assertEqual(str(r1), "1-99") r1.add_range(40, 101, 1) self.assertEqual(len(r1), 100) self.assertEqual(str(r1), "1-100") r1.add_range(399, 423, 2) self.assertEqual(len(r1), 112) self.assertEqual(str(r1), "1-100,399,401,403,405,407,409,411,413,415,417,419,421") # With autostep... r1 = RangeSet(autostep=3) r1.add_range(1, 100, 1) self.assertEqual(r1.autostep, 3) self.assertEqual(len(r1), 99) self.assertEqual(str(r1), "1-99") r1.add_range(40, 101, 1) self.assertEqual(len(r1), 100) self.assertEqual(str(r1), "1-100") r1.add_range(399, 423, 2) self.assertEqual(len(r1), 112) self.assertEqual(str(r1), "1-100,399-421/2") # Bound checks r1 = RangeSet("1-30", autostep=2) self.assertEqual(len(r1), 30) self.assertEqual(str(r1), "1-30") self.assertEqual(r1.autostep, 2) r1.add_range(32, 35, 1) self.assertEqual(len(r1), 33) self.assertEqual(str(r1), "1-30,32-34") r1.add_range(31, 32, 1) self.assertEqual(len(r1), 34) self.assertEqual(str(r1), "1-34") r1 = RangeSet("1-30/4") self.assertEqual(len(r1), 8) self.assertEqual(str(r1), "1,5,9,13,17,21,25,29") r1.add_range(30, 32, 1) self.assertEqual(len(r1), 10) self.assertEqual(str(r1), "1,5,9,13,17,21,25,29-31") r1.add_range(40, 65, 10) self.assertEqual(len(r1), 13) self.assertEqual(str(r1), "1,5,9,13,17,21,25,29-31,40,50,60") r1 = RangeSet("1-30", autostep=3) r1.add_range(40, 65, 10) self.assertEqual(r1.autostep, 3) self.assertEqual(len(r1), 33) self.assertEqual(str(r1), "1-29,30-60/10") # One r1.add_range(103, 104) self.assertEqual(len(r1), 34) self.assertEqual(str(r1), "1-29,30-60/10,103") # Zero self.assertRaises(AssertionError, r1.add_range, 103, 103)