Пример #1
0
def rangeQuery(api, start, end):
    TIMESTAMPE = 'Timestamp'
    # print(start, end)
    result = []
    for ts in range(start // SCALE + 1, end // SCALE):
        # print('ts:', ts)
        # temp = api.liststreamkeyitems(TIMESTAMPE, str(ts))['result']
        temp = pointQuery(api, TIMESTAMPE, str(ts))
        # print(temp)
        if temp:
            result += temp
        # print(result)
    temp = api.liststreamkeyitems(TIMESTAMPE, str(start // SCALE))['result']
    # print('in range:\n', *result, sep='\n')
    if temp:
        data = getData(temp)
        # print(data)
        sl = SortedList(data, key=lambda a: a.split(" ")[0])
        result += list(sl.irange(str(start), str(end)))
    temp = api.liststreamkeyitems(TIMESTAMPE, str(end // SCALE))['result']
    if temp:
        data = getData(temp)
        sl = SortedList(data, key=lambda a: a.split(" ")[0])
        # print('end:')
        # print(*list(sl.irange(maximum=str(end))), sep='\n')
        result += list(sl.irange(str(start), str(end)))

    return result
Пример #2
0
    def _find_general_vector(self):
        """Generate a (non-unit) vector that is general relative to edges"""
        def key(v):
            return vector_frangle(v)

        frangles = SortedList()

        for half_edge in self.half_edges.values():
            frangle = half_edge.frangle
            frangles.add((frangle + 0 * FRANGLE_90) % MAX_FRANGLE)
            frangles.add((frangle + 1 * FRANGLE_90) % MAX_FRANGLE)
            frangles.add((frangle + 2 * FRANGLE_90) % MAX_FRANGLE)
            frangles.add((frangle + 3 * FRANGLE_90) % MAX_FRANGLE)

        if len(frangles) == 0:
            return (Fraction(0), Fraction(1))

        first = list(islice(frangles.irange(None, None), 1))
        assert len(first) == 1
        first = first[0]

        second = list(
            islice(frangles.irange(first, None, inclusive=(False, True)), 1))
        assert len(second) == 1
        second = second[0]

        assert first != second

        return frangle_unit_square_vector((first + second) / 2)
Пример #3
0
def rangeQuery(api, start, end):
    # print(start, end)
    result = []
    for i in reversed(range(NLEVEL)):
        cStep = SCALE * STEP**i
        # print(cStep)
        if start // cStep + 1 < end // cStep:
            for ts in range(start // cStep + 1, end // cStep):
                result += pointQuery(api, PREFIX + str(cStep), str(ts))
                # print("hello")
                # print(result)
            break
    cStep = SCALE * STEP**(NLEVEL - 1)
    data = pointQuery(api, PREFIX + str(cStep), str(start // cStep))
    if data:
        # data = getData(temp)
        sl = SortedList(data, key=lambda a: a.split(" ")[0])
        result += list(sl.irange(str(start), str(end)))
    data = pointQuery(api, PREFIX + str(cStep), str(end // cStep))
    if data:
        # data = getData(temp)
        sl = SortedList(data, key=lambda a: a.split(" ")[0])
        result += list(sl.irange(str(start), str(end)))

    return result
Пример #4
0
def coincident_indices(list0, list1, delta):
    """Get indices of coincident times in both lists as dictionary.

    Parameters
    ----------
    list0 : list
        List of times
    list1 : list
        List of times (preferably longer than ``list0``)
    delta : float
        Time-delta slice

    Returns
    -------
    coincidents : dict
        {index_list0: index_list1}

    """
    slist0 = SortedList(list0)
    slist1 = SortedList(list1)

    coincidents = {}

    for t0 in iter(slist0):
        times = list(slist1.irange(t0 - delta, t0 + delta))
        diffs = []
        for t1 in iter(times):
            diffs.append(abs(t0 - t1))
        if len(diffs) > 0:
            coincidents[slist0.index(t0)] = slist1.index(
                times[np.argmin(diffs)])

    return coincidents
Пример #5
0
def rangeQuery(api, start, end):
    result = []
    stream = att_dict['T']
    timestamps = api.liststreamkeys(stream)["result"]
    # print(timestamps)
    # input()
    sl = SortedList(list(map(int, [key['key'] for key in timestamps])))
    temp = api.liststreamkeys(stream, list(map(str, sl.irange(start, end))),
                              True)["result"]
    # print(*result, sep='\n')
    # input()
    for r in temp:
        result.append(bytes.fromhex(r["first"]["data"]).decode(ENCODE_FORMAT))
    # for timestamp in sl.irange(start, end):
    #     result += getData(api.liststreamkeyitems(stream,
    #                                              str(timestamp))['result'])
    # print(result)
    # input()
    # ts = [v[0] for v in database["Timestamp"].values()][:400]
    # sl = SortedList(ts, key=lambda a: a.split(" ")[0])
    # ans = list(sl.irange(str(start), str(end)))
    # if set(result) != set(ans):
    #     print('result:\n', *result, sep='\n')
    #     print('ans:\n', *ans, sep='\n')
    # #     print('result:\n', *(set(result) - set(ans)), sep='\n')
    #     print('ans:\n', *(set(ans) - set(result)), sep='\n')

    return result
Пример #6
0
def rangeQuery(api, start, end):
    # print(start, end)
    result = []
    for i in reversed(range(NLEVEL)):
        cStep = SCALE * STEP**i
        # print(cStep)
        if start // cStep + 1 < end // cStep:
            for ts in range(start // cStep + 1, end // cStep):
                result += pointQuery(api, PREFIX + str(cStep), str(ts))
                # print("hello")
                # print(result)
            break
    cStep = SCALE * STEP**(NLEVEL - 1)
    data = pointQuery(api, PREFIX + str(cStep), str(start // cStep))
    if data:
        # data = getData(temp)
        sl = SortedList(data, key=lambda a: a.split(DELIMITER)[0])
        result += list(sl.irange(str(start), str(end)))
    data = pointQuery(api, PREFIX + str(cStep), str(end // cStep))
    if data:
        # data = getData(temp)
        sl = SortedList(data, key=lambda a: a.split(DELIMITER)[0])
        result += list(sl.irange(str(start), str(end)))

    # ts = []
    # for v in database["Timestamp"].values():
    #     ts += v
    # # print(len(ts))
    # sl = SortedList(ts, key=lambda a: int(a.split(" ")[0]))
    # ans = list(sl.irange(str(start), str(end)))
    # print(*result, sep="\n")
    # print(*ans, sep="\n")
    # if set(result) != set(ans):
    #     # print('result:\n', *result, sep='\n')
    #     # print('ans:\n', *ans, sep='\n')
    #     print(start, end)
    #     print('result:', *(set(result) - set(ans)), sep='\n')
    #     print('ans:', *(set(ans) - set(result)), sep='\n')
    #     for a in ans:
    #         if a.split(" ")[1] == '4':
    #             print(a)
    #     # print("result:", *result, sep="\n")
    #     # print("ans:", *ans, sep="\n")
    #     input()
    return result
Пример #7
0
def rangeQuery(api, start, end):
    result = []
    stream = att_dict['T']
    timestamps = api.liststreamkeys(stream)["result"]
    sl = SortedList(list(map(int, [key['key'] for key in timestamps])))
    for timestamp in sl.irange(start, end):
        result += getData(
            api.liststreamkeyitems(stream, str(timestamp))['result'])
    return result
Пример #8
0
def rangeQuery(api, start, end):
    TIMESTAMPE = 'Timestamp'
    result = []
    for ts in range(start // SCALE + 1, end // SCALE):
        temp = pointQuery(api, TIMESTAMPE, str(ts))
        if temp:
            result += temp
    temp = api.liststreamkeyitems(TIMESTAMPE, str(start // SCALE))['result']
    if temp:
        data = getData(temp)
        sl = SortedList(data, key=lambda a: a.split(" ")[0])
        result += list(sl.irange(str(start), str(end)))
    temp = api.liststreamkeyitems(TIMESTAMPE, str(end // SCALE))['result']
    if temp:
        data = getData(temp)
        sl = SortedList(data, key=lambda a: a.split(" ")[0])
        result += list(sl.irange(str(start), str(end)))

    return result
Пример #9
0
 def busiestServers(self, k: int, arrival: List[int],
                    load: List[int]) -> List[int]:
     valid = SortedList(range(k))
     cnt = [0] * k
     heap = []
     for idx, (a, l) in enumerate(zip(arrival, load)):
         c = idx % k
         while heap and heap[0][0] <= a:
             _, v = heappop(heap)
             valid.add(v)
         selected = next(valid.irange(c, k - 1), None)
         if selected is None:
             selected = next(valid.irange(0, k - 1), None)
         if selected is not None:
             cnt[selected] += 1
             valid.remove(selected)
             heappush(heap, [a + l, selected])
     m = max(cnt)
     return [i for i, v in enumerate(cnt) if v == m]
Пример #10
0
 def containsNearbyAlmostDuplicate(self, nums, k, t):
     if not nums:
         return False
     sl = SortedList()
     for i, n in enumerate(nums):
         fl_it = sl.irange(maximum=n, reverse=True)
         ceil_it = sl.irange(minimum=n, reverse=False)
         try:
             floor = next(fl_it)
         except StopIteration:
             floor = sys.maxsize
         try:
             ceil = next(ceil_it)
         except StopIteration:
             ceil = sys.maxsize
         if abs(ceil - n) <= t or abs(n - floor) <= t:
             return True
         sl.add(n)
         if len(sl) > k:
             sl.discard(nums[i - k])
     return False
Пример #11
0
class HitCounter:
    records: SortedList

    def __init__(self):
        self.records = SortedList()

    def record(self, timestamp: int):
        self.records.add(timestamp)

    def total(self) -> int:
        return len(self.records)

    def range(self, lower, upper) -> int:
        return sum(1 for _ in self.records.irange(lower, upper))
def loadprocessedcsvdata(validcsvpath):
    #Load pkmn stats from csv data
    if not os.path.exists(validcsvpath):
        print('could not find ' + validcsvpath)
        sys.exit(1)

    scoreidx = processedheaders.index('Score')
    processedcsvdata = SortedList(key=lambda x: float(x[scoreidx])) #ascending order
    
    csvheaders = dict()
    csvdata = [[]]
    with open(validcsvpath,'r') as csvdatafile:
        csvreader = csv.reader(csvdatafile, delimiter=',')
        i = 0
        for row in csvreader:
            if i is 0:
                j = 0
                for col in row:
                    csvheaders[col] = j
                    j += 1
            else:
                csvdata.append(row)
            i += 1

    for pkmndata in csvdata:
        if len(pkmndata) is len(csvheaders): #this protects against csvdata[0] = []. not sure why this happens
            
            if float(pkmndata[scoreidx]) < pkmn_score_thresh and not pkmndata[csvheaders['Name']] in seededchoices_names:
                continue
            
            processedcsvdata.add(pkmndata)
            if len(processedcsvdata) > maxresultsize:
                processedcsvdata.pop()

    global pkmnstats
    global processedcsv
    processedcsv = [processedheaders]

    pkmnidx = 0
    hpidx = processedheaders.index('HP')
    for somerow in processedcsvdata.irange():

        processedcsv.append( somerow )

        for i in range(len(processedheaders) - hpidx):
            pkmnstats[pkmnidx, i] = float(somerow[i + hpidx])
        pkmnidx += 1
Пример #13
0
 def getNumber(self, root: Optional[TreeNode], ops: List[List[int]]) -> int:
     res = []
     def it(node):
         if node is None:return 
         it(node.left)
         res.append(node.val)
         it(node.right)
     it(root)
     res = SortedList(res)           # 排序列表(res为一个对象)
     colored = 0
     for t,x,y in reversed(ops):     # 反向遍历
         rem = list(res.irange(x,y))
         if t==1:
             colored += len(rem)
         for p in rem:
             res.remove(p)
     return colored
Пример #14
0
 def find(self, left, right):
     sl = SortedList(self.track.keys())
     l = sl.bisect_left(left)
     r = sl.bisect_left(right)
     if l > 0:
         l -= 1
         if self.track[sl[l]] < left:
             l += 1
     if l == r:
         return (left, right)
     else:
         i = min(sl[l], left)
         if r == len(sl): r -= 1
         j = max(self.track[sl[r]], right)
         for it in list(sl.irange(sl[l], sl[r])):
             self.track.pop(it)
         return (i, j)
Пример #15
0
 def avoidFlood(self, rains: List[int]) -> List[int]:
     ans = [1] * len(rains)
     last_rain = {}
     full = set()
     dry = SortedList()
     for i, lake in enumerate(rains):
         if lake == 0:
             dry.add(i)
         else:
             if lake in full:
                 cand = dry.irange(minimum=last_rain[lake], inclusive=(False,True))
                 j = next(cand, None)
                 if j is None:
                     return []
                 ans[j] = lake
                 dry.remove(j)
             else:
                 full.add(lake)
             ans[i] = -1
             last_rain[lake] = i
     return ans
Пример #16
0
def filter_maximal(itemsets):
    """filter maximal itemsets from a set of itemsets

    Parameters
    ----------
    itemsets: Iterator[frozenset]
        a set of itemsets

    Returns
    -------
    SortedList
    """
    maximals = SortedList(key=len)
    itemsets = sorted(itemsets, key=len, reverse=True)
    for iset in itemsets:
        gts = maximals.irange(iset)
        # is there a superset amongst bigger itemsets ?
        if not any(map(lambda e: e > iset, gts)):
            maximals.add(iset)  # O(log(len(maximals)))

    return maximals
Пример #17
0
def filter_minimal(itemsets):
    """filter minimal itemsets from a set of itemsets

    Parameters
    ----------
    itemsets: Iterator[frozenset]
        a set of itemsets

    Returns
    -------
    SortedList
    """
    minimals = SortedList(key=len)
    itemsets = sorted(itemsets, key=len)
    for iset in itemsets:
        lts = minimals.irange(None, iset)
        # is there a subset amongst the smaller itemsets ?
        if not any(map(lambda e: e < iset, lts)):
            minimals.add(iset)

    return minimals
Пример #18
0
class Timeline:
    """
    Ordered set of segments.

    A timeline can be seen as an ordered set of non-empty segments (Segment).
    Segments can overlap -- though adding an already exisiting segment to a
    timeline does nothing.

    Parameters
    ----------
    segments : Segment iterator, optional
        initial set of (non-empty) segments
    uri : string, optional
        name of segmented resource

    Returns
    -------
    timeline : Timeline
        New timeline
    """

    @classmethod
    def from_df(cls, df: pd.DataFrame, uri: Optional[str] = None) -> 'Timeline':
        segments = list(df[PYANNOTE_SEGMENT])
        timeline = cls(segments=segments, uri=uri)
        return timeline

    def __init__(self,
                 segments: Optional[Iterable[Segment]] = None,
                 uri: str = None):
        if segments is None:
            segments = ()

        # set of segments  (used for checking inclusion)
        segments_set = set(segments)

        if any(not segment for segment in segments_set):
            raise ValueError('Segments must not be empty.')

        self.segments_set_ = segments_set

        # sorted list of segments (used for sorted iteration)
        self.segments_list_ = SortedList(segments_set)

        # sorted list of (possibly redundant) segment boundaries
        boundaries = (boundary for segment in segments_set for boundary in segment)
        self.segments_boundaries_ = SortedList(boundaries)

        # path to (or any identifier of) segmented resource
        self.uri: str = uri

    def __len__(self):
        """Number of segments

        >>> len(timeline)  # timeline contains three segments
        3
        """
        return len(self.segments_set_)

    def __nonzero__(self):
        return self.__bool__()

    def __bool__(self):
        """Emptiness

        >>> if timeline:
        ...    # timeline is empty
        ... else:
        ...    # timeline is not empty
        """
        return len(self.segments_set_) > 0

    def __iter__(self) -> Iterable[Segment]:
        """Iterate over segments (in chronological order)

        >>> for segment in timeline:
        ...     # do something with the segment

        See also
        --------
        :class:`pyannote.core.Segment` describes how segments are sorted.
        """
        return iter(self.segments_list_)

    def __getitem__(self, k: int) -> Segment:
        """Get segment by index (in chronological order)

        >>> first_segment = timeline[0]
        >>> penultimate_segment = timeline[-2]
        """
        return self.segments_list_[k]

    def __eq__(self, other: 'Timeline'):
        """Equality

        Two timelines are equal if and only if their segments are equal.

        >>> timeline1 = Timeline([Segment(0, 1), Segment(2, 3)])
        >>> timeline2 = Timeline([Segment(2, 3), Segment(0, 1)])
        >>> timeline3 = Timeline([Segment(2, 3)])
        >>> timeline1 == timeline2
        True
        >>> timeline1 == timeline3
        False
        """
        return self.segments_set_ == other.segments_set_

    def __ne__(self, other: 'Timeline'):
        """Inequality"""
        return self.segments_set_ != other.segments_set_

    def index(self, segment: Segment) -> int:
        """Get index of (existing) segment

        Parameters
        ----------
        segment : Segment
            Segment that is being looked for.

        Returns
        -------
        position : int
            Index of `segment` in timeline

        Raises
        ------
        ValueError if `segment` is not present.
        """
        return self.segments_list_.index(segment)

    def add(self, segment: Segment) -> 'Timeline':
        """Add a segment (in place)

        Parameters
        ----------
        segment : Segment
            Segment that is being added

        Returns
        -------
        self : Timeline
            Updated timeline.

        Note
        ----
        If the timeline already contains this segment, it will not be added
        again, as a timeline is meant to be a **set** of segments (not a list).

        If the segment is empty, it will not be added either, as a timeline
        only contains non-empty segments.
        """

        segments_set_ = self.segments_set_
        if segment in segments_set_ or not segment:
            return self

        segments_set_.add(segment)

        self.segments_list_.add(segment)

        segments_boundaries_ = self.segments_boundaries_
        segments_boundaries_.add(segment.start)
        segments_boundaries_.add(segment.end)

        return self

    def remove(self, segment: Segment) -> 'Timeline':
        """Remove a segment (in place)

        Parameters
        ----------
        segment : Segment
            Segment that is being removed

        Returns
        -------
        self : Timeline
            Updated timeline.

        Note
        ----
        If the timeline does not contain this segment, this does nothing
        """

        segments_set_ = self.segments_set_
        if segment not in segments_set_:
            return self

        segments_set_.remove(segment)

        self.segments_list_.remove(segment)

        segments_boundaries_ = self.segments_boundaries_
        segments_boundaries_.remove(segment.start)
        segments_boundaries_.remove(segment.end)

        return self

    def discard(self, segment: Segment) -> 'Timeline':
        """Same as `remove`

        See also
        --------
        :func:`pyannote.core.Timeline.remove`
        """
        return self.remove(segment)

    def __ior__(self, timeline: 'Timeline') -> 'Timeline':
        return self.update(timeline)

    def update(self, timeline: Segment) -> 'Timeline':
        """Add every segments of an existing timeline (in place)

        Parameters
        ----------
        timeline : Timeline
            Timeline whose segments are being added

        Returns
        -------
        self : Timeline
            Updated timeline

        Note
        ----
        Only segments that do not already exist will be added, as a timeline is
        meant to be a **set** of segments (not a list).

        """

        segments_set = self.segments_set_

        segments_set |= timeline.segments_set_

        # sorted list of segments (used for sorted iteration)
        self.segments_list_ = SortedList(segments_set)

        # sorted list of (possibly redundant) segment boundaries
        boundaries = (boundary for segment in segments_set for boundary in segment)
        self.segments_boundaries_ = SortedList(boundaries)

        return self

    def __or__(self, timeline: 'Timeline') -> 'Timeline':
        return self.union(timeline)

    def union(self, timeline: 'Timeline') -> 'Timeline':
        """Create new timeline made of union of segments

        Parameters
        ----------
        timeline : Timeline
            Timeline whose segments are being added

        Returns
        -------
        union : Timeline
            New timeline containing the union of both timelines.

        Note
        ----
        This does the same as timeline.update(...) except it returns a new
        timeline, and the original one is not modified.
        """
        segments = self.segments_set_ | timeline.segments_set_
        return Timeline(segments=segments, uri=self.uri)

    def co_iter(self, other: 'Timeline') -> Iterator[Tuple[Segment, Segment]]:
        """Iterate over pairs of intersecting segments

        >>> timeline1 = Timeline([Segment(0, 2), Segment(1, 2), Segment(3, 4)])
        >>> timeline2 = Timeline([Segment(1, 3), Segment(3, 5)])
        >>> for segment1, segment2 in timeline1.co_iter(timeline2):
        ...     print(segment1, segment2)
        (<Segment(0, 2)>, <Segment(1, 3)>)
        (<Segment(1, 2)>, <Segment(1, 3)>)
        (<Segment(3, 4)>, <Segment(3, 5)>)

        Parameters
        ----------
        other : Timeline
            Second timeline

        Returns
        -------
        iterable : (Segment, Segment) iterable
            Yields pairs of intersecting segments in chronological order.
        """

        for segment in self.segments_list_:

            # iterate over segments that starts before 'segment' ends
            temp = Segment(start=segment.end, end=segment.end)
            for other_segment in other.segments_list_.irange(maximum=temp):
                if segment.intersects(other_segment):
                    yield segment, other_segment

    def crop_iter(self,
                  support: Support,
                  mode: CropMode = 'intersection',
                  returns_mapping: bool = False) \
            -> Iterator[Union[Tuple[Segment, Segment], Segment]]:
        """Like `crop` but returns a segment iterator instead

        See also
        --------
        :func:`pyannote.core.Timeline.crop`
        """

        if mode not in {'loose', 'strict', 'intersection'}:
            raise ValueError("Mode must be one of 'loose', 'strict', or "
                             "'intersection'.")

        if not isinstance(support, (Segment, Timeline)):
            raise TypeError("Support must be a Segment or a Timeline.")

        if isinstance(support, Segment):
            # corner case where "support" is empty
            if support:
                segments = [support]
            else:
                segments = []

            support = Timeline(segments=segments, uri=self.uri)
            for yielded in self.crop_iter(support, mode=mode,
                                          returns_mapping=returns_mapping):
                yield yielded
            return

        # loose mode
        if mode == 'loose':
            for segment, _ in self.co_iter(support):
                yield segment
            return

        # strict mode
        if mode == 'strict':
            for segment, other_segment in self.co_iter(support):
                if segment in other_segment:
                    yield segment
            return

        # intersection mode
        for segment, other_segment in self.co_iter(support):
            mapped_to = segment & other_segment
            if not mapped_to:
                continue
            if returns_mapping:
                yield segment, mapped_to
            else:
                yield mapped_to

    def crop(self,
             support: Support,
             mode: CropMode = 'intersection',
             returns_mapping: bool = False) \
            -> Union['Timeline', Tuple['Timeline', Dict[Segment, Segment]]]:
        """Crop timeline to new support

        Parameters
        ----------
        support : Segment or Timeline
            If `support` is a `Timeline`, its support is used.
        mode : {'strict', 'loose', 'intersection'}, optional
            Controls how segments that are not fully included in `support` are
            handled. 'strict' mode only keeps fully included segments. 'loose'
            mode keeps any intersecting segment. 'intersection' mode keeps any
            intersecting segment but replace them by their actual intersection.
        returns_mapping : bool, optional
            In 'intersection' mode, return a dictionary whose keys are segments
            of the cropped timeline, and values are list of the original
            segments that were cropped. Defaults to False.

        Returns
        -------
        cropped : Timeline
            Cropped timeline
        mapping : dict
            When 'returns_mapping' is True, dictionary whose keys are segments
            of 'cropped', and values are lists of corresponding original
            segments.

        Examples
        --------

        >>> timeline = Timeline([Segment(0, 2), Segment(1, 2), Segment(3, 4)])
        >>> timeline.crop(Segment(1, 3))
        <Timeline(uri=None, segments=[<Segment(1, 2)>])>

        >>> timeline.crop(Segment(1, 3), mode='loose')
        <Timeline(uri=None, segments=[<Segment(0, 2)>, <Segment(1, 2)>])>

        >>> timeline.crop(Segment(1, 3), mode='strict')
        <Timeline(uri=None, segments=[<Segment(1, 2)>])>

        >>> cropped, mapping = timeline.crop(Segment(1, 3), returns_mapping=True)
        >>> print(mapping)
        {<Segment(1, 2)>: [<Segment(0, 2)>, <Segment(1, 2)>]}

        """

        if mode == 'intersection' and returns_mapping:
            segments, mapping = [], {}
            for segment, mapped_to in self.crop_iter(support,
                                                     mode='intersection',
                                                     returns_mapping=True):
                segments.append(mapped_to)
                mapping[mapped_to] = mapping.get(mapped_to, list()) + [segment]
            return Timeline(segments=segments, uri=self.uri), mapping

        return Timeline(segments=self.crop_iter(support, mode=mode),
                        uri=self.uri)

    def overlapping(self, t: float) -> List[Segment]:
        """Get list of segments overlapping `t`

        Parameters
        ----------
        t : float
            Timestamp, in seconds.

        Returns
        -------
        segments : list
            List of all segments of timeline containing time t
        """
        return list(self.overlapping_iter(t))

    def overlapping_iter(self, t: float) -> Iterator[Segment]:
        """Like `overlapping` but returns a segment iterator instead

        See also
        --------
        :func:`pyannote.core.Timeline.overlapping`
        """
        segment = Segment(start=t, end=t)
        for segment in self.segments_list_.irange(maximum=segment):
            if segment.overlaps(t):
                yield segment

    def __str__(self):
        """Human-readable representation

        >>> timeline = Timeline(segments=[Segment(0, 10), Segment(1, 13.37)])
        >>> print(timeline)
        [[ 00:00:00.000 -->  00:00:10.000]
         [ 00:00:01.000 -->  00:00:13.370]]

        """

        n = len(self.segments_list_)
        string = "["
        for i, segment in enumerate(self.segments_list_):
            string += str(segment)
            string += "\n " if i + 1 < n else ""
        string += "]"
        return string

    def __repr__(self):
        """Computer-readable representation

        >>> Timeline(segments=[Segment(0, 10), Segment(1, 13.37)])
        <Timeline(uri=None, segments=[<Segment(0, 10)>, <Segment(1, 13.37)>])>

        """

        return "<Timeline(uri=%s, segments=%s)>" % (self.uri,
                                                    list(self.segments_list_))

    def __contains__(self, included: Union[Segment, 'Timeline']):
        """Inclusion

        Check whether every segment of `included` does exist in timeline.

        Parameters
        ----------
        included : Segment or Timeline
            Segment or timeline being checked for inclusion

        Returns
        -------
        contains : bool
            True if every segment in `included` exists in timeline,
            False otherwise

        Examples
        --------
        >>> timeline1 = Timeline(segments=[Segment(0, 10), Segment(1, 13.37)])
        >>> timeline2 = Timeline(segments=[Segment(0, 10)])
        >>> timeline1 in timeline2
        False
        >>> timeline2 in timeline1
        >>> Segment(1, 13.37) in timeline1
        True

        """

        if isinstance(included, Segment):
            return included in self.segments_set_

        elif isinstance(included, Timeline):
            return self.segments_set_.issuperset(included.segments_set_)

        else:
            raise TypeError(
                'Checking for inclusion only supports Segment and '
                'Timeline instances')

    def empty(self) -> 'Timeline':
        """Return an empty copy

        Returns
        -------
        empty : Timeline
            Empty timeline using the same 'uri' attribute.

        """
        return Timeline(uri=self.uri)

    def copy(self, segment_func: Optional[Callable[[Segment], Segment]] = None) \
            -> 'Timeline':
        """Get a copy of the timeline

        If `segment_func` is provided, it is applied to each segment first.

        Parameters
        ----------
        segment_func : callable, optional
            Callable that takes a segment as input, and returns a segment.
            Defaults to identity function (segment_func(segment) = segment)

        Returns
        -------
        timeline : Timeline
            Copy of the timeline

        """

        # if segment_func is not provided
        # just add every segment
        if segment_func is None:
            return Timeline(segments=self.segments_list_, uri=self.uri)

        # if is provided
        # apply it to each segment before adding them
        return Timeline(segments=[segment_func(s) for s in self.segments_list_],
                        uri=self.uri)

    def extent(self) -> Segment:
        """Extent

        The extent of a timeline is the segment of minimum duration that
        contains every segments of the timeline. It is unique, by definition.
        The extent of an empty timeline is an empty segment.

        A picture is worth a thousand words::

            timeline
            |------|    |------|     |----|
              |--|    |-----|     |----------|

            timeline.extent()
            |--------------------------------|

        Returns
        -------
        extent : Segment
            Timeline extent

        Examples
        --------
        >>> timeline = Timeline(segments=[Segment(0, 1), Segment(9, 10)])
        >>> timeline.extent()
        <Segment(0, 10)>

        """
        if self.segments_set_:
            segments_boundaries_ = self.segments_boundaries_
            start = segments_boundaries_[0]
            end = segments_boundaries_[-1]
            return Segment(start=start, end=end)
        else:
            import numpy as np
            return Segment(start=np.inf, end=-np.inf)

    def support_iter(self) -> Iterator[Segment]:
        """Like `support` but returns a segment generator instead

        See also
        --------
        :func:`pyannote.core.Timeline.support`
        """

        # The support of an empty timeline is an empty timeline.
        if not self:
            return

        # Principle:
        #   * gather all segments with no gap between them
        #   * add one segment per resulting group (their union |)
        # Note:
        #   Since segments are kept sorted internally,
        #   there is no need to perform an exhaustive segment clustering.
        #   We just have to consider them in their natural order.

        # Initialize new support segment
        # as very first segment of the timeline
        new_segment = self.segments_list_[0]

        for segment in self:

            # If there is no gap between new support segment and next segment,
            if not (segment ^ new_segment):
                # Extend new support segment using next segment
                new_segment |= segment

            # If there actually is a gap,
            else:
                yield new_segment

                # Initialize new support segment as next segment
                # (right after the gap)
                new_segment = segment

        # Add new segment to the timeline support
        yield new_segment

    def support(self) -> 'Timeline':
        """Timeline support

        The support of a timeline is the timeline with the minimum number of
        segments with exactly the same time span as the original timeline. It
        is (by definition) unique and does not contain any overlapping
        segments.

        A picture is worth a thousand words::

            timeline
            |------|    |------|     |----|
              |--|    |-----|     |----------|

            timeline.support()
            |------|  |--------|  |----------|

        Returns
        -------
        support : Timeline
            Timeline support
        """
        return Timeline(segments=self.support_iter(), uri=self.uri)

    def duration(self) -> float:
        """Timeline duration

        The timeline duration is the sum of the durations of the segments
        in the timeline support.

        Returns
        -------
        duration : float
            Duration of timeline support, in seconds.
        """

        # The timeline duration is the sum of the durations
        # of the segments in the timeline support.
        return sum(s.duration for s in self.support_iter())

    def gaps_iter(self, support: Optional[Support] = None) -> Iterator[Segment]:
        """Like `gaps` but returns a segment generator instead

        See also
        --------
        :func:`pyannote.core.Timeline.gaps`

        """

        if support is None:
            support = self.extent()

        if not isinstance(support, (Segment, Timeline)):
            raise TypeError("unsupported operand type(s) for -':"
                            "%s and Timeline." % type(support).__name__)

        # segment support
        if isinstance(support, Segment):

            # `end` is meant to store the end time of former segment
            # initialize it with beginning of provided segment `support`
            end = support.start

            # support on the intersection of timeline and provided segment
            for segment in self.crop(support, mode='intersection').support():

                # add gap between each pair of consecutive segments
                # if there is no gap, segment is empty, therefore not added
                gap = Segment(start=end, end=segment.start)
                if gap:
                    yield gap

                # keep track of the end of former segment
                end = segment.end

            # add final gap (if not empty)
            gap = Segment(start=end, end=support.end)
            if gap:
                yield gap

        # timeline support
        elif isinstance(support, Timeline):

            # yield gaps for every segment in support of provided timeline
            for segment in support.support():
                for gap in self.gaps_iter(support=segment):
                    yield gap

    def gaps(self, support: Optional[Support] = None) \
            -> 'Timeline':
        """Gaps

        A picture is worth a thousand words::

            timeline
            |------|    |------|     |----|
              |--|    |-----|     |----------|

            timeline.gaps()
                   |--|        |--|

        Parameters
        ----------
        support : None, Segment or Timeline
            Support in which gaps are looked for. Defaults to timeline extent

        Returns
        -------
        gaps : Timeline
            Timeline made of all gaps from original timeline, and delimited
            by provided support

        See also
        --------
        :func:`pyannote.core.Timeline.extent`

        """
        return Timeline(segments=self.gaps_iter(support=support),
                        uri=self.uri)

    def segmentation(self) -> 'Timeline':
        """Segmentation

        Create the unique timeline with same support and same set of segment
        boundaries as original timeline, but with no overlapping segments.

        A picture is worth a thousand words::

            timeline
            |------|    |------|     |----|
              |--|    |-----|     |----------|

            timeline.segmentation()
            |-|--|-|  |-|---|--|  |--|----|--|

        Returns
        -------
        timeline : Timeline
            (unique) timeline with same support and same set of segment
            boundaries as original timeline, but with no overlapping segments.
        """
        # COMPLEXITY: O(n)
        support = self.support()

        # COMPLEXITY: O(n.log n)
        # get all boundaries (sorted)
        # |------|    |------|     |----|
        #   |--|    |-----|     |----------|
        # becomes
        # | |  | |  | |   |  |  |  |    |  |
        timestamps = set([])
        for (start, end) in self:
            timestamps.add(start)
            timestamps.add(end)
        timestamps = sorted(timestamps)

        # create new partition timeline
        # | |  | |  | |   |  |  |  |    |  |
        # becomes
        # |-|--|-|  |-|---|--|  |--|----|--|

        # start with an empty copy
        timeline = Timeline(uri=self.uri)

        if len(timestamps) == 0:
            return Timeline(uri=self.uri)

        segments = []
        start = timestamps[0]
        for end in timestamps[1:]:
            # only add segments that are covered by original timeline
            segment = Segment(start=start, end=end)
            if segment and support.overlapping(segment.middle):
                segments.append(segment)
            # next segment...
            start = end

        return Timeline(segments=segments, uri=self.uri)

    def to_annotation(self,
                      generator: Union[str, Iterable[Label], None, None] = 'string',
                      modality: Optional[str] = None) \
            -> 'Annotation':
        """Turn timeline into an annotation

        Each segment is labeled by a unique label.

        Parameters
        ----------
        generator : 'string', 'int', or iterable, optional
            If 'string' (default) generate string labels. If 'int', generate
            integer labels. If iterable, use it to generate labels.
        modality : str, optional

        Returns
        -------
        annotation : Annotation
            Annotation
        """

        from .annotation import Annotation
        annotation = Annotation(uri=self.uri, modality=modality)
        if generator == 'string':
            from .utils.generators import string_generator
            generator = string_generator()
        elif generator == 'int':
            from .utils.generators import int_generator
            generator = int_generator()

        for segment in self:
            annotation[segment] = next(generator)

        return annotation

    def write_uem(self, file: TextIO):
        """Dump timeline to file using UEM format

        Parameters
        ----------
        file : file object
        
        Usage
        -----
        >>> with open('file.uem', 'w') as file:
        ...    timeline.write_uem(file)
        """

        uri = self.uri if self.uri else "<NA>"

        for segment in self:
            line = f"{uri} 1 {segment.start:.3f} {segment.end:.3f}\n"
            file.write(line)

    def for_json(self):
        """Serialization

        See also
        --------
        :mod:`pyannote.core.json`
        """

        data = {PYANNOTE_JSON: self.__class__.__name__}
        data[PYANNOTE_JSON_CONTENT] = [s.for_json() for s in self]

        if self.uri:
            data[PYANNOTE_URI] = self.uri

        return data

    @classmethod
    def from_json(cls, data):
        """Deserialization

        See also
        --------
        :mod:`pyannote.core.json`
        """

        uri = data.get(PYANNOTE_URI, None)
        segments = [Segment.from_json(s) for s in data[PYANNOTE_JSON_CONTENT]]
        return cls(segments=segments, uri=uri)

    def _repr_png_(self):
        """IPython notebook support

        See also
        --------
        :mod:`pyannote.core.notebook`
        """

        from .notebook import repr_timeline
        return repr_timeline(self)
Пример #19
0
def test_irange():
    sl = SortedList()
    sl._reset(7)

    assert [] == list(sl.irange())

    values = list(range(53))
    sl.update(values)

    for start in range(53):
        for end in range(start, 53):
            assert list(sl.irange(start, end)) == values[start:(end + 1)]
            assert list(sl.irange(start, end, reverse=True)) == values[start:(end + 1)][::-1]

    for start in range(53):
        for end in range(start, 53):
            assert list(range(start, end)) == list(sl.irange(start, end, (True, False)))

    for start in range(53):
        for end in range(start, 53):
            assert list(range(start + 1, end + 1)) == list(sl.irange(start, end, (False, True)))

    for start in range(53):
        for end in range(start, 53):
            assert list(range(start + 1, end)) == list(sl.irange(start, end, (False, False)))

    for start in range(53):
        assert list(range(start, 53)) == list(sl.irange(start))

    for end in range(53):
        assert list(range(0, end)) == list(sl.irange(None, end, (True, False)))

    assert values == list(sl.irange(inclusive=(False, False)))

    assert [] == list(sl.irange(53))
    assert values == list(sl.irange(None, 53, (True, False)))
Пример #20
0
def _all_segment_intersections_no_horizontal(segments):  # noqa
    # Must be unique
    assert len(set(segments)) == len(segments)
    segments = list(segments)

    # Must not be degenerate
    for segment in segments:
        assert segment[0] != segment[1]

    # Use the convention from the book: sweep on Y axis
    def event_key(pt):
        return (pt[1], pt[0])

    # From point to list of segments
    event_queue = SortedDict(event_key)

    def add_event(pt, segment_key=None):
        if pt not in event_queue:
            event_queue[pt] = []
        if segment_key is not None:
            event_queue[pt].append(segment_key)

    for i, segment in enumerate(segments):
        if event_key(segment[0]) < event_key(segment[1]):
            add_event(segment[0], _SweepKey(segment, segment[0]))
            add_event(segment[1], None)
        else:
            add_event(segment[0], None)
            add_event(segment[1], _SweepKey(segment, segment[1]))

    active = SortedList()

    y = -math.inf

    while len(event_queue) > 0:
        v = event_queue.popitem(0)
        pt, segstarts = v

        # Can't be > since while there are no horizontal segments,
        # there can still be points in horizontal relation to one another
        assert pt[1] >= y
        y = pt[1]

        # Find all segments within the event point

        fake_segment = ((pt[0], pt[1]), (pt[0], pt[1] + 1))
        fake_key = _SweepKey(fake_segment, pt)

        touches = []

        # The next lower / higher keys, respectively, to enter new events for
        neighbours = []

        if _extra_checks:
            _assert_fully_sorted(list(active), y)
        # Iterate on both sides
        for it in (
                active.irange(None,
                              fake_key,
                              inclusive=(True, True),
                              reverse=True),
                active.irange(fake_key, None, inclusive=(False, True)),
        ):
            neighbour = None
            for sweep_key in it:
                if sweep_key.at_y(y) != pt[0]:
                    neighbour = sweep_key
                    break
                touches.append(sweep_key)
            neighbours.append(neighbour)

        # Remove the old sweep keys
        for touch in touches:
            active.remove(touch)

        segments_at_pt = [
            sweep_key.segment for sweep_key in touches + segstarts
        ]
        if len(segments_at_pt) > 1:
            yield (pt, tuple(segments_at_pt))

        # Create new _SweepKeys, automatically sorts
        # according to order after point
        sweep_keys = []
        for segment in segments_at_pt:
            # Is this segment still relevant?
            if max(segment[0][1], segment[1][1]) <= pt[1]:
                continue
            sweep_keys.append(_SweepKey(segment, pt))

        sweep_keys = list(sorted(sweep_keys))

        # Add new events for neighbours
        if len(sweep_keys) == 0:
            # If we just removed stuff, the neighbours might now meet...
            if neighbours[0] is not None and neighbours[1] is not None:
                ipt = _nonparallel_intersection_point(neighbours[0].segment,
                                                      neighbours[1].segment)
                if ipt and ipt[1] > pt[1]:
                    add_event(ipt)

            continue

        if neighbours[0] is not None:
            ipt = _nonparallel_intersection_point(sweep_keys[0].segment,
                                                  neighbours[0].segment)
            # hyp.note(fstr('IPTL', ipt, pt))
            if ipt and ipt[1] > pt[1]:
                add_event(ipt)

        if neighbours[1] is not None:
            ipt = _nonparallel_intersection_point(sweep_keys[-1].segment,
                                                  neighbours[1].segment)
            # hyp.note(fstr('IPTR', ipt, pt))
            if ipt and ipt[1] > pt[1]:
                add_event(ipt)

        # Add them in and continue
        for sweep_key in sweep_keys:
            active.add(sweep_key)
Пример #21
0
class PeakDB:
    def __init__(self, peak_list: List[MetabolitePeak]):
        self._peaks = SortedList(peak_list)

        self.peak_dict = {p.id: p for p in peak_list}

        metabolite_peaks = {}
        for peak in peak_list:
            if peak.metabolite_id not in metabolite_peaks:
                metabolite_peaks[peak.metabolite_id] = []

            metabolite_peaks[peak.metabolite_id].append(peak)

        self._metabolite_peaks = metabolite_peaks

    @property
    def metabolite_peaks(self) -> Dict[str, List[MetabolitePeak]]:
        return self._metabolite_peaks

    def query_n(self, qu: List[float], tolerance=0.0075, missing_thresh=0.8):
        graphs = {}
        for q in qu:
            lower = MetabolitePeak(None, q - tolerance, None)
            upper = MetabolitePeak(None, q + tolerance, None)

            for retrieved_peak in self._peaks.irange(lower, upper):
                if retrieved_peak.metabolite_id not in graphs:
                    graphs[retrieved_peak.metabolite_id] = nx.Graph()

                if f'l_{q:.5f}' not in graphs[retrieved_peak.metabolite_id]:
                    graphs[retrieved_peak.metabolite_id].add_node(f'l_{q:.5f}', bipartite=0)

                if retrieved_peak.id not in graphs[retrieved_peak.metabolite_id]:
                    graphs[retrieved_peak.metabolite_id].add_node(retrieved_peak.id, bipartite=1)

                graphs[retrieved_peak.metabolite_id].add_edge(f'l_{q:.5f}', retrieved_peak.id)

        result_map = []
        for met, graph in graphs.items():
            result = {
                'metabolite_id': met,
                'matches': {}
            }

            for i in nx.components.connected_components(graph):
                matching = nx.bipartite.maximum_matching(graph.subgraph(i))
                result['matches'].update({k: v for k, v in matching.items() if k.startswith("l_")})

            result['score'] = len(result['matches']) / len(self.metabolite_peaks[met])

            result['missing'] = []
            matched_peaks = [self.peak_dict[peak_id] for peak_id in result['matches'].values()]
            for peak in self.metabolite_peaks[met]:
                if peak not in matched_peaks and any([p.amp * missing_thresh <= peak.amp for p in matched_peaks]):
                    # if there is any peak larger than any seen peak
                    # this peak is considered missing
                    result['missing'].append(peak.id)

            result_map.append(result)

        return result_map

    def query(self, qu: List[float], tolerance=0.0075):
        result_map = {}
        for q in qu:
            matched = set()
            lower = MetabolitePeak(None, q - tolerance, None)
            upper = MetabolitePeak(None, q + tolerance, None)

            for retrieved_peak in self._peaks.irange(lower, upper):
                if retrieved_peak.metabolite_id not in result_map:
                    result_map[retrieved_peak.metabolite_id] = 0

                if retrieved_peak.metabolite_id not in matched:
                    matched.add(retrieved_peak.metabolite_id)
                    result_map[retrieved_peak.metabolite_id] += 1

        metabolite_list = []
        for met_id, overlap in result_map.items():
            jaccard_score = overlap / len(self.metabolite_peaks[met_id])
            metabolite_list.append({
                "metabolite_id": met_id,
                "score": jaccard_score,
                "overlap": overlap
            })

        return sorted(metabolite_list, key=lambda i: i['score'], reverse=True)
# %% SortedList
from sortedcontainers import SortedList, SortedDict, SortedSet

sl = SortedList([100, 7, 10, 11, 13, 14])
print(sl)
sl.add(99)
sl.remove(7)
print(sl)
sl.update([1, 0, 2])  # add the entire list
print(sl)

# irange(minimum=None, maximum=None, inclusive=True, True, reverse=False)
rangeList = list(sl.irange(10, 14, inclusive=[True, False]))
print(rangeList)

print(sl.index(10))  # 3
# print(sl.index(-99)) # Throw Error

s2 = SortedList([1, 7, 7, 7, 7, 10, 11, 13, 14])
print(f"left most idx: {s2.bisect_left(7)}")
print(f"right most idx: {s2.bisect_right(7)}")
print(f"out of boundary < min, idx={s2.bisect_left(-100)}")
print(f"out of boundary > max, len={len(s2)}, idx={s2.bisect_left(100)}")

# %% SortedDict
sd = SortedDict()
sd["c"] = 3
sd["a"] = 1
sd["b"] = 2
del sd["a"]
print(sd)
def test_irange():
    sl = SortedList(load=7)

    assert [] == list(sl.irange())

    values = list(range(53))
    sl.update(values)

    for start in range(53):
        for end in range(start, 53):
            assert list(sl.irange(start, end)) == values[start:(end + 1)]
            assert list(sl.irange(start, end, reverse=True)) == values[start:(end + 1)][::-1]

    for start in range(53):
        for end in range(start, 53):
            assert list(range(start, end)) == list(sl.irange(start, end, (True, False)))

    for start in range(53):
        for end in range(start, 53):
            assert list(range(start + 1, end + 1)) == list(sl.irange(start, end, (False, True)))

    for start in range(53):
        for end in range(start, 53):
            assert list(range(start + 1, end)) == list(sl.irange(start, end, (False, False)))

    for start in range(53):
        assert list(range(start, 53)) == list(sl.irange(start))

    for end in range(53):
        assert list(range(0, end)) == list(sl.irange(None, end, (True, False)))

    assert values == list(sl.irange(inclusive=(False, False)))

    assert [] == list(sl.irange(53))
    assert values == list(sl.irange(None, 53, (True, False)))
Пример #24
0
class _Vertex:
    coordinates: Point
    half_edges_by_order: SortedList

    def _key(self, half_edge):
        assert half_edge[0] == self.coordinates
        return vector_frangle(
            np.array(half_edge[1], dtype=object) - half_edge[0])

    def __init__(self, coordinates):
        self.coordinates = coordinates

        self.half_edges_by_order = SortedList(key=self._key)

    def add_half_edge(self, half_edge):
        existing = list(
            self.half_edges_by_order.irange(half_edge,
                                            half_edge,
                                            inclusive=(True, True)))
        assert len(existing) == 0, fstr(half_edge, existing, approx=True)

        self.half_edges_by_order.add(half_edge)

    def get_next_cw(self, half_edge):
        assert half_edge[0] == self.coordinates

        nxt = list(
            islice(
                self.half_edges_by_order.irange(half_edge,
                                                None,
                                                inclusive=(False, True)), 1))
        if len(nxt) == 0:
            nxt = list(
                islice(
                    self.half_edges_by_order.irange(None,
                                                    None,
                                                    inclusive=(True, True)),
                    1))

        assert len(nxt) == 1

        return nxt[0]

    def get_next_ccw(self, half_edge):
        assert half_edge[0] == self.coordinates

        nxt = list(
            islice(
                self.half_edges_by_order.irange(None,
                                                half_edge,
                                                inclusive=(True, False),
                                                reverse=True), 1))
        if len(nxt) == 0:
            nxt = list(
                islice(
                    self.half_edges_by_order.irange(None,
                                                    None,
                                                    inclusive=(True, True),
                                                    reverse=True), 1))

        assert len(nxt) == 1, (self.half_edges_by_order, nxt)

        prev = self.get_next_cw(nxt[0])
        assert prev == half_edge, fstr(half_edge, nxt[0], prev,
                                       self._key(half_edge), self._key(nxt[0]),
                                       self._key(prev))

        return nxt[0]
def processthread(trange):

    teamstatsScoreidx = teamstatsheaders.index('Score')
    sortedScoreidx = setsize + teamstatsScoreidx
    threadSortedResults = SortedList(key=lambda x: -x[sortedScoreidx])
     #todo check this index
    minScore = 0

    istart = trange[0]
    isize = trange[1] - trange[0]
    iend = trange[1]
    doprinting = trange[2]
    logfilepath = os.path.join(currentdir, trange[3])
    resumeset = istart
    if resumethreads and os.path.exists(logfilepath):
        with open(logfilepath, 'rb') as logfile:
            resumeset = pickle.load(logfile)
            if resumeset < istart or resumeset >= iend - 1:
                print('cant resume set ' + logfilepath)
                resumeset = istart
            else:
                print( 'resuming ' + os.path.basename(logfilepath) + ' from ' + str(resumeset))
                for someset in pickle.load(logfile):
                    threadSortedResults.add(someset)


    map_subpool_to_csv = list(range(len(processedcsv) - 1)) #indexes of all the pokemon in csvdata and pkmnstats
    #reverse to generate 'reverse colexigraphical' order

    for somesetidx in seededchoices: #remove the seeded choices if any
        map_subpool_to_csv.remove(somesetidx)

    subsetpool = list(range(len(processedcsv) - 1 - len(seededchoices)))[::-1] #make a separate index. ex: pkmnstats[ 1 + map_subpool_to_csv[setidx]]
    if len(map_subpool_to_csv) != len(subsetpool):
        print('error in map_subpool_to_csv')
        exit()

    subsetsize = setsize - len(seededchoices)

    threadcombinator = IndexedCombination( subsetsize, subsetpool)
    threadtimerstart = timeit.default_timer()
    skippedrange = 0
    #for setindex in range( resumeset, iend):
    setindex = resumeset
    while setindex < iend:
        for iprintsplit in range(nprintsplit):
            parentset = threadcombinator.get_nth_combination(setindex)

            if len(seededchoices) > 0:
                fullset = seededchoices + parentpool_to_csvdata(map_subpool_to_csv,parentset)
            else:
                fullset = parentpool_to_csvdata(map_subpool_to_csv,parentset)
            
            if len(threadSortedResults) < maxresultsize:
                if filtersetbyweakness(fullset) and filtersetbyattack(fullset):
                    teamcompare = list(fullset) + teamstats(fullset)
                    threadSortedResults.add(teamcompare)
            else:
                teamcompare = list(fullset) + teamstats(fullset)
                if teamcompare[sortedScoreidx] > minScore: #todo check this index
                    if filtersetbyweakness(fullset) and filtersetbyattack(fullset):
                        threadSortedResults.add(teamcompare)
                        threadSortedResults.pop()
                        minScore = threadSortedResults[-1][sortedScoreidx]
                else:
                    #skip to next combination where its possible for the stats to be high enough.
                    nextset = threadcombinator.skiptonextmaxima(parentset)
                    if len(nextset) == 0:
                        new_setindex = iend
                    else:
                        new_setindex = threadcombinator.get_n_from_combination(nextset) - 1
                    skippedrange += (new_setindex + 1 - setindex)
                    setindex = new_setindex
            setindex += 1
            if setindex >= iend:
                break
        
        if doprinting:
            threadtimernow = timeit.default_timer()
            combinationspersecond = (setindex - istart)/(threadtimernow - threadtimerstart)
            print("{0:.6f}".format((setindex - istart)/isize * 100), '%' + ' with ' + str(skippedrange) + ' skipped of ' + str(setindex - istart) + ' and SortedResults has ' + str(len(threadSortedResults)) + '/' + str(maxresultsize) + ' at ' + '{0:.1f}'.format(combinationspersecond) + ' combinations/s', end='\r')
        if resumethreads:
            with open(logfilepath, 'wb') as logfile:
                pickle.dump(setindex, logfile, protocol=pickle.HIGHEST_PROTOCOL)
                pickle.dump(list(threadSortedResults.irange()), logfile, protocol=pickle.HIGHEST_PROTOCOL)
    if doprinting:
        print('')
    if resumethreads:
        with open(logfilepath, 'wb') as logfile:
            pickle.dump(setindex, logfile, protocol=pickle.HIGHEST_PROTOCOL)
            pickle.dump(list(threadSortedResults.irange()), logfile, protocol=pickle.HIGHEST_PROTOCOL)

    return [list(threadSortedResults.irange())]
Пример #26
0
class ListWithAdjustments(object):
    """
  To prepare inserts, we adjust elements to be inserted and elements in the underlying list. We
  don't want to actually touch the underlying list, but we need to remember the adjustments,
  because later adjustments may depend on and readjust earlier ones.
  """
    def __init__(self, orig_list):
        """
    Orig_list must be a a SortedListWithKey.
    """
        self._orig_list = orig_list
        self._key = orig_list._key

        # Stores pairs (i, new_key) where i is an index into orig_list.
        #   Note that adjustments don't affect the order in the original list, so the list is sorted
        #   both on keys an on indices; and a missing index i means that (i, orig_key) fits into the
        #   adjustments list both by key and by index.
        self._adjustments = SortedListWithKey(key=lambda pair: pair[1])

        # Stores keys for new insertions.
        self._insertions = SortedList()

    def get_insertions(self):
        return self._insertions

    def get_adjustments(self):
        return self._adjustments

    def _adj_bisect_key_left(self, key):
        """
    Works as bisect_key_left(key) on the orig_list as if all adjustments have been applied.
    """
        adj_index = self._adjustments.bisect_key_left(key)
        adj_next = (self._adjustments[adj_index][0]
                    if adj_index < len(self._adjustments) else len(
                        self._orig_list))
        adj_prev = self._adjustments[adj_index - 1][0] if adj_index > 0 else -1
        orig_index = self._orig_list.bisect_key_left(key)
        if adj_prev < orig_index and orig_index < adj_next:
            return orig_index
        return adj_next

    def _adj_get_key(self, index):
        """
    Returns the key corresponding to the given index into orig_list as if all adjustments have
    been applied.
    """
        i = bisect.bisect_left(self._adjustments, (index, float('-inf')))
        if i < len(self._adjustments) and self._adjustments[i][0] == index:
            return self._adjustments[i][1]
        return self._key(self._orig_list[index])

    def count_range(self, begin, end):
        """
    Returns the number of elements with keys in the half-open interval [begin, end).
    """
        adj_begin = self._adj_bisect_key_left(begin)
        adj_end = self._adj_bisect_key_left(end)
        ins_begin = self._insertions.bisect_left(begin)
        ins_end = self._insertions.bisect_left(end)
        return (adj_end - adj_begin) + (ins_end - ins_begin)

    def _adjust_range(self, begin, end):
        """
    Make changes to stored adjustments and insertions to distribute them equally in the half-open
    interval of keys [begin, end).
    """
        adj_begin = self._adj_bisect_key_left(begin)
        adj_end = self._adj_bisect_key_left(end)
        ins_begin = self._insertions.bisect_left(begin)
        ins_end = self._insertions.bisect_left(end)
        self._do_adjust_range(adj_begin, adj_end, ins_begin, ins_end, begin,
                              end)

    def _adjust_all(self):
        """
    Renumber everything to be equally distributed in the open interval (new_begin, new_end).
    """
        orig_len = len(self._orig_list)
        ins_len = len(self._insertions)
        self._do_adjust_range(0, orig_len, 0, ins_len, 0.0,
                              orig_len + ins_len + 1.0)

    def _do_adjust_range(self, adj_begin, adj_end, ins_begin, ins_end,
                         new_begin_key, new_end_key):
        """
    Implements renumbering as used by _adjust_range() and _adjust_all().
    """
        count = (adj_end - adj_begin) + (ins_end - ins_begin)

        prev_keys = ([(self._adj_get_key(i), False, i)
                      for i in xrange(adj_begin, adj_end)] +
                     [(self._insertions[i], True, i)
                      for i in xrange(ins_begin, ins_end)])
        prev_keys.sort()
        new_keys = get_range(new_begin_key, new_end_key, count)

        for (old_key, is_insert, i), new_key in zip(prev_keys, new_keys):
            if is_insert:
                self._insertions.remove(old_key)
                self._insertions.add(new_key)
            else:
                # (i, old_key) pair may not be among _adjustments, so we discard() rather than remove().
                self._adjustments.discard((i, old_key))
                self._adjustments.add((i, new_key))

    def prep_inserts_at_index(self, index, count):
        # This is the crux of the algorithm, inspired by the [Bender] paper (cited above).
        # Here's a brief summary of the algorithm, and of our departures from it.
        # - The algorithm inserts keys while it is able. When there isn't enough space, it walks
        #   enclosing intervals around the key it wants to insert, doubling the interval each time,
        #   until it finds an interval that doesn't overflow. The overflow threshold is calculated in
        #   such a way that the bigger the interval, the smaller the density it seeks.
        # - The algorithm uses integers, picking the number of bits to work for list length between
        #   n/2 and 2n, and rebuilding from scratch any time length moves out of this range. We don't
        #   rebuild anything, don't change number of bits, and use floats. This breaks some of the
        #   theoretical results, and thinking about floats is much harder than about integers. So we
        #   are not on particularly solid ground with these changes (but it seems to work).
        # - We try different thresholds, which seems to perform better. This is mentioned in "Variable
        #   T" section of [Bender] paper, but our approach isn't quite the same. So it's also on shaky
        #   theoretical ground.
        assert count > 0
        begin = self._adj_get_key(index - 1) if index > 0 else 0.0
        end = self._adj_get_key(index) if index < len(
            self._orig_list) else begin + count + 1
        if begin < 0 or end <= 0 or math.isinf(max(begin, end)):
            # This should only happen if we have some invalid positions (e.g. from before we started
            # using this logic). In this case, just renumber everything 1 through n (leaving space so
            # that the count insertions take the first count integers).
            self._insertions.update([begin if index > 0 else float('-inf')] *
                                    count)
            self._adjust_all()
            return

        self._insertions.update(get_range(begin, end, count))
        if not is_valid_range(begin, self._insertions.irange(begin, end), end):
            assert self.count_range(begin, end) > 0
            min_key, max_key = self._find_sparse_enough_range(begin, end)
            self._adjust_range(min_key, max_key)
            assert is_valid_range(begin, self._insertions.irange(begin, end),
                                  end)

    def _find_sparse_enough_range(self, begin, end):
        # frac is a parameter used for relabeling, corresponding to 2/T in [Bender]. Its
        # interpretation is that frac^i is the overflow limit for intervals of size 2^i.
        for frac in (1.14, 1.3):
            thresh = 1
            for i in xrange(64):
                rbegin, rend = range_around_float(begin, i)
                assert self.count_range(rbegin, rend) > 0
                if end <= rend and self.count_range(rbegin, rend) < thresh:
                    return (rbegin, rend)
                thresh *= frac
        raise ValueError("This isn't expected")
def preprocesscsvdata(validcsvpath, csvsavepath):
    #Load pkmn stats from csv data
    if not os.path.exists(validcsvpath):
        print('could not find ' + validcsvpath)
        sys.exit(1)
    global customelements
    global processedheaders

    scoreidx = processedheaders.index('Score')
    processedcsvdata = SortedList(key=lambda x: float(x[scoreidx])) #ascending order
    
    csvheaders = dict()
    csvdata = [[]]
    with open(validcsvpath,'r') as csvdatafile:
        csvreader = csv.reader(csvdatafile, delimiter=',')
        i = 0
        for row in csvreader:
            if i is 0:
                j = 0
                for col in row:
                    csvheaders[col] = j
                    j += 1
            else:
                csvdata.append(row)
            i += 1

    #expectedcsvheaders = ['Name','Type1','Type2', 'HP','Atk','Def','SpecialAtk','SpecialDef','Speed', 'Ability1','Ability2','HiddenAbility']
    
    
    for pkmndata in csvdata:
        processedpkmndata = []
        if len(pkmndata) is len(csvheaders): #this protects against csvdata[0] = []. not sure why this happens


            processedpkmndata += list( pkmndata[csvheaders[i]] for i in ['Name','Type1','Type2'])
            
            abilityandandres = calcpkmnresistances( pkmndata, csvheaders)
            ability = abilityandandres[0]
            processedpkmndata += [ability]
            
            thispkmnstats = calcStats(pkmndata, csvheaders, ability)
            processedpkmndata += list(map(str,thispkmnstats))

            processedpkmndata += list(map(str,abilityandandres[1])) #Resistances

            if float(thispkmnstats[-1]) < pkmn_score_thresh and not pkmndata[csvheaders['Name']] in seededchoices_names:
                continue

            processedcsvdata.add(processedpkmndata)
    
    if generation >= 3:
        print('To adjust ability assumptions, edit supported abilities by each pkmn in the unprocessed input csv') # or disable unwanted abilities via the command line')
    global pkmnstats
    global processedcsv
    processedcsv = [processedheaders]

    scoreidx = processedheaders.index('Score')

    with open(csvsavepath,'w') as csvdatafile:
        csvwriter = csv.writer(csvdatafile, delimiter=',')
        csvwriter.writerow(processedheaders)

        pkmnidx = 0
        hpidx = processedheaders.index('HP')
        for somerow in processedcsvdata.irange():
            
            processedcsv.append( somerow )
            csvwriter.writerow(somerow)

            for i in range(len(processedheaders) - hpidx):
                pkmnstats[pkmnidx, i] = float(somerow[i + hpidx])
            pkmnidx += 1
Пример #28
0
class SCEngine:
    '''
    Fast tree-based implementation for indexing, using the
    ``sortedcontainers`` package.

    Parameters
    ----------
    data : Table
        Sorted columns of the original table
    row_index : Column object
        Row numbers corresponding to data columns
    unique : bool (defaults to False)
        Whether the values of the index must be unique
    '''
    def __init__(self, data, row_index, unique=False):
        node_keys = map(tuple, data)
        self._nodes = SortedList(starmap(Node, zip(node_keys, row_index)))
        self._unique = unique

    def add(self, key, value):
        '''
        Add a key, value pair.
        '''
        if self._unique and (key in self._nodes):
            message = 'duplicate {0:!r} in unique index'.format(key)
            raise ValueError(message)
        self._nodes.add(Node(key, value))

    def find(self, key):
        '''
        Find rows corresponding to the given key.
        '''
        return [node.value for node in self._nodes.irange(key, key)]

    def remove(self, key, data=None):
        '''
        Remove data from the given key.
        '''
        if data is not None:
            item = Node(key, data)
            try:
                self._nodes.remove(item)
            except ValueError:
                return False
            return True
        items = list(self._nodes.irange(key, key))
        for item in items:
            self._nodes.remove(item)
        return bool(items)

    def shift_left(self, row):
        '''
        Decrement rows larger than the given row.
        '''
        for node in self._nodes:
            if node.value > row:
                node.value -= 1

    def shift_right(self, row):
        '''
        Increment rows greater than or equal to the given row.
        '''
        for node in self._nodes:
            if node.value >= row:
                node.value += 1

    def items(self):
        '''
        Return a list of key, data tuples.
        '''
        result = OrderedDict()
        for node in self._nodes:
            if node.key in result:
                result[node.key].append(node.value)
            else:
                result[node.key] = [node.value]
        return result.items()

    def sort(self):
        '''
        Make row order align with key order.
        '''
        for index, node in enumerate(self._nodes):
            node.value = index

    def sorted_data(self):
        '''
        Return a list of rows in order sorted by key.
        '''
        return [node.value for node in self._nodes]

    def range(self, lower, upper, bounds=(True, True)):
        '''
        Return row values in the given range.
        '''
        iterator = self._nodes.irange(lower, upper, bounds)
        return [node.value for node in iterator]

    def replace_rows(self, row_map):
        '''
        Replace rows with the values in row_map.
        '''
        nodes = [node for node in self._nodes if node.value in row_map]
        for node in nodes:
            node.value = row_map[node.value]
        self._nodes.clear()
        self._nodes.update(nodes)

    def __repr__(self):
        return '{0!r}'.format(list(self._nodes))
Пример #29
0
class SCEngine:
    '''
    Fast tree-based implementation for indexing, using the
    ``sortedcontainers`` package.

    Parameters
    ----------
    data : Table
        Sorted columns of the original table
    row_index : Column object
        Row numbers corresponding to data columns
    unique : bool (defaults to False)
        Whether the values of the index must be unique
    '''
    def __init__(self, data, row_index, unique=False):
        node_keys = map(tuple, data)
        self._nodes = SortedList(starmap(Node, zip(node_keys, row_index)))
        self._unique = unique

    def add(self, key, value):
        '''
        Add a key, value pair.
        '''
        if self._unique and (key in self._nodes):
            message = f'duplicate {key:!r} in unique index'
            raise ValueError(message)
        self._nodes.add(Node(key, value))

    def find(self, key):
        '''
        Find rows corresponding to the given key.
        '''
        return [node.value for node in self._nodes.irange(key, key)]

    def remove(self, key, data=None):
        '''
        Remove data from the given key.
        '''
        if data is not None:
            item = Node(key, data)
            try:
                self._nodes.remove(item)
            except ValueError:
                return False
            return True
        items = list(self._nodes.irange(key, key))
        for item in items:
            self._nodes.remove(item)
        return bool(items)

    def shift_left(self, row):
        '''
        Decrement rows larger than the given row.
        '''
        for node in self._nodes:
            if node.value > row:
                node.value -= 1

    def shift_right(self, row):
        '''
        Increment rows greater than or equal to the given row.
        '''
        for node in self._nodes:
            if node.value >= row:
                node.value += 1

    def items(self):
        '''
        Return a list of key, data tuples.
        '''
        result = OrderedDict()
        for node in self._nodes:
            if node.key in result:
                result[node.key].append(node.value)
            else:
                result[node.key] = [node.value]
        return result.items()

    def sort(self):
        '''
        Make row order align with key order.
        '''
        for index, node in enumerate(self._nodes):
            node.value = index

    def sorted_data(self):
        '''
        Return a list of rows in order sorted by key.
        '''
        return [node.value for node in self._nodes]

    def range(self, lower, upper, bounds=(True, True)):
        '''
        Return row values in the given range.
        '''
        iterator = self._nodes.irange(lower, upper, bounds)
        return [node.value for node in iterator]

    def replace_rows(self, row_map):
        '''
        Replace rows with the values in row_map.
        '''
        nodes = [node for node in self._nodes if node.value in row_map]
        for node in nodes:
            node.value = row_map[node.value]
        self._nodes.clear()
        self._nodes.update(nodes)

    def __repr__(self):
        if len(self._nodes) > 6:
            nodes = list(self._nodes[:3]) + ['...'] + list(self._nodes[-3:])
        else:
            nodes = self._nodes
        nodes_str = ', '.join(str(node) for node in nodes)
        return f'<{self.__class__.__name__} nodes={nodes_str}>'