Пример #1
0
def signal_crosses(short_moving_averages, long_moving_averages):
    short_moving_averages = SortedDict(short_moving_averages)
    long_moving_averages = SortedDict(long_moving_averages)

    short_len = len(short_moving_averages.values())
    long_len  = len(long_moving_averages.values())

    if(short_len != long_len):
        print "[Error] signal_crosses: inputs must be same size"
        return {}

    signal_crosses = {}
    last_diff_dir = 0
    for date, short_average in short_moving_averages.iteritems():
        long_average = long_moving_averages[date]
        diff = short_average - long_average

        if(last_diff_dir == 0):
            signal_crosses[date] = HOLD
            if(diff != 0):
                last_diff_dir = sign(diff)
            continue

        if(sign(diff) != last_diff_dir):
            signal_crosses[date] = BUY if last_diff_dir < 0 else SELL
            last_diff_dir = -last_diff_dir
        else:
            signal_crosses[date] = HOLD

    return SortedDict(signal_crosses)
Пример #2
0
 def get_sort_vector( self, col, order, key_func = None, filter_func = None ) :
     if filter_func : # is not None,
         # create a sort vector from scratch, filtered
         getter_func = self._make_key_getter( col )
         sorted_dict = SortedDict( key_func )
         for j in range( len( self.vocab ) ) :
             if filter_func( self.vocab_kview[j], self.vocab_vview[j][1] ) :
                 k = getter_func( j )
                 sorted_dict[ k ] = j
         vector = sorted_dict.values()
         if order != Qt.AscendingOrder :
             vector = [j for j in reversed( vector ) ]
     else : # no filter_func, try to reuse a cached vector
         vector = self.sort_up_vectors[ col ]
         if not vector or key_func is not self.sort_key_funcs[ col ] :
             # there is no ascending vector for this column, or there
             # is one but it was made with a different key_func.
             getter_func = self._make_key_getter( col )
             sorted_dict = SortedDict( key_func )
             for j in range( len( self.vocab ) ) :
                 k = getter_func( j )
                 sorted_dict[ k ] = j
             vector = self.sort_up_vectors[ col ] = sorted_dict.values()
             self.sort_key_funcs[ col ] = key_func
         if order != Qt.AscendingOrder :
             # what is wanted is a descending order vector, do we have one?
             if self.sort_down_vectors[ col ] is None :
                 # no, so create one from the asc. vector we now have
                 self.sort_down_vectors[ col ] = [ j for j in reversed( vector ) ]
             # yes we do (now)
             vector = self.sort_down_vectors[ col ]
     # one way or another, vector is a sort vector
     # note the actual word count available through that vector
     self.active_word_count = len(vector)
     return vector
Пример #3
0
 def sort( self, col, order ) :
     self.active_sort_vector = []
     if 0 == len(self.message_tuples) : # nothing to display
         return
     self.layoutAboutToBeChanged.emit([],QAbstractItemModel.VerticalSortHint)
     # treat columns 0 and 1 the same
     if col : # is 1 or 2
         col -= 1 # make it 0 or 1
     # we need an ascending vector in all cases.
     vector = self.sort_vectors_ascending[ col ]
     if vector is None : # we need to create the ascending vector
         sorted_dict = SortedDict()
         for j in range( len( self.message_tuples ) ) :
             line_col_msg_tuple = self.message_tuples[ j ]
             if col : # is 1, meaning sort on messages
                 key = line_col_msg_tuple[2]+line_col_msg_tuple[0]
             else : # col is 0, sort on line#+col#
                 key = line_col_msg_tuple[0]+line_col_msg_tuple[1]
             key += str(j) # ensure uniqueness
             sorted_dict[key] = j
             vector = self.sort_vectors_ascending[ col ] = sorted_dict.values()
     # vector now has an ascending sort vector which is cached..
     if order == Qt.DescendingOrder : # ..but we need the descending one
         if self.sort_vectors_descending[ col ] is None : # we need to make it
             self.sort_vectors_descending[ col ] = [ j for j in reversed( vector ) ]
         vector = self.sort_vectors_descending[ col ]
     self.active_sort_vector = vector
     self.layoutChanged.emit([],QAbstractItemModel.VerticalSortHint)
Пример #4
0
class ProductReport(object):

    """Read overview page of one job group and generate a report for the product."""

    def __init__(self, browser, job_group_url, root_url, args):
        """Construct a product report object with options."""
        self.args = args
        self.job_group_url = job_group_url
        self.group = job_group_url.split('/')[-1]
        current_url, previous_url = get_build_urls_to_compare(browser, job_group_url, args.builds, args.against_reviewed, args.running_threshold)
        # read last finished
        current_details = browser.get_soup(current_url)
        previous_details = browser.get_soup(previous_url)
        for details in current_details, previous_details:
            assert sum(int(badge.text) for badge in details.find_all(class_='badge')) > 0, \
                "invalid page with no test results found, make sure you specified valid builds (leading zero missing?)"
        current_summary = parse_summary(current_details)
        previous_summary = parse_summary(previous_details)

        changes = {k: v - previous_summary.get(k, 0) for k, v in iteritems(current_summary) if k != 'none' and k != 'incomplete'}
        log.info("Changes since last build:\n\t%s" % '\n\t'.join("%s: %s" % (k, v) for k, v in iteritems(changes)))

        self.build = get_build_nr(current_url)
        self.ref_build = get_build_nr(previous_url)

        # for each architecture iterate over all
        cur_archs, prev_archs = (set(arch.text for arch in details.find_all('th', id=re.compile('flavor_'))) for details in [current_details, previous_details])
        archs = cur_archs
        if args.arch:
            assert args.arch in cur_archs, "Selected arch {} was not found in test results {}".format(args.arch, cur_archs)
            archs = [args.arch]
        self.missing_archs = sorted(prev_archs - cur_archs)
        if self.missing_archs:
            log.info("%s missing completely from current run: %s" %
                     (pluralize(len(self.missing_archs), "architecture is", "architectures are"), ', '.join(self.missing_archs)))

        # create arch reports
        self.reports = SortedDict()
        progress_browser = progress_browser_factory(args) if args.query_issue_status else None
        bugzilla_browser = bugzilla_browser_factory(args) if args.query_issue_status else None
        for arch in sorted(archs):
            results = get_arch_state_results(arch, current_details, previous_details, args.output_state_results)
            self.reports[arch] = ArchReport(arch, results, args, root_url, progress_browser, bugzilla_browser, browser)

    def __str__(self):
        """Return report for product."""
        now_str = datetime.datetime.now().strftime('%Y-%m-%d - %H:%M')
        missing_archs_str = '\n * **Missing architectures**: %s' % ', '.join(self.missing_archs) if self.missing_archs else ''

        build_str = self.build
        if self.args.verbose_test and self.args.verbose_test > 1:
            build_str += ' (reference %s)' % self.ref_build

        openqa_review_report_product = openqa_review_report_product_template.substitute({
            'now': now_str,
            'build': build_str,
            'common_issues': common_issues(missing_archs_str, self.args.show_empty),
            'arch_report': '<hr>'.join(map(str, self.reports.values()))
        })
        return openqa_review_report_product
Пример #5
0
def simulation(prices, signal_crosses, budget):
    simulation = {}
    prices = SortedDict(prices)
    cash_on_hand = budget
    shares = 0
    for date, price in prices.iteritems():
        signal = signal_crosses[date]
        if(signal == SELL):
            shares += cash_on_hand / price
            cash_on_hand = 0
        elif(signal == BUY):
            cash_on_hand += shares * price
            shares = 0
        simulation[date] = { 'shares': shares, 'cash_on_hand': cash_on_hand }
    final_value = max(cash_on_hand, shares * prices.values()[-1])
    earnings = final_value - budget
    return simulation, earnings
Пример #6
0
class ImageFlow(QtCore.QObject):
    # _dataPosChanged = QtCore.pyqtSignal(int)

    def __init__(self):
        self.processors = SortedDict()


    def add_processor(self,processor):
        self.processors[len(self.processors)] = processor

    def apply(self,data):
        if len(self.processors) == 0:
            return data

        data = data.copy()
        for p in self.processors.values():
            data = p.apply(data)

        return data
def test_valuesview():
    mapping = [(val, pos) for pos, val in enumerate(string.ascii_lowercase)]
    temp = SortedDict(mapping[:13])
    values = temp.values()

    assert len(values) == 13
    assert 0 in values
    assert list(values) == [pos for val, pos in mapping[:13]]
    assert values[0] == 0
    assert values[-3:] == [10, 11, 12]
    assert list(reversed(values)) == list(reversed(range(13)))
    assert values.index(5) == 5
    assert values.count(10) == 1

    temp.update(mapping[13:])

    assert len(values) == 26
    assert 25 in values
    assert list(values) == [pos for val, pos in mapping]

    values = SortedDict(mapping[:2]).values()
    assert repr(values) == "SortedValuesView(SortedDict({'a': 0, 'b': 1}))"
Пример #8
0
def plotWidth(dwdictX,fname,nameX,mX,cuts):
   sorted_dwdictX = SortedDict(dwdictX)
   n = len(sorted_dwdictX)-1
   x = array('d',sorted_dwdictX.keys())
   y = array('d',sorted_dwdictX.values())
   gwX = TGraph(n,x,y)
   gwX.SetName("gwX")
   gwX.SetTitle("")
   gwX.GetXaxis().SetTitle("tan#beta")
   gwX.GetYaxis().SetTitle("#Gamma_{#it{"+nameX+"}}/#it{m}_{#it{"+nameX+"}} [%]")
   gwX.SetLineColor(ROOT.kBlack)
   gwX.SetMarkerColor(ROOT.kBlack)
   gwX.SetMarkerStyle(20)
   gwX.SetMarkerSize(0.5)

   ptxt = TPaveText(0.62,0.70,0.87,0.87,"NDC")
   ptxt.SetFillStyle(4000) #will be transparent
   ptxt.SetFillColor(0)
   ptxt.SetTextFont(42)
   ptxt.SetBorderSize(0)
   ptxt.AddText("sin(#beta-#alpha)=1")
   ptxt.AddText("#it{m}_{#it{"+nameX+"}}="+str(mX)+" GeV")

   c = TCanvas("c","c",600,600)
   c.cd()
   c.SetLogx()
   c.SetLogy()
   c.SetGridx()
   c.SetGridy()
   c.SetTicks(1,1)
   c.Draw()
   # gwX.Draw("p")
   gwX.Draw()
   ptxt.Draw("same")
   c.Modified()
   c.Update()
   c.SaveAs(fname)
Пример #9
0
class ProductReport(object):
    """Read overview page of one job group and generate a report for the product."""
    def __init__(self, browser, job_group_url, root_url, args):
        """Construct a product report object with options."""
        self.args = args
        self.job_group_url = job_group_url
        self.group = job_group_url.split('/')[-1]
        current_url, previous_url = get_build_urls_to_compare(
            browser, job_group_url, args.builds, args.against_reviewed,
            args.running_threshold)
        # read last finished
        current_details = browser.get_soup(current_url)
        previous_details = browser.get_soup(previous_url)
        for details in current_details, previous_details:
            assert sum(int(badge.text) for badge in details.find_all(class_='badge')) > 0, \
                "invalid page with no test results found, make sure you specified valid builds (leading zero missing?)"
        current_summary = parse_summary(current_details)
        previous_summary = parse_summary(previous_details)

        changes = {
            k: v - previous_summary.get(k, 0)
            for k, v in iteritems(current_summary)
            if k != 'none' and k != 'incomplete'
        }
        log.info("Changes since last build:\n\t%s" %
                 '\n\t'.join("%s: %s" % (k, v) for k, v in iteritems(changes)))

        self.build = get_build_nr(current_url)
        self.ref_build = get_build_nr(previous_url)

        # for each architecture iterate over all
        cur_archs, prev_archs = (
            set(arch.text
                for arch in details.find_all('th', id=re.compile('flavor_')))
            for details in [current_details, previous_details])
        archs = cur_archs
        if args.arch:
            assert args.arch in cur_archs, "Selected arch {} was not found in test results {}".format(
                args.arch, cur_archs)
            archs = [args.arch]
        self.missing_archs = sorted(prev_archs - cur_archs)
        if self.missing_archs:
            log.info("%s missing completely from current run: %s" % (pluralize(
                len(self.missing_archs), "architecture is",
                "architectures are"), ', '.join(self.missing_archs)))

        # create arch reports
        self.reports = SortedDict()
        progress_browser = progress_browser_factory(
            args) if args.query_issue_status else None
        bugzilla_browser = bugzilla_browser_factory(
            args) if args.query_issue_status else None
        for arch in sorted(archs):
            results = get_arch_state_results(arch, current_details,
                                             previous_details,
                                             args.output_state_results)
            self.reports[arch] = ArchReport(arch, results, args, root_url,
                                            progress_browser, bugzilla_browser,
                                            browser)

    def __str__(self):
        """Return report for product."""
        now_str = datetime.datetime.now().strftime('%Y-%m-%d - %H:%M')
        missing_archs_str = '\n * **Missing architectures**: %s' % ', '.join(
            self.missing_archs) if self.missing_archs else ''

        build_str = self.build
        if self.args.verbose_test and self.args.verbose_test > 1:
            build_str += ' (reference %s)' % self.ref_build

        openqa_review_report_product = openqa_review_report_product_template.substitute(
            {
                'now':
                now_str,
                'build':
                build_str,
                'common_issues':
                common_issues(missing_archs_str, self.args.show_empty),
                'arch_report':
                '<hr>'.join(map(str, self.reports.values()))
            })
        return openqa_review_report_product
Пример #10
0
class FederationRemoteSendQueue(object):
    """A drop in replacement for FederationSender"""

    def __init__(self, hs):
        self.server_name = hs.hostname
        self.clock = hs.get_clock()
        self.notifier = hs.get_notifier()
        self.is_mine_id = hs.is_mine_id

        self.presence_map = {}  # Pending presence map user_id -> UserPresenceState
        self.presence_changed = SortedDict()  # Stream position -> list[user_id]

        # Stores the destinations we need to explicitly send presence to about a
        # given user.
        # Stream position -> (user_id, destinations)
        self.presence_destinations = SortedDict()

        self.keyed_edu = {}  # (destination, key) -> EDU
        self.keyed_edu_changed = SortedDict()  # stream position -> (destination, key)

        self.edus = SortedDict()  # stream position -> Edu

        self.device_messages = SortedDict()  # stream position -> destination

        self.pos = 1
        self.pos_time = SortedDict()

        # EVERYTHING IS SAD. In particular, python only makes new scopes when
        # we make a new function, so we need to make a new function so the inner
        # lambda binds to the queue rather than to the name of the queue which
        # changes. ARGH.
        def register(name, queue):
            LaterGauge("synapse_federation_send_queue_%s_size" % (queue_name,),
                       "", [], lambda: len(queue))

        for queue_name in [
            "presence_map", "presence_changed", "keyed_edu", "keyed_edu_changed",
            "edus", "device_messages", "pos_time", "presence_destinations",
        ]:
            register(queue_name, getattr(self, queue_name))

        self.clock.looping_call(self._clear_queue, 30 * 1000)

    def _next_pos(self):
        pos = self.pos
        self.pos += 1
        self.pos_time[self.clock.time_msec()] = pos
        return pos

    def _clear_queue(self):
        """Clear the queues for anything older than N minutes"""

        FIVE_MINUTES_AGO = 5 * 60 * 1000
        now = self.clock.time_msec()

        keys = self.pos_time.keys()
        time = self.pos_time.bisect_left(now - FIVE_MINUTES_AGO)
        if not keys[:time]:
            return

        position_to_delete = max(keys[:time])
        for key in keys[:time]:
            del self.pos_time[key]

        self._clear_queue_before_pos(position_to_delete)

    def _clear_queue_before_pos(self, position_to_delete):
        """Clear all the queues from before a given position"""
        with Measure(self.clock, "send_queue._clear"):
            # Delete things out of presence maps
            keys = self.presence_changed.keys()
            i = self.presence_changed.bisect_left(position_to_delete)
            for key in keys[:i]:
                del self.presence_changed[key]

            user_ids = set(
                user_id
                for uids in self.presence_changed.values()
                for user_id in uids
            )

            keys = self.presence_destinations.keys()
            i = self.presence_destinations.bisect_left(position_to_delete)
            for key in keys[:i]:
                del self.presence_destinations[key]

            user_ids.update(
                user_id for user_id, _ in self.presence_destinations.values()
            )

            to_del = [
                user_id for user_id in self.presence_map if user_id not in user_ids
            ]
            for user_id in to_del:
                del self.presence_map[user_id]

            # Delete things out of keyed edus
            keys = self.keyed_edu_changed.keys()
            i = self.keyed_edu_changed.bisect_left(position_to_delete)
            for key in keys[:i]:
                del self.keyed_edu_changed[key]

            live_keys = set()
            for edu_key in self.keyed_edu_changed.values():
                live_keys.add(edu_key)

            to_del = [edu_key for edu_key in self.keyed_edu if edu_key not in live_keys]
            for edu_key in to_del:
                del self.keyed_edu[edu_key]

            # Delete things out of edu map
            keys = self.edus.keys()
            i = self.edus.bisect_left(position_to_delete)
            for key in keys[:i]:
                del self.edus[key]

            # Delete things out of device map
            keys = self.device_messages.keys()
            i = self.device_messages.bisect_left(position_to_delete)
            for key in keys[:i]:
                del self.device_messages[key]

    def notify_new_events(self, current_id):
        """As per FederationSender"""
        # We don't need to replicate this as it gets sent down a different
        # stream.
        pass

    def build_and_send_edu(self, destination, edu_type, content, key=None):
        """As per FederationSender"""
        if destination == self.server_name:
            logger.info("Not sending EDU to ourselves")
            return

        pos = self._next_pos()

        edu = Edu(
            origin=self.server_name,
            destination=destination,
            edu_type=edu_type,
            content=content,
        )

        if key:
            assert isinstance(key, tuple)
            self.keyed_edu[(destination, key)] = edu
            self.keyed_edu_changed[pos] = (destination, key)
        else:
            self.edus[pos] = edu

        self.notifier.on_new_replication_data()

    def send_read_receipt(self, receipt):
        """As per FederationSender

        Args:
            receipt (synapse.types.ReadReceipt):
        """
        # nothing to do here: the replication listener will handle it.
        pass

    def send_presence(self, states):
        """As per FederationSender

        Args:
            states (list(UserPresenceState))
        """
        pos = self._next_pos()

        # We only want to send presence for our own users, so lets always just
        # filter here just in case.
        local_states = list(filter(lambda s: self.is_mine_id(s.user_id), states))

        self.presence_map.update({state.user_id: state for state in local_states})
        self.presence_changed[pos] = [state.user_id for state in local_states]

        self.notifier.on_new_replication_data()

    def send_presence_to_destinations(self, states, destinations):
        """As per FederationSender

        Args:
            states (list[UserPresenceState])
            destinations (list[str])
        """
        for state in states:
            pos = self._next_pos()
            self.presence_map.update({state.user_id: state for state in states})
            self.presence_destinations[pos] = (state.user_id, destinations)

        self.notifier.on_new_replication_data()

    def send_device_messages(self, destination):
        """As per FederationSender"""
        pos = self._next_pos()
        self.device_messages[pos] = destination
        self.notifier.on_new_replication_data()

    def get_current_token(self):
        return self.pos - 1

    def federation_ack(self, token):
        self._clear_queue_before_pos(token)

    def get_replication_rows(self, from_token, to_token, limit, federation_ack=None):
        """Get rows to be sent over federation between the two tokens

        Args:
            from_token (int)
            to_token(int)
            limit (int)
            federation_ack (int): Optional. The position where the worker is
                explicitly acknowledged it has handled. Allows us to drop
                data from before that point
        """
        # TODO: Handle limit.

        # To handle restarts where we wrap around
        if from_token > self.pos:
            from_token = -1

        # list of tuple(int, BaseFederationRow), where the first is the position
        # of the federation stream.
        rows = []

        # There should be only one reader, so lets delete everything its
        # acknowledged its seen.
        if federation_ack:
            self._clear_queue_before_pos(federation_ack)

        # Fetch changed presence
        i = self.presence_changed.bisect_right(from_token)
        j = self.presence_changed.bisect_right(to_token) + 1
        dest_user_ids = [
            (pos, user_id)
            for pos, user_id_list in self.presence_changed.items()[i:j]
            for user_id in user_id_list
        ]

        for (key, user_id) in dest_user_ids:
            rows.append((key, PresenceRow(
                state=self.presence_map[user_id],
            )))

        # Fetch presence to send to destinations
        i = self.presence_destinations.bisect_right(from_token)
        j = self.presence_destinations.bisect_right(to_token) + 1

        for pos, (user_id, dests) in self.presence_destinations.items()[i:j]:
            rows.append((pos, PresenceDestinationsRow(
                state=self.presence_map[user_id],
                destinations=list(dests),
            )))

        # Fetch changes keyed edus
        i = self.keyed_edu_changed.bisect_right(from_token)
        j = self.keyed_edu_changed.bisect_right(to_token) + 1
        # We purposefully clobber based on the key here, python dict comprehensions
        # always use the last value, so this will correctly point to the last
        # stream position.
        keyed_edus = {v: k for k, v in self.keyed_edu_changed.items()[i:j]}

        for ((destination, edu_key), pos) in iteritems(keyed_edus):
            rows.append((pos, KeyedEduRow(
                key=edu_key,
                edu=self.keyed_edu[(destination, edu_key)],
            )))

        # Fetch changed edus
        i = self.edus.bisect_right(from_token)
        j = self.edus.bisect_right(to_token) + 1
        edus = self.edus.items()[i:j]

        for (pos, edu) in edus:
            rows.append((pos, EduRow(edu)))

        # Fetch changed device messages
        i = self.device_messages.bisect_right(from_token)
        j = self.device_messages.bisect_right(to_token) + 1
        device_messages = {v: k for k, v in self.device_messages.items()[i:j]}

        for (destination, pos) in iteritems(device_messages):
            rows.append((pos, DeviceRow(
                destination=destination,
            )))

        # Sort rows based on pos
        rows.sort()

        return [(pos, row.TypeId, row.to_data()) for pos, row in rows]
Пример #11
0
def save_polygon(polygon, all_metadata):
    d = SortedDict([(m,'') for m in all_metadata])
    d.update(polygon['properties'])
    return d.values()
Пример #12
0
    def validate(self,
                 protocol: str,
                 subset: str = 'development',
                 every: int = 1,
                 start: Union[int, Literal['last']] = 1,
                 end: Union[int, Literal['last']] = 100,
                 chronological: bool = False,
                 device: Optional[torch.device] = None,
                 batch_size: int = 32,
                 n_jobs: int = 1,
                 **kwargs):

        # use last available epoch as starting point
        if start == 'last':
            start = self.get_number_of_epochs() - 1

        # use last available epoch as end point
        if end == 'last':
            end = self.get_number_of_epochs() - 1

        criterion = self.validation_criterion(protocol, **kwargs)

        validate_dir = Path(
            self.VALIDATE_DIR.format(
                train_dir=self.train_dir_,
                _criterion=f'_{criterion}' if criterion is not None else '',
                protocol=protocol,
                subset=subset))

        params_yml = validate_dir / 'params.yml'

        validate_dir.mkdir(parents=True, exist_ok=True)
        writer = SummaryWriter(log_dir=str(validate_dir), purge_step=start)

        self.validate_dir_ = validate_dir

        validation_data = self.validate_init(protocol, subset=subset)

        if n_jobs > 1:
            self.pool_ = multiprocessing.Pool(n_jobs)

        progress_bar = tqdm(unit='iteration')

        for i, epoch in enumerate(
                self.validate_iter(start=start,
                                   end=end,
                                   step=every,
                                   chronological=chronological)):

            # {'metric': 'detection_error_rate',
            #  'minimize': True,
            #  'value': 0.9,
            #  'pipeline': ...}
            details = self.validate_epoch(epoch,
                                          validation_data,
                                          protocol=protocol,
                                          subset=subset,
                                          device=device,
                                          batch_size=batch_size,
                                          n_jobs=n_jobs,
                                          **kwargs)

            # initialize
            if i == 0:
                # what is the name of the metric?
                metric = details['metric']
                # should the metric be minimized?
                minimize = details['minimize']
                # epoch -> value dictionary
                values = SortedDict()

                # load best epoch and value from past executions
                if params_yml.exists():
                    with open(params_yml, 'r') as fp:
                        params = yaml.load(fp, Loader=yaml.SafeLoader)
                    best_epoch = params['epoch']
                    best_value = params[metric]
                    values[best_epoch] = best_value

            # metric value for current epoch
            values[epoch] = details['value']

            # send value to tensorboard
            writer.add_scalar(f'validate/{protocol}.{subset}/{metric}',
                              values[epoch],
                              global_step=epoch)

            # keep track of best value so far
            if minimize:
                best_epoch = values.iloc[np.argmin(values.values())]
                best_value = values[best_epoch]

            else:
                best_epoch = values.iloc[np.argmax(values.values())]
                best_value = values[best_epoch]

            # if current epoch leads to the best metric so far
            # store both epoch number and best pipeline parameter to disk
            if best_epoch == epoch:

                best = {
                    metric: best_value,
                    'epoch': epoch,
                }
                if 'pipeline' in details:
                    pipeline = details['pipeline']
                    best['params'] = pipeline.parameters(instantiated=True)
                with open(params_yml, mode='w') as fp:
                    fp.write(yaml.dump(best, default_flow_style=False))

                # create/update zip file for later upload to torch.hub
                hub_zip = create_zip(validate_dir)

            # progress bar
            desc = (f'{metric} | '
                    f'Epoch #{best_epoch} = {100 * best_value:g}% (best) | '
                    f'Epoch #{epoch} = {100 * details["value"]:g}%')
            progress_bar.set_description(desc=desc)
            progress_bar.update(1)
Пример #13
0
class ReadCounter(object):
    def __init__(self):
        from sortedcontainers import SortedDict
        self.reads = SortedDict()
        self.reads[b''] = [0, 0]

        self.read_counts = {}
        self.hit_count=0

    def process(self, transaction_info):
        for get in transaction_info.gets:
            self._insert_read(get.key, None)
        for get_range in transaction_info.get_ranges:
            self._insert_read(get_range.key_range.start_key, get_range.key_range.end_key)

    def _insert_read(self, start_key, end_key):
        self.read_counts.setdefault((start_key, end_key), 0)
        self.read_counts[(start_key, end_key)] += 1

        self.reads.setdefault(start_key, [0, 0])[0] += 1
        if end_key is not None:
            self.reads.setdefault(end_key, [0, 0])[1] += 1
        else:
            self.reads.setdefault(start_key+b'\x00', [0, 0])[1] += 1

    def get_total_reads(self):
        return sum([v for v in self.read_counts.values()])

    def matches_filter(addresses, required_addresses):
        for addr in required_addresses:
            if addr not in addresses:
                return False
        return True

    def get_top_k_reads(self, num, filter_addresses, shard_finder=None):
        count_pairs = sorted([(v, k) for (k, v) in self.read_counts.items()], reverse=True, key=lambda item: item[0])
        if not filter_addresses:
            count_pairs = count_pairs[0:num]

        if shard_finder:
            results = []
            for (count, (start, end)) in count_pairs:
                results.append((start, end, count, shard_finder.get_addresses_for_key(start)))

            shard_finder.wait_for_shard_addresses(results, 0, 3)

            if filter_addresses:
                filter_addresses = set(filter_addresses)
                results = [r for r in results if filter_addresses.issubset(set(r[3]))][0:num]
        else:
            results = [(start, end, count) for (count, (start, end)) in count_pairs[0:num]]

        return results

    def get_range_boundaries(self, num_buckets, shard_finder=None):
        total = sum([start_count for (start_count, end_count) in self.reads.values()])
        range_size = total // num_buckets
        output_range_counts = []

        if total == 0:
            return output_range_counts

        def add_boundary(start, end, started_count, total_count):
            if shard_finder:
                shard_count = shard_finder.get_shard_count(start, end)
                if shard_count == 1:
                    addresses = shard_finder.get_addresses_for_key(start)
                else:
                    addresses = None
                output_range_counts.append((start, end, started_count, total_count, shard_count, addresses))
            else:
                output_range_counts.append((start, end, started_count, total_count, None, None))

        this_range_start_key = None
        last_end = None
        open_count = 0
        opened_this_range = 0
        count_this_range = 0

        for (start_key, (start_count, end_count)) in self.reads.items():
            open_count -= end_count

            if opened_this_range >= range_size:
                add_boundary(this_range_start_key, start_key, opened_this_range, count_this_range)
                count_this_range = open_count
                opened_this_range = 0
                this_range_start_key = None

            count_this_range += start_count
            opened_this_range += start_count
            open_count += start_count

            if count_this_range > 0 and this_range_start_key is None:
                this_range_start_key = start_key

            if end_count > 0:
                last_end = start_key

        if last_end is None:
            last_end = b'\xff'
        if count_this_range > 0:
            add_boundary(this_range_start_key, last_end, opened_this_range, count_this_range)

        shard_finder.wait_for_shard_addresses(output_range_counts, 0, 5)
        return output_range_counts
Пример #14
0
class Node(BaseNode):
    def __init__(self, *args, **kwargs):
        self.rest = None
        self.offset = None

        super().__init__(*args, **kwargs)

    def _select(self, key):
        """
        Selects the bucket the key should belong to.
        """

        # If the key is smaller than the min or larger than the max, immediately return.
        if key < min(self.bucket):
            return self.rest

        elif key >= max(self.bucket):
            return self.bucket.values()[-1]

        # Else find the correct node
        for k, v in reversed(list(self.bucket.items())):
            if k <= key:
                return v

        return self.rest

    def _insert(self, key, value):
        """
        Recursively inserts the key and value by selecting the bucket the key
        should belong to, and inserting the key and value into that back. If the
        node has been split, it inserts the key of the newly created node into
        the bucket of this node.
        """

        result = self._select(key)._insert(key, value)
        self.changed = True

        if result is None:
            return

        key, other = result
        return super()._insert(key, other)

    def _split(self):
        other = LazyNode(node=Node(tree=self.tree, changed=True),
            tree=self.tree)
        #other = Node(self.tree)

        values = self.bucket.items()
        self.bucket = SortedDict(values[:len(values) // 2])
        other.bucket = SortedDict(values[len(values) // 2:])

        key, value = other.bucket.popitem(last=False)
        other.rest = value

        return (key, other)

    def _commit(self):
        self.rest._commit()

        for child in self.bucket.values():
            child._commit()

        data = packb({
           'rest': self.rest.offset,
           'values': {k: v.offset for k, v in self.bucket.items()}
        })

        return self.tree.store.write(data)

    def __getitem__(self, key):
        return self._select(key)[key]

    def __len__(self):

        print(len(self.rest))
        print (self.bucket.values())

        return sum([len(child) for child in self.bucket.values()]) + len(self.rest)

    def __iter__(self):
        for key in self.rest:
            yield key

        for child in self.bucket.values():
            for key in child:
                yield key
Пример #15
0
class Topics:
    """
A class that manages a collection of `Topic`s.

    """
    def __init__(self):
        self.logger = getLogger('topics')
        self.logger.info('started session')
        self.clear()

    def clear(self):
        self.logger.info('Cleared all topics and received data')
        self.topic_list = SortedDict()
        self.transfers = dict()

    def create(self, topic, source='remote'):
        # Create the topic if it doesn't exist already
        if not topic in self.topic_list:
            self.topic_list[topic] = Topic(topic,source=source)
            self.logger.info('new:topic ' + topic)

    def process(self, topic, payload, options=None):
        # Create the topic if it doesn't exist already
        self.create(topic)

        # Add the new sample
        self.topic_list[topic].new_sample(payload,options)

        # logging
        if options:
            self.logger.debug('new sample | {0} [{1}] {2}'.format(topic, options['index'], payload))
        else:
            self.logger.debug('new sample | {0} {1}'.format(topic, payload))

        # If there is an active transfer, transfer received data to the queue
        if topic in self.transfers:
            # If transfer requires indexed data, check there is an index
            if self.transfers[topic]['type'] == 'indexed' and options is not None:
                x = options['index']
                self.transfers[topic]['queue'].put([x, payload])
            # For linear data, provide sample id for x and payload for y
            elif self.transfers[topic]['type'] == 'linear':
                x = self.transfers[topic]['lastindex']
                self.transfers[topic]['queue'].put([x, payload])
                self.transfers[topic]['lastindex'] += 1

    def ls(self,source='remote'):
        if source is None:
            return sorted([t.name for t in self.topic_list.keys()])
        else:
            return sorted([t.name for t in self.topic_list.values() if t.source == source])

    def samples(self,topic,amount=1):
        if not topic in self.topic_list:
            return None

        if amount == 0 or amount is None:
            return self.topic_list[topic].raw

        return self.topic_list[topic].raw[-amount:]

    def count(self,topic):
        if not topic in self.topic_list:
            return 0

        return len(self.topic_list[topic].raw)

    def exists(self,topic):
        return topic in self.topic_list

    def transfer(self, topic, queue, transfer_type = "linear"):
        # If the topic data is not already transfered to some queue
        if not topic in self.transfers:
            self.transfers[topic] = dict()
            self.transfers[topic]['queue'] = queue
            self.transfers[topic]['lastindex'] = 0
            self.transfers[topic]['type'] = transfer_type

            self.logger.info('start transfer | {0}'.format(topic))

            # If there is already existing data under the topic
            if topic in self.topic_list:
                if transfer_type == 'indexed':
                    for key, value in self.topic_list[topic].indexes.iteritems():
                        queue.put([key, value])
                elif transfer_type == 'linear':
                    for item in self.topic_list[topic].raw:
                        queue.put([self.transfers[topic]['lastindex'], item])
                        self.transfers[topic]['lastindex'] += 1

    def untransfer(self,topic):
        # If the topic data is already transfered to some queue
        if topic in self.transfers:
            # Remove it from the transfer list
            del self.transfers[topic]
            self.logger.info('stop transfer | {0}'.format(topic))

    def intransfer(self,topic):
        return topic in self.transfers

    def has_indexed_data(self,topic):
        return self.topic_list[topic].has_indexed_data()
Пример #16
0
def test6():
    """
    有序的map: SortedDict
    网址: http://www.grantjenks.com/docs/sortedcontainers/sorteddict.html
    """
    from sortedcontainers import SortedDict
    sd = SortedDict()
    # 插入、删除元素
    sd["wxx"] = 21
    sd["hh"] = 18
    sd["other"] = 20
    print(sd)  # SortedDict({'hh': 18, 'other': 20, 'wxx': 21})
    print(sd["wxx"])  # 访问不存在的键会报错, KeyError
    print(sd.get("c"))  # 访问不存在的键会返回None     None
    # SortedDict转dict
    print(dict(sd))  # {'hh': 18, 'other': 20, 'wxx': 21}
    # 返回最后一个元素和最后一个元素
    print(sd.peekitem(0))  # 类型tuple, 返回第一个元素    ('hh', 18)
    print(sd.peekitem())  # 类型tuple, 返回最后一个元素    ('wxx', 21)
    # 遍历
    for k, v in sd.items():
        print(k, ':', v, sep="", end=", ")  # sep取消每行输出之间的空格
    print()
    for k in sd:  # 遍历键k, 等价于for k in d.keys:
        print(str(k) + ":" + str(sd[k]), end=", ")
    print()
    for v in sd.values():  # 遍历值v
        print(v, end=", ")
    print()
    # 返回Map中的一个键
    print(sd.peekitem()[0])
    # 返回Map中的一个值
    print(sd.peekitem()[1])
    # 中判断某元素是否存在
    print("wxx" in sd)  # True
    # bisect_left() / bisect_right()
    sd["a"] = 1
    sd["c1"] = 2
    sd["c2"] = 4
    print(
        sd
    )  # SortedDict({'a': 1, 'c1': 2, 'c2': 4, 'hh': 18, 'other': 20, 'wxx': 21})
    print(sd.bisect_left("c1"))  # 返回键大于等于"c1"的最小元素对应的下标    1
    print(sd.bisect_right("c1"))  # 返回键大于"c1"的最小元素对应的下标    2
    # 清空
    sd.clear()
    print(len(sd))  # 0
    print(len(sd) == 0)  # True
    """
    无序的map: dict
    """
    print("---------------------------------------")
    d = {"c1": 2, "c2": 4, "hh": 18, "wxx": 21, 13: 14, 1: 0}
    print(d["wxx"])  # 21
    print(d[13])  # 14
    d[13] += 1
    print(d[13])  # 15
    d["future"] = "wonderful"  # 字典中添加键值对
    del d[1]  # 删除字典d中键1对应的数据值
    print("wxx" in d)  # 判断键"wxx"是否在字典d中,如果在返回True,否则False
    print(d.keys())  # 返回字典d中所有的键信息  dict_keys(['c1', 'c2', 'hh', 'wxx', 13])
    print(d.values())  # 返回字典d中所有的值信息  dict_values([2, 4, 18, 21, 14])
    print(d.items(
    ))  # dict_items([('c1', 2), ('c2', 4), ('hh', 18), ('wxx', 21), (13, 14)])
    for k, v in d.items():  # 遍历 k, v
        print(k, ':', v)
    for k in d:  # 遍历键k, 等价于for k in d.keys:
        print(str(k) + ":" + str(d[k]), end=", ")
    print()
    for v in d.values():  # 遍历值v
        print(v, end=", ")
    print()
    # 字典类型操作函数和方法
    print("---------------------------------------")
    d = {"中国": "北京", "美国": "华盛顿", "法国": "巴黎"}
    print(len(d))  # 返回字典d中元素的个数  3
    print(d.get("中国", "不存在"))  # 键k存在,则返回相应值,不在则返回<default>值  北京
    print(d.get("中", "不存在"))  # 不存在
    print(d.get("中"))  # None
    d["美国"] = "Washington"  # 修改键对应的值
    print(d.pop("美国"))  # 键k存在,则返回相应值,并将其从dict中删除
    print(d.popitem())  # 随机从字典d中取出一个键值对,以元组形式返回,并将其从dict中删除
    d.clear()  # 删除所有的键值对
Пример #17
0
class PageWidget(QWidget):
    move_drop_event = pyqtSignal(object, int, int)
    copy_drop_event = pyqtSignal(object, int, int)

    DRAG_MAGIC = 'LiSP_Drag&Drop'

    def __init__(self, rows, columns, *args):
        super().__init__(*args)
        self.setAcceptDrops(True)

        self.__rows = rows
        self.__columns = columns
        self.__widgets = SortedDict()

        self.setLayout(QGridLayout())
        self.layout().setContentsMargins(4, 4, 4, 4)
        self.init_layout()

    def init_layout(self):
        for row in range(0, self.__rows):
            self.layout().setRowStretch(row, 1)
            # item = QSpacerItem(0, 0, QSizePolicy.Minimum, QSizePolicy.Expanding)
            # self.layout().addItem(item, row, 0)

        for column in range(0, self.__columns):
            self.layout().setColumnStretch(column, 1)
            # item = QSpacerItem(0, 0, QSizePolicy.Expanding, QSizePolicy.Minimum)
            # self.layout().addItem(item, 0, column)

    def add_widget(self, widget, row, column):
        self._check_index(row, column)
        if (row, column) not in self.__widgets:
            widget.setSizePolicy(QSizePolicy.Ignored, QSizePolicy.Ignored)
            self.__widgets[(row, column)] = widget
            self.layout().addWidget(widget, row, column)
            widget.show()
        else:
            raise IndexError('cell {} already used'.format((row, column)))

    def take_widget(self, row, column):
        self._check_index(row, column)
        if (row, column) in self.__widgets:
            widget = self.__widgets.pop((row, column))
            widget.hide()
            self.layout().removeWidget(widget)
            return widget
        else:
            raise IndexError('cell {} is empty'.format((row, column)))

    def move_widget(self, o_row, o_column, n_row, n_column):
        widget = self.take_widget(o_row, o_column)
        self.add_widget(widget, n_row, n_column)

    def widget(self, row, column):
        self._check_index(row, column)
        return self.__widgets.get((row, column))

    def index(self, widget):
        for index, f_widget in self.__widgets.items():
            if widget is f_widget:
                return index

        return -1, -1

    def widgets(self):
        return iter(self.__widgets.values())

    def reset(self):
        self.__widgets.clear()

    def dragEnterEvent(self, event):
        if event.mimeData().hasText():
            if event.mimeData().text() == PageWidget.DRAG_MAGIC:
                event.accept()
            else:
                event.ignore()
        else:
            event.ignore()

    def dragLeaveEvent(self, event):
        event.ignore()

    def dropEvent(self, event):
        row, column = self._event_index(event)
        if self.layout().itemAtPosition(row, column) is None:
            if qApp.keyboardModifiers() == Qt.ControlModifier:
                event.setDropAction(Qt.MoveAction)
                event.accept()
                self.move_drop_event.emit(event.source(), row, column)
            elif qApp.keyboardModifiers() == Qt.ShiftModifier:
                event.setDropAction(Qt.CopyAction)
                self.copy_drop_event.emit(event.source(), row, column)
                event.accept()

        event.ignore()

    def dragMoveEvent(self, event):
        row, column = self._event_index(event)
        if self.layout().itemAtPosition(row, column) is None:
            event.accept()
        else:
            event.ignore()

    def _check_index(self, row, column):
        if not isinstance(row, int):
            raise TypeError('rows index must be integers, not {}'.format(
                row.__class__.__name__))
        if not isinstance(column, int):
            raise TypeError('columns index must be integers, not {}'.format(
                column.__class__.__name__))

        if not 0 <= row < self.__rows or not 0 <= column < self.__columns:
            raise IndexError('index out of bound {}'.format((row, column)))

    def _event_index(self, event):
        # Margins and spacings are equals
        space = self.layout().horizontalSpacing()
        margin = self.layout().contentsMargins().right()

        r_size = (self.height() + margin * 2) // self.__rows + space
        c_size = (self.width() + margin * 2) // self.__columns + space

        row = math.ceil(event.pos().y() / r_size) - 1
        column = math.ceil(event.pos().x() / c_size) - 1

        return row, column
Пример #18
0
class CacheStore(object):
    class CacheItem(object):
        def __init__(self):
            self.valid = Event()
            self.data = None

    def __init__(self, key=None):
        self.lock = RLock()
        self.store = SortedDict(key)

    def __getitem__(self, item):
        return self.get(item)

    def put(self, key, data):
        with self.lock:
            item = self.store[key] if key in self.store else self.CacheItem()
            item.data = data
            item.valid.set()

            if key not in self.store:
                self.store[key] = item
                return True

            return False

    def get(self, key, default=None, timeout=None):
        item = self.store.get(key)
        if item:
            item.valid.wait(timeout)
            return item.data

        return default

    def remove(self, key):
        with self.lock:
            if key in self.store:
                del self.store[key]
                return True

            return False

    def exists(self, key):
        return key in self.store

    def rename(self, oldkey, newkey):
        with self.lock:
            obj = self.get(oldkey)
            obj['id'] = newkey
            self.put(newkey, obj)
            self.remove(oldkey)

    def is_valid(self, key):
        item = self.store.get(key)
        if item:
            return item.valid.is_set()

        return False

    def invalidate(self, key):
        with self.lock:
            item = self.store.get(key)
            if item:
                item.valid.clear()

    def itervalid(self):
        for key, value in list(self.store.items()):
            if value.valid.is_set():
                yield (key, value.data)

    def validvalues(self):
        for value in list(self.store.values()):
            if value.valid.is_set():
                yield value.data

    def remove_predicate(self, predicate):
        result = []
        for k, v in self.itervalid():
            if predicate(v):
                self.remove(k)
                result.append(k)

        return result

    def query(self, *filter, **params):
        return wrap(list(self.validvalues())).query(*filter, **params)
Пример #19
0
    if isPhoto(file) :
      try :
        exif = getExif(os.path.join(subdir, file))
        if not cameraIsValid(exif) :
          continue
        # get focal length and convert from rational data type to float
        focalLength = exif[FOCALLENGTH_TAG][0] / exif[FOCALLENGTH_TAG][1]
        # count every focal length occurence in dictionary
        if (focalLength in occurences) :
          occurences[focalLength] = occurences[focalLength] + 1
        else:   # find nearest
          index = occurences.bisect(focalLength)
          greater = occurences.iloc[index]
          smaller = occurences.iloc[index - 1]
          nearestFL = greater if (greater - focalLength < focalLength - smaller) else smaller
          occurences[nearestFL] = occurences[nearestFL] + 1
      except (KeyError, TypeError, IndexError) :
        # there is no focal length info in image exif data (Key/Type/IndexError)
        pass

# plot the graph
position = arange(len(focalLengths)) + .5
barh(position, occurences.values(), align='center', color='#FF0000')
yticks(position, occurences.keys())
xlabel('Occurrences')
ylabel('Focal length')
title('Focal length usage analysis')
grid(True)
show()
Пример #20
0
class DotMap(MutableMapping):

    def __init__(self, *args, **kwargs):
        self._map = SortedDict()
        if args:
            d = args[0]
            if type(d) is dict:
                for k, v in self.__call_items(d):
                    if type(v) is dict:
                        v = DotMap(v)
                    self._map[k] = v
        if kwargs:
            for k, v in self.__call_items(kwargs):
                self._map[k] = v

    @staticmethod
    def __call_items(obj):
        if hasattr(obj, 'iteritems') and ismethod(getattr(obj, 'iteritems')):
            return obj.iteritems()
        else:
            return obj.items()

    def items(self):
        return self.iteritems()

    def iteritems(self):
        return self.__call_items(self._map)

    def __iter__(self):
        return self._map.__iter__()

    def __setitem__(self, k, v):
        self._map[k] = v

    def __getitem__(self, k):
        if k not in self._map:
            # automatically extend to new DotMap
            self[k] = DotMap()
        return self._map[k]

    def __setattr__(self, k, v):
        if k == '_map':
            super(DotMap, self).__setattr__(k, v)
        else:
            self[k] = v

    def __getattr__(self, k):
        if k == '_map':
            return self._map
        else:
            return self[k]

    def __delattr__(self, key):
        return self._map.__delitem__(key)

    def __contains__(self, k):
        return self._map.__contains__(k)

    def __str__(self):
        items = []
        for k, v in self.__call_items(self._map):
            items.append('{0}={1}'.format(k, repr(v)))
        out = 'DotMap({0})'.format(', '.join(items))
        return out

    def __repr__(self):
        return str(self)

    def to_dict(self):
        d = {}
        for k, v in self.items():
            if type(v) is DotMap:
                v = v.to_dict()
            d[k] = v
        return d

    def pprint(self):
        pprint(self.to_dict())

    # proper dict subclassing
    def values(self):
        return self._map.values()

    @staticmethod
    def parse_other(other):
        if type(other) is DotMap:
            return other._map
        else:
            return other

    def __cmp__(self, other):
        other = DotMap.parse_other(other)
        return self._map.__cmp__(other)

    def __eq__(self, other):
        other = DotMap.parse_other(other)
        if not isinstance(other, dict):
            return False
        return self._map.__eq__(other)

    def __ge__(self, other):
        other = DotMap.parse_other(other)
        return self._map.__ge__(other)

    def __gt__(self, other):
        other = DotMap.parse_other(other)
        return self._map.__gt__(other)

    def __le__(self, other):
        other = DotMap.parseOther(other)
        return self._map.__le__(other)

    def __lt__(self, other):
        other = DotMap.parse_other(other)
        return self._map.__lt__(other)

    def __ne__(self, other):
        other = DotMap.parse_other(other)
        return self._map.__ne__(other)

    def __delitem__(self, key):
        return self._map.__delitem__(key)

    def __len__(self):
        return self._map.__len__()

    def copy(self):
        return self

    def get(self, key, default=None):
        return self._map.get(key, default)

    def has_key(self, key):
        return key in self._map

    def iterkeys(self):
        return self._map.iterkeys()

    def itervalues(self):
        return self._map.itervalues()

    def keys(self):
        return self._map.keys()

    def pop(self, key, default=None):
        return self._map.pop(key, default)

    def setdefault(self, key, default=None):
        return self._map.setdefault(key, default)

    def viewitems(self):
        if version_info.major == 2 and version_info.minor >= 7:
            return self._map.viewitems()
        else:
            return self._map.items()

    def viewkeys(self):
        if version_info.major == 2 and version_info.minor >= 7:
            return self._map.viewkeys()
        else:
            return self._map.keys()

    def viewvalues(self):
        if version_info.major == 2 and version_info.minor >= 7:
            return self._map.viewvalues()
        else:
            return self._map.values()

    @classmethod
    def fromkeys(cls, seq, value=None):
        d = DotMap()
        d._map = SortedDict.fromkeys(seq, value)
        return d
Пример #21
0
class Model(object):
  '''
  The model of a Stranbeest. The Model consists of a set of nodes, edges and boundary
  conditions. Each node has a unique name and a x and y position which may change
  whenever the simuation is incremented. Each node introduces two degrees of freedom.
  The edges are specified by the nodes they are connecting. The edges are the push/pull
  rods which connect the edges whith one another. An edges keeps the distances between
  two nodes constant and therefore constrains exactly one degree of freedom in the system.
  '''

  def __init__(self):
    '''
    Constructor
    '''
    self._nodes = SortedDict()
    self._edges = defaultdict(set)

  def addNode(self,name,x,y):
    if not isinstance(name,str  ): raise Exception("The 1st argument must be the node's name as str.")
    if not isinstance(x   ,float): raise Exception("The 2nd argument must be the node's x position as float.")
    if not isinstance(y   ,float): raise Exception("The 2nd argument must be the node's y position as float.")
    if name in self._nodes: raise Exception( 'There already exists a node by the name of "%(name)s"' % locals() )
    self._nodes[name] = x,y
    self.__t = 0.0
    for listener in self.onNodeAddListeners:
      listener(name,x,y)

  def addEdge(self,node1,node2):
    if node1 == node2:
      raise Exception('"node1" cannot be equal to "node2".')
    self._edges[node1].add(node2)
    self._edges[node2].add(node1)
    for listener in self.onEdgeAddListeners:
      listener( min(node1,node2), max(node1,node2) )

  def pos(self,name):
    return self._nodes[name]

  def move(self,name,x,y):
    self._nodes[name] = x,y
    for listener in self.onNodeMoveListeners:
      listener(name,x,y)

  def state(self):
    return fromiter( chain.from_iterable( self._nodes.values() ), float )

  def setState(self,state):
    for i,(x,y) in enumerate( zip(state[::2],state[1::2]) ):
      self.move(self._nodes.keys()[i],x,y)

  @property
  def t(self):
    return self.__t

  def increment(self,dt):
    v = self.v
    t0 = self.__t
    x0 = self.state()
    # https://en.wikipedia.org/wiki/Runge%E2%80%93Kutta_methods#The_Runge.E2.80.93Kutta_method
    k0 = v(x0,           t0)
    k1 = v(x0+k0*(dt/2), t0+dt/2)
    k2 = v(x0+k1*(dt/2), t0+dt/2)
    k3 = v(x0+k2*(dt),   t0+dt)
    self.setState( x0 + dt/6 * (k0+k1+k2+k3) )
    self.__t += dt

  def v(self,x,t):
    lhs = zeros( 2*[len(x)] )
    rhs = zeros( len(x) )
    iRows = iter( range( len(x) ) )
    for start,end in self.edges():
      iStart = 2*self._nodes.index(start)
      iEnd   = 2*self._nodes.index(end)
      iRow = next(iRows)
      dx = x[iEnd+0] - x[iStart+0] 
      dy = x[iEnd+1] - x[iStart+1]
      lhs[iRow,iStart+0] = dx; lhs[iRow,iEnd+0] = -dx
      lhs[iRow,iStart+1] = dy; lhs[iRow,iEnd+1] = -dy
      rhs[iRow] = 0
    for bc in self.bcs:
      bc.addEquations(x,t,iRows,lhs,rhs)
    return linalg.solve(lhs,rhs)

  def nodes(self):
    return self._nodes.iteritems()

  def edges(self):
    for node1,neighbors in self._edges.items():
      for node2 in neighbors:
        if node1 < node2:
          yield node1,node2

  bcs = []

  onEdgeAddListeners = set() # <- FIXME should be a multiset
  onNodeAddListeners = set() # <- FIXME should be a multiset
  onNodeMoveListeners = set() # <- FIXME should be a multiset
class FileTable(object):
    """docstring for FileTable"""
    def __init__(self, myip, server):
        super(FileTable, self).__init__()
        self.ring = SortedDict()
        self.hasher = hashlib.sha224
        self.myhash = self.hash(myip)
        self.add_node(myip)

        self.server = server

    def hash(self, key):
        return self.hasher(key).hexdigest()[:-10]

    def hash_at(self, idx):
        idx %= len(self.ring)
        hash = self.ring.iloc[idx]
        return hash

    def add_node(self, ip):
        hash = self.hash(ip)
        self.ring[hash] = {'ip': ip, 'files': []}

        SDFS_LOGGER.info('After adding %s - %s' % (ip, repr(self.ring)))

    def remove_node(self, failed_list):
        start_time = time.time()
        # this is for debug
        flag  = False
            
        # deep copy failed list because it will be reset soon
        ip_list = list(failed_list)

        # change the order of failed node
        # make sure the smaller id node be handled first
        if len(ip_list) == 2:
            if self.hash(ip_list[0]) == 0 and self.hash(ip_list[1]) == len(self.ring) - 1:
                ip_list[0], ip_list[1] = ip_list[1], ip_list[0]
            elif self.ring.index(self.hash(ip_list[0])) == self.ring.index(self.hash(ip_list[1])) + 1:
                ip_list[0], ip_list[1] = ip_list[1], ip_list[0]

        for ip in ip_list:
            hash = self.hash(ip)
            idx = self.ring.index(hash)

            # if the node is not the direct successor of the failed node, do nothing
            if len(ip_list) == 2 and ip == ip_list[1] and self.hash_at((idx + 2) % len(self.ring)) == self.myhash:
                continue

            if self.hash_at((idx + 1) % len(self.ring)) == self.myhash or (self.hash_at((idx + 2) % len(self.ring)) == self.myhash and len(ip_list) == 2):
                # this is for debug
                flag = True

                heritage = set(self.ring[hash]['files'])
                my_files = set(self.ring[self.myhash]['files'])
                next_files = set(self.ring[self.hash_at(idx + 2)]['files'])

                # determine the 
                to_me = heritage - my_files
                to_next = (heritage & my_files) - next_files
                to_next_next = heritage & my_files & next_files
                replica_list = [list(to_me), list(to_next), list(to_next_next)]
                
                self.ring[self.myhash]['files'].extend(to_me)

                # handle replica
                dest_ip_to_me = self.ring[self.hash_at(self.ring.index(hash) - 1)]['ip']
                dest_ip_to_next = self.ring[self.hash_at(self.ring.index(self.myhash) + 1)]['ip']
                dest_ip_to_next_next = self.ring[self.hash_at(self.ring.index(self.myhash) + 2)]['ip']
                dest_ip_list = [dest_ip_to_me, dest_ip_to_next, dest_ip_to_next_next]
                
                del self.ring[hash]

                self.server.handle_replica(replica_list, dest_ip_list, ip_list)
            
            else:
                del self.ring[hash]
            
            elapsed_time = time.time() - start_time
            if flag:
                print "It takes", elapsed_time, "to handle replica"

    def lookup(self, sdfs_filename):
        hash = self.hash(sdfs_filename)
        idx = self.ring.bisect_left(hash) if self.ring.bisect_left(hash) < len(self.ring) else 0
        ip_list = [self.ring[self.hash_at(idx + i)]['ip'] for i in xrange(3)]
        return ip_list

    def insert(self, sdfs_filename):
        hash = self.hash(sdfs_filename)
        idx = self.ring.bisect_left(hash) if self.ring.bisect_left(hash) < len(self.ring) else 0
        for i in xrange(3):
            node_hash = self.hash_at(idx + i)
            self.ring[node_hash]['files'].append(sdfs_filename)
            
            SDFS_LOGGER.info('Inserted %s to %s' % (sdfs_filename, self.ring[node_hash]['ip']))

    def delete(self, sdfs_filename):
        hash = self.hash(sdfs_filename)
        idx = self.ring.bisect_left(hash) if self.ring.bisect_left(hash) < len(self.ring) else 0
        for i in xrange(3):
            node_hash = self.hash_at(idx + i)
            self.ring[node_hash]['files'].remove(sdfs_filename)
            
            SDFS_LOGGER.info('Deleted %s to %s' % (sdfs_filename, self.ring[node_hash]['ip']))

    def update_replica(self, replica_list, dest_ip_list):
        for i in xrange(3):
            self.ring[self.hash(dest_ip_list[i])]['files'] = list(set(self.ring[self.hash(dest_ip_list[i])]['files'] + replica_list[i]))


    def list_my_store(self):
        print '-' * 5 + 'my files are:'
        for f in self.ring[self.myhash]['files']:
            print f,
        print
        print '-' * 5 + 'that is all'

    def list_file_location(self):
        all_files = set()
        for value in self.ring.values():
            all_files.update(set(value['files']))

        for f in all_files:
            print f + ' is stored at ',
            for value in self.ring.values():
                if f in value['files']:
                    print value['ip'],
            print 
Пример #23
0
class TreePage(BasePage):
    """
    Page object, implemented with a sorted dict. Who knows what's underneath!
    """

    def __init__(self, *args, **kwargs):
        storage = kwargs.pop("storage", None)
        super(TreePage, self).__init__(*args, **kwargs)
        self._storage = SortedDict() if storage is None else storage

    def keys(self):
        if len(self._storage) == 0:
            return set()
        else:
            return set.union(*(set(range(*self._resolve_range(mo))) for mo in self._storage.values()))

    def replace_mo(self, state, old_mo, new_mo):
        start, end = self._resolve_range(old_mo)
        for key in self._storage.irange(start, end-1):
            val = self._storage[key]
            if val is old_mo:
                #assert new_mo.includes(a)
                self._storage[key] = new_mo

    def store_overwrite(self, state, new_mo, start, end):
        # iterate over each item we might overwrite
        # track our mutations separately since we're in the process of iterating
        deletes = []
        updates = { start: new_mo }

        for key in self._storage.irange(maximum=end-1, reverse=True):
            old_mo = self._storage[key]

            # make sure we aren't overwriting all of an item that overlaps the end boundary
            if end < self._page_addr + self._page_size and end not in updates and old_mo.includes(end):
                updates[end] = old_mo

            # we can't set a minimum on the range because we need to do the above for
            # the first object before start too
            if key < start:
                break

            # delete any key that falls within the range
            deletes.append(key)

        #assert all(m.includes(i) for i,m in updates.items())

        # perform mutations
        for key in deletes:
            del self._storage[key]

        self._storage.update(updates)

    def store_underwrite(self, state, new_mo, start, end):
        # track the point that we need to write up to
        last_missing = end - 1
        # track also updates since we can't update while iterating
        updates = {}

        for key in self._storage.irange(maximum=end-1, reverse=True):
            mo = self._storage[key]

            # if the mo stops
            if mo.base <= last_missing and not mo.includes(last_missing):
                updates[max(mo.last_addr+1, start)] = new_mo
            last_missing = mo.base - 1

            # we can't set a minimum on the range because we need to do the above for
            # the first object before start too
            if last_missing < start:
                break

        # if there are no memory objects <= start, we won't have filled start yet
        if last_missing >= start:
            updates[start] = new_mo

        #assert all(m.includes(i) for i,m in updates.items())

        self._storage.update(updates)

    def load_mo(self, state, page_idx):
        """
        Loads a memory object from memory.

        :param page_idx: the index into the page
        :returns: a tuple of the object
        """

        try:
            key = next(self._storage.irange(maximum=page_idx, reverse=True))
        except StopIteration:
            return None
        else:
            return self._storage[key]

    def load_slice(self, state, start, end):
        """
        Return the memory objects overlapping with the provided slice.

        :param start: the start address
        :param end: the end address (non-inclusive)
        :returns: tuples of (starting_addr, memory_object)
        """
        keys = list(self._storage.irange(start, end-1))
        if not keys or keys[0] != start:
            try:
                key = next(self._storage.irange(maximum=start, reverse=True))
            except StopIteration:
                pass
            else:
                if self._storage[key].includes(start):
                    keys.insert(0, key)
        return [(max(start, key), self._storage[key]) for key in keys]

    def _copy_args(self):
        return { 'storage': self._storage.copy() }
Пример #24
0
class TradesView(object):
    def __init__(self):
        self.pending_offer_by_id = {}
        self.trade_by_id = {}
        self._trades = SortedDict()

    def add_pending(self, offer):
        self.pending_offer_by_id[offer.offer_id] = offer

    def report_completed(self, offer_id, completed_timestamp):
        offer = self.pending_offer_by_id.get(offer_id)
        if offer is None:
            return False

        del self.pending_offer_by_id[offer_id]

        assert isinstance(offer, BasicOffer)
        trade = Trade(offer, completed_timestamp)

        self._trades[(trade.timestamp, offer.offer_id)] = trade
        # inserts in the dict for retrieval by offer_id
        self.trade_by_id[offer.offer_id] = trade
        return offer.offer_id

    def get_trade_by_id(self, offer_id):
        return self.trade_by_id.get(offer_id)

    def get_pending_by_id(self, offer_id):
        return self.pending_offer_by_id.get(offer_id)

    def __len__(self):
        return len(self._trades)

    def __iter__(self):
        return iter(self._trades)

    def trades(self, from_timestamp=None, to_timestamp=None):
        """
        :param from_timestamp: first timestamp to include in result
        :param to_timestamp: first timestamp to exclude from result
        :return: list
        """
        if (from_timestamp, to_timestamp) is (None, None):
            return self._trades.values()

        min_key, max_key = None, None
        if from_timestamp is not None:
            min_key = (from_timestamp, 0)
        if to_timestamp is not None:
            max_key = (to_timestamp, 0)

        # FIXME prevent modifying (from report_completed()) while iterating
        trades = [
            self._trades[key] for key in self._trades.irange(
                minimum=min_key, maximum=max_key, inclusive=(True, False))
        ]
        return list(trades)

    def values(self):
        # returns sorted list of all values
        return self._trades.values()
Пример #25
0
def parallel_for(f, l, *, threads=int(cpu_count()/2), return_=False, return_ordered=True):
    """Applies f to each element of l, in parallel over the specified number of threads

    :param f: The function to apply
    :param l: The iterable to process
    :param threads: The number of threads
    :param return_: True whether this is a 'map'-like operation that returns results
    :param return_ordered: True whether the order of the results should match the order of the iterable
    :return: Optionally returns the f(l) result, if return_=True
    """
    if threads > 1:
        iteratorlock = threading.Lock()
        exceptions = []
        if return_:
            if return_ordered:
                d = SortedDict()
                i = zip(count(), l.__iter__())
            else:
                d = list()
                i = l.__iter__()
        else:
            i = l.__iter__()

        def runall():
            while True:
                iteratorlock.acquire()
                try:
                    try:
                        if exceptions:
                            return
                        v = next(i)
                    finally:
                        iteratorlock.release()
                except StopIteration:
                    return
                try:
                    if return_:
                        if return_ordered:
                            n, x = v
                            d[n] = f(x)
                        else:
                            d.append(f(v))
                    else:
                        f(v)
                except:
                    e = sys.exc_info()
                    iteratorlock.acquire()
                    try:
                        exceptions.append(e)
                    finally:
                        iteratorlock.release()
        
        threadlist = [threading.Thread(target=runall) for j in range(threads)]
        for t in threadlist:
            t.start()
        for t in threadlist:
            t.join()
        if exceptions:
            a, b, c = exceptions[0]
            raise a(b).with_traceback(c)
        if return_:
            if return_ordered:
                return d.values()
            else:
                return d
    else:
        if return_:
            return [f(v) for v in l]
        else:
            for v in l:
                f(v)
            return
Пример #26
0
class NetworkEventDataset:
    """Collection of 3-channel ZNE streams with traces aligned to a fixed time window about
     seismic P-wave arrival events, *for a given network*.

     Two indexes are provided. One indexes hierarchically by station code and
     event ID, yielding a 3-channel ZNE stream per event, so that you can easily gather all
     traces for a given station by iterating over events.

     The other index indexes hierarchically by event ID and station code, yielding a
     3-channel ZNE stream per station. Using this index you can easily gather all traces
     for a given event across multiple stations.

     Preferably each input trace will already have an 'event_id' attribute in its stats. If
     not, an event ID will be invented based on station identifiers and time window.
    """
    def __init__(self,
                 stream_src,
                 network=None,
                 station=None,
                 location='',
                 ordering='ZNE'):
        """
        Initialize from data source (file or obspy.Stream). Traces are COPIED into
        the dataset in order to leave input object intact, since many obspy functions
        mutate traces in-place.

        All streams in the input data source stream_src are expected to belong to the same network.
        This is checked as the data is ingested. A discrepant network code is an error condition.

        :param stream_src: Source of input streams. May be a file name or an Obspy Stream
        :type stream_src: str, pathlib.Path or obspy.Stream
        :param network: Network code of streams to load. If stream_src is an Obspy Stream, the \
            streams will be filtered to match this network code.
        :type network: str
        :param station: Station code of streams to load. If stream_src is an Obspy Stream, the \
            streams will be filtered to match this station code.
        :type station: str
        :param location: [OPTIONAL] Location code of streams to load. Leave as default (empty string) \
            if location code is empty in the data source.
        :type location: str
        :param ordering: Channel ordering to be applied to the data after loading. The channel labelling \
            must be consistent with the requested ordering - rotation to the coordinate system implied \
            by the ordering is *NOT* applied.
        :type ordering: str
        :raises AssertionError: If discrepant network code is found in input data
        """
        if isinstance(stream_src, obspy.Stream):
            net = network
            sta = station
            loc = location or None
            if net or sta or loc:
                data_src = stream_src.select(net, sta, loc)
            else:
                data_src = stream_src
            # end if
        elif os.path.isfile(stream_src):
            data_src = read_h5_stream(stream_src, network, station, location)
        else:
            assert False, "Unknown data source {}".format(type(stream_src))
        # end if

        self.network = network

        # Data in data_src collects all traces together under a single Stream object.
        # In order to get control over data slicing and traceability in processing, we
        # break it down into one Stream per ZNE channel triplet of a given event.
        self.db_sta = SortedDict()
        for tr in data_src:
            net, sta, loc, _ = tr.id.split('.')
            if self.network:
                assert net == self.network
            else:
                self.network = net
            # end if
            # Create single copy of the trace to be shared by both dicts.
            dupe_trace = tr.copy()
            try:
                event_id = tr.stats.event_id
            except AttributeError:
                event_id = '.'.join([
                    net, sta, loc,
                    '_'.join([str(tr.stats.starttime),
                              str(tr.stats.endtime)])
                ])
            # end try
            self.db_sta.setdefault(sta, SortedDict()).setdefault(
                event_id, obspy.Stream()).append(dupe_trace)
        # end for

        # Index same obspy.Stream instances in event dict. This way, any changes
        # to a given event stream will be seen by both indexes.
        self.db_evid = SortedDict()
        for sta, ev_db in self.db_sta.items():
            for evid, stream in ev_db.items():
                self.db_evid.setdefault(evid, SortedDict())[sta] = stream
            # end for
        # end for

        # Sort each stream into specific order.
        if ordering.upper() == 'ZNE':
            ordinal = zne_order
        elif ordering.upper() == 'ZRT':
            ordinal = zrt_order
        else:
            ordinal = None
        # end if

        if ordinal is not None:
            self.apply(lambda x: x.traces.sort(key=ordinal))
        # end if

    # end func

    def __iter__(self):
        """
        Flat iterator. Loops over self.db_sta depth first and returns tuple of keys and matching stream.
        Equivalent to::

        ```Python
          for sta, ev_db in self.db_sta.items():
              for evid, stream in ev_db.items():
                  yield (sta, evid, stream)
        ```
        """
        return ((sta, evid, stream) for sta, ev_db in self.db_sta.items()
                for evid, stream in ev_db.items())

    # end if

    def __len__(self):
        """Returns number of streams"""
        return sum((len(x) for x in self.db_sta.values()))

    # end func

    def __repr__(self):
        """Displays summary string for all streams"""
        return '\n'.join(
            (evid + ', ' + str(stream) for _, evid, stream in iter(self)))

    # end func

    def num_stations(self):
        """
        Get number of stations in the dataset.

        :return: Number of stations
        :rtype: int
        """
        return len(self.db_sta)

    # end func

    def station(self, station_code):
        """
        Accessor for events for a given station.

        :param station_code: Station to get
        :type station_code: str
        :return: Event index for station, if station is found
        :rtype: SortedDict
        """
        return self.db_sta.get(station_code)

    # end func

    def num_events(self):
        """
        Get number of events in the dataset.

        :return: Number of events
        :rtype: int
        """
        return len(self.db_evid)

    # end func

    def event(self, event_id):
        """
        Accessor for stations for a given event.

        :param event_id: ID of event to look up
        :type event_id: str
        :return: Station index for given event, if event ID is found, otherwise None
        :rtype: SortedDict or NoneType
        """
        return self.db_evid.get(event_id)

    # end func

    def curate(self, curator):
        """
        Curate the dataset according to a callable curator. Modifies collection in-place to remove
        streams that do not satisfy the curation criteria of the callable.
        Curator call signature must be consitent with::

            callable(station_code, event_id, stream) -> bool

        The callable returns a boolean indicating whether to keep the Stream or not.

        :param curator: Function or callable delegate to adjudicate whether to keep each given stream.
        :type curator: Callable
        :return: None
        """
        # Only need to loop over one db, since they both reference the same underlying Stream instances.
        PY2 = (sys.version_info[0] == 2)

        if PY2:
            from itertools import ifilterfalse as filterfalse  # pylint: disable=no-name-in-module, import-outside-toplevel
        else:
            from itertools import filterfalse  # pylint: disable=import-outside-toplevel
        # end if

        discard_items = [
            (x[0], x[1])
            for x in filterfalse(lambda rec: curator(*rec), iter(self))
        ]

        self.prune(discard_items)

    # end func

    def apply(self, _callable):
        """Apply a callable across all streams. Use to apply uniform processing steps to the whole dataset.

        :param _callable: Callable object that takes an obspy Stream as input and applies itself to that Stream. \
            Expect that stream may be mutated in-place by the callable.
        :type _callable: Any Callable compatible with the call signature.
        :return: None
        """
        for _1, _2, stream in iter(self):
            _callable(stream)

    # end func

    def by_station(self):
        """
        Iterate over station sub-dictionaries.

        :return: Iterable over the stations, each element consisting of pair containing \
            (station code, event dict).
        :rtype: Iterable(tuple)
        """
        return iter(self.db_sta.items())

    # end func

    def by_event(self):
        """
        Iterate over event sub-dictionaries.

        :return: Iterable over the discrete events, each element consisting of pair containing \
            (event id, station dict).
        :rtype: Iterable(tuple)
        """
        return iter(self.db_evid.items())

    # end func

    def prune(self, items, cull=True):
        """
        Remove a given sequence of (station, event) pairs from the dataset.

        :param items: Iterable of (station, event) pairs
        :type items: Iterable(tuple)
        :param cull: If True, then empty entries in the top level index will be removed.
        :type cull: boolean
        :return: None
        """
        for station, event_id in items:
            self.db_sta[station].pop(event_id)
            self.db_evid[event_id].pop(station)
            if cull:
                if not self.db_sta[station]:
                    self.db_sta.pop(station)
                # end if
                if not self.db_evid[event_id]:
                    self.db_evid.pop(event_id)
                # end if
            # end if
        # end for

    # end func

    def write(self, output_h5_filename, index_format='event'):
        """
        Write event dataset back out to HDF5 file.

        :param output_h5_filename: Output file name
        :type output_h5_filename: str or path
        :param index_format: Format to use for index. Must be 'event' (default) or 'standard' (obspy default)
        :type index_format: str
        :return: True if file was written
        :rtype: boolean
        """
        assert not os.path.exists(
            output_h5_filename), 'Output file already exists'
        if index_format not in ['event', 'standard']:
            raise ValueError('Index format %s not supported' % index_format)
        # end if
        all_stream = obspy.Stream()
        for sta, evid, stream in iter(self):
            all_stream += stream
        # end for
        if index_format == 'event':
            write_h5_event_stream(output_h5_filename, all_stream, mode='w')
        elif index_format == 'standard':
            all_stream.write(output_h5_filename, format='H5', mode='w')
        # end if
        return os.path.isfile(output_h5_filename)
def test_values():
    mapping = [(val, pos) for pos, val in enumerate(string.ascii_lowercase)]
    temp = SortedDict(mapping)
    assert list(temp.values()) == [pos for key, pos in mapping]
Пример #28
0
class AttributeSet:
    """The AttributeSet class that represents an attribute set."""
    def __init__(self, attributes: Optional[Iterable[Attribute]] = None):
        """Initialize the AttributeSet object with the attributes.

        Args:
            attributes: The attributes that compose the attribute set if set.

        Raises:
            DuplicateAttributeId: Two attributes share the same id.
        """
        # Maintain a sorted dictionary linking the attributes id to the
        # attribute objects
        self._id_to_attr = SortedDict()
        if attributes:
            for attribute in attributes:
                self.add(attribute)

    def __iter__(self) -> Iterator:
        """Give the iterator for the AttributeSet to get the attributes.

        Returns:
            An iterator that iterates over the Attribute objects that compose
            the attribute set.
        """
        return iter(self._id_to_attr.values())

    def __repr__(self) -> str:
        """Provide a string representation of the attribute set.

        Returns:
            A string representation of the attribute set.
        """
        attribute_list = ', '.join(
            str(attr) for attr in self._id_to_attr.values())
        return f'{self.__class__.__name__}([{attribute_list}])'

    @property
    def attribute_names(self) -> List[str]:
        """Give the names of the attributes of this attribute set (read only).

        The attribute names are sorted in function of the attribute ids.

        Returns:
            The name of the attributes of this attribute set as a list of str.
        """
        return list(attribute.name for attribute in self._id_to_attr.values())

    @property
    def attribute_ids(self) -> List[int]:
        """Give the ids of the attributes of this attribute set (read only).

        Returns:
            The ids of the attributes of this set as a sorted list of integers.
        """
        return list(self._id_to_attr.keys())

    def add(self, attribute: Attribute):
        """Add an attribute to this attribute set if it is not already present.

        Args:
            attribute: The attribute to add.

        Raises:
            DuplicateAttributeId: An attribute with the same id as the
                                  attribute that is added already exists.
        """
        if attribute.attribute_id in self._id_to_attr:
            raise DuplicateAttributeId('An attribute with the same id as '
                                       f'{attribute} already exists.')
        self._id_to_attr[attribute.attribute_id] = attribute

    def remove(self, attribute: Attribute):
        """Remove an attribute from this attribute set.

        Args:
            attribute: The attribute to remove.

        Raises:
            KeyError: The attribute is not present in this attribute set.
        """
        if attribute.attribute_id not in self._id_to_attr:
            raise KeyError(f'{attribute} is not among the attributes.')
        del self._id_to_attr[attribute.attribute_id]

    def __hash__(self) -> int:
        """Give the hash of an attribute set: the hash of its attributes.

        Returns:
            The hash of an attribute set as the hash of its frozen attributes.
        """
        return hash(frozenset(self.attribute_ids))

    def __eq__(self, other_attr_set: 'AttributeSet') -> bool:
        """Compare two attribute sets, equal if the attributes correspond.

        Args:
            other_attr_set: The other attribute set to which the attribute set
                            is compared with.

        Returns:
            The two attribute sets are equal: they share the same attributes.
        """
        return (isinstance(other_attr_set, self.__class__)
                and hash(self) == hash(other_attr_set))

    def __contains__(self, attribute: Attribute) -> bool:
        """Check if the attribute is in the attribute set.

        Args:
            attribute: The attribute that is checked whether it is in this set.

        Returns:
            The attribute is in the attribute set.
        """
        return attribute.attribute_id in self._id_to_attr

    def __len__(self) -> int:
        """Give the size of this attribute set as the number of attributes.

        Returns:
            The number of attributes in this attribute set.
        """
        return len(self._id_to_attr)

    def issuperset(self, other_attribute_set: 'AttributeSet') -> bool:
        """Check if the attribute set is a superset of the one in parameters.

        Args:
            other_attribute_set: The attribute set for which we check whether
                                 the attribute set is a superset of.

        Returns:
            The attribute set is a superset of the other attribute set.
        """
        self_attribute_ids_set = frozenset(self.attribute_ids)
        other_attribute_ids_set = frozenset(other_attribute_set.attribute_ids)
        return self_attribute_ids_set.issuperset(other_attribute_ids_set)

    def issubset(self, other_attribute_set: 'AttributeSet') -> bool:
        """Check if the attribute set is a subset of the one in parameters.

        Args:
            other_attribute_set: The attribute set for which we check whether
                                 the attribute set is a subset of.

        Returns:
            The attribute set is a subset of the other attribute set.
        """
        self_attribute_ids_set = frozenset(self.attribute_ids)
        other_attribute_ids_set = frozenset(other_attribute_set.attribute_ids)
        return self_attribute_ids_set.issubset(other_attribute_ids_set)

    def get_attribute_by_id(self, attribute_id: int) -> Attribute:
        """Give an attribute by its id.

        Args:
            attribute_id: The id of the attribute to retrieve.

        Raises:
            KeyError: The attribute is not present in this attribute set.
        """
        if attribute_id not in self._id_to_attr:
            raise KeyError(f'No attribute with the id {attribute_id}.')
        return self._id_to_attr[attribute_id]

    def get_attribute_by_name(self, name: str) -> Attribute:
        """Give an attribute by its name.

        Args:
            name: The name of the attribute to retrieve.

        Raises:
            KeyError: The attribute is not present in this attribute set.
        """
        for attribute in self._id_to_attr.values():
            if attribute.name == name:
                return attribute
        raise KeyError(f'No attribute is named {name}.')
Пример #29
0
class Replica(HasActionQueue, MessageProcessor):
    def __init__(self, node: 'plenum.server.node.Node', instId: int,
                 isMaster: bool = False):
        """
        Create a new replica.

        :param node: Node on which this replica is located
        :param instId: the id of the protocol instance the replica belongs to
        :param isMaster: is this a replica of the master protocol instance
        """
        super().__init__()
        self.stats = Stats(TPCStat)

        self.config = getConfig()

        routerArgs = [(ReqDigest, self._preProcessReqDigest)]

        for r in [PrePrepare, Prepare, Commit]:
            routerArgs.append((r, self.processThreePhaseMsg))

        routerArgs.append((Checkpoint, self.processCheckpoint))
        routerArgs.append((ThreePCState, self.process3PhaseState))

        self.inBoxRouter = Router(*routerArgs)

        self.threePhaseRouter = Router(
                (PrePrepare, self.processPrePrepare),
                (Prepare, self.processPrepare),
                (Commit, self.processCommit)
        )

        self.node = node
        self.instId = instId

        self.name = self.generateName(node.name, self.instId)

        self.outBox = deque()
        """
        This queue is used by the replica to send messages to its node. Replica
        puts messages that are consumed by its node
        """

        self.inBox = deque()
        """
        This queue is used by the replica to receive messages from its node.
        Node puts messages that are consumed by the replica
        """

        self.inBoxStash = deque()
        """
        If messages need to go back on the queue, they go here temporarily and
        are put back on the queue on a state change
        """

        self.isMaster = isMaster

        # Indicates name of the primary replica of this protocol instance.
        # None in case the replica does not know who the primary of the
        # instance is
        self._primaryName = None    # type: Optional[str]

        # Requests waiting to be processed once the replica is able to decide
        # whether it is primary or not
        self.postElectionMsgs = deque()

        # PRE-PREPAREs that are waiting to be processed but do not have the
        # corresponding request digest. Happens when replica has not been
        # forwarded the request by the node but is getting 3 phase messages.
        # The value is a list since a malicious entry might send PRE-PREPARE
        # with a different digest and since we dont have the request finalised,
        # we store all PRE-PPREPARES
        self.prePreparesPendingReqDigest = {}   # type: Dict[Tuple[str, int], List]

        # PREPAREs that are stored by non primary replica for which it has not
        #  got any PRE-PREPARE. Dictionary that stores a tuple of view no and
        #  prepare sequence number as key and a deque of PREPAREs as value.
        # This deque is attempted to be flushed on receiving every
        # PRE-PREPARE request.
        self.preparesWaitingForPrePrepare = {}
        # type: Dict[Tuple[int, int], deque]

        # COMMITs that are stored for which there are no PRE-PREPARE or PREPARE
        # received
        self.commitsWaitingForPrepare = {}
        # type: Dict[Tuple[int, int], deque]

        # Dictionary of sent PRE-PREPARE that are stored by primary replica
        # which it has broadcasted to all other non primary replicas
        # Key of dictionary is a 2 element tuple with elements viewNo,
        # pre-prepare seqNo and value is a tuple of Request Digest and time
        self.sentPrePrepares = {}
        # type: Dict[Tuple[int, int], Tuple[Tuple[str, int], float]]

        # Dictionary of received PRE-PREPAREs. Key of dictionary is a 2
        # element tuple with elements viewNo, pre-prepare seqNo and value is
        # a tuple of Request Digest and time
        self.prePrepares = {}
        # type: Dict[Tuple[int, int], Tuple[Tuple[str, int], float]]

        # Dictionary of received Prepare requests. Key of dictionary is a 2
        # element tuple with elements viewNo, seqNo and value is a 2 element
        # tuple containing request digest and set of sender node names(sender
        # replica names in case of multiple protocol instances)
        # (viewNo, seqNo) -> ((identifier, reqId), {senders})
        self.prepares = Prepares()
        # type: Dict[Tuple[int, int], Tuple[Tuple[str, int], Set[str]]]

        self.commits = Commits()    # type: Dict[Tuple[int, int],
        # Tuple[Tuple[str, int], Set[str]]]

        # Set of tuples to keep track of ordered requests. Each tuple is
        # (viewNo, ppSeqNo)
        self.ordered = OrderedSet()        # type: OrderedSet[Tuple[int, int]]

        # Dictionary to keep track of the which replica was primary during each
        # view. Key is the view no and value is the name of the primary
        # replica during that view
        self.primaryNames = {}  # type: Dict[int, str]

        # Holds msgs that are for later views
        self.threePhaseMsgsForLaterView = deque()
        # type: deque[(ThreePhaseMsg, str)]

        # Holds tuple of view no and prepare seq no of 3-phase messages it
        # received while it was not participating
        self.stashingWhileCatchingUp = set()       # type: Set[Tuple]

        # Commits which are not being ordered since commits with lower view
        # numbers and sequence numbers have not been ordered yet. Key is the
        # viewNo and value a map of pre-prepare sequence number to commit
        self.stashedCommitsForOrdering = {}         # type: Dict[int,
        # Dict[int, Commit]]

        self.checkpoints = SortedDict(lambda k: k[0])

        self.stashingWhileOutsideWaterMarks = deque()

        # Low water mark
        self._h = 0              # type: int

        # High water mark
        self.H = self._h + self.config.LOG_SIZE   # type: int

        self.lastPrePrepareSeqNo = self.h  # type: int

    @property
    def h(self) -> int:
        return self._h

    @h.setter
    def h(self, n):
        self._h = n
        self.H = self._h + self.config.LOG_SIZE

    @property
    def requests(self):
        return self.node.requests

    def shouldParticipate(self, viewNo: int, ppSeqNo: int):
        # Replica should only participating in the consensus process and the
        # replica did not stash any of this request's 3-phase request
        return self.node.isParticipating and (viewNo, ppSeqNo) \
                                             not in self.stashingWhileCatchingUp

    @staticmethod
    def generateName(nodeName: str, instId: int):
        """
        Create and return the name for a replica using its nodeName and
        instanceId.
         Ex: Alpha:1
        """
        return "{}:{}".format(nodeName, instId)

    @staticmethod
    def getNodeName(replicaName: str):
        return replicaName.split(":")[0]

    @property
    def isPrimary(self):
        """
        Is this node primary?

        :return: True if this node is primary, False otherwise
        """
        return self._primaryName == self.name if self._primaryName is not None \
            else None

    @property
    def primaryName(self):
        """
        Name of the primary replica of this replica's instance

        :return: Returns name if primary is known, None otherwise
        """
        return self._primaryName

    @primaryName.setter
    def primaryName(self, value: Optional[str]) -> None:
        """
        Set the value of isPrimary.

        :param value: the value to set isPrimary to
        """
        if not value == self._primaryName:
            self._primaryName = value
            self.primaryNames[self.viewNo] = value
            logger.debug("{} setting primaryName for view no {} to: {}".
                         format(self, self.viewNo, value))
            logger.debug("{}'s primaryNames for views are: {}".
                         format(self, self.primaryNames))
            self._stateChanged()

    def _stateChanged(self):
        """
        A series of actions to be performed when the state of this replica
        changes.

        - UnstashInBox (see _unstashInBox)
        """
        self._unstashInBox()
        if self.isPrimary is not None:
            # TODO handle suspicion exceptions here
            self.process3PhaseReqsQueue()
            # TODO handle suspicion exceptions here
            try:
                self.processPostElectionMsgs()
            except SuspiciousNode as ex:
                self.outBox.append(ex)
                self.discard(ex.msg, ex.reason, logger.warning)

    def _stashInBox(self, msg):
        """
        Stash the specified message into the inBoxStash of this replica.

        :param msg: the message to stash
        """
        self.inBoxStash.append(msg)

    def _unstashInBox(self):
        """
        Append the inBoxStash to the right of the inBox.
        """
        self.inBox.extend(self.inBoxStash)
        self.inBoxStash.clear()

    def __repr__(self):
        return self.name

    @property
    def f(self) -> int:
        """
        Return the number of Byzantine Failures that can be tolerated by this
        system. Equal to (N - 1)/3, where N is the number of nodes in the
        system.
        """
        return self.node.f

    @property
    def viewNo(self):
        """
        Return the current view number of this replica.
        """
        return self.node.viewNo

    def isPrimaryInView(self, viewNo: int) -> Optional[bool]:
        """
        Return whether a primary has been selected for this view number.
        """
        return self.primaryNames[viewNo] == self.name

    def isMsgForLaterView(self, msg):
        """
        Return whether this request's view number is greater than the current
        view number of this replica.
        """
        viewNo = getattr(msg, "viewNo", None)
        return viewNo > self.viewNo

    def isMsgForCurrentView(self, msg):
        """
        Return whether this request's view number is equal to the current view
        number of this replica.
        """
        viewNo = getattr(msg, "viewNo", None)
        return viewNo == self.viewNo

    def isMsgForPrevView(self, msg):
        """
        Return whether this request's view number is less than the current view
        number of this replica.
        """
        viewNo = getattr(msg, "viewNo", None)
        return viewNo < self.viewNo

    def isPrimaryForMsg(self, msg) -> Optional[bool]:
        """
        Return whether this replica is primary if the request's view number is
        equal this replica's view number and primary has been selected for
        the current view.
        Return None otherwise.

        :param msg: message
        """
        if self.isMsgForLaterView(msg):
            self.discard(msg,
                         "Cannot get primary status for a request for a later "
                         "view {}. Request is {}".format(self.viewNo, msg),
                         logger.error)
        else:
            return self.isPrimary if self.isMsgForCurrentView(msg) \
                else self.isPrimaryInView(msg.viewNo)

    def isMsgFromPrimary(self, msg, sender: str) -> bool:
        """
        Return whether this message was from primary replica
        :param msg:
        :param sender:
        :return:
        """
        if self.isMsgForLaterView(msg):
            logger.error("{} cannot get primary for a request for a later "
                         "view. Request is {}".format(self, msg))
        else:
            return self.primaryName == sender if self.isMsgForCurrentView(
                msg) else self.primaryNames[msg.viewNo] == sender

    def _preProcessReqDigest(self, rd: ReqDigest) -> None:
        """
        Process request digest if this replica is not a primary, otherwise stash
        the message into the inBox.

        :param rd: the client Request Digest
        """
        if self.isPrimary is not None:
            self.processReqDigest(rd)
        else:
            logger.debug("{} stashing request digest {} since it does not know "
                         "its primary status".
                         format(self, (rd.identifier, rd.reqId)))
            self._stashInBox(rd)

    def serviceQueues(self, limit=None):
        """
        Process `limit` number of messages in the inBox.

        :param limit: the maximum number of messages to process
        :return: the number of messages successfully processed
        """
        # TODO should handle SuspiciousNode here
        r = self.inBoxRouter.handleAllSync(self.inBox, limit)
        r += self._serviceActions()
        return r
        # Messages that can be processed right now needs to be added back to the
        # queue. They might be able to be processed later

    def processPostElectionMsgs(self):
        """
        Process messages waiting for the election of a primary replica to
        complete.
        """
        while self.postElectionMsgs:
            msg = self.postElectionMsgs.popleft()
            logger.debug("{} processing pended msg {}".format(self, msg))
            self.dispatchThreePhaseMsg(*msg)

    def process3PhaseReqsQueue(self):
        """
        Process the 3 phase requests from the queue whose view number is equal
        to the current view number of this replica.
        """
        unprocessed = deque()
        while self.threePhaseMsgsForLaterView:
            request, sender = self.threePhaseMsgsForLaterView.popleft()
            logger.debug("{} processing pended 3 phase request: {}"
                         .format(self, request))
            # If the request is for a later view dont try to process it but add
            # it back to the queue.
            if self.isMsgForLaterView(request):
                unprocessed.append((request, sender))
            else:
                self.processThreePhaseMsg(request, sender)
        self.threePhaseMsgsForLaterView = unprocessed

    @property
    def quorum(self) -> int:
        r"""
        Return the quorum of this RBFT system. Equal to :math:`2f + 1`.
        Return None if `f` is not yet determined.
        """
        return self.node.quorum

    def dispatchThreePhaseMsg(self, msg: ThreePhaseMsg, sender: str) -> Any:
        """
        Create a three phase request to be handled by the threePhaseRouter.

        :param msg: the ThreePhaseMsg to dispatch
        :param sender: the name of the node that sent this request
        """
        senderRep = self.generateName(sender, self.instId)
        if self.isPpSeqNoAcceptable(msg.ppSeqNo):
            try:
                self.threePhaseRouter.handleSync((msg, senderRep))
            except SuspiciousNode as ex:
                self.node.reportSuspiciousNodeEx(ex)
        else:
            logger.debug("{} stashing 3 phase message {} since ppSeqNo {} is "
                         "not between {} and {}".
                         format(self, msg, msg.ppSeqNo, self.h, self.H))
            self.stashingWhileOutsideWaterMarks.append((msg, sender))

    def processReqDigest(self, rd: ReqDigest):
        """
        Process a request digest. Works only if this replica has decided its
        primary status.

        :param rd: the client request digest to process
        """
        self.stats.inc(TPCStat.ReqDigestRcvd)
        if self.isPrimary is False:
            self.dequeuePrePrepare(rd.identifier, rd.reqId)
        else:
            self.doPrePrepare(rd)

    def processThreePhaseMsg(self, msg: ThreePhaseMsg, sender: str):
        """
        Process a 3-phase (pre-prepare, prepare and commit) request.
        Dispatch the request only if primary has already been decided, otherwise
        stash it.

        :param msg: the Three Phase message, one of PRE-PREPARE, PREPARE,
            COMMIT
        :param sender: name of the node that sent this message
        """
        # Can only proceed further if it knows whether its primary or not
        if self.isMsgForLaterView(msg):
            self.threePhaseMsgsForLaterView.append((msg, sender))
            logger.debug("{} pended received 3 phase request for a later view: "
                         "{}".format(self, msg))
        else:
            if self.isPrimary is None:
                self.postElectionMsgs.append((msg, sender))
                logger.debug("Replica {} pended request {} from {}".
                             format(self, msg, sender))
            else:
                self.dispatchThreePhaseMsg(msg, sender)

    def processPrePrepare(self, pp: PrePrepare, sender: str):
        """
        Validate and process the PRE-PREPARE specified.
        If validation is successful, create a PREPARE and broadcast it.

        :param pp: a prePrepareRequest
        :param sender: name of the node that sent this message
        """
        key = (pp.viewNo, pp.ppSeqNo)
        logger.debug("{} Receiving PRE-PREPARE{} at {} from {}".
                     format(self, key, time.perf_counter(), sender))
        if self.canProcessPrePrepare(pp, sender):
            if not self.node.isParticipating:
                self.stashingWhileCatchingUp.add(key)
            self.addToPrePrepares(pp)
            logger.info("{} processed incoming PRE-PREPARE{}".
                        format(self, key))

    def tryPrepare(self, pp: PrePrepare):
        """
        Try to send the Prepare message if the PrePrepare message is ready to
        be passed into the Prepare phase.
        """
        if self.canSendPrepare(pp):
            self.doPrepare(pp)
        else:
            logger.debug("{} cannot send PREPARE".format(self))

    def processPrepare(self, prepare: Prepare, sender: str) -> None:
        """
        Validate and process the PREPARE specified.
        If validation is successful, create a COMMIT and broadcast it.

        :param prepare: a PREPARE msg
        :param sender: name of the node that sent the PREPARE
        """
        # TODO move this try/except up higher
        logger.debug("{} received PREPARE{} from {}".
                     format(self, (prepare.viewNo, prepare.ppSeqNo), sender))
        try:
            if self.isValidPrepare(prepare, sender):
                self.addToPrepares(prepare, sender)
                self.stats.inc(TPCStat.PrepareRcvd)
                logger.debug("{} processed incoming PREPARE {}".
                             format(self, (prepare.viewNo, prepare.ppSeqNo)))
            else:
                # TODO let's have isValidPrepare throw an exception that gets
                # handled and possibly logged higher
                logger.warning("{} cannot process incoming PREPARE".
                               format(self))
        except SuspiciousNode as ex:
            self.node.reportSuspiciousNodeEx(ex)

    def processCommit(self, commit: Commit, sender: str) -> None:
        """
        Validate and process the COMMIT specified.
        If validation is successful, return the message to the node.

        :param commit: an incoming COMMIT message
        :param sender: name of the node that sent the COMMIT
        """
        logger.debug("{} received COMMIT {} from {}".
                     format(self, commit, sender))
        if self.isValidCommit(commit, sender):
            self.stats.inc(TPCStat.CommitRcvd)
            self.addToCommits(commit, sender)
            logger.debug("{} processed incoming COMMIT{}".
                         format(self, (commit.viewNo, commit.ppSeqNo)))

    def tryCommit(self, prepare: Prepare):
        """
        Try to commit if the Prepare message is ready to be passed into the
        commit phase.
        """
        if self.canCommit(prepare):
            self.doCommit(prepare)
        else:
            logger.debug("{} not yet able to send COMMIT".format(self))

    def tryOrder(self, commit: Commit):
        """
        Try to order if the Commit message is ready to be ordered.
        """
        canOrder, reason = self.canOrder(commit)
        if canOrder:
            logger.debug("{} returning request to node".format(self))
            self.tryOrdering(commit)
        else:
            logger.trace("{} cannot return request to node: {}".
                         format(self, reason))

    def doPrePrepare(self, reqDigest: ReqDigest) -> None:
        """
        Broadcast a PRE-PREPARE to all the replicas.

        :param reqDigest: a tuple with elements identifier, reqId, and digest
        """
        if not self.node.isParticipating:
            logger.error("Non participating node is attempting PRE-PREPARE. "
                         "This should not happen.")
            return

        if self.lastPrePrepareSeqNo == self.H:
            logger.debug("{} stashing PRE-PREPARE {} since outside greater "
                         "than high water mark {}".
                         format(self, (self.viewNo, self.lastPrePrepareSeqNo+1),
                                self.H))
            self.stashingWhileOutsideWaterMarks.append(reqDigest)
            return
        self.lastPrePrepareSeqNo += 1
        tm = time.time()*1000
        logger.debug("{} Sending PRE-PREPARE {} at {}".
                     format(self, (self.viewNo, self.lastPrePrepareSeqNo),
                            time.perf_counter()))
        prePrepareReq = PrePrepare(self.instId,
                                   self.viewNo,
                                   self.lastPrePrepareSeqNo,
                                   *reqDigest,
                                   tm)
        self.sentPrePrepares[self.viewNo, self.lastPrePrepareSeqNo] = (reqDigest.key,
                                                                       tm)
        self.send(prePrepareReq, TPCStat.PrePrepareSent)

    def doPrepare(self, pp: PrePrepare):
        logger.debug("{} Sending PREPARE {} at {}".
                     format(self, (pp.viewNo, pp.ppSeqNo), time.perf_counter()))
        prepare = Prepare(self.instId,
                          pp.viewNo,
                          pp.ppSeqNo,
                          pp.digest,
                          pp.ppTime)
        self.send(prepare, TPCStat.PrepareSent)
        self.addToPrepares(prepare, self.name)

    def doCommit(self, p: Prepare):
        """
        Create a commit message from the given Prepare message and trigger the
        commit phase
        :param p: the prepare message
        """
        logger.debug("{} Sending COMMIT{} at {}".
                     format(self, (p.viewNo, p.ppSeqNo), time.perf_counter()))
        commit = Commit(self.instId,
                        p.viewNo,
                        p.ppSeqNo,
                        p.digest,
                        p.ppTime)
        self.send(commit, TPCStat.CommitSent)
        self.addToCommits(commit, self.name)

    def canProcessPrePrepare(self, pp: PrePrepare, sender: str) -> bool:
        """
        Decide whether this replica is eligible to process a PRE-PREPARE,
        based on the following criteria:

        - this replica is non-primary replica
        - the request isn't in its list of received PRE-PREPAREs
        - the request is waiting to for PRE-PREPARE and the digest value matches

        :param pp: a PRE-PREPARE msg to process
        :param sender: the name of the node that sent the PRE-PREPARE msg
        :return: True if processing is allowed, False otherwise
        """
        # TODO: Check whether it is rejecting PRE-PREPARE from previous view
        # PRE-PREPARE should not be sent from non primary
        if not self.isMsgFromPrimary(pp, sender):
            raise SuspiciousNode(sender, Suspicions.PPR_FRM_NON_PRIMARY, pp)

        # A PRE-PREPARE is being sent to primary
        if self.isPrimaryForMsg(pp) is True:
            raise SuspiciousNode(sender, Suspicions.PPR_TO_PRIMARY, pp)

        # A PRE-PREPARE is sent that has already been received
        if (pp.viewNo, pp.ppSeqNo) in self.prePrepares:
            raise SuspiciousNode(sender, Suspicions.DUPLICATE_PPR_SENT, pp)

        key = (pp.identifier, pp.reqId)
        if not self.requests.isFinalised(key):
            self.enqueuePrePrepare(pp, sender)
            return False

        # A PRE-PREPARE is sent that does not match request digest
        if self.requests.digest(key) != pp.digest:
            raise SuspiciousNode(sender, Suspicions.PPR_DIGEST_WRONG, pp)

        return True

    def addToPrePrepares(self, pp: PrePrepare) -> None:
        """
        Add the specified PRE-PREPARE to this replica's list of received
        PRE-PREPAREs.

        :param pp: the PRE-PREPARE to add to the list
        """
        key = (pp.viewNo, pp.ppSeqNo)
        self.prePrepares[key] = \
            ((pp.identifier, pp.reqId), pp.ppTime)
        self.dequeuePrepares(*key)
        self.dequeueCommits(*key)
        self.stats.inc(TPCStat.PrePrepareRcvd)
        self.tryPrepare(pp)

    def hasPrepared(self, request) -> bool:
        return self.prepares.hasPrepareFrom(request, self.name)

    def canSendPrepare(self, request) -> bool:
        """
        Return whether the request identified by (identifier, requestId) can
        proceed to the Prepare step.

        :param request: any object with identifier and requestId attributes
        """
        return self.shouldParticipate(request.viewNo, request.ppSeqNo) \
            and not self.hasPrepared(request) \
            and self.requests.isFinalised((request.identifier,
                                           request.reqId))

    def isValidPrepare(self, prepare: Prepare, sender: str) -> bool:
        """
        Return whether the PREPARE specified is valid.

        :param prepare: the PREPARE to validate
        :param sender: the name of the node that sent the PREPARE
        :return: True if PREPARE is valid, False otherwise
        """
        key = (prepare.viewNo, prepare.ppSeqNo)
        primaryStatus = self.isPrimaryForMsg(prepare)

        ppReqs = self.sentPrePrepares if primaryStatus else self.prePrepares

        # If a non primary replica and receiving a PREPARE request before a
        # PRE-PREPARE request, then proceed

        # PREPARE should not be sent from primary
        if self.isMsgFromPrimary(prepare, sender):
            raise SuspiciousNode(sender, Suspicions.PR_FRM_PRIMARY, prepare)

        # If non primary replica
        if primaryStatus is False:
            if self.prepares.hasPrepareFrom(prepare, sender):
                raise SuspiciousNode(sender, Suspicions.DUPLICATE_PR_SENT, prepare)
            # If PRE-PREPARE not received for the PREPARE, might be slow network
            if key not in ppReqs:
                self.enqueuePrepare(prepare, sender)
                return False
            elif prepare.digest != self.requests.digest(ppReqs[key][0]):
                raise SuspiciousNode(sender, Suspicions.PR_DIGEST_WRONG, prepare)
            elif prepare.ppTime != ppReqs[key][1]:
                raise SuspiciousNode(sender, Suspicions.PR_TIME_WRONG,
                                     prepare)
            else:
                return True
        # If primary replica
        else:
            if self.prepares.hasPrepareFrom(prepare, sender):
                raise SuspiciousNode(sender, Suspicions.DUPLICATE_PR_SENT, prepare)
            # If PRE-PREPARE was not sent for this PREPARE, certainly
            # malicious behavior
            elif key not in ppReqs:
                raise SuspiciousNode(sender, Suspicions.UNKNOWN_PR_SENT, prepare)
            elif prepare.digest != self.requests.digest(ppReqs[key][0]):
                raise SuspiciousNode(sender, Suspicions.PR_DIGEST_WRONG, prepare)
            elif prepare.ppTime != ppReqs[key][1]:
                raise SuspiciousNode(sender, Suspicions.PR_TIME_WRONG,
                                     prepare)
            else:
                return True

    def addToPrepares(self, prepare: Prepare, sender: str):
        self.prepares.addVote(prepare, sender)
        self.tryCommit(prepare)

    def hasCommitted(self, request) -> bool:
        return self.commits.hasCommitFrom(ThreePhaseKey(
            request.viewNo, request.ppSeqNo), self.name)

    def canCommit(self, prepare: Prepare) -> bool:
        """
        Return whether the specified PREPARE can proceed to the Commit
        step.

        Decision criteria:

        - If this replica has got just 2f PREPARE requests then commit request.
        - If less than 2f PREPARE requests then probably there's no consensus on
            the request; don't commit
        - If more than 2f then already sent COMMIT; don't commit

        :param prepare: the PREPARE
        """
        return self.shouldParticipate(prepare.viewNo, prepare.ppSeqNo) and \
            self.prepares.hasQuorum(prepare, self.f) and \
            not self.hasCommitted(prepare)

    def isValidCommit(self, commit: Commit, sender: str) -> bool:
        """
        Return whether the COMMIT specified is valid.

        :param commit: the COMMIT to validate
        :return: True if `request` is valid, False otherwise
        """
        primaryStatus = self.isPrimaryForMsg(commit)
        ppReqs = self.sentPrePrepares if primaryStatus else self.prePrepares
        key = (commit.viewNo, commit.ppSeqNo)
        if key not in ppReqs:
            self.enqueueCommit(commit, sender)
            return False

        if (key not in self.prepares and
                key not in self.preparesWaitingForPrePrepare):
            logger.debug("{} rejecting COMMIT{} due to lack of prepares".
                         format(self, key))
            # raise SuspiciousNode(sender, Suspicions.UNKNOWN_CM_SENT, commit)
            return False
        elif self.commits.hasCommitFrom(commit, sender):
            raise SuspiciousNode(sender, Suspicions.DUPLICATE_CM_SENT, commit)
        elif commit.digest != self.getDigestFor3PhaseKey(ThreePhaseKey(*key)):
            raise SuspiciousNode(sender, Suspicions.CM_DIGEST_WRONG, commit)
        elif key in ppReqs and commit.ppTime != ppReqs[key][1]:
            raise SuspiciousNode(sender, Suspicions.CM_TIME_WRONG,
                                 commit)
        else:
            return True

    def addToCommits(self, commit: Commit, sender: str):
        """
        Add the specified COMMIT to this replica's list of received
        commit requests.

        :param commit: the COMMIT to add to the list
        :param sender: the name of the node that sent the COMMIT
        """
        self.commits.addVote(commit, sender)
        self.tryOrder(commit)

    def hasOrdered(self, viewNo, ppSeqNo) -> bool:
        return (viewNo, ppSeqNo) in self.ordered

    def canOrder(self, commit: Commit) -> Tuple[bool, Optional[str]]:
        """
        Return whether the specified commitRequest can be returned to the node.

        Decision criteria:

        - If have got just 2f+1 Commit requests then return request to node
        - If less than 2f+1 of commit requests then probably don't have
            consensus on the request; don't return request to node
        - If more than 2f+1 then already returned to node; don't return request
            to node

        :param commit: the COMMIT
        """
        if not self.commits.hasQuorum(commit, self.f):
            return False, "no quorum: {} commits where f is {}".\
                          format(commit, self.f)

        if self.hasOrdered(commit.viewNo, commit.ppSeqNo):
            return False, "already ordered"

        if not self.isNextInOrdering(commit):
            viewNo, ppSeqNo = commit.viewNo, commit.ppSeqNo
            if viewNo not in self.stashedCommitsForOrdering:
                self.stashedCommitsForOrdering[viewNo] = {}
            self.stashedCommitsForOrdering[viewNo][ppSeqNo] = commit
            # self._schedule(self.orderStashedCommits, 2)
            self.startRepeating(self.orderStashedCommits, 2)
            return False, "stashing {} since out of order".\
                format(commit)

        return True, None

    def isNextInOrdering(self, commit: Commit):
        viewNo, ppSeqNo = commit.viewNo, commit.ppSeqNo
        if self.ordered and self.ordered[-1] == (viewNo, ppSeqNo-1):
            return True
        for (v, p) in self.commits:
            if v < viewNo:
                # Have commits from previous view that are unordered.
                # TODO: Question: would commits be always ordered, what if
                # some are never ordered and its fine, go to PBFT.
                return False
            if v == viewNo and p < ppSeqNo and (v, p) not in self.ordered:
                # If unordered commits are found with lower ppSeqNo then this
                # cannot be ordered.
                return False

        # TODO: Revisit PBFT paper, how to make sure that last request of the
        # last view has been ordered? Need change in `VIEW CHANGE` mechanism.
        # Somehow view change needs to communicate what the last request was.
        # Also what if some COMMITs were completely missed in the same view
        return True

    def orderStashedCommits(self):
        # TODO: What if the first few commits were out of order and stashed?
        # `self.ordered` would be empty
        if self.ordered:
            lastOrdered = self.ordered[-1]
            vToRemove = set()
            for v in self.stashedCommitsForOrdering:
                if v < lastOrdered[0] and self.stashedCommitsForOrdering[v]:
                    raise RuntimeError("{} found commits from previous view {}"
                                       " that were not ordered but last ordered"
                                       " is {}".format(self, v, lastOrdered))
                pToRemove = set()
                for p, commit in self.stashedCommitsForOrdering[v].items():
                    if (v == lastOrdered[0] and lastOrdered == (v, p - 1)) or \
                            (v > lastOrdered[0] and
                                self.isLowestCommitInView(commit)):
                        logger.debug("{} ordering stashed commit {}".
                                     format(self, commit))
                        if self.tryOrdering(commit):
                            lastOrdered = (v, p)
                            pToRemove.add(p)

                for p in pToRemove:
                    del self.stashedCommitsForOrdering[v][p]
                if not self.stashedCommitsForOrdering[v]:
                    vToRemove.add(v)

            for v in vToRemove:
                del self.stashedCommitsForOrdering[v]

            # if self.stashedCommitsForOrdering:
            #     self._schedule(self.orderStashedCommits, 2)
            if not self.stashedCommitsForOrdering:
                self.stopRepeating(self.orderStashedCommits)

    def isLowestCommitInView(self, commit):
        # TODO: Assumption: This assumes that at least one commit that was sent
        #  for any request by any node has been received in the view of this
        # commit
        ppSeqNos = []
        for v, p in self.commits:
            if v == commit.viewNo:
                ppSeqNos.append(p)
        return min(ppSeqNos) == commit.ppSeqNo if ppSeqNos else True

    def tryOrdering(self, commit: Commit) -> None:
        """
        Attempt to send an ORDERED request for the specified COMMIT to the
        node.

        :param commit: the COMMIT message
        """
        key = (commit.viewNo, commit.ppSeqNo)
        logger.debug("{} trying to order COMMIT{}".format(self, key))
        reqKey = self.getReqKeyFrom3PhaseKey(key)   # type: Tuple
        digest = self.getDigestFor3PhaseKey(key)
        if not digest:
            logger.error("{} did not find digest for {}, request key {}".
                         format(self, key, reqKey))
            return
        self.doOrder(*key, *reqKey, digest, commit.ppTime)
        return True

    def doOrder(self, viewNo, ppSeqNo, identifier, reqId, digest, ppTime):
        key = (viewNo, ppSeqNo)
        self.addToOrdered(*key)
        ordered = Ordered(self.instId,
                          viewNo,
                          identifier,
                          reqId,
                          ppTime)
        # TODO: Should not order or add to checkpoint while syncing
        # 3 phase state.
        self.send(ordered, TPCStat.OrderSent)
        if key in self.stashingWhileCatchingUp:
            self.stashingWhileCatchingUp.remove(key)
        logger.debug("{} ordered request {}".format(self, (viewNo, ppSeqNo)))
        self.addToCheckpoint(ppSeqNo, digest)

    def processCheckpoint(self, msg: Checkpoint, sender: str):
        if self.checkpoints:
            seqNo = msg.seqNo
            _, firstChk = self.firstCheckPoint
            if firstChk.isStable:
                if firstChk.seqNo == seqNo:
                    self.discard(msg, reason="Checkpoint already stable",
                                 logMethod=logger.debug)
                    return
                if firstChk.seqNo > seqNo:
                    self.discard(msg, reason="Higher stable checkpoint present",
                                 logMethod=logger.debug)
                    return
            for state in self.checkpoints.values():
                if state.seqNo == seqNo:
                    if state.digest == msg.digest:
                        state.receivedDigests[sender] = msg.digest
                        break
                    else:
                        logger.error("{} received an incorrect digest {} for "
                                     "checkpoint {} from {}".format(self,
                                                                    msg.digest,
                                                                    seqNo,
                                                                    sender))
                        return
            if len(state.receivedDigests) == 2*self.f:
                self.markCheckPointStable(msg.seqNo)
        else:
            self.discard(msg, reason="No checkpoints present to tally",
                         logMethod=logger.warn)

    def _newCheckpointState(self, ppSeqNo, digest) -> CheckpointState:
        s, e = ppSeqNo, ppSeqNo + self.config.CHK_FREQ - 1
        logger.debug("{} adding new checkpoint state for {}".
                     format(self, (s, e)))
        state = CheckpointState(ppSeqNo, [digest, ], None, {}, False)
        self.checkpoints[s, e] = state
        return state

    def addToCheckpoint(self, ppSeqNo, digest):
        for (s, e) in self.checkpoints.keys():
            if s <= ppSeqNo <= e:
                state = self.checkpoints[s, e]  # type: CheckpointState
                state.digests.append(digest)
                state = updateNamedTuple(state, seqNo=ppSeqNo)
                self.checkpoints[s, e] = state
                break
        else:
            state = self._newCheckpointState(ppSeqNo, digest)
            s, e = ppSeqNo, ppSeqNo + self.config.CHK_FREQ

        if len(state.digests) == self.config.CHK_FREQ:
            state = updateNamedTuple(state, digest=serialize(state.digests),
                                     digests=[])
            self.checkpoints[s, e] = state
            self.send(Checkpoint(self.instId, self.viewNo, ppSeqNo,
                                 state.digest))

    def markCheckPointStable(self, seqNo):
        previousCheckpoints = []
        for (s, e), state in self.checkpoints.items():
            if e == seqNo:
                state = updateNamedTuple(state, isStable=True)
                self.checkpoints[s, e] = state
                break
            else:
                previousCheckpoints.append((s, e))
        else:
            logger.error("{} could not find {} in checkpoints".
                         format(self, seqNo))
            return
        self.h = seqNo
        for k in previousCheckpoints:
            logger.debug("{} removing previous checkpoint {}".format(self, k))
            self.checkpoints.pop(k)
        self.gc(seqNo)
        logger.debug("{} marked stable checkpoint {}".format(self, (s, e)))
        self.processStashedMsgsForNewWaterMarks()

    def gc(self, tillSeqNo):
        logger.debug("{} cleaning up till {}".format(self, tillSeqNo))
        tpcKeys = set()
        reqKeys = set()
        for (v, p), (reqKey, _) in self.sentPrePrepares.items():
            if p <= tillSeqNo:
                tpcKeys.add((v, p))
                reqKeys.add(reqKey)
        for (v, p), (reqKey, _) in self.prePrepares.items():
            if p <= tillSeqNo:
                tpcKeys.add((v, p))
                reqKeys.add(reqKey)

        logger.debug("{} found {} 3 phase keys to clean".
                     format(self, len(tpcKeys)))
        logger.debug("{} found {} request keys to clean".
                     format(self, len(reqKeys)))

        for k in tpcKeys:
            self.sentPrePrepares.pop(k, None)
            self.prePrepares.pop(k, None)
            self.prepares.pop(k, None)
            self.commits.pop(k, None)
            if k in self.ordered:
                self.ordered.remove(k)

        for k in reqKeys:
            self.requests.pop(k, None)

    def processStashedMsgsForNewWaterMarks(self):
        while self.stashingWhileOutsideWaterMarks:
            item = self.stashingWhileOutsideWaterMarks.pop()
            logger.debug("{} processing stashed item {} after new stable "
                         "checkpoint".format(self, item))

            if isinstance(item, ReqDigest):
                self.doPrePrepare(item)
            elif isinstance(item, tuple) and len(tuple) == 2:
                self.dispatchThreePhaseMsg(*item)
            else:
                logger.error("{} cannot process {} "
                             "from stashingWhileOutsideWaterMarks".
                             format(self, item))

    @property
    def firstCheckPoint(self) -> Tuple[Tuple[int, int], CheckpointState]:
        if not self.checkpoints:
            return None
        else:
            return self.checkpoints.peekitem(0)

    @property
    def lastCheckPoint(self) -> Tuple[Tuple[int, int], CheckpointState]:
        if not self.checkpoints:
            return None
        else:
            return self.checkpoints.peekitem(-1)

    def isPpSeqNoAcceptable(self, ppSeqNo: int):
        return self.h < ppSeqNo <= self.H

    def addToOrdered(self, viewNo: int, ppSeqNo: int):
        self.ordered.add((viewNo, ppSeqNo))

    def enqueuePrePrepare(self, request: PrePrepare, sender: str):
        logger.debug("Queueing pre-prepares due to unavailability of finalised "
                     "Request. Request {} from {}".format(request, sender))
        key = (request.identifier, request.reqId)
        if key not in self.prePreparesPendingReqDigest:
            self.prePreparesPendingReqDigest[key] = []
        self.prePreparesPendingReqDigest[key].append((request, sender))

    def dequeuePrePrepare(self, identifier: int, reqId: int):
        key = (identifier, reqId)
        if key in self.prePreparesPendingReqDigest:
            pps = self.prePreparesPendingReqDigest[key]
            for (pp, sender) in pps:
                logger.debug("{} popping stashed PRE-PREPARE{}".
                             format(self, key))
                if pp.digest == self.requests.digest(key):
                    self.prePreparesPendingReqDigest.pop(key)
                    self.processPrePrepare(pp, sender)
                    logger.debug(
                        "{} processed {} PRE-PREPAREs waiting for finalised "
                        "request for identifier {} and reqId {}".
                        format(self, pp, identifier, reqId))
                    break

    def enqueuePrepare(self, request: Prepare, sender: str):
        logger.debug("Queueing prepares due to unavailability of PRE-PREPARE. "
                     "Request {} from {}".format(request, sender))
        key = (request.viewNo, request.ppSeqNo)
        if key not in self.preparesWaitingForPrePrepare:
            self.preparesWaitingForPrePrepare[key] = deque()
        self.preparesWaitingForPrePrepare[key].append((request, sender))

    def dequeuePrepares(self, viewNo: int, ppSeqNo: int):
        key = (viewNo, ppSeqNo)
        if key in self.preparesWaitingForPrePrepare:
            i = 0
            # Keys of pending prepares that will be processed below
            while self.preparesWaitingForPrePrepare[key]:
                prepare, sender = self.preparesWaitingForPrePrepare[
                    key].popleft()
                logger.debug("{} popping stashed PREPARE{}".format(self, key))
                self.processPrepare(prepare, sender)
                i += 1
            self.preparesWaitingForPrePrepare.pop(key)
            logger.debug("{} processed {} PREPAREs waiting for PRE-PREPARE for"
                         " view no {} and seq no {}".
                         format(self, i, viewNo, ppSeqNo))

    def enqueueCommit(self, request: Commit, sender: str):
        logger.debug("Queueing commit due to unavailability of PREPARE. "
                     "Request {} from {}".format(request, sender))
        key = (request.viewNo, request.ppSeqNo)
        if key not in self.commitsWaitingForPrepare:
            self.commitsWaitingForPrepare[key] = deque()
        self.commitsWaitingForPrepare[key].append((request, sender))

    def dequeueCommits(self, viewNo: int, ppSeqNo: int):
        key = (viewNo, ppSeqNo)
        if key in self.commitsWaitingForPrepare:
            i = 0
            # Keys of pending prepares that will be processed below
            while self.commitsWaitingForPrepare[key]:
                commit, sender = self.commitsWaitingForPrepare[
                    key].popleft()
                logger.debug("{} popping stashed COMMIT{}".format(self, key))
                self.processCommit(commit, sender)
                i += 1
            self.commitsWaitingForPrepare.pop(key)
            logger.debug("{} processed {} COMMITs waiting for PREPARE for"
                         " view no {} and seq no {}".
                         format(self, i, viewNo, ppSeqNo))

    def getDigestFor3PhaseKey(self, key: ThreePhaseKey) -> Optional[str]:
        reqKey = self.getReqKeyFrom3PhaseKey(key)
        digest = self.requests.digest(reqKey)
        if not digest:
            logger.debug("{} could not find digest in sent or received "
                         "PRE-PREPAREs or PREPAREs for 3 phase key {} and req "
                         "key {}".format(self, key, reqKey))
            return None
        else:
            return digest

    def getReqKeyFrom3PhaseKey(self, key: ThreePhaseKey):
        reqKey = None
        if key in self.sentPrePrepares:
            reqKey = self.sentPrePrepares[key][0]
        elif key in self.prePrepares:
            reqKey = self.prePrepares[key][0]
        elif key in self.prepares:
            reqKey = self.prepares[key][0]
        else:
            logger.debug("Could not find request key for 3 phase key {}".
                         format(key))
        return reqKey

    @property
    def threePhaseState(self):
        # TODO: This method is incomplete
        # Gets the current stable and unstable checkpoints and creates digest
        # of unstable checkpoints
        if self.checkpoints:
            pass
        else:
            state = []
        return ThreePCState(self.instId, state)

    def process3PhaseState(self, msg: ThreePCState, sender: str):
        # TODO: This is not complete
        pass

    def send(self, msg, stat=None) -> None:
        """
        Send a message to the node on which this replica resides.

        :param msg: the message to send
        """
        logger.display("{} sending {}".format(self, msg.__class__.__name__),
                       extra={"cli": True})
        logger.trace("{} sending {}".format(self, msg))
        if stat:
            self.stats.inc(stat)
        self.outBox.append(msg)
Пример #30
0
class FederationRemoteSendQueue(object):
    """A drop in replacement for FederationSender"""
    def __init__(self, hs):
        self.server_name = hs.hostname
        self.clock = hs.get_clock()
        self.notifier = hs.get_notifier()
        self.is_mine_id = hs.is_mine_id

        # We may have multiple federation sender instances, so we need to track
        # their positions separately.
        self._sender_instances = hs.config.worker.federation_shard_config.instances
        self._sender_positions = {}

        # Pending presence map user_id -> UserPresenceState
        self.presence_map = {}  # type: Dict[str, UserPresenceState]

        # Stream position -> list[user_id]
        self.presence_changed = SortedDict(
        )  # type: SortedDict[int, List[str]]

        # Stores the destinations we need to explicitly send presence to about a
        # given user.
        # Stream position -> (user_id, destinations)
        self.presence_destinations = (
            SortedDict())  # type: SortedDict[int, Tuple[str, List[str]]]

        # (destination, key) -> EDU
        self.keyed_edu = {}  # type: Dict[Tuple[str, tuple], Edu]

        # stream position -> (destination, key)
        self.keyed_edu_changed = (SortedDict()
                                  )  # type: SortedDict[int, Tuple[str, tuple]]

        self.edus = SortedDict()  # type: SortedDict[int, Edu]

        # stream ID for the next entry into presence_changed/keyed_edu_changed/edus.
        self.pos = 1

        # map from stream ID to the time that stream entry was generated, so that we
        # can clear out entries after a while
        self.pos_time = SortedDict()  # type: SortedDict[int, int]

        # EVERYTHING IS SAD. In particular, python only makes new scopes when
        # we make a new function, so we need to make a new function so the inner
        # lambda binds to the queue rather than to the name of the queue which
        # changes. ARGH.
        def register(name, queue):
            LaterGauge(
                "synapse_federation_send_queue_%s_size" % (queue_name, ),
                "",
                [],
                lambda: len(queue),
            )

        for queue_name in [
                "presence_map",
                "presence_changed",
                "keyed_edu",
                "keyed_edu_changed",
                "edus",
                "pos_time",
                "presence_destinations",
        ]:
            register(queue_name, getattr(self, queue_name))

        self.clock.looping_call(self._clear_queue, 30 * 1000)

    def _next_pos(self):
        pos = self.pos
        self.pos += 1
        self.pos_time[self.clock.time_msec()] = pos
        return pos

    def _clear_queue(self):
        """Clear the queues for anything older than N minutes"""

        FIVE_MINUTES_AGO = 5 * 60 * 1000
        now = self.clock.time_msec()

        keys = self.pos_time.keys()
        time = self.pos_time.bisect_left(now - FIVE_MINUTES_AGO)
        if not keys[:time]:
            return

        position_to_delete = max(keys[:time])
        for key in keys[:time]:
            del self.pos_time[key]

        self._clear_queue_before_pos(position_to_delete)

    def _clear_queue_before_pos(self, position_to_delete):
        """Clear all the queues from before a given position"""
        with Measure(self.clock, "send_queue._clear"):
            # Delete things out of presence maps
            keys = self.presence_changed.keys()
            i = self.presence_changed.bisect_left(position_to_delete)
            for key in keys[:i]:
                del self.presence_changed[key]

            user_ids = {
                user_id
                for uids in self.presence_changed.values() for user_id in uids
            }

            keys = self.presence_destinations.keys()
            i = self.presence_destinations.bisect_left(position_to_delete)
            for key in keys[:i]:
                del self.presence_destinations[key]

            user_ids.update(
                user_id for user_id, _ in self.presence_destinations.values())

            to_del = [
                user_id for user_id in self.presence_map
                if user_id not in user_ids
            ]
            for user_id in to_del:
                del self.presence_map[user_id]

            # Delete things out of keyed edus
            keys = self.keyed_edu_changed.keys()
            i = self.keyed_edu_changed.bisect_left(position_to_delete)
            for key in keys[:i]:
                del self.keyed_edu_changed[key]

            live_keys = set()
            for edu_key in self.keyed_edu_changed.values():
                live_keys.add(edu_key)

            keys_to_del = [
                edu_key for edu_key in self.keyed_edu
                if edu_key not in live_keys
            ]
            for edu_key in keys_to_del:
                del self.keyed_edu[edu_key]

            # Delete things out of edu map
            keys = self.edus.keys()
            i = self.edus.bisect_left(position_to_delete)
            for key in keys[:i]:
                del self.edus[key]

    def notify_new_events(self, current_id):
        """As per FederationSender"""
        # We don't need to replicate this as it gets sent down a different
        # stream.
        pass

    def build_and_send_edu(self, destination, edu_type, content, key=None):
        """As per FederationSender"""
        if destination == self.server_name:
            logger.info("Not sending EDU to ourselves")
            return

        pos = self._next_pos()

        edu = Edu(
            origin=self.server_name,
            destination=destination,
            edu_type=edu_type,
            content=content,
        )

        if key:
            assert isinstance(key, tuple)
            self.keyed_edu[(destination, key)] = edu
            self.keyed_edu_changed[pos] = (destination, key)
        else:
            self.edus[pos] = edu

        self.notifier.on_new_replication_data()

    def send_read_receipt(self, receipt):
        """As per FederationSender

        Args:
            receipt (synapse.types.ReadReceipt):
        """
        # nothing to do here: the replication listener will handle it.
        return defer.succeed(None)

    def send_presence(self, states):
        """As per FederationSender

        Args:
            states (list(UserPresenceState))
        """
        pos = self._next_pos()

        # We only want to send presence for our own users, so lets always just
        # filter here just in case.
        local_states = list(
            filter(lambda s: self.is_mine_id(s.user_id), states))

        self.presence_map.update(
            {state.user_id: state
             for state in local_states})
        self.presence_changed[pos] = [state.user_id for state in local_states]

        self.notifier.on_new_replication_data()

    def send_presence_to_destinations(self, states, destinations):
        """As per FederationSender

        Args:
            states (list[UserPresenceState])
            destinations (list[str])
        """
        for state in states:
            pos = self._next_pos()
            self.presence_map.update(
                {state.user_id: state
                 for state in states})
            self.presence_destinations[pos] = (state.user_id, destinations)

        self.notifier.on_new_replication_data()

    def send_device_messages(self, destination):
        """As per FederationSender"""
        # We don't need to replicate this as it gets sent down a different
        # stream.

    def get_current_token(self):
        return self.pos - 1

    def federation_ack(self, instance_name, token):
        if self._sender_instances:
            # If we have configured multiple federation sender instances we need
            # to track their positions separately, and only clear the queue up
            # to the token all instances have acked.
            self._sender_positions[instance_name] = token
            token = min(self._sender_positions.values())

        self._clear_queue_before_pos(token)

    async def get_replication_rows(
            self, instance_name: str, from_token: int, to_token: int,
            target_row_count: int
    ) -> Tuple[List[Tuple[int, Tuple]], int, bool]:
        """Get rows to be sent over federation between the two tokens

        Args:
            instance_name: the name of the current process
            from_token: the previous stream token: the starting point for fetching the
                updates
            to_token: the new stream token: the point to get updates up to
            target_row_count: a target for the number of rows to be returned.

        Returns: a triplet `(updates, new_last_token, limited)`, where:
           * `updates` is a list of `(token, row)` entries.
           * `new_last_token` is the new position in stream.
           * `limited` is whether there are more updates to fetch.
        """
        # TODO: Handle target_row_count.

        # To handle restarts where we wrap around
        if from_token > self.pos:
            from_token = -1

        # list of tuple(int, BaseFederationRow), where the first is the position
        # of the federation stream.
        rows = []  # type: List[Tuple[int, BaseFederationRow]]

        # Fetch changed presence
        i = self.presence_changed.bisect_right(from_token)
        j = self.presence_changed.bisect_right(to_token) + 1
        dest_user_ids = [
            (pos, user_id)
            for pos, user_id_list in self.presence_changed.items()[i:j]
            for user_id in user_id_list
        ]

        for (key, user_id) in dest_user_ids:
            rows.append((key, PresenceRow(state=self.presence_map[user_id])))

        # Fetch presence to send to destinations
        i = self.presence_destinations.bisect_right(from_token)
        j = self.presence_destinations.bisect_right(to_token) + 1

        for pos, (user_id, dests) in self.presence_destinations.items()[i:j]:
            rows.append((
                pos,
                PresenceDestinationsRow(state=self.presence_map[user_id],
                                        destinations=list(dests)),
            ))

        # Fetch changes keyed edus
        i = self.keyed_edu_changed.bisect_right(from_token)
        j = self.keyed_edu_changed.bisect_right(to_token) + 1
        # We purposefully clobber based on the key here, python dict comprehensions
        # always use the last value, so this will correctly point to the last
        # stream position.
        keyed_edus = {v: k for k, v in self.keyed_edu_changed.items()[i:j]}

        for ((destination, edu_key), pos) in keyed_edus.items():
            rows.append((
                pos,
                KeyedEduRow(key=edu_key,
                            edu=self.keyed_edu[(destination, edu_key)]),
            ))

        # Fetch changed edus
        i = self.edus.bisect_right(from_token)
        j = self.edus.bisect_right(to_token) + 1
        edus = self.edus.items()[i:j]

        for (pos, edu) in edus:
            rows.append((pos, EduRow(edu)))

        # Sort rows based on pos
        rows.sort()

        return (
            [(pos, (row.TypeId, row.to_data())) for pos, row in rows],
            to_token,
            False,
        )
Пример #31
0
class Topics:
    """
A class that manages a collection of `Topic`s.

    """
    def __init__(self):
        self.logger = getLogger('topics')
        self.logger.info('started session')
        self.clear()

    def clear(self):
        self.logger.info('Cleared all topics and received data')
        self.topic_list = SortedDict()
        self.transfers = dict()

    def create(self, topic, source='remote'):
        # Create the topic if it doesn't exist already
        if not topic in self.topic_list:
            self.topic_list[topic] = Topic(topic, source=source)
            self.logger.info('new:topic ' + topic)

    def process(self, topic, payload, options=None):
        # Create the topic if it doesn't exist already
        self.create(topic)

        # Add the new sample
        self.topic_list[topic].new_sample(payload, options)

        # logging
        if options:
            self.logger.debug('new sample | {0} [{1}] {2}'.format(
                topic, options['index'], payload))
        else:
            self.logger.debug('new sample | {0} {1}'.format(topic, payload))

        # If there is an active transfer, transfer received data to the queue
        if topic in self.transfers:
            # If transfer requires indexed data, check there is an index
            if self.transfers[topic][
                    'type'] == 'indexed' and options is not None:
                x = options['index']
                self.transfers[topic]['queue'].put([x, payload])
            # For linear data, provide sample id for x and payload for y
            elif self.transfers[topic]['type'] == 'linear':
                x = self.transfers[topic]['lastindex']
                self.transfers[topic]['queue'].put([x, payload])
                self.transfers[topic]['lastindex'] += 1

    def ls(self, source='remote'):
        if source is None:
            return sorted([t.name for t in self.topic_list.keys()])
        else:
            return sorted([
                t.name for t in self.topic_list.values() if t.source == source
            ])

    def samples(self, topic, amount=1):
        if not topic in self.topic_list:
            return None

        if amount == 0 or amount is None:
            return self.topic_list[topic].raw

        return self.topic_list[topic].raw[-amount:]

    def count(self, topic):
        if not topic in self.topic_list:
            return 0

        return len(self.topic_list[topic].raw)

    def exists(self, topic):
        return topic in self.topic_list

    def transfer(self, topic, queue, transfer_type="linear"):
        # If the topic data is not already transfered to some queue
        if not topic in self.transfers:
            self.transfers[topic] = dict()
            self.transfers[topic]['queue'] = queue
            self.transfers[topic]['lastindex'] = 0
            self.transfers[topic]['type'] = transfer_type

            self.logger.info('start transfer | {0}'.format(topic))

            # If there is already existing data under the topic
            if topic in self.topic_list:
                if transfer_type == 'indexed':
                    for key, value in self.topic_list[topic].indexes.iteritems(
                    ):
                        queue.put([key, value])
                elif transfer_type == 'linear':
                    for item in self.topic_list[topic].raw:
                        queue.put([self.transfers[topic]['lastindex'], item])
                        self.transfers[topic]['lastindex'] += 1

    def untransfer(self, topic):
        # If the topic data is already transfered to some queue
        if topic in self.transfers:
            # Remove it from the transfer list
            del self.transfers[topic]
            self.logger.info('stop transfer | {0}'.format(topic))

    def intransfer(self, topic):
        return topic in self.transfers

    def has_indexed_data(self, topic):
        return self.topic_list[topic].has_indexed_data()
Пример #32
0
class OrderedSet(abc.MutableSet, abc.Sequence):
    """Like OrderedDict, OrderedSet maintains the insertion order of elements.

    For example::

        >>> ordered_set = OrderedSet('abcde')
        >>> list(ordered_set) == list('abcde')
        True
        >>> ordered_set = OrderedSet('edcba')
        >>> list(ordered_set) == list('edcba')
        True

    OrderedSet also implements the collections.Sequence interface.

    """

    # pylint: disable=too-many-ancestors
    def __init__(self, iterable=()):
        # pylint: disable=super-init-not-called
        self._keys = {}
        self._nums = SortedDict()
        self._keys_view = self._nums.keys()
        self._count = count()
        self |= iterable

    def __contains__(self, key):
        "``key in ordered_set``"
        return key in self._keys

    count = __contains__

    def __iter__(self):
        "``iter(ordered_set)``"
        return iter(self._nums.values())

    def __reversed__(self):
        "``reversed(ordered_set)``"
        _nums = self._nums
        for key in reversed(_nums):
            yield _nums[key]

    def __getitem__(self, index):
        "``ordered_set[index]`` -> element; lookup element at index."
        num = self._keys_view[index]
        return self._nums[num]

    def __len__(self):
        "``len(ordered_set)``"
        return len(self._keys)

    def index(self, value):
        "Return index of value."
        # pylint: disable=arguments-differ
        try:
            return self._keys[value]
        except KeyError:
            raise ValueError('%r is not in %s' % (value, type(self).__name__))

    def add(self, value):
        "Add element, value, to set."
        if value not in self._keys:
            num = next(self._count)
            self._keys[value] = num
            self._nums[num] = value

    def discard(self, value):
        "Remove element, value, from set if it is a member."
        num = self._keys.pop(value, None)
        if num is not None:
            del self._nums[num]

    def __repr__(self):
        "Text representation of set."
        return '%s(%r)' % (type(self).__name__, list(self))

    __str__ = __repr__
Пример #33
0
class CacheStore(object):
    class CacheItem(object):
        __slots__ = ('valid', 'data')

        def __init__(self):
            self.valid = Event()
            self.data = None

    def __init__(self, key=None):
        self.lock = RLock()
        self.store = SortedDict(key)

    def __getitem__(self, item):
        return self.get(item)

    def put(self, key, data):
        with self.lock:
            try:
                item = self.store[key]
                item.data = data
                item.valid.set()
                return False
            except KeyError:
                item = self.CacheItem()
                item.data = data
                item.valid.set()
                self.store[key] = item
                return True

    def update(self, **kwargs):
        with self.lock:
            items = {}
            created = []
            updated = []
            for k, v in kwargs.items():
                items[k] = self.CacheItem()
                items[k].data = v
                items[k].valid.set()
                if k in self.store:
                    updated.append(k)
                else:
                    created.append(k)

            self.store.update(**items)
            return created, updated

    def update_one(self, key, **kwargs):
        with self.lock:
            item = self.get(key)
            if not item:
                return False

            for k, v in kwargs.items():
                set(item, k, v)

            self.put(key, item)
            return True

    def update_many(self, key, predicate, **kwargs):
        with self.lock:
            updated = []
            for k, v in self.itervalid():
                if predicate(v):
                    if self.update_one(k, **kwargs):
                        updated.append(key)

            return updated

    def get(self, key, default=None, timeout=None):
        item = self.store.get(key)
        if item:
            item.valid.wait(timeout)
            return item.data

        return default

    def remove(self, key):
        with self.lock:
            try:
                del self.store[key]
                return True
            except KeyError:
                return False

    def remove_many(self, keys):
        with self.lock:
            removed = []
            for key in keys:
                try:
                    del self.store[key]
                    removed.append(key)
                except KeyError:
                    pass

            return removed

    def clear(self):
        with self.lock:
            items = list(self.store.keys())
            self.store.clear()
            return items

    def exists(self, key):
        return key in self.store

    def rename(self, oldkey, newkey):
        with self.lock:
            obj = self.get(oldkey)
            obj['id'] = newkey
            self.put(newkey, obj)
            self.remove(oldkey)

    def is_valid(self, key):
        item = self.store.get(key)
        if item:
            return item.valid.is_set()

        return False

    def invalidate(self, key):
        with self.lock:
            item = self.store.get(key)
            if item:
                item.valid.clear()

    def itervalid(self):
        for key, value in list(self.store.items()):
            if value.valid.is_set():
                yield (key, value.data)

    def validvalues(self):
        for value in list(self.store.values()):
            if value.valid.is_set():
                yield value.data

    def remove_predicate(self, predicate):
        result = []
        for k, v in self.itervalid():
            if predicate(v):
                self.remove(k)
                result.append(k)

        return result

    def query(self, *filter, **params):
        return query(list(self.validvalues()), *filter, **params)
Пример #34
0
class SequenceLearner(BaseLearner):
    r"""A learner that will learn a sequence. It simply returns
    the points in the provided sequence when asked.

    This is useful when your problem cannot be formulated in terms of
    another adaptive learner, but you still want to use Adaptive's
    routines to run, save, and plot.

    Parameters
    ----------
    function : callable
        The function to learn. Must take a single element `sequence`.
    sequence : sequence
        The sequence to learn.

    Attributes
    ----------
    data : dict
        The data as a mapping from "index of element in sequence" => value.

    Notes
    -----
    From primitive tests, the `~adaptive.SequenceLearner` appears to have a
    similar performance to `ipyparallel`\s ``load_balanced_view().map``. With
    the added benefit of having results in the local kernel already.
    """
    def __init__(self, function, sequence):
        self._original_function = function
        self.function = _IgnoreFirstArgument(function)
        self._to_do_indices = SortedSet({i for i, _ in enumerate(sequence)})
        self._ntotal = len(sequence)
        self.sequence = copy(sequence)
        self.data = SortedDict()
        self.pending_points = set()

    def ask(self, n, tell_pending=True):
        indices = []
        points = []
        loss_improvements = []
        for index in self._to_do_indices:
            if len(points) >= n:
                break
            point = self.sequence[index]
            indices.append(index)
            points.append((index, point))
            loss_improvements.append(1 / self._ntotal)

        if tell_pending:
            for i, p in zip(indices, points):
                self.tell_pending((i, p))

        return points, loss_improvements

    def _get_data(self):
        return self.data

    def _set_data(self, data):
        if data:
            indices, values = zip(*data.items())
            # the points aren't used by tell, so we can safely pass None
            points = [(i, None) for i in indices]
            self.tell_many(points, values)

    def loss(self, real=True):
        if not (self._to_do_indices or self.pending_points):
            return 0
        else:
            npoints = self.npoints + (0 if real else len(self.pending_points))
            return (self._ntotal - npoints) / self._ntotal

    def remove_unfinished(self):
        for i in self.pending_points:
            self._to_do_indices.add(i)
        self.pending_points = set()

    def tell(self, point, value):
        index, point = point
        self.data[index] = value
        self.pending_points.discard(index)
        self._to_do_indices.discard(index)

    def tell_pending(self, point):
        index, point = point
        self.pending_points.add(index)
        self._to_do_indices.discard(index)

    def done(self):
        return not self._to_do_indices and not self.pending_points

    def result(self):
        """Get the function values in the same order as ``sequence``."""
        if not self.done():
            raise Exception("Learner is not yet complete.")
        return list(self.data.values())

    @property
    def npoints(self):
        return len(self.data)
Пример #35
0
class Node(BaseNode):

    def __init__(self, *args, **kwargs):
        self.rest = None

        super(Node, self).__init__(*args, **kwargs)

    def _select(self, key):
        """
        Selects the bucket the key should belong to.
        """

        if key < min(self.bucket):
            new_node = self.rest
            return new_node

        elif key >= max(self.bucket):
            new_node = self.bucket.values()[-1]
            return new_node

        for i in range(0, len(self.bucket.keys())-1):
            if key >= self.bucket.keys()[i] and key < self.bucket.keys()[i + 1]:
                new_node = self.bucket.values()[i]
                return new_node

        pass

    def _insert(self, key, value):
        """
        Recursively inserts the key and value by selecting the bucket the key
        should belong to, and inserting the key and value into that back. If the
        node has been split, it inserts the key of the newly created node into
        the bucket of this node.
        """

        result = self._select(key)._insert(key,value)
        self.changed = True

        if result is None:
            return

        key, other = result
        return super()._insert(key, other)

    def _split(self):
        """
        Creates a new node of the same type and splits the contents of the
        bucket into two parts of an equal size. The lower keys are being stored
        in the bucket of the current node. The higher keys are being stored in
        the bucket of the new node. Afterwards, the new node is being returned.
        """
        other = self.__class__(tree=self.tree)
        size = len(self.bucket)

        values = self.bucket.items()

        self.bucket = SortedDict(values[:len(values) // 2])
        other.values = SortedDict(values[len(values) // 2:])

        key, value = other.values.popitem(last=False)
        other.rest = value

        return (key, other)

    def __getitem__(self, key):
        selected_node = self._select(key)
        return selected_node.__getitem__(key)

    def __iter__(self):
        if self.rest != None:
            for key in self.rest:
                yield key

        for child in self.bucket.values():
            for key in child:
                yield key

    def __len__(self):
        return sum([len(child) for child in self.buckt.values()])+len(self.rest)
def test_values_view_index():
    mapping = [(val, pos) for pos, val in enumerate(string.ascii_lowercase)]
    temp = SortedDict(mapping[:13])
    values = temp.values()
    with pytest.raises(ValueError):
        values.index(100)
Пример #37
0
class Replica(HasActionQueue, MessageProcessor):
    def __init__(self,
                 node: 'plenum.server.node.Node',
                 instId: int,
                 isMaster: bool = False):
        """
        Create a new replica.

        :param node: Node on which this replica is located
        :param instId: the id of the protocol instance the replica belongs to
        :param isMaster: is this a replica of the master protocol instance
        """
        HasActionQueue.__init__(self)
        self.stats = Stats(TPCStat)

        self.config = getConfig()

        routerArgs = [(ReqDigest, self._preProcessReqDigest)]

        for r in [PrePrepare, Prepare, Commit]:
            routerArgs.append((r, self.processThreePhaseMsg))

        routerArgs.append((Checkpoint, self.processCheckpoint))
        routerArgs.append((ThreePCState, self.process3PhaseState))

        self.inBoxRouter = Router(*routerArgs)

        self.threePhaseRouter = Router((PrePrepare, self.processPrePrepare),
                                       (Prepare, self.processPrepare),
                                       (Commit, self.processCommit))

        self.node = node
        self.instId = instId

        self.name = self.generateName(node.name, self.instId)

        self.outBox = deque()
        """
        This queue is used by the replica to send messages to its node. Replica
        puts messages that are consumed by its node
        """

        self.inBox = deque()
        """
        This queue is used by the replica to receive messages from its node.
        Node puts messages that are consumed by the replica
        """

        self.inBoxStash = deque()
        """
        If messages need to go back on the queue, they go here temporarily and
        are put back on the queue on a state change
        """

        self.isMaster = isMaster

        # Indicates name of the primary replica of this protocol instance.
        # None in case the replica does not know who the primary of the
        # instance is
        self._primaryName = None  # type: Optional[str]

        # Requests waiting to be processed once the replica is able to decide
        # whether it is primary or not
        self.postElectionMsgs = deque()

        # PRE-PREPAREs that are waiting to be processed but do not have the
        # corresponding request digest. Happens when replica has not been
        # forwarded the request by the node but is getting 3 phase messages.
        # The value is a list since a malicious entry might send PRE-PREPARE
        # with a different digest and since we dont have the request finalised,
        # we store all PRE-PPREPARES
        self.prePreparesPendingReqDigest = {
        }  # type: Dict[Tuple[str, int], List]

        # PREPAREs that are stored by non primary replica for which it has not
        #  got any PRE-PREPARE. Dictionary that stores a tuple of view no and
        #  prepare sequence number as key and a deque of PREPAREs as value.
        # This deque is attempted to be flushed on receiving every
        # PRE-PREPARE request.
        self.preparesWaitingForPrePrepare = {}
        # type: Dict[Tuple[int, int], deque]

        # COMMITs that are stored for which there are no PRE-PREPARE or PREPARE
        # received
        self.commitsWaitingForPrepare = {}
        # type: Dict[Tuple[int, int], deque]

        # Dictionary of sent PRE-PREPARE that are stored by primary replica
        # which it has broadcasted to all other non primary replicas
        # Key of dictionary is a 2 element tuple with elements viewNo,
        # pre-prepare seqNo and value is a tuple of Request Digest and time
        self.sentPrePrepares = {}
        # type: Dict[Tuple[int, int], Tuple[Tuple[str, int], float]]

        # Dictionary of received PRE-PREPAREs. Key of dictionary is a 2
        # element tuple with elements viewNo, pre-prepare seqNo and value is
        # a tuple of Request Digest and time
        self.prePrepares = {}
        # type: Dict[Tuple[int, int], Tuple[Tuple[str, int], float]]

        # Dictionary of received Prepare requests. Key of dictionary is a 2
        # element tuple with elements viewNo, seqNo and value is a 2 element
        # tuple containing request digest and set of sender node names(sender
        # replica names in case of multiple protocol instances)
        # (viewNo, seqNo) -> ((identifier, reqId), {senders})
        self.prepares = Prepares()
        # type: Dict[Tuple[int, int], Tuple[Tuple[str, int], Set[str]]]

        self.commits = Commits()  # type: Dict[Tuple[int, int],
        # Tuple[Tuple[str, int], Set[str]]]

        # Set of tuples to keep track of ordered requests. Each tuple is
        # (viewNo, ppSeqNo)
        self.ordered = OrderedSet()  # type: OrderedSet[Tuple[int, int]]

        # Dictionary to keep track of the which replica was primary during each
        # view. Key is the view no and value is the name of the primary
        # replica during that view
        self.primaryNames = {}  # type: Dict[int, str]

        # Holds msgs that are for later views
        self.threePhaseMsgsForLaterView = deque()
        # type: deque[(ThreePhaseMsg, str)]

        # Holds tuple of view no and prepare seq no of 3-phase messages it
        # received while it was not participating
        self.stashingWhileCatchingUp = set()  # type: Set[Tuple]

        # Commits which are not being ordered since commits with lower view
        # numbers and sequence numbers have not been ordered yet. Key is the
        # viewNo and value a map of pre-prepare sequence number to commit
        self.stashedCommitsForOrdering = {}  # type: Dict[int,
        # Dict[int, Commit]]

        self.checkpoints = SortedDict(lambda k: k[0])

        self.stashingWhileOutsideWaterMarks = deque()

        # Low water mark
        self._h = 0  # type: int

        # High water mark
        self.H = self._h + self.config.LOG_SIZE  # type: int

        self.lastPrePrepareSeqNo = self.h  # type: int

    @property
    def h(self) -> int:
        return self._h

    @h.setter
    def h(self, n):
        self._h = n
        self.H = self._h + self.config.LOG_SIZE

    @property
    def requests(self):
        return self.node.requests

    def shouldParticipate(self, viewNo: int, ppSeqNo: int):
        # Replica should only participating in the consensus process and the
        # replica did not stash any of this request's 3-phase request
        return self.node.isParticipating and (viewNo, ppSeqNo) \
                                             not in self.stashingWhileCatchingUp

    @staticmethod
    def generateName(nodeName: str, instId: int):
        """
        Create and return the name for a replica using its nodeName and
        instanceId.
         Ex: Alpha:1
        """
        return "{}:{}".format(nodeName, instId)

    @staticmethod
    def getNodeName(replicaName: str):
        return replicaName.split(":")[0]

    @property
    def isPrimary(self):
        """
        Is this node primary?

        :return: True if this node is primary, False otherwise
        """
        return self._primaryName == self.name if self._primaryName is not None \
            else None

    @property
    def primaryName(self):
        """
        Name of the primary replica of this replica's instance

        :return: Returns name if primary is known, None otherwise
        """
        return self._primaryName

    @primaryName.setter
    def primaryName(self, value: Optional[str]) -> None:
        """
        Set the value of isPrimary.

        :param value: the value to set isPrimary to
        """
        if not value == self._primaryName:
            self._primaryName = value
            self.primaryNames[self.viewNo] = value
            logger.debug("{} setting primaryName for view no {} to: {}".format(
                self, self.viewNo, value))
            logger.debug("{}'s primaryNames for views are: {}".format(
                self, self.primaryNames))
            self._stateChanged()

    def _stateChanged(self):
        """
        A series of actions to be performed when the state of this replica
        changes.

        - UnstashInBox (see _unstashInBox)
        """
        self._unstashInBox()
        if self.isPrimary is not None:
            # TODO handle suspicion exceptions here
            self.process3PhaseReqsQueue()
            # TODO handle suspicion exceptions here
            try:
                self.processPostElectionMsgs()
            except SuspiciousNode as ex:
                self.outBox.append(ex)
                self.discard(ex.msg, ex.reason, logger.warning)

    def _stashInBox(self, msg):
        """
        Stash the specified message into the inBoxStash of this replica.

        :param msg: the message to stash
        """
        self.inBoxStash.append(msg)

    def _unstashInBox(self):
        """
        Append the inBoxStash to the right of the inBox.
        """
        self.inBox.extend(self.inBoxStash)
        self.inBoxStash.clear()

    def __repr__(self):
        return self.name

    @property
    def f(self) -> int:
        """
        Return the number of Byzantine Failures that can be tolerated by this
        system. Equal to (N - 1)/3, where N is the number of nodes in the
        system.
        """
        return self.node.f

    @property
    def viewNo(self):
        """
        Return the current view number of this replica.
        """
        return self.node.viewNo

    def isPrimaryInView(self, viewNo: int) -> Optional[bool]:
        """
        Return whether a primary has been selected for this view number.
        """
        return self.primaryNames[viewNo] == self.name

    def isMsgForLaterView(self, msg):
        """
        Return whether this request's view number is greater than the current
        view number of this replica.
        """
        viewNo = getattr(msg, "viewNo", None)
        return viewNo > self.viewNo

    def isMsgForCurrentView(self, msg):
        """
        Return whether this request's view number is equal to the current view
        number of this replica.
        """
        viewNo = getattr(msg, "viewNo", None)
        return viewNo == self.viewNo

    def isMsgForPrevView(self, msg):
        """
        Return whether this request's view number is less than the current view
        number of this replica.
        """
        viewNo = getattr(msg, "viewNo", None)
        return viewNo < self.viewNo

    def isPrimaryForMsg(self, msg) -> Optional[bool]:
        """
        Return whether this replica is primary if the request's view number is
        equal this replica's view number and primary has been selected for
        the current view.
        Return None otherwise.

        :param msg: message
        """
        if self.isMsgForLaterView(msg):
            self.discard(
                msg, "Cannot get primary status for a request for a later "
                "view {}. Request is {}".format(self.viewNo, msg),
                logger.error)
        else:
            return self.isPrimary if self.isMsgForCurrentView(msg) \
                else self.isPrimaryInView(msg.viewNo)

    def isMsgFromPrimary(self, msg, sender: str) -> bool:
        """
        Return whether this message was from primary replica
        :param msg:
        :param sender:
        :return:
        """
        if self.isMsgForLaterView(msg):
            logger.error("{} cannot get primary for a request for a later "
                         "view. Request is {}".format(self, msg))
        else:
            return self.primaryName == sender if self.isMsgForCurrentView(
                msg) else self.primaryNames[msg.viewNo] == sender

    def _preProcessReqDigest(self, rd: ReqDigest) -> None:
        """
        Process request digest if this replica is not a primary, otherwise stash
        the message into the inBox.

        :param rd: the client Request Digest
        """
        if self.isPrimary is not None:
            self.processReqDigest(rd)
        else:
            logger.debug(
                "{} stashing request digest {} since it does not know "
                "its primary status".format(self, (rd.identifier, rd.reqId)))
            self._stashInBox(rd)

    def serviceQueues(self, limit=None):
        """
        Process `limit` number of messages in the inBox.

        :param limit: the maximum number of messages to process
        :return: the number of messages successfully processed
        """
        # TODO should handle SuspiciousNode here
        r = self.inBoxRouter.handleAllSync(self.inBox, limit)
        r += self._serviceActions()
        return r
        # Messages that can be processed right now needs to be added back to the
        # queue. They might be able to be processed later

    def processPostElectionMsgs(self):
        """
        Process messages waiting for the election of a primary replica to
        complete.
        """
        while self.postElectionMsgs:
            msg = self.postElectionMsgs.popleft()
            logger.debug("{} processing pended msg {}".format(self, msg))
            self.dispatchThreePhaseMsg(*msg)

    def process3PhaseReqsQueue(self):
        """
        Process the 3 phase requests from the queue whose view number is equal
        to the current view number of this replica.
        """
        unprocessed = deque()
        while self.threePhaseMsgsForLaterView:
            request, sender = self.threePhaseMsgsForLaterView.popleft()
            logger.debug("{} processing pended 3 phase request: {}".format(
                self, request))
            # If the request is for a later view dont try to process it but add
            # it back to the queue.
            if self.isMsgForLaterView(request):
                unprocessed.append((request, sender))
            else:
                self.processThreePhaseMsg(request, sender)
        self.threePhaseMsgsForLaterView = unprocessed

    @property
    def quorum(self) -> int:
        r"""
        Return the quorum of this RBFT system. Equal to :math:`2f + 1`.
        Return None if `f` is not yet determined.
        """
        return self.node.quorum

    def dispatchThreePhaseMsg(self, msg: ThreePhaseMsg, sender: str) -> Any:
        """
        Create a three phase request to be handled by the threePhaseRouter.

        :param msg: the ThreePhaseMsg to dispatch
        :param sender: the name of the node that sent this request
        """
        senderRep = self.generateName(sender, self.instId)
        if self.isPpSeqNoAcceptable(msg.ppSeqNo):
            try:
                self.threePhaseRouter.handleSync((msg, senderRep))
            except SuspiciousNode as ex:
                self.node.reportSuspiciousNodeEx(ex)
        else:
            logger.debug("{} stashing 3 phase message {} since ppSeqNo {} is "
                         "not between {} and {}".format(
                             self, msg, msg.ppSeqNo, self.h, self.H))
            self.stashingWhileOutsideWaterMarks.append((msg, sender))

    def processReqDigest(self, rd: ReqDigest):
        """
        Process a request digest. Works only if this replica has decided its
        primary status.

        :param rd: the client request digest to process
        """
        self.stats.inc(TPCStat.ReqDigestRcvd)
        if self.isPrimary is False:
            self.dequeuePrePrepare(rd.identifier, rd.reqId)
        else:
            self.doPrePrepare(rd)

    def processThreePhaseMsg(self, msg: ThreePhaseMsg, sender: str):
        """
        Process a 3-phase (pre-prepare, prepare and commit) request.
        Dispatch the request only if primary has already been decided, otherwise
        stash it.

        :param msg: the Three Phase message, one of PRE-PREPARE, PREPARE,
            COMMIT
        :param sender: name of the node that sent this message
        """
        # Can only proceed further if it knows whether its primary or not
        if self.isMsgForLaterView(msg):
            self.threePhaseMsgsForLaterView.append((msg, sender))
            logger.debug(
                "{} pended received 3 phase request for a later view: "
                "{}".format(self, msg))
        else:
            if self.isPrimary is None:
                self.postElectionMsgs.append((msg, sender))
                logger.debug("Replica {} pended request {} from {}".format(
                    self, msg, sender))
            else:
                self.dispatchThreePhaseMsg(msg, sender)

    def processPrePrepare(self, pp: PrePrepare, sender: str):
        """
        Validate and process the PRE-PREPARE specified.
        If validation is successful, create a PREPARE and broadcast it.

        :param pp: a prePrepareRequest
        :param sender: name of the node that sent this message
        """
        key = (pp.viewNo, pp.ppSeqNo)
        logger.debug("{} Receiving PRE-PREPARE{} at {} from {}".format(
            self, key, time.perf_counter(), sender))
        if self.canProcessPrePrepare(pp, sender):
            if not self.node.isParticipating:
                self.stashingWhileCatchingUp.add(key)
            self.addToPrePrepares(pp)
            logger.info("{} processed incoming PRE-PREPARE{}".format(
                self, key))

    def tryPrepare(self, pp: PrePrepare):
        """
        Try to send the Prepare message if the PrePrepare message is ready to
        be passed into the Prepare phase.
        """
        if self.canSendPrepare(pp):
            self.doPrepare(pp)
        else:
            logger.debug("{} cannot send PREPARE".format(self))

    def processPrepare(self, prepare: Prepare, sender: str) -> None:
        """
        Validate and process the PREPARE specified.
        If validation is successful, create a COMMIT and broadcast it.

        :param prepare: a PREPARE msg
        :param sender: name of the node that sent the PREPARE
        """
        # TODO move this try/except up higher
        logger.debug("{} received PREPARE{} from {}".format(
            self, (prepare.viewNo, prepare.ppSeqNo), sender))
        try:
            if self.isValidPrepare(prepare, sender):
                self.addToPrepares(prepare, sender)
                self.stats.inc(TPCStat.PrepareRcvd)
                logger.debug("{} processed incoming PREPARE {}".format(
                    self, (prepare.viewNo, prepare.ppSeqNo)))
            else:
                # TODO let's have isValidPrepare throw an exception that gets
                # handled and possibly logged higher
                logger.warning(
                    "{} cannot process incoming PREPARE".format(self))
        except SuspiciousNode as ex:
            self.node.reportSuspiciousNodeEx(ex)

    def processCommit(self, commit: Commit, sender: str) -> None:
        """
        Validate and process the COMMIT specified.
        If validation is successful, return the message to the node.

        :param commit: an incoming COMMIT message
        :param sender: name of the node that sent the COMMIT
        """
        logger.debug("{} received COMMIT {} from {}".format(
            self, commit, sender))
        if self.isValidCommit(commit, sender):
            self.stats.inc(TPCStat.CommitRcvd)
            self.addToCommits(commit, sender)
            logger.debug("{} processed incoming COMMIT{}".format(
                self, (commit.viewNo, commit.ppSeqNo)))

    def tryCommit(self, prepare: Prepare):
        """
        Try to commit if the Prepare message is ready to be passed into the
        commit phase.
        """
        if self.canCommit(prepare):
            self.doCommit(prepare)
        else:
            logger.debug("{} not yet able to send COMMIT".format(self))

    def tryOrder(self, commit: Commit):
        """
        Try to order if the Commit message is ready to be ordered.
        """
        canOrder, reason = self.canOrder(commit)
        if canOrder:
            logger.debug("{} returning request to node".format(self))
            self.tryOrdering(commit)
        else:
            logger.trace("{} cannot return request to node: {}".format(
                self, reason))

    def doPrePrepare(self, reqDigest: ReqDigest) -> None:
        """
        Broadcast a PRE-PREPARE to all the replicas.

        :param reqDigest: a tuple with elements identifier, reqId, and digest
        """
        if not self.node.isParticipating:
            logger.error("Non participating node is attempting PRE-PREPARE. "
                         "This should not happen.")
            return

        if self.lastPrePrepareSeqNo == self.H:
            logger.debug("{} stashing PRE-PREPARE {} since outside greater "
                         "than high water mark {}".format(
                             self, (self.viewNo, self.lastPrePrepareSeqNo + 1),
                             self.H))
            self.stashingWhileOutsideWaterMarks.append(reqDigest)
            return
        self.lastPrePrepareSeqNo += 1
        tm = time.time() * 1000
        logger.debug("{} Sending PRE-PREPARE {} at {}".format(
            self, (self.viewNo, self.lastPrePrepareSeqNo),
            time.perf_counter()))
        prePrepareReq = PrePrepare(self.instId, self.viewNo,
                                   self.lastPrePrepareSeqNo, *reqDigest, tm)
        self.sentPrePrepares[self.viewNo,
                             self.lastPrePrepareSeqNo] = (reqDigest.key, tm)
        self.send(prePrepareReq, TPCStat.PrePrepareSent)

    def doPrepare(self, pp: PrePrepare):
        logger.debug("{} Sending PREPARE {} at {}".format(
            self, (pp.viewNo, pp.ppSeqNo), time.perf_counter()))
        prepare = Prepare(self.instId, pp.viewNo, pp.ppSeqNo, pp.digest,
                          pp.ppTime)
        self.send(prepare, TPCStat.PrepareSent)
        self.addToPrepares(prepare, self.name)

    def doCommit(self, p: Prepare):
        """
        Create a commit message from the given Prepare message and trigger the
        commit phase
        :param p: the prepare message
        """
        logger.debug("{} Sending COMMIT{} at {}".format(
            self, (p.viewNo, p.ppSeqNo), time.perf_counter()))
        commit = Commit(self.instId, p.viewNo, p.ppSeqNo, p.digest, p.ppTime)
        self.send(commit, TPCStat.CommitSent)
        self.addToCommits(commit, self.name)

    def canProcessPrePrepare(self, pp: PrePrepare, sender: str) -> bool:
        """
        Decide whether this replica is eligible to process a PRE-PREPARE,
        based on the following criteria:

        - this replica is non-primary replica
        - the request isn't in its list of received PRE-PREPAREs
        - the request is waiting to for PRE-PREPARE and the digest value matches

        :param pp: a PRE-PREPARE msg to process
        :param sender: the name of the node that sent the PRE-PREPARE msg
        :return: True if processing is allowed, False otherwise
        """
        # TODO: Check whether it is rejecting PRE-PREPARE from previous view
        # PRE-PREPARE should not be sent from non primary
        if not self.isMsgFromPrimary(pp, sender):
            raise SuspiciousNode(sender, Suspicions.PPR_FRM_NON_PRIMARY, pp)

        # A PRE-PREPARE is being sent to primary
        if self.isPrimaryForMsg(pp) is True:
            raise SuspiciousNode(sender, Suspicions.PPR_TO_PRIMARY, pp)

        # A PRE-PREPARE is sent that has already been received
        if (pp.viewNo, pp.ppSeqNo) in self.prePrepares:
            raise SuspiciousNode(sender, Suspicions.DUPLICATE_PPR_SENT, pp)

        key = (pp.identifier, pp.reqId)
        if not self.requests.isFinalised(key):
            self.enqueuePrePrepare(pp, sender)
            return False

        # A PRE-PREPARE is sent that does not match request digest
        if self.requests.digest(key) != pp.digest:
            raise SuspiciousNode(sender, Suspicions.PPR_DIGEST_WRONG, pp)

        return True

    def addToPrePrepares(self, pp: PrePrepare) -> None:
        """
        Add the specified PRE-PREPARE to this replica's list of received
        PRE-PREPAREs.

        :param pp: the PRE-PREPARE to add to the list
        """
        key = (pp.viewNo, pp.ppSeqNo)
        self.prePrepares[key] = \
            ((pp.identifier, pp.reqId), pp.ppTime)
        self.dequeuePrepares(*key)
        self.dequeueCommits(*key)
        self.stats.inc(TPCStat.PrePrepareRcvd)
        self.tryPrepare(pp)

    def hasPrepared(self, request) -> bool:
        return self.prepares.hasPrepareFrom(request, self.name)

    def canSendPrepare(self, request) -> bool:
        """
        Return whether the request identified by (identifier, requestId) can
        proceed to the Prepare step.

        :param request: any object with identifier and requestId attributes
        """
        return self.shouldParticipate(request.viewNo, request.ppSeqNo) \
            and not self.hasPrepared(request) \
            and self.requests.isFinalised((request.identifier,
                                           request.reqId))

    def isValidPrepare(self, prepare: Prepare, sender: str) -> bool:
        """
        Return whether the PREPARE specified is valid.

        :param prepare: the PREPARE to validate
        :param sender: the name of the node that sent the PREPARE
        :return: True if PREPARE is valid, False otherwise
        """
        key = (prepare.viewNo, prepare.ppSeqNo)
        primaryStatus = self.isPrimaryForMsg(prepare)

        ppReqs = self.sentPrePrepares if primaryStatus else self.prePrepares

        # If a non primary replica and receiving a PREPARE request before a
        # PRE-PREPARE request, then proceed

        # PREPARE should not be sent from primary
        if self.isMsgFromPrimary(prepare, sender):
            raise SuspiciousNode(sender, Suspicions.PR_FRM_PRIMARY, prepare)

        # If non primary replica
        if primaryStatus is False:
            if self.prepares.hasPrepareFrom(prepare, sender):
                raise SuspiciousNode(sender, Suspicions.DUPLICATE_PR_SENT,
                                     prepare)
            # If PRE-PREPARE not received for the PREPARE, might be slow network
            if key not in ppReqs:
                self.enqueuePrepare(prepare, sender)
                return False
            elif prepare.digest != self.requests.digest(ppReqs[key][0]):
                raise SuspiciousNode(sender, Suspicions.PR_DIGEST_WRONG,
                                     prepare)
            elif prepare.ppTime != ppReqs[key][1]:
                raise SuspiciousNode(sender, Suspicions.PR_TIME_WRONG, prepare)
            else:
                return True
        # If primary replica
        else:
            if self.prepares.hasPrepareFrom(prepare, sender):
                raise SuspiciousNode(sender, Suspicions.DUPLICATE_PR_SENT,
                                     prepare)
            # If PRE-PREPARE was not sent for this PREPARE, certainly
            # malicious behavior
            elif key not in ppReqs:
                raise SuspiciousNode(sender, Suspicions.UNKNOWN_PR_SENT,
                                     prepare)
            elif prepare.digest != self.requests.digest(ppReqs[key][0]):
                raise SuspiciousNode(sender, Suspicions.PR_DIGEST_WRONG,
                                     prepare)
            elif prepare.ppTime != ppReqs[key][1]:
                raise SuspiciousNode(sender, Suspicions.PR_TIME_WRONG, prepare)
            else:
                return True

    def addToPrepares(self, prepare: Prepare, sender: str):
        self.prepares.addVote(prepare, sender)
        self.tryCommit(prepare)

    def hasCommitted(self, request) -> bool:
        return self.commits.hasCommitFrom(
            ThreePhaseKey(request.viewNo, request.ppSeqNo), self.name)

    def canCommit(self, prepare: Prepare) -> bool:
        """
        Return whether the specified PREPARE can proceed to the Commit
        step.

        Decision criteria:

        - If this replica has got just 2f PREPARE requests then commit request.
        - If less than 2f PREPARE requests then probably there's no consensus on
            the request; don't commit
        - If more than 2f then already sent COMMIT; don't commit

        :param prepare: the PREPARE
        """
        return self.shouldParticipate(prepare.viewNo, prepare.ppSeqNo) and \
            self.prepares.hasQuorum(prepare, self.f) and \
            not self.hasCommitted(prepare)

    def isValidCommit(self, commit: Commit, sender: str) -> bool:
        """
        Return whether the COMMIT specified is valid.

        :param commit: the COMMIT to validate
        :return: True if `request` is valid, False otherwise
        """
        primaryStatus = self.isPrimaryForMsg(commit)
        ppReqs = self.sentPrePrepares if primaryStatus else self.prePrepares
        key = (commit.viewNo, commit.ppSeqNo)
        if key not in ppReqs:
            self.enqueueCommit(commit, sender)
            return False

        if (key not in self.prepares
                and key not in self.preparesWaitingForPrePrepare):
            logger.debug(
                "{} rejecting COMMIT{} due to lack of prepares".format(
                    self, key))
            # raise SuspiciousNode(sender, Suspicions.UNKNOWN_CM_SENT, commit)
            return False
        elif self.commits.hasCommitFrom(commit, sender):
            raise SuspiciousNode(sender, Suspicions.DUPLICATE_CM_SENT, commit)
        elif commit.digest != self.getDigestFor3PhaseKey(ThreePhaseKey(*key)):
            raise SuspiciousNode(sender, Suspicions.CM_DIGEST_WRONG, commit)
        elif key in ppReqs and commit.ppTime != ppReqs[key][1]:
            raise SuspiciousNode(sender, Suspicions.CM_TIME_WRONG, commit)
        else:
            return True

    def addToCommits(self, commit: Commit, sender: str):
        """
        Add the specified COMMIT to this replica's list of received
        commit requests.

        :param commit: the COMMIT to add to the list
        :param sender: the name of the node that sent the COMMIT
        """
        self.commits.addVote(commit, sender)
        self.tryOrder(commit)

    def hasOrdered(self, viewNo, ppSeqNo) -> bool:
        return (viewNo, ppSeqNo) in self.ordered

    def canOrder(self, commit: Commit) -> Tuple[bool, Optional[str]]:
        """
        Return whether the specified commitRequest can be returned to the node.

        Decision criteria:

        - If have got just 2f+1 Commit requests then return request to node
        - If less than 2f+1 of commit requests then probably don't have
            consensus on the request; don't return request to node
        - If more than 2f+1 then already returned to node; don't return request
            to node

        :param commit: the COMMIT
        """
        if not self.commits.hasQuorum(commit, self.f):
            return False, "no quorum: {} commits where f is {}".\
                          format(commit, self.f)

        if self.hasOrdered(commit.viewNo, commit.ppSeqNo):
            return False, "already ordered"

        if not self.isNextInOrdering(commit):
            viewNo, ppSeqNo = commit.viewNo, commit.ppSeqNo
            if viewNo not in self.stashedCommitsForOrdering:
                self.stashedCommitsForOrdering[viewNo] = {}
            self.stashedCommitsForOrdering[viewNo][ppSeqNo] = commit
            self.startRepeating(self.orderStashedCommits, 2)
            return False, "stashing {} since out of order".\
                format(commit)

        return True, None

    def isNextInOrdering(self, commit: Commit):
        viewNo, ppSeqNo = commit.viewNo, commit.ppSeqNo
        if self.ordered and self.ordered[-1] == (viewNo, ppSeqNo - 1):
            return True
        for (v, p) in self.commits:
            if v < viewNo:
                # Have commits from previous view that are unordered.
                # TODO: Question: would commits be always ordered, what if
                # some are never ordered and its fine, go to PBFT.
                return False
            if v == viewNo and p < ppSeqNo and (v, p) not in self.ordered:
                # If unordered commits are found with lower ppSeqNo then this
                # cannot be ordered.
                return False

        # TODO: Revisit PBFT paper, how to make sure that last request of the
        # last view has been ordered? Need change in `VIEW CHANGE` mechanism.
        # Somehow view change needs to communicate what the last request was.
        # Also what if some COMMITs were completely missed in the same view
        return True

    def orderStashedCommits(self):
        # TODO: What if the first few commits were out of order and stashed?
        # `self.ordered` would be empty
        if self.ordered:
            lastOrdered = self.ordered[-1]
            vToRemove = set()
            for v in self.stashedCommitsForOrdering:
                if v < lastOrdered[0] and self.stashedCommitsForOrdering[v]:
                    raise RuntimeError(
                        "{} found commits from previous view {}"
                        " that were not ordered but last ordered"
                        " is {}".format(self, v, lastOrdered))
                pToRemove = set()
                for p, commit in self.stashedCommitsForOrdering[v].items():
                    if (v == lastOrdered[0] and lastOrdered == (v, p - 1)) or \
                            (v > lastOrdered[0] and
                                self.isLowestCommitInView(commit)):
                        logger.debug("{} ordering stashed commit {}".format(
                            self, commit))
                        if self.tryOrdering(commit):
                            lastOrdered = (v, p)
                            pToRemove.add(p)

                for p in pToRemove:
                    del self.stashedCommitsForOrdering[v][p]
                if not self.stashedCommitsForOrdering[v]:
                    vToRemove.add(v)

            for v in vToRemove:
                del self.stashedCommitsForOrdering[v]

            # if self.stashedCommitsForOrdering:
            #     self._schedule(self.orderStashedCommits, 2)
            if not self.stashedCommitsForOrdering:
                self.stopRepeating(self.orderStashedCommits)

    def isLowestCommitInView(self, commit):
        # TODO: Assumption: This assumes that at least one commit that was sent
        #  for any request by any node has been received in the view of this
        # commit
        ppSeqNos = []
        for v, p in self.commits:
            if v == commit.viewNo:
                ppSeqNos.append(p)
        return min(ppSeqNos) == commit.ppSeqNo if ppSeqNos else True

    def tryOrdering(self, commit: Commit) -> None:
        """
        Attempt to send an ORDERED request for the specified COMMIT to the
        node.

        :param commit: the COMMIT message
        """
        key = (commit.viewNo, commit.ppSeqNo)
        logger.debug("{} trying to order COMMIT{}".format(self, key))
        reqKey = self.getReqKeyFrom3PhaseKey(key)  # type: Tuple
        digest = self.getDigestFor3PhaseKey(key)
        if not digest:
            logger.error(
                "{} did not find digest for {}, request key {}".format(
                    self, key, reqKey))
            return
        self.doOrder(*key, *reqKey, digest, commit.ppTime)
        return True

    def doOrder(self, viewNo, ppSeqNo, identifier, reqId, digest, ppTime):
        key = (viewNo, ppSeqNo)
        self.addToOrdered(*key)
        ordered = Ordered(self.instId, viewNo, identifier, reqId, ppTime)
        # TODO: Should not order or add to checkpoint while syncing
        # 3 phase state.
        self.send(ordered, TPCStat.OrderSent)
        if key in self.stashingWhileCatchingUp:
            self.stashingWhileCatchingUp.remove(key)
        logger.debug("{} ordered request {}".format(self, (viewNo, ppSeqNo)))
        self.addToCheckpoint(ppSeqNo, digest)

    def processCheckpoint(self, msg: Checkpoint, sender: str):
        if self.checkpoints:
            seqNo = msg.seqNo
            _, firstChk = self.firstCheckPoint
            if firstChk.isStable:
                if firstChk.seqNo == seqNo:
                    self.discard(msg,
                                 reason="Checkpoint already stable",
                                 logMethod=logger.debug)
                    return
                if firstChk.seqNo > seqNo:
                    self.discard(msg,
                                 reason="Higher stable checkpoint present",
                                 logMethod=logger.debug)
                    return
            for state in self.checkpoints.values():
                if state.seqNo == seqNo:
                    if state.digest == msg.digest:
                        state.receivedDigests[sender] = msg.digest
                        break
                    else:
                        logger.error("{} received an incorrect digest {} for "
                                     "checkpoint {} from {}".format(
                                         self, msg.digest, seqNo, sender))
                        return
            if len(state.receivedDigests) == 2 * self.f:
                self.markCheckPointStable(msg.seqNo)
        else:
            self.discard(msg,
                         reason="No checkpoints present to tally",
                         logMethod=logger.warn)

    def _newCheckpointState(self, ppSeqNo, digest) -> CheckpointState:
        s, e = ppSeqNo, ppSeqNo + self.config.CHK_FREQ - 1
        logger.debug("{} adding new checkpoint state for {}".format(
            self, (s, e)))
        state = CheckpointState(ppSeqNo, [
            digest,
        ], None, {}, False)
        self.checkpoints[s, e] = state
        return state

    def addToCheckpoint(self, ppSeqNo, digest):
        for (s, e) in self.checkpoints.keys():
            if s <= ppSeqNo <= e:
                state = self.checkpoints[s, e]  # type: CheckpointState
                state.digests.append(digest)
                state = updateNamedTuple(state, seqNo=ppSeqNo)
                self.checkpoints[s, e] = state
                break
        else:
            state = self._newCheckpointState(ppSeqNo, digest)
            s, e = ppSeqNo, ppSeqNo + self.config.CHK_FREQ

        if len(state.digests) == self.config.CHK_FREQ:
            state = updateNamedTuple(state,
                                     digest=serialize(state.digests),
                                     digests=[])
            self.checkpoints[s, e] = state
            self.send(
                Checkpoint(self.instId, self.viewNo, ppSeqNo, state.digest))

    def markCheckPointStable(self, seqNo):
        previousCheckpoints = []
        for (s, e), state in self.checkpoints.items():
            if e == seqNo:
                state = updateNamedTuple(state, isStable=True)
                self.checkpoints[s, e] = state
                break
            else:
                previousCheckpoints.append((s, e))
        else:
            logger.error("{} could not find {} in checkpoints".format(
                self, seqNo))
            return
        self.h = seqNo
        for k in previousCheckpoints:
            logger.debug("{} removing previous checkpoint {}".format(self, k))
            self.checkpoints.pop(k)
        self.gc(seqNo)
        logger.debug("{} marked stable checkpoint {}".format(self, (s, e)))
        self.processStashedMsgsForNewWaterMarks()

    def gc(self, tillSeqNo):
        logger.debug("{} cleaning up till {}".format(self, tillSeqNo))
        tpcKeys = set()
        reqKeys = set()
        for (v, p), (reqKey, _) in self.sentPrePrepares.items():
            if p <= tillSeqNo:
                tpcKeys.add((v, p))
                reqKeys.add(reqKey)
        for (v, p), (reqKey, _) in self.prePrepares.items():
            if p <= tillSeqNo:
                tpcKeys.add((v, p))
                reqKeys.add(reqKey)

        logger.debug("{} found {} 3 phase keys to clean".format(
            self, len(tpcKeys)))
        logger.debug("{} found {} request keys to clean".format(
            self, len(reqKeys)))

        for k in tpcKeys:
            self.sentPrePrepares.pop(k, None)
            self.prePrepares.pop(k, None)
            self.prepares.pop(k, None)
            self.commits.pop(k, None)
            if k in self.ordered:
                self.ordered.remove(k)

        for k in reqKeys:
            self.requests.pop(k, None)

    def processStashedMsgsForNewWaterMarks(self):
        while self.stashingWhileOutsideWaterMarks:
            item = self.stashingWhileOutsideWaterMarks.pop()
            logger.debug("{} processing stashed item {} after new stable "
                         "checkpoint".format(self, item))

            if isinstance(item, ReqDigest):
                self.doPrePrepare(item)
            elif isinstance(item, tuple) and len(tuple) == 2:
                self.dispatchThreePhaseMsg(*item)
            else:
                logger.error("{} cannot process {} "
                             "from stashingWhileOutsideWaterMarks".format(
                                 self, item))

    @property
    def firstCheckPoint(self) -> Tuple[Tuple[int, int], CheckpointState]:
        if not self.checkpoints:
            return None
        else:
            return self.checkpoints.peekitem(0)

    @property
    def lastCheckPoint(self) -> Tuple[Tuple[int, int], CheckpointState]:
        if not self.checkpoints:
            return None
        else:
            return self.checkpoints.peekitem(-1)

    def isPpSeqNoAcceptable(self, ppSeqNo: int):
        return self.h < ppSeqNo <= self.H

    def addToOrdered(self, viewNo: int, ppSeqNo: int):
        self.ordered.add((viewNo, ppSeqNo))

    def enqueuePrePrepare(self, request: PrePrepare, sender: str):
        logger.debug(
            "Queueing pre-prepares due to unavailability of finalised "
            "Request. Request {} from {}".format(request, sender))
        key = (request.identifier, request.reqId)
        if key not in self.prePreparesPendingReqDigest:
            self.prePreparesPendingReqDigest[key] = []
        self.prePreparesPendingReqDigest[key].append((request, sender))

    def dequeuePrePrepare(self, identifier: int, reqId: int):
        key = (identifier, reqId)
        if key in self.prePreparesPendingReqDigest:
            pps = self.prePreparesPendingReqDigest[key]
            for (pp, sender) in pps:
                logger.debug("{} popping stashed PRE-PREPARE{}".format(
                    self, key))
                if pp.digest == self.requests.digest(key):
                    self.prePreparesPendingReqDigest.pop(key)
                    self.processPrePrepare(pp, sender)
                    logger.debug(
                        "{} processed {} PRE-PREPAREs waiting for finalised "
                        "request for identifier {} and reqId {}".format(
                            self, pp, identifier, reqId))
                    break

    def enqueuePrepare(self, request: Prepare, sender: str):
        logger.debug("Queueing prepares due to unavailability of PRE-PREPARE. "
                     "Request {} from {}".format(request, sender))
        key = (request.viewNo, request.ppSeqNo)
        if key not in self.preparesWaitingForPrePrepare:
            self.preparesWaitingForPrePrepare[key] = deque()
        self.preparesWaitingForPrePrepare[key].append((request, sender))

    def dequeuePrepares(self, viewNo: int, ppSeqNo: int):
        key = (viewNo, ppSeqNo)
        if key in self.preparesWaitingForPrePrepare:
            i = 0
            # Keys of pending prepares that will be processed below
            while self.preparesWaitingForPrePrepare[key]:
                prepare, sender = self.preparesWaitingForPrePrepare[
                    key].popleft()
                logger.debug("{} popping stashed PREPARE{}".format(self, key))
                self.processPrepare(prepare, sender)
                i += 1
            self.preparesWaitingForPrePrepare.pop(key)
            logger.debug("{} processed {} PREPAREs waiting for PRE-PREPARE for"
                         " view no {} and seq no {}".format(
                             self, i, viewNo, ppSeqNo))

    def enqueueCommit(self, request: Commit, sender: str):
        logger.debug("Queueing commit due to unavailability of PREPARE. "
                     "Request {} from {}".format(request, sender))
        key = (request.viewNo, request.ppSeqNo)
        if key not in self.commitsWaitingForPrepare:
            self.commitsWaitingForPrepare[key] = deque()
        self.commitsWaitingForPrepare[key].append((request, sender))

    def dequeueCommits(self, viewNo: int, ppSeqNo: int):
        key = (viewNo, ppSeqNo)
        if key in self.commitsWaitingForPrepare:
            i = 0
            # Keys of pending prepares that will be processed below
            while self.commitsWaitingForPrepare[key]:
                commit, sender = self.commitsWaitingForPrepare[key].popleft()
                logger.debug("{} popping stashed COMMIT{}".format(self, key))
                self.processCommit(commit, sender)
                i += 1
            self.commitsWaitingForPrepare.pop(key)
            logger.debug("{} processed {} COMMITs waiting for PREPARE for"
                         " view no {} and seq no {}".format(
                             self, i, viewNo, ppSeqNo))

    def getDigestFor3PhaseKey(self, key: ThreePhaseKey) -> Optional[str]:
        reqKey = self.getReqKeyFrom3PhaseKey(key)
        digest = self.requests.digest(reqKey)
        if not digest:
            logger.debug("{} could not find digest in sent or received "
                         "PRE-PREPAREs or PREPAREs for 3 phase key {} and req "
                         "key {}".format(self, key, reqKey))
            return None
        else:
            return digest

    def getReqKeyFrom3PhaseKey(self, key: ThreePhaseKey):
        reqKey = None
        if key in self.sentPrePrepares:
            reqKey = self.sentPrePrepares[key][0]
        elif key in self.prePrepares:
            reqKey = self.prePrepares[key][0]
        elif key in self.prepares:
            reqKey = self.prepares[key][0]
        else:
            logger.debug(
                "Could not find request key for 3 phase key {}".format(key))
        return reqKey

    @property
    def threePhaseState(self):
        # TODO: This method is incomplete
        # Gets the current stable and unstable checkpoints and creates digest
        # of unstable checkpoints
        if self.checkpoints:
            pass
        else:
            state = []
        return ThreePCState(self.instId, state)

    def process3PhaseState(self, msg: ThreePCState, sender: str):
        # TODO: This is not complete
        pass

    def send(self, msg, stat=None) -> None:
        """
        Send a message to the node on which this replica resides.

        :param msg: the message to send
        """
        logger.display("{} sending {}".format(self, msg.__class__.__name__),
                       extra={"cli": True})
        logger.trace("{} sending {}".format(self, msg))
        if stat:
            self.stats.inc(stat)
        self.outBox.append(msg)
Пример #38
0
class Superplot():
    """
Self-contained plotting class that runs in its own process.
Plotting functionality (reset the graph, .. ?) can be controlled
by issuing message-based commands using a multiprocessing Pipe

    """
    def __init__(self,name,plottype=PlotType.indexed):
        self.name = name
        self.plottype = plottype
        self._clear()

    def _clear(self):
        # Process-local buffers used to host the displayed data
        if self.plottype == PlotType.linear:
            self.set = True
            self.x = []
            self.y = []
        else:
            self.xy = SortedDict()
            # TODO : use this optimization, but for now raises issue
            # Can't pickle dict_key views ??
            #self.x = self.xy.keys()
            #self.y = self.xy.values()
            self.set = False

    def start(self):
        # The queue that will be used to transfer data from the main process
        # to the plot
        self.q = Queue()
        main_pipe, self.in_process_pipe = Pipe()
        self.p = Process(target=self.run)
        self.p.start()
        # Return a handle to the data queue and the control pipe
        return self.q, main_pipe

    def join(self):
        self.p.join()

    def _update(self):
        # Empty data queue and process received data
        while not self.q.empty():
            item = self.q.get()
            if self.plottype == PlotType.linear:
                self.x.append(item[0])
                self.y.append(item[1])
            else:
                # Seems pretty slow,
                # TODO : Profile
                # TODO : Eventually, need to find high performance alternative. Maybe numpy based
                self.xy[item[0]] = item[1]

        # Initialize view on data dictionnary only once for increased performance
        if not self.set:
            self.set = True
            self.x = self.xy.keys()
            self.y = self.xy.values()

        # Refresh plot data
        self.curve.setData(self.x,self.y)

        try:
            if self.in_process_pipe.poll():
                msg = self.in_process_pipe.recv()
                self._process_msg(msg)
        except:
            # If the polling failed, then the application most likely shut down
            # So close the window and terminate as well
            self.app.quit()

    def _process_msg(self, msg):
        if msg == "exit":
            # TODO : Remove this line ? Redundant with send after app.exec_() ?
            self.in_process_pipe.send("closing")
            self.app.quit()
        elif msg == "clear":
            self._clear()

    def run(self):
        self.app = QtGui.QApplication([])
        win = pg.GraphicsWindow(title="Basic plotting examples")
        win.resize(1000,600)
        win.setWindowTitle('pyqtgraph example: Plotting')
        plot = win.addPlot(title=self.name)
        self.curve = plot.plot(pen='y')

        timer = QtCore.QTimer()
        timer.timeout.connect(self._update)
        timer.start(50)

        self.app.exec_()
        try:
            self.in_process_pipe.send("closing")
        except:
            pass
Пример #39
0
def read_swans(
    fileglob, ndays=None, int_freq=True, int_dir=False, dirorder=True, ntimes=None
):
    """Read multiple SWAN ASCII files into single Dataset.

    Args:
        - fileglob (str, list): glob pattern specifying files to read.
        - ndays (float): number of days to keep from each file, choose None to
          keep entire period.
        - int_freq (ndarray, bool): frequency array for interpolating onto:
            - ndarray: 1d array specifying frequencies to interpolate onto.
            - True: logarithm array is constructed such that fmin=0.0418 Hz,
              fmax=0.71856 Hz, df=0.1f.
            - False: No interpolation performed in frequency space.
        - int_dir (ndarray, bool): direction array for interpolating onto:
            - ndarray: 1d array specifying directions to interpolate onto.
            - True: circular array is constructed such that dd=10 degrees.
            - False: No interpolation performed in direction space.
        - dirorder (bool): if True ensures directions are sorted.
        - ntimes (int): use it to read only specific number of times, useful
          for checking headers only.

    Returns:
        - dset (SpecDataset): spectra dataset object read from file with
          different sites and cycles concatenated along the 'site' and 'time'
          dimensions.

    Note:
        - If multiple cycles are provided, 'time' coordinate is replaced by
          'cycletime' multi-index coordinate.
        - If more than one cycle is prescribed from fileglob, each cycle must
          have same number of sites.
        - Either all or none of the spectra in fileglob must have tabfile
          associated to provide wind/depth data.
        - Concatenation is done with numpy arrays for efficiency.

    """
    swans = (
        sorted(fileglob) if isinstance(fileglob, list) else sorted(glob.glob(fileglob))
    )
    assert swans, "No SWAN file identified with fileglob %s" % (fileglob)

    # Default spectral basis for interpolating
    if int_freq is True:
        int_freq = [0.04118 * 1.1 ** n for n in range(31)]
    elif int_freq is False:
        int_freq = None
    if int_dir is True:
        int_dir = np.arange(0, 360, 10)
    elif int_dir is False:
        int_dir = None

    cycles = list()
    dsets = SortedDict()
    tabs = SortedDict()
    all_times = list()
    all_sites = SortedDict()
    all_lons = SortedDict()
    all_lats = SortedDict()
    deps = SortedDict()
    wspds = SortedDict()
    wdirs = SortedDict()

    for filename in swans:
        swanfile = SwanSpecFile(filename, dirorder=dirorder)
        times = swanfile.times
        lons = list(swanfile.x)
        lats = list(swanfile.y)
        sites = (
            [os.path.splitext(os.path.basename(filename))[0]]
            if len(lons) == 1
            else np.arange(len(lons)) + 1
        )
        freqs = swanfile.freqs
        dirs = swanfile.dirs

        if ntimes is None:
            spec_list = [s for s in swanfile.readall()]
        else:
            spec_list = [swanfile.read() for itime in range(ntimes)]

        # Read tab files for winds / depth
        if swanfile.is_tab:
            try:
                tab = read_tab(swanfile.tabfile).rename(columns={"dep": attrs.DEPNAME})
                if len(swanfile.times) == tab.index.size:
                    if "X-wsp" in tab and "Y-wsp" in tab:
                        tab[attrs.WSPDNAME], tab[attrs.WDIRNAME] = uv_to_spddir(
                            tab["X-wsp"], tab["Y-wsp"], coming_from=True
                        )
                else:
                    warnings.warn(
                        "Times in {} and {} not consistent, not appending "
                        "winds and depth".format(swanfile.filename, swanfile.tabfile)
                    )
                    tab = pd.DataFrame()
                tab = tab[
                    list(
                        set(tab.columns).intersection(
                            (attrs.DEPNAME, attrs.WSPDNAME, attrs.WDIRNAME)
                        )
                    )
                ]
            except Exception as exc:
                warnings.warn(
                    "Cannot parse depth and winds from {}:\n{}".format(
                        swanfile.tabfile, exc
                    )
                )
        else:
            tab = pd.DataFrame()

        # Shrinking times
        if ndays is not None:
            tend = times[0] + datetime.timedelta(days=ndays)
            if tend > times[-1]:
                raise IOError(
                    "Times in %s does not extend for %0.2f days" % (filename, ndays)
                )
            iend = times.index(min(times, key=lambda d: abs(d - tend)))
            times = times[0 : iend + 1]
            spec_list = spec_list[0 : iend + 1]
            tab = tab.loc[times[0] : tend] if tab is not None else tab

        spec_list = flatten_list(spec_list, [])

        # Interpolate spectra
        if int_freq is not None or int_dir is not None:
            spec_list = [
                interp_spec(spec, freqs, dirs, int_freq, int_dir) for spec in spec_list
            ]
            freqs = int_freq if int_freq is not None else freqs
            dirs = int_dir if int_dir is not None else dirs

        # Appending
        try:
            arr = np.array(spec_list).reshape(
                len(times), len(sites), len(freqs), len(dirs)
            )
            cycle = times[0]
            if cycle not in dsets:
                dsets[cycle] = [arr]
                tabs[cycle] = [tab]
                all_sites[cycle] = sites
                all_lons[cycle] = lons
                all_lats[cycle] = lats
                all_times.append(times)
                nsites = 1
            else:
                dsets[cycle].append(arr)
                tabs[cycle].append(tab)
                all_sites[cycle].extend(sites)
                all_lons[cycle].extend(lons)
                all_lats[cycle].extend(lats)
                nsites += 1
        except Exception:
            if len(spec_list) != arr.shape[0]:
                raise IOError(
                    "Time length in %s (%i) does not match previous files (%i), "
                    "cannot concatenate",
                    (filename, len(spec_list), arr.shape[0]),
                )
            else:
                raise
        swanfile.close()

    cycles = dsets.keys()

    # Ensuring sites are consistent across cycles
    sites = all_sites[cycle]
    lons = all_lons[cycle]
    lats = all_lats[cycle]
    for site, lon, lat in zip(all_sites.values(), all_lons.values(), all_lats.values()):
        if (
            (list(site) != list(sites))
            or (list(lon) != list(lons))
            or (list(lat) != list(lats))
        ):
            raise IOError("Inconsistent sites across cycles in glob pattern provided")

    # Ensuring consistent tabs
    cols = set(
        [
            frozenset(tabs[cycle][n].columns)
            for cycle in cycles
            for n in range(len(tabs[cycle]))
        ]
    )
    if len(cols) > 1:
        raise IOError(
            "Inconsistent tab files, ensure either all or none of the spectra have "
            "associated tabfiles and columns are consistent"
        )

    # Concat sites
    for cycle in cycles:
        dsets[cycle] = np.concatenate(dsets[cycle], axis=1)
        deps[cycle] = (
            np.vstack([tab[attrs.DEPNAME].values for tab in tabs[cycle]]).T
            if attrs.DEPNAME in tabs[cycle][0]
            else None
        )
        wspds[cycle] = (
            np.vstack([tab[attrs.WSPDNAME].values for tab in tabs[cycle]]).T
            if attrs.WSPDNAME in tabs[cycle][0]
            else None
        )
        wdirs[cycle] = (
            np.vstack([tab[attrs.WDIRNAME].values for tab in tabs[cycle]]).T
            if attrs.WDIRNAME in tabs[cycle][0]
            else None
        )

    time_sizes = [dsets[cycle].shape[0] for cycle in cycles]

    # Concat cycles
    if len(dsets) > 1:
        dsets = np.concatenate(dsets.values(), axis=0)
        deps = (
            np.concatenate(deps.values(), axis=0)
            if attrs.DEPNAME in tabs[cycle][0]
            else None
        )
        wspds = (
            np.concatenate(wspds.values(), axis=0)
            if attrs.WSPDNAME in tabs[cycle][0]
            else None
        )
        wdirs = (
            np.concatenate(wdirs.values(), axis=0)
            if attrs.WDIRNAME in tabs[cycle][0]
            else None
        )
    else:
        dsets = dsets[cycle]
        deps = deps[cycle] if attrs.DEPNAME in tabs[cycle][0] else None
        wspds = wspds[cycle] if attrs.WSPDNAME in tabs[cycle][0] else None
        wdirs = wdirs[cycle] if attrs.WDIRNAME in tabs[cycle][0] else None

    # Creating dataset
    times = flatten_list(all_times, [])
    dsets = xr.DataArray(
        data=dsets,
        coords=OrderedDict(
            (
                (attrs.TIMENAME, times),
                (attrs.SITENAME, sites),
                (attrs.FREQNAME, freqs),
                (attrs.DIRNAME, dirs),
            )
        ),
        dims=(attrs.TIMENAME, attrs.SITENAME, attrs.FREQNAME, attrs.DIRNAME),
        name=attrs.SPECNAME,
    ).to_dataset()

    dsets[attrs.LATNAME] = xr.DataArray(
        data=lats, coords={attrs.SITENAME: sites}, dims=[attrs.SITENAME]
    )
    dsets[attrs.LONNAME] = xr.DataArray(
        data=lons, coords={attrs.SITENAME: sites}, dims=[attrs.SITENAME]
    )

    if wspds is not None:
        dsets[attrs.WSPDNAME] = xr.DataArray(
            data=wspds,
            dims=[attrs.TIMENAME, attrs.SITENAME],
            coords=OrderedDict(((attrs.TIMENAME, times), (attrs.SITENAME, sites))),
        )
        dsets[attrs.WDIRNAME] = xr.DataArray(
            data=wdirs,
            dims=[attrs.TIMENAME, attrs.SITENAME],
            coords=OrderedDict(((attrs.TIMENAME, times), (attrs.SITENAME, sites))),
        )
    if deps is not None:
        dsets[attrs.DEPNAME] = xr.DataArray(
            data=deps,
            dims=[attrs.TIMENAME, attrs.SITENAME],
            coords=OrderedDict(((attrs.TIMENAME, times), (attrs.SITENAME, sites))),
        )

    # Setting multi-index
    if len(cycles) > 1:
        dsets = dsets.rename({attrs.TIMENAME: "cycletime"})
        cycletime = zip(
            [
                item
                for sublist in [[c] * t for c, t in zip(cycles, time_sizes)]
                for item in sublist
            ],
            dsets.cycletime.values,
        )
        dsets["cycletime"] = pd.MultiIndex.from_tuples(
            cycletime, names=[attrs.CYCLENAME, attrs.TIMENAME]
        )
        dsets["cycletime"].attrs = attrs.ATTRS[attrs.TIMENAME]

    set_spec_attributes(dsets)
    if "dir" in dsets and len(dsets.dir) > 1:
        dsets[attrs.SPECNAME].attrs.update(
            {"_units": "m^{2}.s.degree^{-1}", "_variable_name": "VaDens"}
        )
    else:
        dsets[attrs.SPECNAME].attrs.update(
            {"units": "m^{2}.s", "_units": "m^{2}.s", "_variable_name": "VaDens"}
        )

    return dsets
Пример #40
0
class DownloadTask(QObject):
    download_ready = Signal(QObject)
    download_not_ready = Signal(QObject)
    download_complete = Signal(QObject)
    download_failed = Signal(QObject)
    download_error = Signal(str)
    download_ok = Signal()

    download_finishing = Signal()
    copy_added = Signal(str)
    chunk_downloaded = Signal(
        str,  # obj_id
        str,  # str(offset) to fix offset >= 2**31
        int)  # length
    chunk_aborted = Signal()
    request_data = Signal(
        str,  # node_id
        str,  # obj_id
        str,  # str(offset) to fix offset >= 2**31
        int)  # length
    abort_data = Signal(
        str,  # node_id
        str,  # obj_id
        str)  # str(offset) to fix offset >= 2**31
    possibly_sync_folder_is_removed = Signal()
    no_disk_space = Signal(
        QObject,  # task
        str,  # display_name
        bool)  # is error
    wrong_hash = Signal(QObject)  # task)
    signal_info_rx = Signal(tuple)

    default_part_size = DOWNLOAD_PART_SIZE
    receive_timeout = 20  # seconds
    retry_limit = 2
    timeouts_limit = 2
    max_node_chunk_requests = 128
    end_race_timeout = 5.  # seconds

    def __init__(self,
                 tracker,
                 connectivity_service,
                 priority,
                 obj_id,
                 obj_size,
                 file_path,
                 display_name,
                 file_hash=None,
                 parent=None,
                 files_info=None):
        QObject.__init__(self, parent=parent)
        self._tracker = tracker
        self._connectivity_service = connectivity_service

        self.priority = priority
        self.size = obj_size
        self.id = obj_id
        self.file_path = file_path
        self.file_hash = file_hash
        self.download_path = file_path + '.download'
        self._info_path = file_path + '.info'
        self.display_name = display_name
        self.received = 0
        self.files_info = files_info

        self.hash_is_wrong = False
        self._ready = False
        self._started = False
        self._paused = False
        self._finished = False
        self._no_disk_space_error = False

        self._wanted_chunks = SortedDict()
        self._downloaded_chunks = SortedDict()
        self._nodes_available_chunks = dict()
        self._nodes_requested_chunks = dict()
        self._nodes_last_receive_time = dict()
        self._nodes_downloaded_chunks_count = dict()
        self._nodes_timeouts_count = dict()
        self._total_chunks_count = 0

        self._file = None
        self._info_file = None

        self._started_time = time()

        self._took_from_turn = 0
        self._received_via_turn = 0
        self._received_via_p2p = 0

        self._retry = 0

        self._limiter = None

        self._init_wanted_chunks()

        self._on_downloaded_cb = None
        self._on_failed_cb = None
        self.download_complete.connect(self._on_downloaded)
        self.download_failed.connect(self._on_failed)

        self._timeout_timer = QTimer(self)
        self._timeout_timer.setInterval(15 * 1000)
        self._timeout_timer.setSingleShot(False)
        self._timeout_timer.timeout.connect(self._on_check_timeouts)

        self._leaky_timer = QTimer(self)
        self._leaky_timer.setInterval(1000)
        self._leaky_timer.setSingleShot(True)
        self._leaky_timer.timeout.connect(self._download_chunks)

        self._network_limited_error_set = False

    def __lt__(self, other):
        if not isinstance(other, DownloadTask):
            return object.__lt__(self, other)

        if self == other:
            return False

        if self.priority == other.priority:
            if self.size - self.received == other.size - other.received:
                return self.id < other.id

            return self.size - self.received < other.size - other.received

        return self.priority > other.priority

    def __le__(self, other):
        if not isinstance(other, DownloadTask):
            return object.__le__(self, other)

        if self == other:
            return True

        if self.priority == other.priority:
            if self.size - self.received == other.size - other.received:
                return self.id < other.id

            return self.size - self.received < other.size - other.received

        return self.priority >= other.priority

    def __gt__(self, other):
        if not isinstance(other, DownloadTask):
            return object.__gt__(self, other)

        if self == other:
            return False

        if self.priority == other.priority:
            if self.size - self.received == other.size - other.received:
                return self.id > other.id

            return self.size - self.received > other.size - other.received

        return self.priority <= other.priority

    def __ge__(self, other):
        if not isinstance(other, DownloadTask):
            return object.__ge__(self, other)

        if self == other:
            return True

        if self.priority == other.priority:
            if self.size - self.received == other.size - other.received:
                return self.id > other.id

            return self.size - self.received > other.size - other.received

        return self.priority <= other.priority

    def __eq__(self, other):
        if not isinstance(other, DownloadTask):
            return object.__eq__(self, other)

        return self.id == other.id

    def on_availability_info_received(self, node_id, obj_id, info):
        if obj_id != self.id or self._finished or not info:
            return

        logger.info(
            "availability info received, "
            "node_id: %s, obj_id: %s, info: %s", node_id, obj_id, info)

        new_chunks_stored = self._store_availability_info(node_id, info)
        if not self._ready and new_chunks_stored:
            if self._check_can_receive(node_id):
                self._ready = True
                self.download_ready.emit(self)
            else:
                self.download_error.emit('Turn limit reached')

        if self._started and not self._paused \
                and not self._nodes_requested_chunks.get(node_id, None):
            logger.debug("Downloading next chunk")
            self._download_next_chunks(node_id)
            self._clean_nodes_last_receive_time()
            self._check_download_not_ready(self._nodes_requested_chunks)

    def on_availability_info_failure(self, node_id, obj_id, error):
        if obj_id != self.id or self._finished:
            return

        logger.info(
            "availability info failure, "
            "node_id: %s, obj_id: %s, error: %s", node_id, obj_id, error)
        try:
            if error["err_code"] == "FILE_CHANGED":
                self.download_failed.emit(self)
        except Exception as e:
            logger.warning("Can't parse error message. Reson: %s", e)

    def start(self, limiter):
        if exists(self.file_path):
            logger.info("download task file already downloaded %s",
                        self.file_path)
            self.received = self.size
            self.download_finishing.emit()
            self.download_complete.emit(self)
            return

        self._limiter = limiter

        if self._started:
            # if we swapped task earlier
            self.resume()
            return

        self._no_disk_space_error = False
        if not self.check_disk_space():
            return

        logger.info("starting download task, obj_id: %s", self.id)
        self._started = True
        self._paused = False
        self.hash_is_wrong = False
        self._started_time = time()
        self._send_start_statistic()
        if not self._open_file():
            return

        self._read_info_file()

        for downloaded_chunk in self._downloaded_chunks.items():
            self._remove_from_chunks(downloaded_chunk[0], downloaded_chunk[1],
                                     self._wanted_chunks)

        self.received = sum(self._downloaded_chunks.values())
        if self._complete_download():
            return

        self._download_chunks()
        if not self._timeout_timer.isActive():
            self._timeout_timer.start()

    def check_disk_space(self):
        if self.size * 2 + get_signature_file_size(self.size) > \
                get_free_space_by_filepath(self.file_path):
            self._emit_no_disk_space()
            return False

        return True

    def pause(self, disconnect_cb=True):
        self._paused = True
        if disconnect_cb:
            self.disconnect_callbacks()
        self.stop_download_chunks()

    def resume(self, start_download=True):
        self._started_time = time()
        self._paused = False
        self.hash_is_wrong = False
        if start_download:
            self._started = True
            self._download_chunks()
            if not self._timeout_timer.isActive():
                self._timeout_timer.start()

    def cancel(self):
        self._close_file()
        self._close_info_file()
        self.stop_download_chunks()

        self._finished = True

    def clean(self):
        logger.debug("Cleaning download files %s", self.download_path)
        try:
            remove_file(self.download_path)
        except:
            pass
        try:
            remove_file(self._info_path)
        except:
            pass

    def connect_callbacks(self, on_downloaded, on_failed):
        self._on_downloaded_cb = on_downloaded
        self._on_failed_cb = on_failed

    def disconnect_callbacks(self):
        self._on_downloaded_cb = None
        self._on_failed_cb = None

    @property
    def ready(self):
        return self._ready

    @property
    def paused(self):
        return self._paused

    @property
    def no_disk_space_error(self):
        return self._no_disk_space_error

    def _init_wanted_chunks(self):
        self._total_chunks_count = math.ceil(
            float(self.size) / float(DOWNLOAD_CHUNK_SIZE))

        self._wanted_chunks[0] = self.size

    def _on_downloaded(self, task):
        if callable(self._on_downloaded_cb):
            self._on_downloaded_cb(task)
            self._on_downloaded_cb = None

    def _on_failed(self, task):
        if callable(self._on_failed_cb):
            self._on_failed_cb(task)
            self._on_failed_cb = None

    def on_data_received(self, node_id, obj_id, offset, length, data):
        if obj_id != self.id or self._finished:
            return

        logger.debug(
            "on_data_received for objId: %s, offset: %s, from node_id: %s",
            self.id, offset, node_id)

        now = time()
        last_received_time = self._nodes_last_receive_time.get(node_id, 0.)
        if node_id in self._nodes_last_receive_time:
            self._nodes_last_receive_time[node_id] = now

        self._nodes_timeouts_count.pop(node_id, 0)

        downloaded_count = \
            self._nodes_downloaded_chunks_count.get(node_id, 0) + 1
        self._nodes_downloaded_chunks_count[node_id] = downloaded_count

        # to collect traffic info
        node_type = self._connectivity_service.get_self_node_type()
        is_share = node_type == "webshare"
        # tuple -> (obj_id, rx_wd, rx_wr, is_share)
        if self._connectivity_service.is_relayed(node_id):
            # relayed traffic
            info_rx = (obj_id, 0, length, is_share)
        else:
            # p2p traffic
            info_rx = (obj_id, length, 0, is_share)
        self.signal_info_rx.emit(info_rx)

        if not self._is_chunk_already_downloaded(offset):
            if not self._on_new_chunk_downloaded(node_id, offset, length,
                                                 data):
                return

        else:
            logger.debug("chunk %s already downloaded", offset)

        requested_chunks = self._nodes_requested_chunks.get(
            node_id, SortedDict())
        if not requested_chunks:
            return

        self._remove_from_chunks(offset, length, requested_chunks)

        if not requested_chunks:
            self._nodes_requested_chunks.pop(node_id, None)

        requested_count = sum(requested_chunks.values()) // DOWNLOAD_CHUNK_SIZE
        if downloaded_count * 4 >= requested_count \
                and requested_count < self.max_node_chunk_requests:
            self._download_next_chunks(node_id, now - last_received_time)
            self._clean_nodes_last_receive_time()
            self._check_download_not_ready(self._nodes_requested_chunks)

    def _is_chunk_already_downloaded(self, offset):
        if self._downloaded_chunks:
            chunk_index = self._downloaded_chunks.bisect_right(offset)
            if chunk_index > 0:
                chunk_index -= 1

                chunk = self._downloaded_chunks.peekitem(chunk_index)
                if offset < chunk[0] + chunk[1]:
                    return True

        return False

    def _on_new_chunk_downloaded(self, node_id, offset, length, data):
        if not self._write_to_file(offset, data):
            return False

        self.received += length
        if self._connectivity_service.is_relayed(node_id):
            self._received_via_turn += length
        else:
            self._received_via_p2p += length

        new_offset = offset
        new_length = length

        left_index = self._downloaded_chunks.bisect_right(new_offset)
        if left_index > 0:
            left_chunk = self._downloaded_chunks.peekitem(left_index - 1)
            if left_chunk[0] + left_chunk[1] == new_offset:
                new_offset = left_chunk[0]
                new_length += left_chunk[1]
                self._downloaded_chunks.popitem(left_index - 1)

        right_index = self._downloaded_chunks.bisect_right(new_offset +
                                                           new_length)
        if right_index > 0:
            right_chunk = self._downloaded_chunks.peekitem(right_index - 1)
            if right_chunk[0] == new_offset + new_length:
                new_length += right_chunk[1]
                self._downloaded_chunks.popitem(right_index - 1)

        self._downloaded_chunks[new_offset] = new_length

        assert self._remove_from_chunks(offset, length, self._wanted_chunks)

        logger.debug("new chunk downloaded from node: %s, wanted size: %s",
                     node_id, sum(self._wanted_chunks.values()))

        part_offset = (offset / DOWNLOAD_PART_SIZE) * DOWNLOAD_PART_SIZE
        part_size = min([DOWNLOAD_PART_SIZE, self.size - part_offset])
        if new_offset <= part_offset \
                and new_offset + new_length >= part_offset + part_size:
            if self._file:
                self._file.flush()
            self._write_info_file()

            self.chunk_downloaded.emit(self.id, str(part_offset), part_size)

        if self._complete_download():
            return False

        return True

    def _remove_from_chunks(self, offset, length, chunks):
        if not chunks:
            return False

        chunk_left_index = chunks.bisect_right(offset)
        if chunk_left_index > 0:
            left_chunk = chunks.peekitem(chunk_left_index - 1)
            if offset >= left_chunk[0] + left_chunk[1] \
                    and len(chunks) > chunk_left_index:
                left_chunk = chunks.peekitem(chunk_left_index)
            else:
                chunk_left_index -= 1
        else:
            left_chunk = chunks.peekitem(chunk_left_index)

        if offset >= left_chunk[0] + left_chunk[1] or \
                offset + length <= left_chunk[0]:
            return False

        chunk_right_index = chunks.bisect_right(offset + length)
        right_chunk = chunks.peekitem(chunk_right_index - 1)

        if chunk_right_index == chunk_left_index:
            to_del = [right_chunk[0]]
        else:
            to_del = list(chunks.islice(chunk_left_index, chunk_right_index))

        for chunk in to_del:
            chunks.pop(chunk)

        if left_chunk[0] < offset:
            if left_chunk[0] + left_chunk[1] >= offset:
                chunks[left_chunk[0]] = offset - left_chunk[0]

        if right_chunk[0] + right_chunk[1] > offset + length:
            chunks[offset + length] = \
                right_chunk[0] + right_chunk[1] - offset - length
        return True

    def on_data_failed(self, node_id, obj_id, offset, error):
        if obj_id != self.id or self._finished:
            return

        logger.info(
            "data request failure, "
            "node_id: %s, obj_id: %s, offset: %s, error: %s", node_id, obj_id,
            offset, error)

        self.on_node_disconnected(node_id)

    def get_downloaded_chunks(self):
        if not self._downloaded_chunks:
            return None

        return self._downloaded_chunks

    def on_node_disconnected(self,
                             node_id,
                             connection_alive=False,
                             timeout_limit_exceed=True):
        requested_chunks = self._nodes_requested_chunks.pop(node_id, None)
        logger.info("node disconnected %s, chunks removed from requested: %s",
                    node_id, requested_chunks)
        if timeout_limit_exceed:
            self._nodes_available_chunks.pop(node_id, None)
            self._nodes_timeouts_count.pop(node_id, None)
            if connection_alive:
                self._connectivity_service.reconnect(node_id)
        self._nodes_last_receive_time.pop(node_id, None)
        self._nodes_downloaded_chunks_count.pop(node_id, None)

        if connection_alive:
            self.abort_data.emit(node_id, self.id, None)

        if self._nodes_available_chunks:
            self._download_chunks(check_node_busy=True)
        else:
            chunks_to_test = self._nodes_requested_chunks \
                if self._started and not self._paused \
                else self._nodes_available_chunks
            self._check_download_not_ready(chunks_to_test)

    def complete(self):
        if self._started and not self._finished:
            self._complete_download(force_complete=True)
        elif not self._finished:
            self._finished = True
            self.clean()
            self.download_complete.emit(self)

    def _download_chunks(self, check_node_busy=False):
        if not self._started or self._paused or self._finished:
            return

        logger.debug("download_chunks for %s", self.id)

        node_ids = list(self._nodes_available_chunks.keys())
        random.shuffle(node_ids)
        for node_id in node_ids:
            node_free = not check_node_busy or \
                        not self._nodes_requested_chunks.get(node_id, None)
            if node_free:
                self._download_next_chunks(node_id)
        self._clean_nodes_last_receive_time()
        self._check_download_not_ready(self._nodes_requested_chunks)

    def _check_can_receive(self, node_id):
        return True

    def _write_to_file(self, offset, data):
        self._file.seek(offset)
        try:
            self._file.write(data)
        except EnvironmentError as e:
            logger.error("Download task %s can't write to file. Reason: %s",
                         self.id, e)
            self._send_error_statistic()
            if e.errno == errno.ENOSPC:
                self._emit_no_disk_space(error=True)
            else:
                self.download_failed.emit(self)
                self.possibly_sync_folder_is_removed.emit()
            return False

        return True

    def _open_file(self, clean=False):
        if not self._file or self._file.closed:
            try:
                if clean:
                    self._file = open(self.download_path, 'wb')
                else:
                    self._file = open(self.download_path, 'r+b')
            except IOError:
                try:
                    self._file = open(self.download_path, 'wb')
                except IOError as e:
                    logger.error(
                        "Can't open file for download for task %s. "
                        "Reason: %s", self.id, e)
                    self.download_failed.emit(self)
                    return False

        return True

    def _close_file(self):
        if not self._file:
            return True

        try:
            self._file.close()
        except EnvironmentError as e:
            logger.error("Download task %s can't close file. Reason: %s",
                         self.id, e)
            self._send_error_statistic()
            if e.errno == errno.ENOSPC:
                self._emit_no_disk_space(error=True)
            else:
                self.download_failed.emit(self)
                self.possibly_sync_folder_is_removed.emit()
            self._file = None
            return False

        self._file = None
        return True

    def _write_info_file(self):
        try:
            self._info_file.seek(0)
            self._info_file.truncate()
            pickle.dump(self._downloaded_chunks, self._info_file,
                        pickle.HIGHEST_PROTOCOL)
            self._info_file.flush()
        except EnvironmentError as e:
            logger.debug("Can't write to info file for task id %s. Reason: %s",
                         self.id, e)

    def _read_info_file(self):
        try:
            if not self._info_file or self._info_file.closed:
                self._info_file = open(self._info_path, 'a+b')
                self._info_file.seek(0)
            try:
                self._downloaded_chunks = pickle.load(self._info_file)
            except:
                pass
        except EnvironmentError as e:
            logger.debug("Can't open info file for task id %s. Reason: %s",
                         self.id, e)

    def _close_info_file(self, to_remove=False):
        if not self._info_file:
            return

        try:
            self._info_file.close()
            if to_remove:
                remove_file(self._info_path)
        except Exception as e:
            logger.debug(
                "Can't close or remove info file "
                "for task id %s. Reason: %s", self.id, e)
        self._info_file = None

    def _complete_download(self, force_complete=False):
        if (not self._wanted_chunks or force_complete) and \
                not self._finished:
            logger.debug("download %s completed", self.id)
            self._nodes_requested_chunks.clear()
            for node_id in self._nodes_last_receive_time.keys():
                self.abort_data.emit(node_id, self.id, None)

            if not force_complete:
                self.download_finishing.emit()

            if not force_complete and self.file_hash:
                hash_check_result = self._check_file_hash()
                if hash_check_result is not None:
                    return hash_check_result

            self._started = False
            self._finished = True
            self.stop_download_chunks()
            self._close_info_file(to_remove=True)
            if not self._close_file():
                return False

            try:
                if force_complete:
                    remove_file(self.download_path)
                    self.download_complete.emit(self)
                else:
                    shutil.move(self.download_path, self.file_path)
                    self._send_end_statistic()
                    self.download_complete.emit(self)
                    if self.file_hash:
                        self.copy_added.emit(self.file_hash)
            except EnvironmentError as e:
                logger.error(
                    "Download task %s can't (re)move file. "
                    "Reason: %s", self.id, e)
                self._send_error_statistic()
                self.download_failed.emit(self)
                self.possibly_sync_folder_is_removed.emit()
                return False

            result = True
        else:
            result = not self._wanted_chunks
        return result

    def _check_file_hash(self):
        self._file.flush()
        try:
            hash = Rsync.hash_from_block_checksum(
                Rsync.block_checksum(self.download_path))
        except IOError as e:
            logger.error("download %s error: %s", self.id, e)
            hash = None
        if hash != self.file_hash:
            logger.error(
                "download hash check failed objId: %s, "
                "expected hash: %s, actual hash: %s", self.id, self.file_hash,
                hash)
            if not self._close_file() or not self._open_file(clean=True):
                return False

            self._downloaded_chunks.clear()
            self._nodes_downloaded_chunks_count.clear()
            self._nodes_last_receive_time.clear()
            self._nodes_timeouts_count.clear()
            self._write_info_file()
            self._init_wanted_chunks()

            self.received = 0
            if self._retry < self.retry_limit:
                self._retry += 1
                self.resume()
            else:
                self._retry = 0
                self._nodes_available_chunks.clear()
                self.hash_is_wrong = True
                self.wrong_hash.emit(self)
            return True

        return None

    def _download_next_chunks(self, node_id, time_from_last_received_chunk=0.):
        if (self._paused or not self._started or not self._ready
                or self._finished or not self._wanted_chunks
                or self._leaky_timer.isActive()):
            return

        total_requested = sum(
            map(lambda x: sum(x.values()),
                self._nodes_requested_chunks.values()))

        if total_requested + self.received >= self.size:
            if self._nodes_requested_chunks.get(node_id, None) and \
                    time_from_last_received_chunk <= self.end_race_timeout:
                return

            available_chunks = \
                self._get_end_race_chunks_to_download_from_node(node_id)
        else:
            available_chunks = \
                self._get_available_chunks_to_download_from_node(node_id)

        if not available_chunks:
            logger.debug("no chunks available for download %s", self.id)
            logger.debug("downloading from: %s nodes, length: %s, wanted: %s",
                         len(self._nodes_requested_chunks), total_requested,
                         self.size - self.received)
            return

        available_offset = random.sample(available_chunks.keys(), 1)[0]
        available_length = available_chunks[available_offset]
        logger.debug("selected random offset: %s", available_offset)

        parts_count = math.ceil(
            float(available_length) / float(DOWNLOAD_PART_SIZE)) - 1
        logger.debug("parts count: %s", parts_count)

        part_to_download_number = random.randint(0, parts_count)
        offset = available_offset + \
                 part_to_download_number * DOWNLOAD_PART_SIZE
        length = min(DOWNLOAD_PART_SIZE,
                     available_offset + available_length - offset)
        logger.debug("selected random part: %s, offset: %s, length: %s",
                     part_to_download_number, offset, length)

        self._request_data(node_id, offset, length)

    def _get_end_race_chunks_to_download_from_node(self, node_id):
        available_chunks = self._nodes_available_chunks.get(node_id, None)
        if not available_chunks:
            return []

        available_chunks = available_chunks.copy()
        logger.debug("end race downloaded_chunks: %s", self._downloaded_chunks)
        logger.debug("end race requested_chunks: %s",
                     self._nodes_requested_chunks)
        logger.debug("end race available_chunks before excludes: %s",
                     available_chunks)
        if self._downloaded_chunks:
            for downloaded_chunk in self._downloaded_chunks.items():
                self._remove_from_chunks(downloaded_chunk[0],
                                         downloaded_chunk[1], available_chunks)
        if not available_chunks:
            return []

        available_from_other_nodes = available_chunks.copy()
        for requested_offset, requested_length in \
                self._nodes_requested_chunks.get(node_id, dict()).items():
            self._remove_from_chunks(requested_offset, requested_length,
                                     available_from_other_nodes)

        result = available_from_other_nodes if available_from_other_nodes \
            else available_chunks

        if result:
            logger.debug("end race available_chunks after excludes: %s",
                         available_chunks)
        return result

    def _get_available_chunks_to_download_from_node(self, node_id):
        available_chunks = self._nodes_available_chunks.get(node_id, None)
        if not available_chunks:
            return []

        available_chunks = available_chunks.copy()
        logger.debug("downloaded_chunks: %s", self._downloaded_chunks)
        logger.debug("requested_chunks: %s", self._nodes_requested_chunks)
        logger.debug("available_chunks before excludes: %s", available_chunks)
        for _, requested_chunks in self._nodes_requested_chunks.items():
            for requested_offset, requested_length in requested_chunks.items():
                self._remove_from_chunks(requested_offset, requested_length,
                                         available_chunks)
        if not available_chunks:
            return []

        for downloaded_chunk in self._downloaded_chunks.items():
            self._remove_from_chunks(downloaded_chunk[0], downloaded_chunk[1],
                                     available_chunks)
        logger.debug("available_chunks after excludes: %s", available_chunks)
        return available_chunks

    def _request_data(self, node_id, offset, length):
        logger.debug("Requesting date from node %s, request_chunk (%s, %s)",
                     node_id, offset, length)
        if self._limiter:
            try:
                self._limiter.leak(length)
            except LeakyBucketException:
                if node_id not in self._nodes_requested_chunks:
                    self._nodes_last_receive_time.pop(node_id, None)
                    if not self._network_limited_error_set:
                        self.download_error.emit('Network limited.')
                        self._network_limited_error_set = True
                if not self._leaky_timer.isActive():
                    self._leaky_timer.start()
                return

        if self._network_limited_error_set:
            self._network_limited_error_set = False
            self.download_ok.emit()

        requested_chunks = self._nodes_requested_chunks.get(node_id, None)
        if not requested_chunks:
            requested_chunks = SortedDict()
            self._nodes_requested_chunks[node_id] = requested_chunks
        requested_chunks[offset] = length
        logger.debug("Requested chunks %s", requested_chunks)
        self._nodes_last_receive_time[node_id] = time()
        self.request_data.emit(node_id, self.id, str(offset), length)

    def _clean_nodes_last_receive_time(self):
        for node_id in list(self._nodes_last_receive_time.keys()):
            if node_id not in self._nodes_requested_chunks:
                self._nodes_last_receive_time.pop(node_id, None)

    def _on_check_timeouts(self):
        if self._paused or not self._started \
                or self._finished or self._leaky_timer.isActive():
            return

        timed_out_nodes = set()
        cur_time = time()
        logger.debug("Chunk requests check %s",
                     len(self._nodes_requested_chunks))
        if self._check_download_not_ready(self._nodes_requested_chunks):
            return

        for node_id in self._nodes_last_receive_time:
            last_receive_time = self._nodes_last_receive_time.get(node_id)
            if cur_time - last_receive_time > self.receive_timeout:
                timed_out_nodes.add(node_id)

        logger.debug("Timed out nodes %s, nodes last receive time %s",
                     timed_out_nodes, self._nodes_last_receive_time)
        for node_id in timed_out_nodes:
            timeout_count = self._nodes_timeouts_count.pop(node_id, 0)
            timeout_count += 1
            if timeout_count >= self.timeouts_limit:
                retry = False
            else:
                retry = True
                self._nodes_timeouts_count[node_id] = timeout_count
            logger.debug("Node if %s, timeout_count %s, retry %s", node_id,
                         timeout_count, retry)
            self.on_node_disconnected(node_id,
                                      connection_alive=True,
                                      timeout_limit_exceed=not retry)

    def _get_chunks_from_info(self, chunks, info):
        new_added = False
        for part_info in info:
            logger.debug("get_chunks_from_info part_info %s", part_info)
            if part_info.length == 0:
                continue

            if not chunks:
                chunks[part_info.offset] = part_info.length
                new_added = True
                continue

            result_offset = part_info.offset
            result_length = part_info.length
            left_index = chunks.bisect_right(part_info.offset)
            if left_index > 0:
                left_chunk = chunks.peekitem(left_index - 1)
                if (left_chunk[0] <= part_info.offset
                        and left_chunk[0] + left_chunk[1] >=
                        part_info.offset + part_info.length):
                    continue

                if part_info.offset <= left_chunk[0] + left_chunk[1]:
                    result_offset = left_chunk[0]
                    result_length = part_info.offset + \
                                    part_info.length - result_offset
                    left_index -= 1

            right_index = chunks.bisect_right(part_info.offset +
                                              part_info.length)
            if right_index > 0:
                right_chunk = chunks.peekitem(right_index - 1)
                if part_info.offset + part_info.length <= \
                        right_chunk[0] + right_chunk[1]:
                    result_length = right_chunk[0] + \
                                    right_chunk[1] - result_offset

            to_delete = list(chunks.islice(left_index, right_index))

            for to_del in to_delete:
                chunks.pop(to_del)

            new_added = True
            chunks[result_offset] = result_length

        return new_added

    def _store_availability_info(self, node_id, info):
        known_chunks = self._nodes_available_chunks.get(node_id, None)
        if not known_chunks:
            known_chunks = SortedDict()
            self._nodes_available_chunks[node_id] = known_chunks
        return self._get_chunks_from_info(known_chunks, info)

    def _check_download_not_ready(self, checkable):
        if not self._wanted_chunks and self._started:
            self._complete_download(force_complete=False)
            return False

        if self._leaky_timer.isActive():
            if not self._nodes_available_chunks:
                self._make_not_ready()
                return True

        elif not checkable:
            self._make_not_ready()
            return True

        return False

    def _make_not_ready(self):
        if not self._ready:
            return

        logger.info("download %s not ready now", self.id)
        self._ready = False
        self._started = False
        if self._timeout_timer.isActive():
            self._timeout_timer.stop()
        if self._leaky_timer.isActive():
            self._leaky_timer.stop()
        self.download_not_ready.emit(self)

    def _clear_globals(self):
        self._wanted_chunks.clear()
        self._downloaded_chunks.clear()
        self._nodes_available_chunks.clear()
        self._nodes_requested_chunks.clear()
        self._nodes_last_receive_time.clear()
        self._nodes_downloaded_chunks_count.clear()
        self._nodes_timeouts_count.clear()
        self._total_chunks_count = 0

    def stop_download_chunks(self):
        if self._leaky_timer.isActive():
            self._leaky_timer.stop()
        if self._timeout_timer.isActive():
            self._timeout_timer.stop()

        for node_id in self._nodes_requested_chunks:
            self.abort_data.emit(node_id, self.id, None)

        self._nodes_requested_chunks.clear()
        self._nodes_last_receive_time.clear()

    def _emit_no_disk_space(self, error=False):
        self._no_disk_space_error = True
        self._nodes_available_chunks.clear()
        self._clear_globals()
        self._make_not_ready()
        file_name = self.display_name.split()[-1] \
            if self.display_name else ""
        self.no_disk_space.emit(self, file_name, error)

    def _send_start_statistic(self):
        if self._tracker:
            self._tracker.download_start(self.id, self.size)

    def _send_end_statistic(self):
        if self._tracker:
            time_diff = time() - self._started_time
            if time_diff < 1e-3:
                time_diff = 1e-3

            self._tracker.download_end(
                self.id,
                time_diff,
                websockets_bytes=0,
                webrtc_direct_bytes=self._received_via_p2p,
                webrtc_relay_bytes=self._received_via_turn,
                chunks=len(self._downloaded_chunks),
                chunks_reloaded=0,
                nodes=len(self._nodes_available_chunks))

    def _send_error_statistic(self):
        if self._tracker:
            time_diff = time() - self._started_time
            if time_diff < 1e-3:
                time_diff = 1e-3

            self._tracker.download_error(
                self.id,
                time_diff,
                websockets_bytes=0,
                webrtc_direct_bytes=self._received_via_p2p,
                webrtc_relay_bytes=self._received_via_turn,
                chunks=len(self._downloaded_chunks),
                chunks_reloaded=0,
                nodes=len(self._nodes_available_chunks))
Пример #41
0
def main():
    # test_file = "/run/user/1000/gvfs/smb-share:server=cossartlab.local,share=picardoteam/Behavior Camera/p5_20_02_17/cam 1"
    # print(f"is dir {os.path.isdir(test_file)}")
    # return

    open_avi_for_test = False
    if open_avi_for_test:
        test_avi()
        return

    subject_id = "p8_20_02_27"  # P12_20_01_20 p8_20_01_16
    cam_folder_id_1 = "cam2"  # "cam2"
    cam_folder_id_2 = "a001"  # a000  a001
    if cam_folder_id_2 is None:
        cam_folder_id = "20190430_a002"  # ex cam1_a002, movie1, etc...
    else:
        cam_folder_id = f"{cam_folder_id_1}_{cam_folder_id_2}"
    tiffs_path_dir = '/media/julien/My Book/robin_tmp/cameras/'
    tiffs_path_dir = '/media/julien/My Book/robin_tmp/cameras/to_convert/'
    # tiffs_path_dir = '/media/julien/My Book/robin_tmp/cameras/basler_recordings/'
    # tiffs_path_dir = '/media/julien/dream team/camera/'
    tiffs_path_dir = '/media/julien/Not_today/hne_not_today/data/behavior_movies/to_convert/'
    # On NAS
    # tiffs_path_dir = '/run/user/1000/gvfs/smb-share:server=cossartlab.local,share=picardoteam/Behavior Camera/'
    if cam_folder_id_2 is not None:
        tiffs_path_dir = os.path.join(tiffs_path_dir, subject_id,
                                      cam_folder_id_1, cam_folder_id_2)
        # tiffs_path_dir = os.path.join(tiffs_path_dir, subject_id, cam_folder_id_2, cam_folder_id_1)
    else:
        tiffs_path_dir = os.path.join(tiffs_path_dir, subject_id,
                                      cam_folder_id)
    # print(f"is dir {os.path.isdir(tiffs_path_dir)}")
    if cam_folder_id_1 is None:
        cam_id = "22983298"
    elif cam_folder_id_1 == "cam1":
        cam_id = "22983298"
    else:
        cam_id = "23109588"  #  cam1: 22983298  cam2: 23109588

    # results_path = '/media/julien/My Book/robin_tmp/cameras/'
    # results_path = os.path.join(results_path, subject_id)
    results_path = "/media/julien/Not_today/hne_not_today/data/behavior_movies/converted_so_far/"

    files_in_dir = [
        item for item in os.listdir(tiffs_path_dir)
        if os.path.isfile(os.path.join(tiffs_path_dir, item)) and (
            item.endswith("tiff") or item.endswith("tif")) and (
                not item.startswith("."))
    ]

    # files_in_dir = sorted_tiff_ls(tiffs_path_dir)
    # print(f"len(files_in_dir) {len(files_in_dir)}")
    # for file_name in files_in_dir[-1000:]:
    #     print(f"{file_name}")

    files_in_dir_dict = SortedDict()
    for file_name in files_in_dir:
        index_ = file_name[::-1].find("_")
        frame_number = int(file_name[-index_:-5])
        files_in_dir_dict[frame_number] = file_name
        # print(f"{file_name[-index_:-5]}")
        # break

    # looking for a gap between frames
    last_tiff_frame = 0
    error_detected = False
    for tiff_frame, tiff_file in files_in_dir_dict.items():
        if tiff_frame - 1 != last_tiff_frame:
            print(
                f"Gap between frame n° {last_tiff_frame} and {tiff_frame}. File {tiff_file}"
            )
            error_detected = True
        last_tiff_frame = tiff_frame

    if error_detected:
        raise Exception("ERROR: gap between 2 frames")

    # keep the name of the tiffs files
    yaml_file_name = os.path.join(
        results_path,
        f"behavior_{subject_id}_cam_{cam_id}_{cam_folder_id}.yaml")
    with open(yaml_file_name, 'w') as outfile:
        yaml.dump(list(files_in_dir_dict.values()),
                  outfile,
                  default_flow_style=False)

    # raise Exception("TEST YAML")
    # # leave only regular files, insert creation date
    # entries = ((stat[ST_CTIME], path)
    #            for stat, path in entries if S_ISREG(stat[ST_MODE]))
    # # NOTE: on Windows `ST_CTIME` is a creation date
    # #  but on Unix it could be something else
    # # NOTE: use `ST_MTIME` to sort by a modification date
    #
    # for cdate, path in sorted(entries):
    #     print(time.ctime(cdate), os.path.basename(path))

    # sort by alaphabatical order

    size_avi = None
    vid_avi = None
    fps_avi = 20
    avi_file_name = os.path.join(
        results_path,
        f"behavior_{subject_id}_cam_{cam_id}_{cam_folder_id}_fps_{fps_avi}.avi"
    )
    print(
        f"creating behavior_{subject_id}_cam_{cam_id}_{cam_folder_id}_fps_{fps_avi}.avi from {len(files_in_dir_dict)} tiff files"
    )
    is_color = True
    # put fourcc to 0 for no compression
    # fourcc = 0
    fourcc = VideoWriter_fourcc(*"XVID")
    # fourcc = VideoWriter_fourcc(*"MPEG")

    # https://stackoverflow.com/questions/44947505/how-to-make-a-movie-out-of-images-in-python
    start_time = time()
    for tiff_frame, tiff_file in files_in_dir_dict.items():
        if (tiff_frame > 0) and (tiff_frame % 5000 == 0):
            print(f"{tiff_frame} frames done")
        # img = PIL.Image.open(os.path.join(tiffs_path_dir, tiff_file))
        # img = np.array(img)
        if vid_avi is None:
            if size_avi is None:
                img = PIL.Image.open(os.path.join(tiffs_path_dir, tiff_file))
                img = np.array(img)
                print(f"img.shape {img.shape}")
                size_avi = img.shape[1], img.shape[0]
            # vid_avi = VideoWriter(avi_file_name, fourcc, float(fps_avi), size_avi, is_color)
            vid_avi = VideoWriter(avi_file_name, fourcc, fps_avi, size_avi,
                                  is_color)
        # vid_avi.write(img)
        vid_avi.write(imread(os.path.join(tiffs_path_dir, tiff_file)))
    cv2.destroyAllWindows()
    vid_avi.release()

    time_to_convert = time() - start_time

    print(f"time_to_convert: {time_to_convert} sec")
Пример #42
0
class OrderedDict(dict):
    """Dictionary that remembers insertion order and is numerically indexable.

    Keys are numerically indexable using dict views. For example::

        >>> ordered_dict = OrderedDict.fromkeys('abcde')
        >>> keys = ordered_dict.keys()
        >>> keys[0]
        'a'
        >>> keys[-2:]
        ['d', 'e']

    The dict views support the sequence abstract base class.

    """

    # pylint: disable=super-init-not-called
    def __init__(self, *args, **kwargs):
        self._keys = {}
        self._nums = SortedDict()
        self._keys_view = self._nums.keys()
        self._count = count()
        self.update(*args, **kwargs)

    def __setitem__(self, key, value, dict_setitem=dict.__setitem__):
        "``ordered_dict[key] = value``"
        if key not in self:
            num = next(self._count)
            self._keys[key] = num
            self._nums[num] = key
        dict_setitem(self, key, value)

    def __delitem__(self, key, dict_delitem=dict.__delitem__):
        "``del ordered_dict[key]``"
        dict_delitem(self, key)
        num = self._keys.pop(key)
        del self._nums[num]

    def __iter__(self):
        "``iter(ordered_dict)``"
        return iter(self._nums.values())

    def __reversed__(self):
        "``reversed(ordered_dict)``"
        nums = self._nums
        for key in reversed(nums):
            yield nums[key]

    def clear(self, dict_clear=dict.clear):
        "Remove all items from mapping."
        dict_clear(self)
        self._keys.clear()
        self._nums.clear()

    def popitem(self, last=True):
        """Remove and return (key, value) item pair.

        Pairs are returned in LIFO order if last is True or FIFO order if
        False.

        """
        index = -1 if last else 0
        num = self._keys_view[index]
        key = self._nums[num]
        value = self.pop(key)
        return key, value

    update = __update = co.MutableMapping.update

    def keys(self):
        "Return set-like and sequence-like view of mapping keys."
        return KeysView(self)

    def items(self):
        "Return set-like and sequence-like view of mapping items."
        return ItemsView(self)

    def values(self):
        "Return set-like and sequence-like view of mapping values."
        return ValuesView(self)

    def pop(self, key, default=NONE):
        """Remove given key and return corresponding value.

        If key is not found, default is returned if given, otherwise raise
        KeyError.

        """
        if key in self:
            value = self[key]
            del self[key]
            return value
        elif default is NONE:
            raise KeyError(key)
        else:
            return default

    def setdefault(self, key, default=None):
        """Return ``mapping.get(key, default)``, also set ``mapping[key] = default`` if
        key not in mapping.

        """
        if key in self:
            return self[key]
        self[key] = default
        return default

    @recursive_repr()
    def __repr__(self):
        "Text representation of mapping."
        return '%s(%r)' % (self.__class__.__name__, list(self.items()))

    __str__ = __repr__

    def __reduce__(self):
        "Support for pickling serialization."
        return (self.__class__, (list(self.items()), ))

    def copy(self):
        "Return shallow copy of mapping."
        return self.__class__(self)

    @classmethod
    def fromkeys(cls, iterable, value=None):
        """Return new mapping with keys from iterable.

        If not specified, value defaults to None.

        """
        return cls((key, value) for key in iterable)

    def __eq__(self, other):
        "Test self and other mapping for equality."
        if isinstance(other, OrderedDict):
            return dict.__eq__(self, other) and all(map(eq, self, other))
        return dict.__eq__(self, other)

    __ne__ = co.MutableMapping.__ne__

    def _check(self):
        "Check consistency of internal member variables."
        # pylint: disable=protected-access
        keys = self._keys
        nums = self._nums

        for key, value in keys.items():
            assert nums[value] == key

        nums._check()
Пример #43
0
class KeyedRegion:
    """
    KeyedRegion keeps a mapping between stack offsets and all objects covering that offset. It assumes no variable in
    this region overlap with another variable in this region.

    Registers and function frames can all be viewed as a keyed region.
    """

    __slots__ = ('_storage', '_object_mapping', '_phi_node_contains')

    def __init__(self, tree=None, phi_node_contains=None):
        self._storage = SortedDict() if tree is None else tree
        self._object_mapping = weakref.WeakValueDictionary()
        self._phi_node_contains = phi_node_contains

    def _get_container(self, offset):
        try:
            base_offset = next(
                self._storage.irange(maximum=offset, reverse=True))
        except StopIteration:
            return offset, None
        else:
            container = self._storage[base_offset]
            if container.includes(offset):
                return base_offset, container
            return offset, None

    def __contains__(self, offset):
        """
        Test if there is at least one varaible covering the given offset.

        :param offset:
        :return:
        """

        return self._get_container(offset)[1] is not None

    def __len__(self):
        return len(self._storage)

    def __iter__(self):
        return iter(self._storage.values())

    def __eq__(self, other):
        if set(self._storage.keys()) != set(other._storage.keys()):
            return False

        for k, v in self._storage.items():
            if v != other._storage[k]:
                return False

        return True

    def copy(self):
        if not self._storage:
            return KeyedRegion(phi_node_contains=self._phi_node_contains)

        kr = KeyedRegion(phi_node_contains=self._phi_node_contains)
        for key, ro in self._storage.items():
            kr._storage[key] = ro.copy()
        kr._object_mapping = self._object_mapping.copy()
        return kr

    def merge(self, other, replacements=None):
        """
        Merge another KeyedRegion into this KeyedRegion.

        :param KeyedRegion other: The other instance to merge with.
        :return: None
        """

        # TODO: is the current solution not optimal enough?
        for _, item in other._storage.items():  # type: RegionObject
            for so in item.stored_objects:  # type: StoredObject
                if replacements and so.obj in replacements:
                    so = StoredObject(so.start, replacements[so.obj], so.size)
                self._object_mapping[so.obj_id] = so
                self.__store(so, overwrite=False)

        return self

    def replace(self, replacements):
        """
        Replace variables with other variables.

        :param dict replacements:   A dict of variable replacements.
        :return:                    self
        """

        for old_var, new_var in replacements.items():
            old_var_id = id(old_var)
            if old_var_id in self._object_mapping:
                # FIXME: we need to check if old_var still exists in the storage
                old_so = self._object_mapping[old_var_id]  # type: StoredObject
                self._store(old_so.start, new_var, old_so.size, overwrite=True)

        return self

    def dbg_repr(self):
        """
        Get a debugging representation of this keyed region.
        :return: A string of debugging output.
        """
        keys = self._storage.keys()
        offset_to_vars = {}

        for key in sorted(keys):
            ro = self._storage[key]
            variables = [obj.obj for obj in ro.stored_objects]
            offset_to_vars[key] = variables

        s = []
        for offset, variables in offset_to_vars.items():
            s.append("Offset %#x: %s" % (offset, variables))
        return "\n".join(s)

    def add_variable(self, start, variable):
        """
        Add a variable to this region at the given offset.

        :param int start:
        :param SimVariable variable:
        :return: None
        """

        size = variable.size if variable.size is not None else 1

        self.add_object(start, variable, size)

    def add_object(self, start, obj, object_size):
        """
        Add/Store an object to this region at the given offset.

        :param start:
        :param obj:
        :param int object_size: Size of the object
        :return:
        """

        self._store(start, obj, object_size, overwrite=False)

    def set_variable(self, start, variable):
        """
        Add a variable to this region at the given offset, and remove all other variables that are fully covered by
        this variable.

        :param int start:
        :param SimVariable variable:
        :return: None
        """

        size = variable.size if variable.size is not None else 1

        self.set_object(start, variable, size)

    def set_object(self, start, obj, object_size):
        """
        Add an object to this region at the given offset, and remove all other objects that are fully covered by this
        object.

        :param start:
        :param obj:
        :param object_size:
        :return:
        """

        self._store(start, obj, object_size, overwrite=True)

    def get_base_addr(self, addr):
        """
        Get the base offset (the key we are using to index objects covering the given offset) of a specific offset.

        :param int addr:
        :return:
        :rtype:  int or None
        """

        base_addr, container = self._get_container(addr)
        if container is None:
            return None
        else:
            return base_addr

    def get_variables_by_offset(self, start):
        """
        Find variables covering the given region offset.

        :param int start:
        :return: A list of stack variables.
        :rtype:  set
        """

        _, container = self._get_container(start)
        if container is None:
            return []
        else:
            return container.internal_objects

    def get_objects_by_offset(self, start):
        """
        Find objects covering the given region offset.

        :param start:
        :return:
        """

        _, container = self._get_container(start)
        if container is None:
            return set()
        else:
            return container.internal_objects

    #
    # Private methods
    #

    def _store(self, start, obj, size, overwrite=False):
        """
        Store a variable into the storage.

        :param int start: The beginning address of the variable.
        :param obj: The object to store.
        :param int size: Size of the object to store.
        :param bool overwrite: Whether existing objects should be overwritten or not.
        :return: None
        """

        stored_object = StoredObject(start, obj, size)
        self._object_mapping[stored_object.obj_id] = stored_object
        self.__store(stored_object, overwrite=overwrite)

    def __store(self, stored_object, overwrite=False):
        """
        Store a variable into the storage.

        :param StoredObject stored_object: The descriptor describing start address and the variable.
        :param bool overwrite:  Whether existing objects should be overwritten or not. True to make a strong update,
                                False to make a weak update.
        :return: None
        """

        start = stored_object.start
        object_size = stored_object.size
        end = start + object_size

        # region items in the middle
        overlapping_items = list(self._storage.irange(start, end - 1))

        # is there a region item that begins before the start and overlaps with this variable?
        floor_key, floor_item = self._get_container(start)
        if floor_item is not None and floor_key not in overlapping_items:
            # insert it into the beginning
            overlapping_items.insert(0, floor_key)

        # scan through the entire list of region items, split existing regions and insert new regions as needed
        to_update = {start: RegionObject(start, object_size, {stored_object})}
        last_end = start

        for floor_key in overlapping_items:
            item = self._storage[floor_key]
            if item.start < start:
                # we need to break this item into two
                a, b = item.split(start)
                if overwrite:
                    b.set_object(stored_object)
                else:
                    self._add_object_with_check(b, stored_object)
                to_update[a.start] = a
                to_update[b.start] = b
                last_end = b.end
            elif item.start > last_end:
                # there is a gap between the last item and the current item
                # fill in the gap
                new_item = RegionObject(last_end, item.start - last_end,
                                        {stored_object})
                to_update[new_item.start] = new_item
                last_end = new_item.end
            elif item.end > end:
                # we need to split this item into two
                a, b = item.split(end)
                if overwrite:
                    a.set_object(stored_object)
                else:
                    self._add_object_with_check(a, stored_object)
                to_update[a.start] = a
                to_update[b.start] = b
                last_end = b.end
            else:
                if overwrite:
                    item.set_object(stored_object)
                else:
                    self._add_object_with_check(item, stored_object)
                to_update[item.start] = item

        self._storage.update(to_update)

    def _is_overlapping(self, start, variable):

        if variable.size is not None:
            # make sure this variable does not overlap with any other variable
            end = start + variable.size
            try:
                prev_offset = next(
                    self._storage.irange(maximum=end - 1, reverse=True))
            except StopIteration:
                prev_offset = None

            if prev_offset is not None:
                if start <= prev_offset < end:
                    return True
                prev_item = self._storage[prev_offset][0]
                prev_item_size = prev_item.size if prev_item.size is not None else 1
                if start < prev_offset + prev_item_size < end:
                    return True
        else:
            try:
                prev_offset = next(
                    self._storage.irange(maximum=start, reverse=True))
            except StopIteration:
                prev_offset = None

            if prev_offset is not None:
                prev_item = self._storage[prev_offset][0]
                prev_item_size = prev_item.size if prev_item.size is not None else 1
                if prev_offset <= start < prev_offset + prev_item_size:
                    return True

        return False

    def _add_object_with_check(self, item, stored_object):
        if len({stored_object.obj} | item.internal_objects) > 1:
            if self._phi_node_contains is not None:
                # check if `item` is a phi node that contains stored_object.obj
                for so in item.internal_objects:
                    if self._phi_node_contains(so, stored_object.obj):
                        # yes! so we want to skip this object
                        return
                # check if `stored_object.obj` is a phi node that contains item.internal_objects
                if all(
                        self._phi_node_contains(stored_object.obj, o)
                        for o in item.internal_objects):
                    # yes!
                    item.set_object(stored_object)
                    return

            l.warning("Overlapping objects %s.",
                      str({stored_object.obj} | item.internal_objects))
            # import ipdb; ipdb.set_trace()
        item.add_object(stored_object)
Пример #44
0
def test_values():
    mapping = [(val, pos) for pos, val in enumerate(string.ascii_lowercase)]
    temp = SortedDict(mapping)
    assert list(temp.values()) == [pos for key, pos in mapping]
Пример #45
0
class FreshPondSim:
    def __init__(self,
                 distance,
                 start_time,
                 end_time,
                 entrances,
                 entrance_weights,
                 rand_velocities_and_distances_func,
                 entrance_rate,
                 entrance_rate_integral=None,
                 entrance_rate_integral_inverse=None,
                 interpolate_rate=True,
                 interpolate_rate_integral=True,
                 interpolate_res=None,
                 snap_exit=True):
        assert_positive_real(distance, 'distance')
        assert_real(start_time, 'start_time')
        assert_real(end_time, 'end_time')
        if not (start_time < end_time):
            raise ValueError(f"start_time should be less than end_time")
        assert len(entrances) == len(entrance_weights)
        self.start_time = start_time
        self.end_time = end_time
        self.dist_around = distance
        self.entrances = entrances
        self.entrance_weights = entrance_weights
        self.rand_velocities_and_distances = rand_velocities_and_distances_func
        self._snap_exit = snap_exit

        if interpolate_rate or interpolate_rate_integral:
            if interpolate_res is None:
                raise ValueError("Specify interpolate_res for interpolation")

        if interpolate_rate:
            self.entrance_rate = DynamicBoundedInterpolator(
                entrance_rate, start_time, end_time, interpolate_res)
        else:
            self.entrance_rate = entrance_rate

        if interpolate_rate_integral: # Want to interplate the integral function
            if entrance_rate_integral is None: # No integral function given
                # Do numerical integration and interpolate to speed it up
                def integral_func(t):
                    y, abserr = integrate.quad(entrance_rate, start_time, t)
                    return y

                self.entrance_rate_integral = DynamicBoundedInterpolator(
                    integral_func, start_time, end_time, interpolate_res)
            else: # Integral function was provided
                # Use the provided rate integral function but interpolate it
                self.entrance_rate_integral = DynamicBoundedInterpolator(
                    entrance_rate_integral, start_time, end_time, interpolate_res)
        else: # Don't want to interpolate the integral function
            # If entrance_rate_integral is not None (i.e. is provided) then
            # that function will be used as the rate integral.
            # If entrance_rate_integral is None, numerical integration will
            # be used.
            self.entrance_rate_integral = entrance_rate_integral

        self.entrance_rate_integral_inverse = entrance_rate_integral_inverse

        self.pedestrians = SortedKeyList(key=attrgetter('start_time'))
        
        self._counts = SortedDict()
        self._counts[self.start_time] = 0

        self._counts_are_correct = True

        self.refresh_pedestrians()

    def _distance(self, a, b):
        """signed distance of a relative to b"""
        return circular_diff(a % self.dist_around, b % self.dist_around,
                             self.dist_around)

    def _distance_from(self, b):
        """returns a function that returns the signed sitance from b"""
        return lambda a: self._distance(a, b)

    def _abs_distance_from(self, b):
        """returns a function that returns the distance from b"""
        return lambda a: abs(self._distance(a, b))

    def _closest_exit(self, dist):
        """Returns the closest number to dist that is equivalent mod dist_around
        to an element of entrances"""
        closest_exit = min(self.entrances, key=self._abs_distance_from(dist))
        diff = self._distance(closest_exit, dist)
        corrected_dist = dist + diff
        return corrected_dist

    def refresh_pedestrians(self):
        """Refreshes the pedestrians in the simulation to random ones"""
        self.clear_pedestrians()

        start_times = list(
            random_times(self.start_time, self.end_time,
                         self.entrance_rate,
                         self.entrance_rate_integral,
                         self.entrance_rate_integral_inverse))
        n_pedestrians = len(start_times)
        entrances = random.choices(population=self.entrances,
                                   weights=self.entrance_weights,
                                   k=n_pedestrians)
        velocities, distances = self.rand_velocities_and_distances(
            n_pedestrians).T

        def pedestrians_generator():
            for start_time, entrance, velocity, dist in zip(
                    start_times, entrances, velocities, distances):
                assert dist > 0
                if self._snap_exit:
                    original_exit = entrance + dist * sign(velocity)
                    corrected_exit = self._closest_exit(original_exit)
                    corrected_dist = abs(corrected_exit - entrance)
                    if math.isclose(corrected_dist, 0, abs_tol=1e-10):
                        corrected_dist = self.dist_around
                else:
                    corrected_dist = dist
                yield FreshPondPedestrian(self.dist_around, entrance,
                                          corrected_dist, start_time, velocity)

        self.add_pedestrians(pedestrians_generator())

    def clear_pedestrians(self):
        """Removes all pedestrains in the simulation"""
        self.pedestrians.clear()
        self._reset_counts()
        self._counts_are_correct = True

    def add_pedestrians(self, pedestrians):
        """Adds all the given pedestrians to the simulation"""
        def checked_pedestrians():
            for p in pedestrians:
                self._assert_pedestrian_in_range(p)
                yield p

        initial_num_pedestrians = self.num_pedestrians()
        self.pedestrians.update(checked_pedestrians())
        final_num_pedestrians = self.num_pedestrians()

        if final_num_pedestrians > initial_num_pedestrians:
            self._counts_are_correct = False
        else:
            assert final_num_pedestrians == initial_num_pedestrians

    def _assert_pedestrian_in_range(self, p):
        """Makes sure the pedestrian's start time is in the simulation's
        time interval"""
        if not (self.start_time <= p.start_time < self.end_time):
            raise ValueError(
                "Pedestrian start time is not in range [start_time, end_time)")

    def add_pedestrian(self, p):
        """Adds a new pedestrian to the simulation"""
        self._assert_pedestrian_in_range(p)
        self.pedestrians.add(p)

        # Update counts only when counts are correct
        if self._counts_are_correct:
            # add a new breakpoint at the pedestrian's start time if it not there
            self._counts[p.start_time] = self.n_people(p.start_time)

            # add a new breakpoint at the pedestrian's end time if it not there
            self._counts[p.end_time] = self.n_people(p.end_time)

            # increment all the counts in the pedestrian's interval of time
            # inclusive on the left, exclusive on the right
            # If it were inclusive on the right, then the count would be one more
            # than it should be in the period after end_time and before the next
            # breakpoint after end_time
            for t in self._counts.irange(p.start_time,
                                        p.end_time,
                                        inclusive=(True, False)):
                self._counts[t] += 1

    def _reset_counts(self):
        """Clears _counts and sets count at start_time to 0"""
        self._counts.clear()
        self._counts[self.start_time] = 0

    def _recompute_counts(self):
        """Store how many people there are whenever someone enters or exits so
        the number of people at a given time can be found quickly later"""
        # print("Recomputing counts")
        self._reset_counts()

        if self.num_pedestrians() == 0:
            return

        # pedestrians are already sorted by start time
        start_times = [p.start_time for p in self.pedestrians]
        end_times = sorted([p.end_time for p in self.pedestrians])

        n = len(start_times)
        curr_count = 0  # current number of people
        start_times_index = 0
        end_times_index = 0
        starts_done = False  # whether all the start times have been added
        ends_done = False  # whether all the end times have been added
        while not (starts_done and ends_done):
            # determine whether a start time or an end time should be added next
            # store this in the variable take_start which is true if a start
            # time should be added next
            if starts_done:
                # already added all the start times; add an end time
                take_start = False
            elif ends_done:
                # already added all the end times; add a start time
                take_start = True
            else:
                # didn't add all the end times nor all the start times
                # add the time that is earliest
                next_start_time = start_times[start_times_index]
                next_end_time = end_times[end_times_index]
                take_start = next_start_time < next_end_time

            if take_start:
                # add next start
                curr_count += 1
                start_time = start_times[start_times_index]
                self._counts[start_time] = curr_count
                start_times_index += 1
                if start_times_index == n:
                    starts_done = True
            else:
                # add next end
                curr_count -= 1
                end_time = end_times[end_times_index]
                self._counts[end_time] = curr_count
                end_times_index += 1
                if end_times_index == n:
                    ends_done = True

    def n_unique_people_saw(self, p):
        """Returns the number of unique people that a pedestrian sees"""
        n = 0
        for q in self.pedestrians:
            if p.intersects(q):
                n += 1
        return n

    def n_people_saw(self, p):
        """Returns the number of times a pedestrian sees someone"""
        n = 0
        for q in self.pedestrians:
            if p.end_time > q.start_time and p.start_time < q.end_time:
                n += p.n_intersections(q)
        return n

    def intersection_directions(self, p):
        """Returns the number of people seen going in the same direction and the
        number of people seen going in the opposite direction by p as a tuple"""
        n_same, n_diff = 0, 0
        for q in self.pedestrians:
            if p.end_time > q.start_time and p.start_time < q.end_time:
                d = q.intersection_direction(p)
                if d == 1:
                    n_same += 1
                elif d == -1:
                    n_diff += 1
        return n_same, n_diff

    def intersection_directions_total(self, p):
        n_same, n_diff = 0, 0
        for q in self.pedestrians:
            if p.end_time > q.start_time and p.start_time < q.end_time:
                i = p.total_intersection_direction(q)
                if i < 0:
                    n_diff += -i
                elif i > 0:
                    n_same += i
        return n_same, n_diff

    def n_people(self, t):
        """Returns the number of people at a given time"""

        if not self._counts_are_correct:
            self._recompute_counts()
            self._counts_are_correct = True

        if t in self._counts:
            return self._counts[t]
        elif t < self.start_time:
            return 0
        else:
            index = self._counts.bisect_left(t)
            return self._counts.values()[index - 1]

    def num_pedestrians(self):
        """Returns the total number of pedestrians in the simulation"""
        return len(self.pedestrians)

    def get_pedestrians_in_interval(self, start, stop):
        """Returns a list of all the pedestrians who entered in the interval
        [start, stop]"""
        return list(self.pedestrians.irange_key(start, stop))

    def num_entrances_in_interval(self, start, stop):
        """Returns the number of pedestrians who entered in the given interval
        of time [start, stop]"""
        return len(self.get_pedestrians_in_interval(start, stop))
    
    def get_enter_and_exit_times_in_interval(self, start, stop):
        """Returns the entrance and exit times in a given time interval
        as a tuple of lists (entrance_times, exit_times)."""
        start_times = []
        end_times = []
        for p in self.pedestrians:
            if start <= p.start_time <= stop:
                start_times.append(p.start_time)
            if start <= p.end_time <= stop:
                end_times.append(p.end_time)
        return start_times, end_times
    
    def get_pedestrians_at_time(self, t):
        """Returns a list of all the pedestrians who were there at time t"""
        # get all pedestrians who entered at or before time t
        entered_before_t = self.pedestrians.irange_key(
            min_key=None, max_key=t, inclusive=(True, True))
        # Of those, return return the ones who exited after time t
        return [p for p in entered_before_t if p.end_time > t]
Пример #46
0
class TreePage(BasePage):
    """
    Page object, implemented with a sorted dict. Who knows what's underneath!
    """
    def __init__(self, *args, **kwargs):
        storage = kwargs.pop("storage", None)
        super(TreePage, self).__init__(*args, **kwargs)
        self._storage = SortedDict() if storage is None else storage

    def keys(self):
        if len(self._storage) == 0:
            return set()
        else:
            return set.union(*(set(range(*self._resolve_range(mo)))
                               for mo in self._storage.values()))

    def replace_mo(self, state, old_mo, new_mo):
        start, end = self._resolve_range(old_mo)
        for key in self._storage.irange(start, end - 1):
            val = self._storage[key]
            if val is old_mo:
                #assert new_mo.includes(a)
                self._storage[key] = new_mo

    def store_overwrite(self, state, new_mo, start, end):
        # iterate over each item we might overwrite
        # track our mutations separately since we're in the process of iterating
        deletes = []
        updates = {start: new_mo}

        for key in self._storage.irange(maximum=end - 1, reverse=True):
            old_mo = self._storage[key]

            # make sure we aren't overwriting all of an item that overlaps the end boundary
            if end < self._page_addr + self._page_size and end not in updates and old_mo.includes(
                    end):
                updates[end] = old_mo

            # we can't set a minimum on the range because we need to do the above for
            # the first object before start too
            if key < start:
                break

            # delete any key that falls within the range
            deletes.append(key)

        #assert all(m.includes(i) for i,m in updates.items())

        # perform mutations
        for key in deletes:
            del self._storage[key]

        self._storage.update(updates)

    def store_underwrite(self, state, new_mo, start, end):
        # track the point that we need to write up to
        last_missing = end - 1
        # track also updates since we can't update while iterating
        updates = {}

        for key in self._storage.irange(maximum=end - 1, reverse=True):
            mo = self._storage[key]

            # if the mo stops
            if mo.base <= last_missing and not mo.includes(last_missing):
                updates[max(mo.last_addr + 1, start)] = new_mo
            last_missing = mo.base - 1

            # we can't set a minimum on the range because we need to do the above for
            # the first object before start too
            if last_missing < start:
                break

        # if there are no memory objects <= start, we won't have filled start yet
        if last_missing >= start:
            updates[start] = new_mo

        #assert all(m.includes(i) for i,m in updates.items())

        self._storage.update(updates)

    def load_mo(self, state, page_idx):
        """
        Loads a memory object from memory.

        :param page_idx: the index into the page
        :returns: a tuple of the object
        """

        try:
            key = next(self._storage.irange(maximum=page_idx, reverse=True))
        except StopIteration:
            return None
        else:
            return self._storage[key]

    def load_slice(self, state, start, end):
        """
        Return the memory objects overlapping with the provided slice.

        :param start: the start address
        :param end: the end address (non-inclusive)
        :returns: tuples of (starting_addr, memory_object)
        """
        keys = list(self._storage.irange(start, end - 1))
        if not keys or keys[0] != start:
            try:
                key = next(self._storage.irange(maximum=start, reverse=True))
            except StopIteration:
                pass
            else:
                if self._storage[key].includes(start):
                    keys.insert(0, key)
        return [(max(start, key), self._storage[key]) for key in keys]

    def _copy_args(self):
        return {'storage': self._storage.copy()}
Пример #47
0
class Ontology():
    def __init__(self, domains, predicates):

        self.domains = {}
        self.predicates = SortedDict()
        self.herbrand_base_size = 0
        self.predicate_range = {}
        self.finalized = False
        self.constraints = []

        for d in domains:
            self.__add_domain(d)

        for p in predicates:
            self.__add_predicate(p)

        self.__create_indexing_scheme()

    def __check_multidomain(self):
        if len(self.domains) > 1:
            raise Exception("This operation does not allow multi domains")

    def __add_domain(self, d):
        if not isinstance(d, Iterable):
            D = [d]
        else:
            D = d
        for d in D:
            if d.name in self.domains:
                raise Exception("Domain %s already exists" % d.name)
            self.domains[d.name] = d

    def __add_predicate(self, p):
        if not isinstance(p, Iterable):
            P = [p]
        else:
            P = p
        for p in P:
            if p.name in self.predicates:
                raise Exception("Predicate %s already exists" % p.name)
            self.predicates[p.name] = p
            self.predicate_range[p.name] = (self.herbrand_base_size,
                                            self.herbrand_base_size +
                                            p.groundings_number)
            self.herbrand_base_size += p.groundings_number

    def __create_indexing_scheme(self):
        # Managing a linearized version of this logic
        self._up_to_idx = 0  # linear max indices
        self._dict_indices = {
        }  # map potentials id to correspondent multidimensional indices tensor

        self.finalized = False
        self._linear = None
        self._linear_evidence = None

        self._linear_size = 0
        for p in self.predicates.values():
            shape = [d.num_constants for d in p.domains]
            length = np.prod(shape)
            fr = self._up_to_idx
            to = fr + length
            self._up_to_idx = to
            self._dict_indices[p.name] = np.reshape(np.arange(fr, to), shape)
            self._linear_size += length
        self.finalized = True

    def get_constraint(self, formula):
        return Formula(self, formula)

    def FOL2LinearState(self, file):
        self.__check_multidomain()
        #just converting APIs from old NMLN
        pp = SortedDict({p.name: p.arity for p in self.predicates.values()})
        constants, predicates, evidences = utils.read_file_fixed_world(
            file,
            list(self.domains.values())[0].constants, pp)
        linear = []
        for p, v in predicates.items():
            linear.extend(np.reshape(v, [-1]))
        linear = np.reshape(linear, [1, -1])
        return linear

    def linear2Dict(self, linear_state):
        d = SortedDict()
        for p in self.predicates.values():
            d[p.name] = np.take(linear_state, self._dict_indices[p.name])
        return d

    def prettyPrintFromLinear(self, linear_state):
        for p in self.predicates.values():
            print(p)
            print(np.take(linear_state, self._dict_indices[p.name]))
            print()

    def linear_size(self):
        return self._linear_size

    def sample_fragments_idx(self, k, num=100, get_ids=False):
        self.__check_multidomain()
        ii = []
        all_ids = []
        for _ in range(num):
            i = []
            num_constants = list(self.domains.values())[0].num_constants
            idx = np.random.choice(num_constants, size=k, replace=False)
            idx = np.random.permutation(idx)
            all_ids.append(idx)
            for p in self.predicates.values():
                a = p.arity
                f_idx = self._dict_indices[p.name]
                for j in range(a):
                    f_idx = np.take(f_idx, idx, axis=j)
                f_idx = np.reshape(f_idx, [-1])
                i.extend(f_idx)
            ii.append(i)
        res = np.stack(ii, axis=0)
        if not get_ids:
            return res
        else:
            return res, np.stack(all_ids, axis=0)

    def all_fragments_idx(self,
                          k,
                          get_ids=False,
                          get_atom_to_fragments_mask=False):
        self.__check_multidomain()
        ii = []
        all_ids = []
        num_constants = list(self.domains.values())[0].num_constants
        for idx in permutations(range(num_constants), k):
            all_ids.append(idx)
            i = []
            for p in self.predicates.values():
                a = p.arity
                f_idx = self._dict_indices[p.name]
                for j in range(a):
                    f_idx = np.take(f_idx, idx, axis=j)
                f_idx = np.reshape(f_idx, [-1])
                i.extend(f_idx)
            ii.append(i)
        res = np.stack(ii, axis=0)

        atom_to_fragments_mask = np.zeros([self.linear_size(), len(res)])
        for i in range(len(res)):
            for j in range(len(res[0])):
                atom_id = res[i, j]
                atom_to_fragments_mask[atom_id, i] = 1

        to_return = res
        if get_ids:
            to_return = [res, np.stack(all_ids, axis=0)]
        if get_atom_to_fragments_mask:
            to_return = to_return + [atom_to_fragments_mask]
        return to_return

    def size_of_fragment_state(self, k):
        self.__check_multidomain()
        size = 0
        for p in self.predicates.values():
            size += k**p.arity
        return size
Пример #48
0
    def calculate_scores(self):
        """
        Function to calculate a score for each transcript, given the metrics derived
        with the calculate_metrics method and the scoring scheme provided in the JSON configuration.
        If any requirements have been specified, all transcripts which do not pass them
        will be assigned a score of 0 and subsequently ignored.
        Scores are rounded to the nearest integer.
        """

        if self.scores_calculated is True:
            return

        self.get_metrics()
        if not hasattr(self, "logger"):
            self.logger = None
            self.logger.setLevel("DEBUG")
        self.logger.debug("Calculating scores for {0}".format(self.id))

        self.scores = dict()
        for tid in self.transcripts:
            self.scores[tid] = dict()
            # Add the score for the transcript source
            self.scores[tid]["source_score"] = self.transcripts[tid].source_score

        if self.regressor is None:
            for param in self.json_conf["scoring"]:
                self._calculate_score(param)

            for tid in self.scores:
                self.transcripts[tid].scores = self.scores[tid].copy()

            for tid in self.transcripts:

                if tid in self.__orf_doubles:
                    del self.scores[tid]
                    continue
                self.transcripts[tid].score = sum(self.scores[tid].values())
                self.scores[tid]["score"] = self.transcripts[tid].score

        else:
            valid_metrics = self.regressor.metrics
            metric_rows = SortedDict()
            for tid, transcript in sorted(self.transcripts.items(), key=operator.itemgetter(0)):
                for param in valid_metrics:
                    self.scores[tid][param] = "NA"
                row = []
                for attr in valid_metrics:
                    val = getattr(transcript, attr)
                    if isinstance(val, bool):
                        if val:
                            val = 1
                        else:
                            val = 0
                    row.append(val)
                metric_rows[tid] = row
            # scores = SortedDict.fromkeys(metric_rows.keys())
            for pos, score in enumerate(self.regressor.predict(list(metric_rows.values()))):
                tid = list(metric_rows.keys())[pos]
                if tid in self.__orf_doubles:
                    del self.scores[tid]
                    continue
                self.scores[tid]["score"] = score
                self.transcripts[tid].score = score

        self.metric_lines_store = []
        for row in self.prepare_metrics():
            if row["tid"] in self.__orf_doubles:
                continue
            else:
                self.metric_lines_store.append(row)

        for doubled in self.__orf_doubles:
            for partial in self.__orf_doubles[doubled]:
                if partial in self.transcripts:
                    del self.transcripts[partial]

        self.scores_calculated = True
Пример #49
0
class Geofence(App):
    """A Geofence defines the space a vehicle is allowed to operate within.

    A geofence is constructed by layering additive and subtractive geometry
    to construct a 3-dimensional space of operations that a drone is allowed to
    fly in.

    Within a layer, a point is determined to be inside as if all the volumes in
    that layer were taken as a union.
    """
    # TODO Use a small memory database (like TinyDB) to handle layer mapping.
    #   Added benefit of allowing both name and order mapping to layer at once.

    req_telem = {
        'latitude': '/Airliner/CNTL/VehicleGlobalPosition/Lat',
        'longitude': '/Airliner/CNTL/VehicleGlobalPosition/Lon',
        'altitude': '/Airliner/CNTL/VehicleGlobalPosition/Alt'
    }

    def __init__(self):
        super(Geofence, self).__init__()
        self._check_thread = None
        self.enabled = False
        self.fence_violation = False
        self.layers = SortedDict()
        """:type: dict[Any, _Layer]"""

    def __contains__(self, other):
        """True if the given other is contained within the Geofence."""
        contained = False
        for layer in self.layers.values():
            if other in layer:
                contained = layer.kind is LayerKind.ADDITIVE
        return contained

    def __str__(self):
        return 'Geofence{\n' + '\n'.join(
            ' {}{}: {}'.format(
                '+' if layer.kind is LayerKind.ADDITIVE else '-', order, layer)
            for order, layer in self.layers.items()) + '\n}'

    def attach(self, vehicle):
        super(Geofence, self).attach(vehicle)
        self._check_thread = PeriodicExecutor(
            self._check_fence,
            every=FENCE_SLEEP,
            logger=self.vehicle.logger,
            name='FenceCheck',
            exception=lambda e: self.vehicle.exception('Geofence Exception'))
        self._check_thread.start()

    def detach(self):
        self._check_thread.stop()
        super(Geofence, self).detach()

    @classmethod
    def required_telemetry_paths(cls):
        return cls.req_telem.values()

    def add_layer(self, layer_position, layer_name, layer_kind):
        if layer_position in self.layers:
            raise KeyError('This layer already exists.')
        if not isinstance(layer_kind, LayerKind):
            raise TypeError('layer_kind must be of type LayerKind.')
        layer = Layer(name=layer_name, kind=layer_kind)
        self.layers[layer_position] = layer
        return layer

    def _check_fence(self):
        old = self.fence_violation
        self.fence_violation = self.fence_violation or \
            (self.enabled and self.position not in self)
        if not old and self.fence_violation:
            self.vehicle.error('Encountered Fence Violation at %s',
                               self.position)
            self.vehicle.broadcast(Intent(action=ACTION_RTL))
            print('Encountered fence violation. Press Ctrl-C exit.')

    def layer_by_name(self, name):
        for layer in self.layers.values():
            if layer.name == name:
                return layer

    @property
    def position(self):
        return Position(
            App._telem(self.req_telem['latitude'])(self),
            App._telem(self.req_telem['longitude'])(self),
            App._telem(self.req_telem['altitude'])(self))

    def remove_layer(self, position):
        del self.layers[position]
Пример #50
0
class FederationRemoteSendQueue(object):
    """A drop in replacement for TransactionQueue"""
    def __init__(self, hs):
        self.server_name = hs.hostname
        self.clock = hs.get_clock()
        self.notifier = hs.get_notifier()
        self.is_mine_id = hs.is_mine_id

        self.presence_map = {
        }  # Pending presence map user_id -> UserPresenceState
        self.presence_changed = SortedDict()  # Stream position -> user_id

        self.keyed_edu = {}  # (destination, key) -> EDU
        self.keyed_edu_changed = SortedDict(
        )  # stream position -> (destination, key)

        self.edus = SortedDict()  # stream position -> Edu

        self.failures = SortedDict(
        )  # stream position -> (destination, Failure)

        self.device_messages = SortedDict()  # stream position -> destination

        self.pos = 1
        self.pos_time = SortedDict()

        # EVERYTHING IS SAD. In particular, python only makes new scopes when
        # we make a new function, so we need to make a new function so the inner
        # lambda binds to the queue rather than to the name of the queue which
        # changes. ARGH.
        def register(name, queue):
            LaterGauge(
                "synapse_federation_send_queue_%s_size" % (queue_name, ), "",
                [], lambda: len(queue))

        for queue_name in [
                "presence_map",
                "presence_changed",
                "keyed_edu",
                "keyed_edu_changed",
                "edus",
                "failures",
                "device_messages",
                "pos_time",
        ]:
            register(queue_name, getattr(self, queue_name))

        self.clock.looping_call(self._clear_queue, 30 * 1000)

    def _next_pos(self):
        pos = self.pos
        self.pos += 1
        self.pos_time[self.clock.time_msec()] = pos
        return pos

    def _clear_queue(self):
        """Clear the queues for anything older than N minutes"""

        FIVE_MINUTES_AGO = 5 * 60 * 1000
        now = self.clock.time_msec()

        keys = self.pos_time.keys()
        time = self.pos_time.bisect_left(now - FIVE_MINUTES_AGO)
        if not keys[:time]:
            return

        position_to_delete = max(keys[:time])
        for key in keys[:time]:
            del self.pos_time[key]

        self._clear_queue_before_pos(position_to_delete)

    def _clear_queue_before_pos(self, position_to_delete):
        """Clear all the queues from before a given position"""
        with Measure(self.clock, "send_queue._clear"):
            # Delete things out of presence maps
            keys = self.presence_changed.keys()
            i = self.presence_changed.bisect_left(position_to_delete)
            for key in keys[:i]:
                del self.presence_changed[key]

            user_ids = set(user_id
                           for uids in itervalues(self.presence_changed)
                           for user_id in uids)

            to_del = [
                user_id for user_id in self.presence_map
                if user_id not in user_ids
            ]
            for user_id in to_del:
                del self.presence_map[user_id]

            # Delete things out of keyed edus
            keys = self.keyed_edu_changed.keys()
            i = self.keyed_edu_changed.bisect_left(position_to_delete)
            for key in keys[:i]:
                del self.keyed_edu_changed[key]

            live_keys = set()
            for edu_key in self.keyed_edu_changed.values():
                live_keys.add(edu_key)

            to_del = [
                edu_key for edu_key in self.keyed_edu
                if edu_key not in live_keys
            ]
            for edu_key in to_del:
                del self.keyed_edu[edu_key]

            # Delete things out of edu map
            keys = self.edus.keys()
            i = self.edus.bisect_left(position_to_delete)
            for key in keys[:i]:
                del self.edus[key]

            # Delete things out of failure map
            keys = self.failures.keys()
            i = self.failures.bisect_left(position_to_delete)
            for key in keys[:i]:
                del self.failures[key]

            # Delete things out of device map
            keys = self.device_messages.keys()
            i = self.device_messages.bisect_left(position_to_delete)
            for key in keys[:i]:
                del self.device_messages[key]

    def notify_new_events(self, current_id):
        """As per TransactionQueue"""
        # We don't need to replicate this as it gets sent down a different
        # stream.
        pass

    def send_edu(self, destination, edu_type, content, key=None):
        """As per TransactionQueue"""
        pos = self._next_pos()

        edu = Edu(
            origin=self.server_name,
            destination=destination,
            edu_type=edu_type,
            content=content,
        )

        if key:
            assert isinstance(key, tuple)
            self.keyed_edu[(destination, key)] = edu
            self.keyed_edu_changed[pos] = (destination, key)
        else:
            self.edus[pos] = edu

        self.notifier.on_new_replication_data()

    def send_presence(self, states):
        """As per TransactionQueue

        Args:
            states (list(UserPresenceState))
        """
        pos = self._next_pos()

        # We only want to send presence for our own users, so lets always just
        # filter here just in case.
        local_states = list(
            filter(lambda s: self.is_mine_id(s.user_id), states))

        self.presence_map.update(
            {state.user_id: state
             for state in local_states})
        self.presence_changed[pos] = [state.user_id for state in local_states]

        self.notifier.on_new_replication_data()

    def send_failure(self, failure, destination):
        """As per TransactionQueue"""
        pos = self._next_pos()

        self.failures[pos] = (destination, str(failure))
        self.notifier.on_new_replication_data()

    def send_device_messages(self, destination):
        """As per TransactionQueue"""
        pos = self._next_pos()
        self.device_messages[pos] = destination
        self.notifier.on_new_replication_data()

    def get_current_token(self):
        return self.pos - 1

    def federation_ack(self, token):
        self._clear_queue_before_pos(token)

    def get_replication_rows(self,
                             from_token,
                             to_token,
                             limit,
                             federation_ack=None):
        """Get rows to be sent over federation between the two tokens

        Args:
            from_token (int)
            to_token(int)
            limit (int)
            federation_ack (int): Optional. The position where the worker is
                explicitly acknowledged it has handled. Allows us to drop
                data from before that point
        """
        # TODO: Handle limit.

        # To handle restarts where we wrap around
        if from_token > self.pos:
            from_token = -1

        # list of tuple(int, BaseFederationRow), where the first is the position
        # of the federation stream.
        rows = []

        # There should be only one reader, so lets delete everything its
        # acknowledged its seen.
        if federation_ack:
            self._clear_queue_before_pos(federation_ack)

        # Fetch changed presence
        i = self.presence_changed.bisect_right(from_token)
        j = self.presence_changed.bisect_right(to_token) + 1
        dest_user_ids = [
            (pos, user_id)
            for pos, user_id_list in self.presence_changed.items()[i:j]
            for user_id in user_id_list
        ]

        for (key, user_id) in dest_user_ids:
            rows.append((key, PresenceRow(state=self.presence_map[user_id], )))

        # Fetch changes keyed edus
        i = self.keyed_edu_changed.bisect_right(from_token)
        j = self.keyed_edu_changed.bisect_right(to_token) + 1
        # We purposefully clobber based on the key here, python dict comprehensions
        # always use the last value, so this will correctly point to the last
        # stream position.
        keyed_edus = {v: k for k, v in self.keyed_edu_changed.items()[i:j]}

        for ((destination, edu_key), pos) in iteritems(keyed_edus):
            rows.append((pos,
                         KeyedEduRow(
                             key=edu_key,
                             edu=self.keyed_edu[(destination, edu_key)],
                         )))

        # Fetch changed edus
        i = self.edus.bisect_right(from_token)
        j = self.edus.bisect_right(to_token) + 1
        edus = self.edus.items()[i:j]

        for (pos, edu) in edus:
            rows.append((pos, EduRow(edu)))

        # Fetch changed failures
        i = self.failures.bisect_right(from_token)
        j = self.failures.bisect_right(to_token) + 1
        failures = self.failures.items()[i:j]

        for (pos, (destination, failure)) in failures:
            rows.append(
                (pos, FailureRow(
                    destination=destination,
                    failure=failure,
                )))

        # Fetch changed device messages
        i = self.device_messages.bisect_right(from_token)
        j = self.device_messages.bisect_right(to_token) + 1
        device_messages = {v: k for k, v in self.device_messages.items()[i:j]}

        for (destination, pos) in iteritems(device_messages):
            rows.append((pos, DeviceRow(destination=destination, )))

        # Sort rows based on pos
        rows.sort()

        return [(pos, row.TypeId, row.to_data()) for pos, row in rows]
Пример #51
0
class KeyedRegion(object):
    """
    KeyedRegion keeps a mapping between stack offsets and all objects covering that offset. It assumes no variable in
    this region overlap with another variable in this region.

    Registers and function frames can all be viewed as a keyed region.
    """
    def __init__(self, tree=None):
        self._storage = SortedDict() if tree is None else tree

    def _get_container(self, offset):
        try:
            base_offset = next(self._storage.irange(maximum=offset, reverse=True))
        except StopIteration:
            return offset, None
        else:
            container = self._storage[base_offset]
            if container.includes(offset):
                return base_offset, container
            return offset, None

    def __contains__(self, offset):
        """
        Test if there is at least one varaible covering the given offset.

        :param offset:
        :return:
        """

        return self._get_container(offset)[1] is not None

    def __len__(self):
        return len(self._storage)

    def __iter__(self):
        return iter(self._storage.values())

    def __eq__(self, other):
        if set(self._storage.keys()) != set(other._storage.keys()):
            return False

        for k, v in self._storage.items():
            if v != other._storage[k]:
                return False

        return True

    def copy(self):
        if not self._storage:
            return KeyedRegion()

        kr = KeyedRegion()
        for key, ro in self._storage.items():
            kr._storage[key] = ro.copy()
        return kr

    def merge(self, other, make_phi_func=None):
        """
        Merge another KeyedRegion into this KeyedRegion.

        :param KeyedRegion other: The other instance to merge with.
        :return: None
        """

        # TODO: is the current solution not optimal enough?
        for _, item in other._storage.items():  # type: RegionObject
            for loc_and_var in item.stored_objects:
                self.__store(loc_and_var, overwrite=False, make_phi_func=make_phi_func)

        return self

    def dbg_repr(self):
        """
        Get a debugging representation of this keyed region.
        :return: A string of debugging output.
        """
        keys = self._storage.keys()
        offset_to_vars = { }

        for key in sorted(keys):
            ro = self._storage[key]
            variables = [ obj.obj for obj in ro.stored_objects ]
            offset_to_vars[key] = variables

        s = [ ]
        for offset, variables in offset_to_vars.items():
            s.append("Offset %#x: %s" % (offset, variables))
        return "\n".join(s)

    def add_variable(self, start, variable):
        """
        Add a variable to this region at the given offset.

        :param int start:
        :param SimVariable variable:
        :return: None
        """

        size = variable.size if variable.size is not None else 1

        self.add_object(start, variable, size)

    def add_object(self, start, obj, object_size):
        """
        Add/Store an object to this region at the given offset.

        :param start:
        :param obj:
        :param int object_size: Size of the object
        :return:
        """

        self._store(start, obj, object_size, overwrite=False)

    def set_variable(self, start, variable):
        """
        Add a variable to this region at the given offset, and remove all other variables that are fully covered by
        this variable.

        :param int start:
        :param SimVariable variable:
        :return: None
        """

        size = variable.size if variable.size is not None else 1

        self.set_object(start, variable, size)

    def set_object(self, start, obj, object_size):
        """
        Add an object to this region at the given offset, and remove all other objects that are fully covered by this
        object.

        :param start:
        :param obj:
        :param object_size:
        :return:
        """

        self._store(start, obj, object_size, overwrite=True)

    def get_base_addr(self, addr):
        """
        Get the base offset (the key we are using to index objects covering the given offset) of a specific offset.

        :param int addr:
        :return:
        :rtype:  int or None
        """

        base_addr, container = self._get_container(addr)
        if container is None:
            return None
        else:
            return base_addr

    def get_variables_by_offset(self, start):
        """
        Find variables covering the given region offset.

        :param int start:
        :return: A list of stack variables.
        :rtype:  set
        """

        _, container = self._get_container(start)
        if container is None:
            return []
        else:
            return container.internal_objects

    def get_objects_by_offset(self, start):
        """
        Find objects covering the given region offset.

        :param start:
        :return:
        """

        _, container = self._get_container(start)
        if container is None:
            return set()
        else:
            return container.internal_objects

    #
    # Private methods
    #

    def _store(self, start, obj, size, overwrite=False):
        """
        Store a variable into the storage.

        :param int start: The beginning address of the variable.
        :param obj: The object to store.
        :param int size: Size of the object to store.
        :param bool overwrite: Whether existing objects should be overwritten or not.
        :return: None
        """

        stored_object = StoredObject(start, obj, size)
        self.__store(stored_object, overwrite=overwrite)

    def __store(self, stored_object, overwrite=False, make_phi_func=None):
        """
        Store a variable into the storage.

        :param StoredObject stored_object: The descriptor describing start address and the variable.
        :param bool overwrite: Whether existing objects should be overwritten or not.
        :return: None
        """

        start = stored_object.start
        object_size = stored_object.size
        end = start + object_size

        # region items in the middle
        overlapping_items = list(self._storage.irange(start, end-1))

        # is there a region item that begins before the start and overlaps with this variable?
        floor_key, floor_item = self._get_container(start)
        if floor_item is not None and floor_key not in overlapping_items:
                # insert it into the beginning
                overlapping_items.insert(0, floor_key)

        # scan through the entire list of region items, split existing regions and insert new regions as needed
        to_update = {start: RegionObject(start, object_size, {stored_object})}
        last_end = start

        for floor_key in overlapping_items:
            item = self._storage[floor_key]
            if item.start < start:
                # we need to break this item into two
                a, b = item.split(start)
                if overwrite:
                    b.set_object(stored_object)
                else:
                    self._add_object_or_make_phi(b, stored_object, make_phi_func=make_phi_func)
                to_update[a.start] = a
                to_update[b.start] = b
                last_end = b.end
            elif item.start > last_end:
                # there is a gap between the last item and the current item
                # fill in the gap
                new_item = RegionObject(last_end, item.start - last_end, {stored_object})
                to_update[new_item.start] = new_item
                last_end = new_item.end
            elif item.end > end:
                # we need to split this item into two
                a, b = item.split(end)
                if overwrite:
                    a.set_object(stored_object)
                else:
                    self._add_object_or_make_phi(a, stored_object, make_phi_func=make_phi_func)
                to_update[a.start] = a
                to_update[b.start] = b
                last_end = b.end
            else:
                if overwrite:
                    item.set_object(stored_object)
                else:
                    self._add_object_or_make_phi(item, stored_object, make_phi_func=make_phi_func)
                to_update[item.start] = item

        self._storage.update(to_update)

    def _is_overlapping(self, start, variable):

        if variable.size is not None:
            # make sure this variable does not overlap with any other variable
            end = start + variable.size
            try:
                prev_offset = next(self._storage.irange(maximum=end-1, reverse=True))
            except StopIteration:
                prev_offset = None

            if prev_offset is not None:
                if start <= prev_offset < end:
                    return True
                prev_item = self._storage[prev_offset][0]
                prev_item_size = prev_item.size if prev_item.size is not None else 1
                if start < prev_offset + prev_item_size < end:
                    return True
        else:
            try:
                prev_offset = next(self._storage.irange(maximum=start, reverse=True))
            except StopIteration:
                prev_offset = None

            if prev_offset is not None:
                prev_item = self._storage[prev_offset][0]
                prev_item_size = prev_item.size if prev_item.size is not None else 1
                if prev_offset <= start < prev_offset + prev_item_size:
                    return True

        return False

    def _add_object_or_make_phi(self, item, stored_object, make_phi_func=None):  #pylint:disable=no-self-use
        if not make_phi_func or len({stored_object.obj} | item.internal_objects) == 1:
            item.add_object(stored_object)
        else:
            # make a phi node
            item.set_object(StoredObject(stored_object.start,
                                         make_phi_func(stored_object.obj, *item.internal_objects),
                                         stored_object.size,
                                         )
                            )
Пример #52
0
class PiecewiseConstantFunction(Generic[T]):
    def __init__(self, initial_value: float = 0) -> None:
        """ Initialize the constant function to a particular value

        :param initial_value: the starting value for the function
        """
        self.breakpoints = SortedDict()
        self._initial_value: float = initial_value

    def add_breakpoint(self,
                       xval: XValue[T],
                       yval: float,
                       squash: bool = True) -> None:
        """ Add a breakpoint to the function and update the value

        Let f(x) be the original function, and next_bp be the first breakpoint > xval; after calling
        this method, the function will be modified to f'(x) = yval for x \in [xval, next_bp)

        :param xval: the x-position of the breakpoint to add/modify
        :param yval: the value to set the function to at xval
        :param squash: if True and f(xval) = yval before calling this method, the function will remain unchanged
        """
        if squash and self.call(xval) == yval:
            return
        self.breakpoints[xval] = yval

    def add_delta(self, xval: XValue[T], delta: float) -> None:
        """ Modify the function value for x >= xval

        Let f(x) be the original function; After calling this method,
        the function will be modified to f'(x) = f(x) + delta for all x >= xval

        :param xval: the x-position of the breakpoint to add/modify
        :param delta: the amount to shift the function value by at xval
        """
        if delta == 0:
            return

        if xval not in self.breakpoints:
            self.breakpoints[xval] = self.call(xval)

        for x in self.breakpoints.irange(xval):
            self.breakpoints[x] += delta

        self.values.cache_clear()
        self.integrals.cache_clear()

    def call(self, xval: XValue[T]) -> float:
        """ Compute the output of the function at a point

        :param xval: the x-position to compute
        :returns: f(xval)
        """
        if len(self.breakpoints) == 0 or xval < self.breakpoints.keys()[0]:
            return self._initial_value
        else:
            lower_index = self.breakpoints.bisect(xval) - 1
            return self.breakpoints.values()[lower_index]

    def _breakpoint_info(
        self, index: Optional[int]
    ) -> Tuple[Optional[int], Optional[XValue[T]], float]:
        """ Helper function for computing breakpoint information

        :param index: index of the breakpoint to compute
        :returns: (index, breakpoint, value)
          * index is the breakpoint index (if it exists), or None if we're off the end
          * breakpoint is the x-value of the breakpoint, or None if we're off the end
          * value is f(breakpoint), or f(last_breakpoint) if we're off the end
        """
        try:
            breakpoint, value = self.breakpoints.peekitem(index)
        except IndexError:
            index = None
            breakpoint, value = None, self.breakpoints.values()[-1]
        return (index, breakpoint, value)

    @lru_cache(maxsize=_LRU_CACHE_SIZE
               )  # cache results of calls to this function
    def values(self, start: XValue[T], stop: XValue[T],
               step: XValueDiff[T]) -> 'SortedDict[XValue[T], float]':
        """ Compute a sequence of values of the function

        This is more efficient than [self.call(xval) for xval in range(start, stop, step)] because each self.call(..)
        takes O(log n) time due to the binary tree structure of self._breakpoints.  This method can compute the range
        of values in linear time in the range, which is significantly faster for large value ranges.

        :param start: lower bound of value sequence
        :param stop: upper bound of value sequence
        :param step: width between points in the sequence
        :returns: a SortedDict of the values of the function between start and stop, with the x-distance between
            each data-point equal to `step`; like normal "range" functions the right endpoint is not included
        """

        step = step or (stop - start)
        if len(self.breakpoints) == 0:
            num_values = int(math.ceil((stop - start) / step))
            return SortedDict([(start + step * i, self._initial_value)
                               for i in range(num_values)])

        curr_xval = start
        curr_value = self.call(start)
        next_index, next_breakpoint, next_value = self._breakpoint_info(
            self.breakpoints.bisect(start))

        sequence = SortedDict()
        while curr_xval < stop:
            sequence[curr_xval] = curr_value

            next_xval = min(stop, curr_xval + step)
            while next_breakpoint and next_xval >= next_breakpoint:
                assert next_index is not None  # if next_breakpoint is set, next_index should also be set
                curr_value = next_value
                next_index, next_breakpoint, next_value = self._breakpoint_info(
                    next_index + 1)
            curr_xval = next_xval

        return sequence

    @lru_cache(maxsize=_LRU_CACHE_SIZE
               )  # cache results of calls to this function
    def integrals(
        self,
        start: XValue[T],
        stop: XValue[T],
        step: XValueDiff[T],
        transform: Callable[[XValueDiff[T]], float] = lambda x: cast(float, x),
    ) -> 'SortedDict[XValue[T], float]':
        """ Compute a sequence of integrals of the function

        :param start: lower bound of integral sequence
        :param stop: upper bound of integral sequence
        :param step: width of each "chunk" of the integral sequence
        :param transform: function to apply to x-widths before computing the integral
        :returns: a SortedDict of the numeric integral values of the function between start and stop;
            each integral has a range of size `step`, and the key-value is the left endpoint of the chunk
        """
        step = step or (stop - start)
        if len(self.breakpoints) == 0:
            # If there are no breakpoints, just split up the range into even widths and compute
            # (width * self._initial_value) for each chunk.
            step_width = transform(step)
            range_width = transform(stop - start)
            num_full_chunks = int(range_width // step_width)
            sequence = SortedDict([(start + step * i,
                                    step_width * self._initial_value)
                                   for i in range(num_full_chunks)])

            # If the width does not evenly divide the range, compute the last chunk separately
            if range_width % step_width != 0:
                sequence[
                    start + step *
                    num_full_chunks] = range_width % step_width * self._initial_value
            return sequence

        # Set up starting loop parameters
        curr_xval = start
        curr_value = self.call(start)
        next_index, next_breakpoint, next_value = self._breakpoint_info(
            self.breakpoints.bisect(start))

        # Loop through the entire range and compute the integral of each chunk
        sequence = SortedDict()
        while curr_xval < stop:
            orig_xval = curr_xval
            next_xval = min(stop, curr_xval + step)

            # For each breakpoint in [curr_xval, next_xval), compute the area of that sub-chunk
            next_integral: float = 0
            while next_breakpoint and next_xval >= next_breakpoint:
                assert next_index is not None  # if next_breakpoint is set, next_index should also be set
                next_integral += transform(next_breakpoint -
                                           curr_xval) * curr_value
                curr_xval = next_breakpoint
                curr_value = next_value
                next_index, next_breakpoint, next_value = self._breakpoint_info(
                    next_index + 1)

            # Handle any remaining width between the last breakpoint and the end of the chunk
            next_integral += transform(next_xval - curr_xval) * curr_value
            sequence[orig_xval] = next_integral

            curr_xval = next_xval

        return sequence

    def integral(
        self,
        start: XValue[T],
        stop: XValue[T],
        transform: Callable[[XValueDiff[T]], float] = lambda x: cast(float, x),
    ) -> float:
        """ Helper function to compute the integral of the whole specified range

        :param start: lower bound of the integral
        :param stop: upper bound of the integral
        :returns: the integral of the function between start and stop
        """
        return self.integrals(start, stop, (stop - start),
                              transform).values()[0]

    def __str__(self) -> str:
        ret = f'{self._initial_value}, x < {self.breakpoints.keys()[0]}\n'
        for xval, yval in self.breakpoints.items():
            ret += f'{yval}, x >= {xval}\n'
        return ret

    def __add__(
        self, other: 'PiecewiseConstantFunction[T]'
    ) -> 'PiecewiseConstantFunction[T]':
        new_func: 'PiecewiseConstantFunction[T]' = PiecewiseConstantFunction(
            self._initial_value + other._initial_value)
        for xval, y0, y1 in _merged_breakpoints(self, other):
            new_func.add_breakpoint(xval, y0 + y1)
        return new_func

    def __sub__(
        self, other: 'PiecewiseConstantFunction[T]'
    ) -> 'PiecewiseConstantFunction[T]':
        new_func: 'PiecewiseConstantFunction[T]' = PiecewiseConstantFunction(
            self._initial_value - other._initial_value)
        for xval, y0, y1 in _merged_breakpoints(self, other):
            new_func.add_breakpoint(xval, y0 - y1)
        return new_func

    def __mul__(
        self, other: 'PiecewiseConstantFunction[T]'
    ) -> 'PiecewiseConstantFunction[T]':
        new_func: 'PiecewiseConstantFunction[T]' = PiecewiseConstantFunction(
            self._initial_value * other._initial_value)
        for xval, y0, y1 in _merged_breakpoints(self, other):
            new_func.add_breakpoint(xval, y0 * y1)
        return new_func

    def __truediv__(
        self, other: 'PiecewiseConstantFunction[T]'
    ) -> 'PiecewiseConstantFunction[T]':
        try:
            new_func: 'PiecewiseConstantFunction[T]' = PiecewiseConstantFunction(
                self._initial_value / other._initial_value)
        except ZeroDivisionError:
            new_func = PiecewiseConstantFunction()

        for xval, y0, y1 in _merged_breakpoints(self, other):
            try:
                new_func.add_breakpoint(xval, y0 / y1)
            except ZeroDivisionError:
                new_func.add_breakpoint(xval, 0)
        return new_func
Пример #53
0
class Node(BaseNode, Mapping):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.rest = None

    def _select(self, key):
        for k, v in reversed(list(self.values.items())):
            if k <= key:
                return v

        return self.rest

    def _insert(self, key, value):
        result = self._select(key)._insert(key, value)
        self.changed = True

        if result is None:
            return

        key, other = result
        return super()._insert(key, other)

    def _split(self):
        other = LazyNode(node=Node(tree=self.tree, changed=True),
            tree=self.tree)

        values = self.values.items()
        self.values = SortedDict(values[:len(values) // 2])
        other.values = SortedDict(values[len(values) // 2:])

        key, value = other.values.popitem(last=False)
        other.rest = value

        return (key, other)

    def _commit(self):
        self.rest._commit()

        for child in self.values.values():
            child._commit()

        data = packb({
            'rest': self.rest.offset,
            'values': {k: v.offset for k, v in self.values.items()},
        })

        self.tree.chunk.write(ChunkId.Node, data)
        return self.tree.chunk.tell()

    def __getitem__(self, key):
        return self._select(key)[key]

    def __len__(self):
        return sum([len(value) for child in self.values.values() + \
            len(self.rest)])

    def __iter__(self):
        for key in self.rest:
            yield key

        for child in self.values.values():
            for key in child:
                yield key
Пример #54
0
    def validate(self,
                 protocol_name,
                 subset='development',
                 aggregate=False,
                 every=1,
                 start=0):

        # prepare paths
        validate_dir = self.VALIDATE_DIR.format(train_dir=self.train_dir_,
                                                protocol=protocol_name)
        validate_txt = self.VALIDATE_TXT.format(
            validate_dir=validate_dir,
            subset=subset,
            aggregate='aggregate.' if aggregate else '')
        validate_png = self.VALIDATE_PNG.format(
            validate_dir=validate_dir,
            subset=subset,
            aggregate='aggregate.' if aggregate else '')
        validate_eps = self.VALIDATE_EPS.format(
            validate_dir=validate_dir,
            subset=subset,
            aggregate='aggregate.' if aggregate else '')

        # create validation directory
        mkdir_p(validate_dir)

        # Build validation set
        if aggregate:
            X, n, y = self._validation_set_z(protocol_name, subset=subset)
        else:
            X, y = self._validation_set_y(protocol_name, subset=subset)

        # list of equal error rates, and epoch to process
        eers, epoch = SortedDict(), start

        desc_format = ('Best EER = {best_eer:.2f}% @ epoch #{best_epoch:d} ::'
                       ' EER = {eer:.2f}% @ epoch #{epoch:d} :')

        progress_bar = tqdm(unit='epoch')

        with open(validate_txt, mode='w') as fp:

            # watch and evaluate forever
            while True:

                # last completed epochs
                completed_epochs = self.get_epochs(self.train_dir_) - 1

                if completed_epochs < epoch:
                    time.sleep(60)
                    continue

                # if last completed epoch has already been processed
                # go back to first epoch that hasn't been processed yet
                process_epoch = epoch if completed_epochs in eers \
                                      else completed_epochs

                # do not validate this epoch if it has been done before...
                if process_epoch == epoch and epoch in eers:
                    epoch += every
                    progress_bar.update(every)
                    continue

                weights_h5 = LoggingCallback.WEIGHTS_H5.format(
                    log_dir=self.train_dir_, epoch=process_epoch)

                # this is needed for corner case when training is started from
                # an epoch > 0
                if not isfile(weights_h5):
                    time.sleep(60)
                    continue

                # sleep 5 seconds to let the checkpoint callback finish
                time.sleep(5)

                embedding = keras.models.load_model(
                    weights_h5, custom_objects=CUSTOM_OBJECTS, compile=False)

                if aggregate:

                    def embed(X):
                        func = K.function([
                            embedding.get_layer(name='input').input,
                            K.learning_phase()
                        ], [embedding.get_layer(name='internal').output])
                        return func([X, 0])[0]
                else:
                    embed = embedding.predict

                # embed all validation sequences
                fX = embed(X)

                if aggregate:
                    indices = np.hstack([[0], np.cumsum(n)])
                    fX = np.stack([
                        np.sum(np.sum(fX[i:j], axis=0), axis=0)
                        for i, j in pairwise(indices)
                    ])
                    fX = l2_normalize(fX)

                # compute pairwise distances
                y_pred = pdist(fX, metric=self.approach_.metric)
                # compute pairwise groundtruth
                y_true = pdist(y, metric='chebyshev') < 1
                # estimate equal error rate
                _, _, _, eer = det_curve(y_true, y_pred, distances=True)
                eers[process_epoch] = eer

                # save equal error rate to file
                fp.write(
                    self.VALIDATE_TXT_TEMPLATE.format(epoch=process_epoch,
                                                      eer=eer))
                fp.flush()

                # keep track of best epoch so far
                best_epoch = eers.iloc[np.argmin(eers.values())]
                best_eer = eers[best_epoch]

                progress_bar.set_description(
                    desc_format.format(epoch=process_epoch,
                                       eer=100 * eer,
                                       best_epoch=best_epoch,
                                       best_eer=100 * best_eer))

                # plot
                fig = plt.figure()
                plt.plot(eers.keys(), eers.values(), 'b')
                plt.plot([best_epoch], [best_eer], 'bo')
                plt.plot([eers.iloc[0], eers.iloc[-1]], [best_eer, best_eer],
                         'k--')
                plt.grid(True)
                plt.xlabel('epoch')
                plt.ylabel('EER on {subset}'.format(subset=subset))
                TITLE = '{best_eer:.5g} @ epoch #{best_epoch:d}'
                title = TITLE.format(best_eer=best_eer,
                                     best_epoch=best_epoch,
                                     subset=subset)
                plt.title(title)
                plt.tight_layout()
                plt.savefig(validate_png, dpi=75)
                plt.savefig(validate_eps)
                plt.close(fig)

                # go to next epoch
                if epoch == process_epoch:
                    epoch += every
                    progress_bar.update(every)
                else:
                    progress_bar.update(0)

        progress_bar.close()
Пример #55
0
class WordData(QObject):
    # Define the signal we emit when we have loaded new data
    WordsUpdated = pyqtSignal()

    def __init__(self, my_book):
        super().__init__(None)
        # Save reference to the book
        self.my_book = my_book
        # Save reference to the metamanager
        self.metamgr = my_book.get_meta_manager()
        # Save reference to the edited document
        self.document = my_book.get_edit_model()
        # Save reference to a speller, which will be the default
        # at this point.
        self.speller = my_book.get_speller()
        # The vocabulary list as a sorted dict.
        self.vocab = SortedDict()
        # Key and Values views on the vocab list for indexing by table row.
        self.vocab_kview = self.vocab.keys()
        self.vocab_vview = self.vocab.values()
        # The count of available words based on the latest sort
        self.active_word_count = 0
        # The good- and bad-words sets and the scannos set.
        self.good_words = set()
        self.bad_words = set()
        self.scannos = set()
        # A dict of words that use an alt-dict tag. The key is a word and the
        # value is the alt-dict tag string.
        self.alt_tags = SortedDict()
        # Cached sort vectors, see get_sort_vector()
        self.sort_up_vectors = [None, None, None]
        self.sort_down_vectors = [None, None, None]
        self.sort_key_funcs = [None, None, None]
        # Register metadata readers and writers.
        self.metamgr.register(C.MD_GW, self.good_read, self.good_save)
        self.metamgr.register(C.MD_BW, self.bad_read, self.bad_save)
        self.metamgr.register(C.MD_SC, self.scanno_read, self.scanno_save)
        self.metamgr.register(C.MD_VL, self.word_read, self.word_save)
    # End of __init__


    # =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
    # Methods used when saving metadata. The items in the good_words,
    # bad_words, and scanno sets are simply returned as a list of strings.
    #
    def good_save(self, section) :
        return [ token for token in self.good_words ]

    def bad_save(self, section) :
        return [ token for token in self.bad_words ]

    def scanno_save(self, section) :
        return [ token for token in self.scannos ]
    #
    # To save the vocabulary, write a list for each word:
    #   [ "token", "tag", count, [prop-code...] ]
    # where "token" is the word as a string, "tag" is its alt-dict tag
    # or a null string, count is an integer and [prop-code...] is the
    # integer values from the word's property set as a list. Note that
    # alt_tag needs to be a string because json doesn't handle None.
    #
    def word_save(self, section) :
        vlist = []
        for word in self.vocab:
            [count, prop_set] = self.vocab[word]
            #tag = "" if AD not in prop_set else self.alt_tags[word]
            tag = ""
            if AD in prop_set :
                if word in self.alt_tags :
                    tag = self.alt_tags[word]
                else : # should never occur, could be assertion error
                    worddata_logger.error( 'erroneous alt tag on ' + word )
            plist = list(prop_set)
            vlist.append( [ word, count, tag, plist ] )
        return vlist

    #
    # Methods used to load metadata. Called by the metadata manager with
    # a single Python object, presumably the object that was prepared by
    # the matching _save method above. Because the user might edit the metadata
    # file, do a little quality control.
    #

    def good_read(self, section, value, version):
        if isinstance(value, list) :
            for token in value :
                if isinstance(token, str) :
                    if token in self.bad_words :
                        worddata_logger.warn(
                            '"{}" is in both good and bad words - use in good ignored'.format(token)
                            )
                    else :
                        self.good_words.add(token)
                        if token in self.vocab : # vocab already loaded, it seems
                            props = self.vocab[token][1]
                            props.add(GW)
                            props &= prop_nox
                else :
                    worddata_logger.error(
                        '{} in GOODWORDS list ignored'.format(token)
                        )
            if len(self.good_words) :
                # We loaded some, the display might need to change
                self.WordsUpdated.emit()
        else :
            worddata_logger.error(
                'GOODWORDS metadata is not a list of strings, ignoring it'
                )

    def bad_read(self, section, value, version):
        if isinstance(value, list) :
            for token in value :
                if isinstance(token, str) :
                    if token in self.good_words :
                        worddata_logger.warn(
                            '"{}" is in both good and bad words - use in bad ignored'.format(token)
                            )
                    else :
                        self.bad_words.add(token)
                        if token in self.vocab : # vocab already loaded, it seems
                            props = self.vocab[token][1]
                            props.add(BW)
                            props.add(XX)
                else :
                    worddata_logger.error(
                        '{} in BADWORDS list ignored'.format(token)
                        )
            if len(self.bad_words) :
                # We loaded some, the display might need to change
                self.WordsUpdated.emit()
        else :
            worddata_logger.error(
                'BADWORDS metadata is not a list of strings, ignoring it'
                )

    def scanno_read(self, section, value, version):
        if isinstance(value, list) :
            for token in value :
                if isinstance(token, str) :
                    self.scannos.add(token)
                else :
                    worddata_logger.error(
                        '{} in SCANNOLIST ignored'.format(token)
                        )
        else :
            worddata_logger.error(
                'SCANNOLIST metadata is not a list of strings, ignoring it'
                )

    # Load the vocabulary section of a metadata file, allowing for
    # user-edited malformed items. Be very generous about user errors in a
    # modified meta file. The expected value for each word is as written by
    # word_save() above, ["token", count, tag, [props]] but allow a single
    # item ["token"] or just "token" so the user can put in a single word
    # with no count or properties. Convert null-string alt-tag to None.
    #
    # Before adding a word make sure to unicode-flatten it.
    #
    def word_read(self, section, value, version) :
        global PROP_ALL, prop_nox
        # get a new speller in case the Book read a different dict already
        self.speller = self.my_book.get_speller()
        # if value isn't a list, bail out now
        if not isinstance(value,list):
            worddata_logger.error(
                'WORDCENSUS metadata is not a list, ignoring it'
                )
            return
        # inspect each item of the list.
        for wlist in value:
            try :
                if isinstance(wlist,str) :
                    # expand "token" to ["token"]
                    wlist = [wlist]
                if not isinstance(wlist, list) : raise ValueError
                if len(wlist) != 4 :
                    if len(wlist) > 4 :raise ValueError
                    if len(wlist) == 1 : wlist.append(0) # add default count of 0
                    if len(wlist) == 2 : wlist.append('') # add default alt-tag
                    if len(wlist) == 3 : wlist.append([]) # add default props
                word = wlist[0]
                if not isinstance(word,str) : raise ValueError
                word = unicodedata.normalize('NFKC',word)
                count = int(wlist[1]) # exception if not numeric
                alt_tag = wlist[2]
                if not isinstance(alt_tag,str) : raise ValueError
                if alt_tag == '' : alt_tag = None
                prop_set = set(wlist[3]) # exception if not iterable
                if len( prop_set - PROP_ALL ) : raise ValueError #bogus props
            except :
                worddata_logger.error(
                    'WORDCENSUS item {} is invalid, ignoring it'.format(wlist)
                    )
                continue
            # checking done, store the word.
            if (0 == len(prop_set)) or (0 == count) :
                # word with no properties or count is a user addition, enter
                # it as if we found it in the file, including deducing the
                # properties, spell-check, hyphenation split.
                self._add_token(word, alt_tag)
                continue # that's that, on to next line
            # Assume we have a word saved by word_save(), but possibly the
            # good_words and bad_words have been edited and read-in first.
            # Note we are not checking for duplicates
            if word in self.bad_words :
                prop_set.add(BW)
                prop_set.add(XX)
            if word in self.good_words :
                prop_set.add(GW)
                prop_set &= prop_nox
            if alt_tag :
                prop_set.add(AD)
                self.alt_tags[word] = alt_tag
            self.vocab[word] = [count, prop_set]
        # end of "for wlist in value"
        # note the current word count
        self.active_word_count = len(self.vocab)
        # Tell wordview that the display might need to change
        self.WordsUpdated.emit()
    # end of word_read()

    # =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
    # Methods used when opening a new file, one with no metadata.
    #
    # The Book will call these methods passing a text stream when it finds a
    # good-words file or bad-words file. Each of these is expected to have
    # one token per line. We don't presume to know in what order the files
    # are presented, but we DO assume that the vocabulary census has not yet
    # been taken. That requires the user clicking Refresh and that cannot
    # have happened while first opening the file.

    def good_file(self, stream) :
        while not stream.atEnd() :
            token = stream.readLine().strip()
            if token in self.bad_words :
                worddata_logger.warn(
                    '"{}" is in both good and bad words - use in good ignored'.format(token)
                    )
            else :
                self.good_words.add(token)

    def bad_file(self, stream) :
        while not stream.atEnd() :
            token = stream.readLine().strip()
            if token in self.good_words :
                worddata_logger.warn(
                    '"{}" is in both good and bad words - use in bad ignored'.format(token)
                    )
            else :
                self.bad_words.add(token)
    # =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
    #
    # The user can choose a new scannos file any time while editing. So there
    # might be existing data, so we clear the set before reading.
    #
    def scanno_file(self, stream) :
        self.scannos = set() # clear any prior values
        while not stream.atEnd() :
            token = stream.readLine().strip()
            self.scannos.add(token)

    # =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
    # The following is called by the Book when the user chooses a different
    # spelling dictionary. Store a new spellcheck object. Recheck the
    # spelling of all words except those with properties HY, GW, or BW.
    #
    # NOTE IF THIS IS A PERFORMANCE BURDEN, KILL IT AND REQUIRE REFRESH
    #
    def recheck_spelling(self, speller):
        global PROP_BGH, prop_nox
        self.speller = speller
        for i in range(len(self.vocab)) :
            (c, p) = self.vocab_vview[i]
            if not( PROP_BGH & p ) : # then p lacks BW, GW and HY
                p = p & prop_nox # and now it also lacks XX
                w = self.vocab_kview[i]
                t = self.alt_tags.get(w,None)
                if not self.speller.check(w,t):
                    p.add(XX)
                self.vocab_vview[i][1] = p

    # =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
    # Method to perform a census. This is called from wordview when the
    # user clicks the Refresh button asking for a new scan over all words in
    # the book. Formerly this took a progress bar, but the actual operation
    # is so fast no progress need be shown.
    #
    def refresh(self):
        global RE_LANG_ATTR, RE_TOKEN

        count = 0
        end_count = self.document.blockCount()

        # get a reference to the dictionary to use
        self.speller = self.my_book.get_speller()
        # clear the alt-dict list.
        self.alt_tags = SortedDict()
        # clear the sort vectors
        self.sort_up_vectors = [None, None, None]
        self.sort_down_vectors = [None, None, None]
        self.sort_key_funcs = [None, None, None]
        # Zero out all counts and property sets that we have so far. We will
        # develop new properties when each word is first seen. Properties
        # such as HY will not have changed, but both AD and XX might have
        # changed while the word text remains the same.
        for j in range(len(self.vocab)) :
            self.vocab_vview[j][0] = 0
            self.vocab_vview[j][1] = set()

        # iterate over all lines extracting tokens and processing them.
        alt_dict = None
        alt_tag = None
        for line in self.document.all_lines():
            count += 1
            j = 0
            m = RE_TOKEN.search(line,0)
            while m : # while match is not None
                if m.group(6) : # start-tag; has it lang= ?
                    d = RE_LANG_ATTR.search(m.group(8))
                    if d :
                        alt_dict = d.group(1)
                        alt_tag = m.group(7)
                elif m.group(9) :
                    if m.group(10) == alt_tag :
                        # end tag of a lang= start tag
                        alt_dict = None
                        alt_tag = None
                else :
                    self._add_token(m.group(0),alt_dict)
                j = m.end()
                m = RE_TOKEN.search(line,j)
        # Look for zero counts and delete those items. It is forbidden to
        # alter the dict contents while iterating over values or keys views,
        # so make a list of the word tokens to be deleted, then use del.
        togo = []
        for j in range(len(self.vocab)) :
            if self.vocab_vview[j][0] == 0 :
                togo.append(self.vocab_kview[j])
        for key in togo:
            del self.vocab[key]
        # Update possibly modified word count
        self.active_word_count = len(self.vocab)

    # Internal method for adding a possibly-hyphenated token to the vocabulary,
    # incrementing its count. This is used during the census/refresh scan, and
    # can be called from word_read to process a user-added word.
    # Arguments:
    #    tok_str: a normalized word-like token; may be hyphenated a/o apostrophized
    #    dic_tag: an alternate dictionary tag or None
    #
    # If the token has no hyphens, this is just a cover on _count. When the
    # token is hyphenated, we enter each part of it alone, then add the
    # phrase with the union of the prop_sets of its parts, plus HY. Thus
    # "mother-in-law's" will be added as "mother", "in" and "law's", and as
    # itself with HY, LC, AP. "1989-1995" puts 1989 and 1995 in the list and
    # will have HY and ND. Yes, this means that a hyphenation could have all
    # of UC, MC and LC.
    #
    # If a part of a phrase fails spellcheck, it will have XX but we do not
    # propogate that to the phrase itself.
    #
    # If a part of the phrase has AD (because it was previously entered as
    # part of a lang= string) that also is not propogated to the phrase
    # itself. Since hyphenated phrases are never spell-checked, they should
    # never have AD.
    #
    # Note: en-dash \u2013 is not supported here, only the ascii hyphen.
    # Support for it could be added if required.
    #
    # Defensive programming: '-'.split('-') --> ['','']; '-9'.split('-') --> ['','9']

    def _add_token(self, tok_str, dic_tag ) :
        global prop_nox
        # Count the entire token regardless of hyphens
        self._count(tok_str, dic_tag) # this definitely puts it in the dict
        [count, prop_set] = self.vocab[tok_str]
        if (count == 1) and (HY in prop_set) :
            # We just added a hyphenated token: add its parts also.
            parts = tok_str.split('-')
            prop_set = {HY}
            for member in parts :
                if len(member) : # if not null split from leading -
                    self._count(member, dic_tag)
                    [x, part_props] = self.vocab[member]
                    prop_set |= part_props
            self.vocab[tok_str] = [count, prop_set  - {XX, AD} ]

    # Internal method to count a token, adding it to the list if necessary.
    # An /alt-tag must already be removed. The word must be already
    # normalized. Because of the way we tokenize, we know the token contains
    # only letter forms, numeric forms, and possibly hyphens and/or
    # apostrophes.
    #
    # If it is in the list, increment its count. Otherwise, compute its
    # properties, including spellcheck for non-hyphenated tokens, and
    # add it to the vocabulary with a count of 1. Returns nothing.

    def _count(self, word, dic_tag ) :
        [count, prop_set] = self.vocab.get( word, [0,set()] )
        if count : # it was in the list: a new word would have count=0
            self.vocab[word][0] += 1 # increment its count
            return # and done.
        # Word was not in the list (but is now): count is 0, prop_set is empty.
        # The following is only done once per unique word.
        self.my_book.metadata_modified(True, C.MD_MOD_FLAG)
        work = word[:] # copy the word, we may modify it next.
        if work.startswith("Point"):
            pass # debug
        # If word has apostrophes, note that and delete for following tests.
        if -1 < work.find("'") : # look for ascii apostrophe
            prop_set.add(AP)
            work = work.replace("'","")
        if -1 < work.find('\u02bc') : # look for MODIFIER LETTER APOSTROPHE
            prop_set.add(AP)
            work = work.replace('\u02bc','')
        # If word has hyphens, note that and remove them.
        if -1 < work.find('-') :
            prop_set.add(HY)
            work = work.replace('-','')
        # With the hyphens and apostrophes out, check letter case
        if ANY_DIGIT.search( work ) :
            # word has at least one numeric
            prop_set.add(ND)
        if not work.isnumeric() :
            # word is not all-numeric, determine case of letters
            if work.lower() == work :
                prop_set.add(LC) # most common case
            elif work.upper() != work :
                prop_set.add(MC) # next most common case
            else : # work.upper() == work
                prop_set.add(UC)
        if HY not in prop_set : # word is not hyphenated, so check its spelling.
            if word not in self.good_words :
                if word not in self.bad_words :
                    # Word in neither good- nor bad-words
                    if dic_tag : # uses an alt dictionary
                        self.alt_tags[word] = dic_tag
                        prop_set.add(AD)
                    if not self.speller.check(word, dic_tag) :
                        prop_set.add(XX)
                else : # in bad-words
                    prop_set.add(XX)
            # else in good-words
        # else hyphenated, spellcheck only its parts
        self.vocab[word] = [1, prop_set]

    # =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
    #
    # The following methods are called from the Words panel.
    #
    #  Get the count of words in the vocabulary, as selected by the
    #  latest sort vector.
    #
    def word_count(self):
        return self.active_word_count
    #
    # Get the actual size of the vocabulary, for searching it all.
    def vocab_count(self):
        return len(self.vocab)
    #
    # Get the word at position n in the vocabulary, using the SortedDict
    # KeysView for O(1) lookup time. Guard against invalid indices.
    #
    def word_at(self, n):
        try:
            return self.vocab_kview[n]
        except Exception as whatever:
            worddata_logger.error('bad call to word_at({0})'.format(n))
            return ('?')
    #
    # Get the count and/or property-set of the word at position n in the
    # vocabulary, using the SortedDict ValuesView for O(1) lookup time.
    #
    def word_info_at(self, n):
        try:
            return self.vocab_vview[n]
        except Exception as whatever:
            worddata_logger.error('bad call to word_count_at({0})'.format(n))
            return [0, set()]
    def word_count_at(self, n):
        try:
            return self.vocab_vview[n][0]
        except Exception as whatever:
            worddata_logger.error('bad call to word_count_at({0})'.format(n))
            return 0
    def word_props_at(self, n):
        try:
            return self.vocab_vview[n][1]
        except Exception as whatever:
            worddata_logger.error('bad call to word_props_at({0})'.format(n))
            return (set())

    #
    # Return a sort vector to implement column-sorting and/or filtering. The
    # returned value is a list of index numbers to self.vocab_vview and
    # vocab_kview such that iterating over the list selects vocabulary items
    # in some order. The parameters are:
    #
    # col is the number of the table column, 0:word, 1:count, 2:properties.
    # The sort key is formed based on the column:
    #   0: key is the word-token
    #   1: key is nnnnnnword-token so that words with the same count are
    #      in sequence.
    #   2: fffffffword-token so that words with the same props are in sequence.
    #
    # order is Qt.AscendingOrder or Qt.DescendingOrder
    #
    # key_func is a callable used to extract or condition the key value when
    # a new key is added to a SortedDict, usually created by natsort.keygen()
    # and used to implement locale-aware and case-independent sorting.
    #
    # filter_func is a callable that examines a vocab entry and returns
    # True or False, meaning include or omit this entry from the vector.
    # Used to implement property filters or harmonic-sets.
    #
    # To implement Descending order we return a reversed() version of the
    # matching Ascending order vector.
    #
    # Because vectors are expensive to make, we cache them, so that to
    # return to a previous sort order takes near zero time. However we can't
    # cache every variation of a filtered vector, so when a filter_func is
    # passed we make the vector every time.
    #
    def _make_key_getter(self, col) :
        if col == 0 :
            return lambda j : self.vocab_kview[j]
        elif col == 1 :
            return lambda j : '{:05}:{}'.format( self.vocab_vview[j][0], self.vocab_kview[j] )
        else : # col == 2
            return lambda j : prop_string(self.vocab_vview[j][1]) + self.vocab_kview[j]

    def get_sort_vector( self, col, order, key_func = None, filter_func = None ) :
        if filter_func : # is not None,
            # create a sort vector from scratch, filtered
            getter_func = self._make_key_getter( col )
            sorted_dict = SortedDict( key_func )
            for j in range( len( self.vocab ) ) :
                if filter_func( self.vocab_kview[j], self.vocab_vview[j][1] ) :
                    k = getter_func( j )
                    sorted_dict[ k ] = j
            vector = sorted_dict.values()
            if order != Qt.AscendingOrder :
                vector = [j for j in reversed( vector ) ]
        else : # no filter_func, try to reuse a cached vector
            vector = self.sort_up_vectors[ col ]
            if not vector or key_func is not self.sort_key_funcs[ col ] :
                # there is no ascending vector for this column, or there
                # is one but it was made with a different key_func.
                getter_func = self._make_key_getter( col )
                sorted_dict = SortedDict( key_func )
                for j in range( len( self.vocab ) ) :
                    k = getter_func( j )
                    sorted_dict[ k ] = j
                vector = self.sort_up_vectors[ col ] = sorted_dict.values()
                self.sort_key_funcs[ col ] = key_func
            if order != Qt.AscendingOrder :
                # what is wanted is a descending order vector, do we have one?
                if self.sort_down_vectors[ col ] is None :
                    # no, so create one from the asc. vector we now have
                    self.sort_down_vectors[ col ] = [ j for j in reversed( vector ) ]
                # yes we do (now)
                vector = self.sort_down_vectors[ col ]
        # one way or another, vector is a sort vector
        # note the actual word count available through that vector
        self.active_word_count = len(vector)
        return vector

    # Return a reference to the good-words set
    def get_good_set(self):
        return self.good_words

    # Note the addition of a word to the good-words set. The word probably
    # (but does not have to) exist in the database; add GW and remove XX from
    # its properties.
    def add_to_good_set(self, word):
        self.good_words.add(word)
        if word in self.vocab_kview :
            [count, pset] = self.vocab[word]
            pset.add(GW)
            pset -= set([XX]) # conditional .remove()
            self.vocab[word] = [count,pset]

    # Note the removal of a word from the good-words set. The word exists in
    # the good-words set, because the wordview panel good-words list only
    # calls this for words it is displaying. The word may or may not exist in
    # the database. If it does, remove GW and set XX based on a spellcheck
    # test.
    def del_from_good_set(self, word):
        self.good_words.remove(word)
        if word in self.vocab_kview :
            [count, pset] = self.vocab[word]
            pset -= set([GW,XX])
            dic_tag = self.alt_tags.get(word)
            if not self.speller.check(word, dic_tag) :
                pset.add(XX)
            self.vocab[word] = [count, pset]

    # mostly used by unit test, get the index of a word by its key
    def word_index(self, w):
        try:
            return self.vocab_kview.index(w)
        except Exception as whatever:
            worddata_logger.error('bad call to word_index({0})'.format(w))
            return -1

    # The following methods are used by the edit syntax highlighter to set flags.
    #
    # 1. Check a token for spelling. We expect the vast majority of words
    # will be in the list. And for performance, we want to respond in as little
    # code as possible! So if we know the word, reply at once.
    #
    # 2. If the word in the document isn't in the vocab, perhaps it is not
    # a normalized string, so try again, normalized.
    #
    # 3 If the token is not in the list, add it to the vocabulary with null
    # properties (to speed up repeat calls) and return False, meaning it is
    # not misspelled. The opposite, returning True for misspelled, in a new
    # book before Refresh is done, would highlight everything.
    #
    def spelling_test(self, tok_str) :
        count, prop_set = self.vocab.get(tok_str,[0,set()])
        if count : # it was in the list
            return XX in prop_set
        tok_nlz = unicodedata.normalize('NFKC',tok_str)
        [count, prop_set] = self.vocab.get(tok_nlz,[0,set()])
        return XX in prop_set
    #
    # 2. Check a token for being in the scannos list. If no scannos
    # have been loaded, none will be hilited.
    #
    def scanno_test(self, tok_str) :
        return tok_str in self.scannos
Пример #56
0
    def apply(self, protocol_name, output_dir, step=None, internal=False):

        # load best performing model
        with open(self.validate_txt_, 'r') as fp:
            eers = SortedDict(np.loadtxt(fp))
        best_epoch = int(eers.iloc[np.argmin(eers.values())])
        embedding = SequenceEmbeddingAutograd.load(self.train_dir_, best_epoch)

        # guess sequence duration from path (.../3.2+0.8/...)
        directory = basename(dirname(self.experiment_dir))
        duration, _, _, _ = self._directory_to_params(directory)
        if step is None:
            step = 0.5 * duration

        # initialize embedding extraction
        batch_size = self.approach_.batch_size
        extraction = Extraction(embedding,
                                self.feature_extraction_,
                                duration,
                                step=step,
                                batch_size=batch_size,
                                internal=internal)
        sliding_window = extraction.sliding_window
        dimension = extraction.dimension

        # create metadata file at root that contains
        # sliding window and dimension information
        path = Precomputed.get_config_path(output_dir)
        mkdir_p(dirname(path))
        f = h5py.File(path)
        f.attrs['start'] = sliding_window.start
        f.attrs['duration'] = sliding_window.duration
        f.attrs['step'] = sliding_window.step
        f.attrs['dimension'] = dimension
        f.close()

        # file generator
        protocol = get_protocol(protocol_name,
                                progress=True,
                                preprocessors=self.preprocessors_)

        for subset in ['development', 'test', 'train']:

            try:
                file_generator = getattr(protocol, subset)()
                first_item = next(file_generator)
            except NotImplementedError as e:
                continue

            file_generator = getattr(protocol, subset)()

            for current_file in file_generator:

                fX = extraction.apply(current_file)

                path = Precomputed.get_path(output_dir, current_file)
                mkdir_p(dirname(path))

                f = h5py.File(path)
                f.attrs['start'] = sliding_window.start
                f.attrs['duration'] = sliding_window.duration
                f.attrs['step'] = sliding_window.step
                f.attrs['dimension'] = dimension
                f.create_dataset('features', data=fX.data)
                f.close()
Пример #57
0
class KeyedRegion:
    """
    KeyedRegion keeps a mapping between stack offsets and all objects covering that offset. It assumes no variable in
    this region overlap with another variable in this region.

    Registers and function frames can all be viewed as a keyed region.
    """

    __slots__ = ('_storage', '_object_mapping', '_phi_node_contains' )

    def __init__(self, tree=None, phi_node_contains=None):
        self._storage = SortedDict() if tree is None else tree
        self._object_mapping = weakref.WeakValueDictionary()
        self._phi_node_contains = phi_node_contains

    def __getstate__(self):
        return self._storage, dict(self._object_mapping), self._phi_node_contains

    def __setstate__(self, s):
        self._storage, om, self._phi_node_contains = s
        self._object_mapping = weakref.WeakValueDictionary(om)

    def _get_container(self, offset):
        try:
            base_offset = next(self._storage.irange(maximum=offset, reverse=True))
        except StopIteration:
            return offset, None
        else:
            container = self._storage[base_offset]
            if container.includes(offset):
                return base_offset, container
            return offset, None

    def __contains__(self, offset):
        """
        Test if there is at least one variable covering the given offset.

        :param offset:
        :return:
        """

        if type(offset) is not int:
            raise TypeError("KeyedRegion only accepts concrete offsets.")

        return self._get_container(offset)[1] is not None

    def __len__(self):
        return len(self._storage)

    def __iter__(self):
        return iter(self._storage.values())

    def __eq__(self, other):
        if set(self._storage.keys()) != set(other._storage.keys()):
            return False

        for k, v in self._storage.items():
            if v != other._storage[k]:
                return False

        return True

    def copy(self):
        if not self._storage:
            return KeyedRegion(phi_node_contains=self._phi_node_contains)

        kr = KeyedRegion(phi_node_contains=self._phi_node_contains)
        for key, ro in self._storage.items():
            kr._storage[key] = ro.copy()
        kr._object_mapping = self._object_mapping.copy()
        return kr

    def merge(self, other, replacements=None):
        """
        Merge another KeyedRegion into this KeyedRegion.

        :param KeyedRegion other: The other instance to merge with.
        :return: None
        """

        # TODO: is the current solution not optimal enough?
        for _, item in other._storage.items():  # type: RegionObject
            for so in item.stored_objects:  # type: StoredObject
                if replacements and so.obj in replacements:
                    so = StoredObject(so.start, replacements[so.obj], so.size)
                self._object_mapping[so.obj_id] = so
                self.__store(so, overwrite=False)

        return self

    def replace(self, replacements):
        """
        Replace variables with other variables.

        :param dict replacements:   A dict of variable replacements.
        :return:                    self
        """

        for old_var, new_var in replacements.items():
            old_var_id = id(old_var)
            if old_var_id in self._object_mapping:
                # FIXME: we need to check if old_var still exists in the storage
                old_so = self._object_mapping[old_var_id]  # type: StoredObject
                self._store(old_so.start, new_var, old_so.size, overwrite=True)

        return self

    def dbg_repr(self):
        """
        Get a debugging representation of this keyed region.
        :return: A string of debugging output.
        """
        keys = self._storage.keys()
        offset_to_vars = { }

        for key in sorted(keys):
            ro = self._storage[key]
            variables = [ obj.obj for obj in ro.stored_objects ]
            offset_to_vars[key] = variables

        s = [ ]
        for offset, variables in offset_to_vars.items():
            s.append("Offset %#x: %s" % (offset, variables))
        return "\n".join(s)

    def add_variable(self, start, variable):
        """
        Add a variable to this region at the given offset.

        :param int start:
        :param SimVariable variable:
        :return: None
        """

        size = variable.size if variable.size is not None else 1

        self.add_object(start, variable, size)

    def add_object(self, start, obj, object_size):
        """
        Add/Store an object to this region at the given offset.

        :param start:
        :param obj:
        :param int object_size: Size of the object
        :return:
        """

        self._store(start, obj, object_size, overwrite=False)

    def set_variable(self, start, variable):
        """
        Add a variable to this region at the given offset, and remove all other variables that are fully covered by
        this variable.

        :param int start:
        :param SimVariable variable:
        :return: None
        """

        size = variable.size if variable.size is not None else 1

        self.set_object(start, variable, size)

    def set_object(self, start, obj, object_size):
        """
        Add an object to this region at the given offset, and remove all other objects that are fully covered by this
        object.

        :param start:
        :param obj:
        :param object_size:
        :return:
        """

        self._store(start, obj, object_size, overwrite=True)

    def get_base_addr(self, addr):
        """
        Get the base offset (the key we are using to index objects covering the given offset) of a specific offset.

        :param int addr:
        :return:
        :rtype:  int or None
        """

        base_addr, container = self._get_container(addr)
        if container is None:
            return None
        else:
            return base_addr

    def get_variables_by_offset(self, start):
        """
        Find variables covering the given region offset.

        :param int start:
        :return: A list of stack variables.
        :rtype:  set
        """

        _, container = self._get_container(start)
        if container is None:
            return []
        else:
            return container.internal_objects

    def get_objects_by_offset(self, start):
        """
        Find objects covering the given region offset.

        :param start:
        :return:
        """

        _, container = self._get_container(start)
        if container is None:
            return set()
        else:
            return container.internal_objects

    #
    # Private methods
    #

    def _store(self, start, obj, size, overwrite=False):
        """
        Store a variable into the storage.

        :param int start: The beginning address of the variable.
        :param obj: The object to store.
        :param int size: Size of the object to store.
        :param bool overwrite: Whether existing objects should be overwritten or not.
        :return: None
        """

        stored_object = StoredObject(start, obj, size)
        self._object_mapping[stored_object.obj_id] = stored_object
        self.__store(stored_object, overwrite=overwrite)

    def __store(self, stored_object, overwrite=False):
        """
        Store a variable into the storage.

        :param StoredObject stored_object: The descriptor describing start address and the variable.
        :param bool overwrite:  Whether existing objects should be overwritten or not. True to make a strong update,
                                False to make a weak update.
        :return: None
        """

        start = stored_object.start
        object_size = stored_object.size
        end = start + object_size

        # region items in the middle
        overlapping_items = list(self._storage.irange(start, end-1))

        # is there a region item that begins before the start and overlaps with this variable?
        floor_key, floor_item = self._get_container(start)
        if floor_item is not None and floor_key not in overlapping_items:
            # insert it into the beginning
            overlapping_items.insert(0, floor_key)

        # scan through the entire list of region items, split existing regions and insert new regions as needed
        to_update = {start: RegionObject(start, object_size, {stored_object})}
        last_end = start

        for floor_key in overlapping_items:
            item = self._storage[floor_key]
            if item.start < start:
                # we need to break this item into two
                a, b = item.split(start)
                if overwrite:
                    b.set_object(stored_object)
                else:
                    self._add_object_with_check(b, stored_object)
                to_update[a.start] = a
                to_update[b.start] = b
                last_end = b.end
            elif item.start > last_end:
                # there is a gap between the last item and the current item
                # fill in the gap
                new_item = RegionObject(last_end, item.start - last_end, {stored_object})
                to_update[new_item.start] = new_item
                last_end = new_item.end
            elif item.end > end:
                # we need to split this item into two
                a, b = item.split(end)
                if overwrite:
                    a.set_object(stored_object)
                else:
                    self._add_object_with_check(a, stored_object)
                to_update[a.start] = a
                to_update[b.start] = b
                last_end = b.end
            else:
                if overwrite:
                    item.set_object(stored_object)
                else:
                    self._add_object_with_check(item, stored_object)
                to_update[item.start] = item

        self._storage.update(to_update)

    def _is_overlapping(self, start, variable):

        if variable.size is not None:
            # make sure this variable does not overlap with any other variable
            end = start + variable.size
            try:
                prev_offset = next(self._storage.irange(maximum=end-1, reverse=True))
            except StopIteration:
                prev_offset = None

            if prev_offset is not None:
                if start <= prev_offset < end:
                    return True
                prev_item = self._storage[prev_offset][0]
                prev_item_size = prev_item.size if prev_item.size is not None else 1
                if start < prev_offset + prev_item_size < end:
                    return True
        else:
            try:
                prev_offset = next(self._storage.irange(maximum=start, reverse=True))
            except StopIteration:
                prev_offset = None

            if prev_offset is not None:
                prev_item = self._storage[prev_offset][0]
                prev_item_size = prev_item.size if prev_item.size is not None else 1
                if prev_offset <= start < prev_offset + prev_item_size:
                    return True

        return False

    def _add_object_with_check(self, item, stored_object):
        if len({stored_object.obj} | item.internal_objects) > 1:
            if self._phi_node_contains is not None:
                # check if `item` is a phi node that contains stored_object.obj
                for so in item.internal_objects:
                    if self._phi_node_contains(so, stored_object.obj):
                        # yes! so we want to skip this object
                        return
                # check if `stored_object.obj` is a phi node that contains item.internal_objects
                if all(self._phi_node_contains(stored_object.obj, o) for o in item.internal_objects):
                    # yes!
                    item.set_object(stored_object)
                    return

            l.warning("Overlapping objects %s.", str({stored_object.obj} | item.internal_objects))
            # import ipdb; ipdb.set_trace()
        item.add_object(stored_object)
Пример #58
0
    def generate_graphs(self, show=False):
        filename = "task_arrival_{0}.png".format(self.workload_name)
        if os.path.isfile(os.path.join(self.folder, filename)):
            return filename

        fig = plt.figure(figsize=(9, 7))
        granularity_order = ["Second", "Minute", "Hour", "Day"]

        granularity_lambdas = {
            "Second": 1000,
            "Minute": 60 * 1000,
            "Hour": 60 * 60 * 1000,
            "Day": 60 * 60 * 24 * 1000,
        }

        plot_count = 0

        for granularity in granularity_order:
            task_arrivals = SortedDict()
            df = self.df.withColumn(
                'ts_submit',
                F.col('ts_submit') / granularity_lambdas[granularity])
            df = df.withColumn('ts_submit',
                               F.col('ts_submit').cast(T.LongType()))
            submit_times = df.groupBy("ts_submit").count().toPandas()

            for task in submit_times.itertuples():
                submit_time = int(task.ts_submit)

                if submit_time not in task_arrivals:
                    task_arrivals[submit_time] = 0

                task_arrivals[submit_time] += task.count

            ax = plt.subplot2grid(
                (2, 2), (int(math.floor(plot_count / 2)), (plot_count % 2)))
            if max(task_arrivals.keys()) >= 1:
                ax.plot(task_arrivals.keys(),
                        task_arrivals.values(),
                        color="black",
                        linewidth=1.0)
                ax.grid(True)
            else:
                ax.text(0.5,
                        0.5,
                        'Not available;\nTrace too small.',
                        horizontalalignment='center',
                        verticalalignment='center',
                        transform=ax.transAxes,
                        fontsize=16)
                ax.grid(False)

            # Rotates and right aligns the x labels, and moves the bottom of the
            # axes up to make room for them
            # fig.autofmt_xdate()

            ax.set_xlim(0)
            ax.set_ylim(0)

            ax.locator_params(nbins=3, axis='y')

            ax.margins(0.05)
            ax.tick_params(axis='both', which='major', labelsize=16)
            ax.tick_params(axis='both', which='minor', labelsize=14)

            ax.get_xaxis().get_offset_text().set_visible(False)
            formatter = ScalarFormatter(useMathText=True)
            formatter.set_powerlimits((-4, 5))
            ax.get_xaxis().set_major_formatter(formatter)
            fig.tight_layout(
            )  # Need to set this to be able to get the offset... for whatever reason
            offset_text = ax.get_xaxis().get_major_formatter().get_offset()

            ax.set_xlabel('Time{0} [{1}]'.format(
                f' {offset_text}' if len(offset_text) else "",
                granularity.lower()),
                          fontsize=18)
            ax.set_ylabel('Number of Tasks', fontsize=18)

            plot_count += 1

        fig.tight_layout()

        fig.savefig(os.path.join(self.folder, filename), dpi=600, format='png')
        if show:
            fig.show()

        return filename