Пример #1
0
def MedCalculator(fileNum, text):
    """
    thread worker function
    Calculates the running median for the lines present in the text list supplied.
    Currently the sequential implementation is identical to the parallel implementation.

    :rtype :        null
    :param fileNum: an index pointing to the file to be processing in the input files
    :param text:    a text buffer to be loaded with the input text
    """

    # Start Profiling
    # basic profiling for the speed of the algorithm
    # start = time.clock()

    # the list that is going to hold the running medians
    medianNumbers = []

    # a sorted list to hold the word counts for input lines
    # the sorted list boosts performance substantially when computing the running median because this will not require
    # resorting the wordcount list every time we add an entry to it.
    linesWordCount = SortedList()
    lineNO = 0

    for line in text:

        # fast conversion of uppercase to lowercase and removal of the following `'-_
        cleaned_line = line.translate(table)

        # matching words/tokens
        words = re.findall(tokenPattern, cleaned_line)

        # counting wordcount of a line and adding it ot the respective list
        lineWordCount = len(words)
        linesWordCount.add(lineWordCount)

        # running median calculations
        # because I used a sorted list for the worcounts of lines, now it is straightforward to compute
        # the running median
        index = int(lineNO / 2)
        if lineNO % 2 == 0:
            medianNumbers.append(float(linesWordCount[index]))
        else:
            medianNumbers.append(
                float((linesWordCount[index] + linesWordCount[index + 1]) / 2))
        lineNO += 1

        # optional profiling
        # print("Line NO: " + str(lineNO) + " wordcount: " + str(lineWordCount))
        # print("Size: " + str(len(linesWordCount)) + " linesWordCount elm: " + str(linesWordCount) )
        # print("Median NOs: " + str(medianNumbers))
        # end =  time.clock()

    # optional profiling
    # print("(Calculator)Time elapsed: ", (end-start), "Using Multiprocessing, Generated ", len(medianNumbers) , " medians from " , lineNO, " Lines")#, len(text) , " files")

    return medianNumbers
def sort_file_lists(input_file_list, file_name_generator):
    command_list = SortedList()
    for input_file in input_file_list:
        reader = _get_reader(input_file)
        for command in reader:
            command_list.add(command)
            if len(command_list) > number_of_allowed_command:
                dump_commands_to_file(command_list, file_name_generator)
                command_list = SortedList()
Пример #3
0
    def _match_agg_by_val(self):
        '''
        Matches input/output aggregates by values and returns a bunch of data structs
        '''
        self._all_match_in_agg = SortedList()
        self._match_in_agg_to_val = defaultdict(int)
        self._val_to_match_out_agg = defaultdict(set)

        # Gets unique values of input / output aggregates
        all_unique_in_agg_val, _ = np.unique(self._all_in_agg_val,
                                             return_inverse=True)
        all_unique_out_agg_val, _ = np.unique(self._all_out_agg_val,
                                              return_inverse=True)

        # Computes total fees paid/receiver by taker/maker
        if self._has_intrafees:
            fees_taker = self._fees + self._fees_taker
            fees_maker = -self._fees_maker  # doesn't take into account tx fees paid by makers

        # Finds input and output aggregates with matching values
        for in_agg_val in np.nditer(all_unique_in_agg_val):
            val = int(in_agg_val)

            for out_agg_val in np.nditer(all_unique_out_agg_val):

                diff = in_agg_val - out_agg_val

                if (not self._has_intrafees) and (diff < 0):
                    break
                else:
                    # Computes conditions required for a matching
                    cond_no_intrafees = (
                        not self._has_intrafees) and diff <= self._fees
                    cond_intrafees = self._has_intrafees and\
                                     ( (diff <= 0 and diff >= fees_maker) or (diff >= 0 and diff <= fees_taker) )

                    if cond_no_intrafees or cond_intrafees:
                        # Registers the matching input aggregate
                        match_in_agg = np.where(
                            self._all_in_agg_val == in_agg_val)[0]

                        for in_idx in match_in_agg:
                            if not in_idx in self._all_match_in_agg:
                                self._all_match_in_agg.add(in_idx)
                                self._match_in_agg_to_val[in_idx] = val

                        # Registers the matching output aggregate
                        match_out_agg = np.where(
                            self._all_out_agg_val == out_agg_val)[0]
                        self._val_to_match_out_agg[val].update(
                            match_out_agg.tolist())
Пример #4
0
    def _merge_when_available(self, error_queue, kill_queue, in_queue,
                              result_send, y, q):
        try:
            result = SortedList()
            _in_get = in_queue.get
            _kill_get = kill_queue.get
            _get_ids = y.get_ids
            _add = result.add

            while True:

                try:
                    _kill_get(False)
                    result_send.send(None)
                    return
                except Empty:
                    pass

                item = _in_get()
                if item == "kill":
                    break

                slice_obj, nn_ids = item
                q_ids = _get_ids(slice_obj)
                _n = q_ids.shape[0]

                j = 0
                for i in range(_n):
                    _add((q_ids[i], tuple(nn_ids[j:j + q])))
                    j += q

            result_send.send(result)
        except Exception as e:
            result_send.send(None)
            error_queue.put(ExceptionWrapper(os.getpid(), e))
 def get_split_point_suggestions(self):
     suggested_split_values = SortedList()
     min_value = np.inf
     max_value = -np.inf
     for k, estimator in self._att_val_dist_per_class.items():
         if self._min_value_observed_per_class[k] < min_value:
             min_value = self._min_value_observed_per_class[k]
         if self._max_value_observed_per_class[k] > max_value:
             max_value = self._max_value_observed_per_class[k]
     if min_value < np.inf:
         bin_size = max_value - min_value
         bin_size /= (float(self.num_bin_options) + 1.0)
         for i in range(self.num_bin_options):
             split_value = min_value + (bin_size * (i + 1))
             if split_value > min_value and split_value < max_value:
                 suggested_split_values.add(split_value)
     return suggested_split_values
def _get_top_n_samples(model_predictions: List[ModelPrediction], n: int, best: bool):
    top_n_samples = SortedList(key=lambda sample: -sample.true_label_probability) if best else SortedList()
    for model_prediction in model_predictions:
        if best == model_prediction.is_correct():
            if len(top_n_samples) < n:
                top_n_samples.add(model_prediction)
            else:
                if best != (model_prediction < top_n_samples[-1]):
                    top_n_samples.pop()
                    top_n_samples.add(model_prediction)
    return [sample for sample in top_n_samples]  # so that it returns a normal list instead of SortedList
def sort_file_lists(input_file_list, file_name_generator):
    command_list = SortedList()
    for input_file in input_file_list:
        reader = _get_reader(input_file)
        for command in reader:
            command_list.add(command)
            if len(command_list) > number_of_allowed_command:
                dump_commands_to_file(command_list, file_name_generator)
                command_list = SortedList()
Пример #8
0
def reformat(data_dir='../../../data', cores=1):
    print('Reformats data from:', data_dir)
    files = os.listdir(data_dir + '/json/')  # noqa
    snap_files = SortedList(
        [filename for filename in files if 'snaps' in filename],
        key=lambda fn: pd.to_datetime(fn[:-11], format='%d_%m_%Y_%H_%M_%S'))

    try:
        os.makedirs(data_dir + '/snap_json/')
    except FileExistsError:
        pass

    Parallel(n_jobs=cores)(
        delayed(save_snaps_from_file_path)(data_dir, snapfile)
        for snapfile in tqdm(snap_files))

    files = os.listdir(data_dir + '/json/')  # noqa
    mess_files = SortedList(
        [filename for filename in files if 'mess' in filename],
        key=lambda fn: pd.to_datetime(fn[:-10], format='%d_%m_%Y_%H_%M_%S'))

    keys = {
        'order_type', 'reason', 'sequence', 'side', 'size', 'type', 'price',
        'funds', 'order_id', 'time'
    }
    price_tick = 0.01
    price_dec = int(np.log10(1 / price_tick))

    try:
        os.makedirs(data_dir + '/feather/')
    except FileExistsError:
        pass

    Parallel(n_jobs=cores)(
        delayed(reformat_messages)(data_dir, k, keys, price_dec, messfile)
        for k, messfile in tqdm(enumerate(list((mess_files)))))
Пример #9
0
    def test_nlines(self):
        reader, start_time = self.init_reader()
        tpoints = SortedList()
        time_points, jobs = reader.next(start_time)
        tpoints.update(time_points)
        total_jobs = len(jobs)
        while True:
            if not tpoints:
                break
            _time = tpoints.pop(0)
            time_points, jobs = reader.next(_time)
            total_jobs += sum([len(js) for js in jobs.values()])
            tpoints.update(time_points)

        self.assertEqual(total_jobs, 25)
Пример #10
0
    def _query_in_serial(self, y, q):

        _L = self.L
        _b = self.b
        _metric = self.metric
        _margs = self.margs
        _where = np.where
        _empty = np.empty
        _unique = np.unique
        _get_data = self.data.get_data
        _get_data_by_id = self.data.get_data_by_id
        _get_lists = storage.get_lists
        _hash = self._hash
        _encode = self._encode

        result = SortedList()
        _add = result.add

        q_data = y.get_data()
        num_points = q_data.shape[0]

        tmp = _hash(q_data)
        hashlist = _encode(tmp, _b).reshape(-1, _L)
        indptr, cand = _get_lists(hashlist)

        c_ids, c_indices = _unique(cand, return_inverse=True)
        c_data = _get_data_by_id(c_ids)

        dist = ssdist_wrapped(q_data, c_data, None, c_indices, indptr, _metric,
                              _margs)
        nn_indices = rank(q, num_points, dist.data, c_indices, indptr)

        q_ids = y.get_ids()
        nn_ids = _where(nn_indices > 0, c_ids[nn_indices], -1)

        _m = q_ids.shape[0]

        j = 0
        for i in range(_m):
            _add((q_ids[i], tuple(nn_ids[j:j + q])))
            j += q

        return result
Пример #11
0
    def _end_running_jobs(self, running_jobs, allocator, requests):
        if not running_jobs:
            return running_jobs

        if random() >= 0.5:
            return running_jobs

        sample_size = randint(1, len(running_jobs))
        idxs = SortedList(sample(range(len(running_jobs)), sample_size))

        cur_resources = allocator.get_resources()

        for idx in reversed(idxs):

            job = running_jobs.pop(idx)
            job_request = requests.pop(job[1])

            for node in job[2]:
                for k, v in job_request.requested_resources.items():
                    cur_resources[node][k] += v

        return running_jobs
Пример #12
0
def get_arrdata_from_dataset_values(arr_distinct_values,attributes,votes_attributes,users_attributes,position_attribute):
    
    arr_data=[]
    arr_types=[]
    arr_depthmax=[]
    arr_refinement_indexes=[]
    arr_labels=[]
    subgroup_pipeline=[]
    filter_operations=[]
    
    num=r'([0-9]|\.)*'
    reg = re.compile(num)       
    
    for i,attr in enumerate(attributes):
        arr_types.append(attr['type'])
        arr_depthmax.append(attr['bound_width'])
        subgroup_pipeline.append({'dimensionName':attr['name']})
        
        if attr['type']=='numeric':
            arr_data.append(SortedList(arr_distinct_values[i]))
            arr_refinement_indexes.append(0)
            arr_labels.append({})
            
            subgroup_pipeline[-1]['inInterval']=[]
            filter_operations.append('inInterval')
            
        elif attr['type']=='nominal':
            arr_data.append(SortedList(arr_distinct_values[i]))
            arr_refinement_indexes.append(len(arr_data[i]))
            arr_labels.append({})
            
            subgroup_pipeline[-1]['inSet']=[]
            filter_operations.append('inSet')
        
        elif attr['type']=='simple':
            arr_data.append(arr_distinct_values[i])
            arr_refinement_indexes.append(len(arr_data[i]))
            arr_labels.append({})
            
            subgroup_pipeline[-1]['inSet']=[]
            filter_operations.append('inSet')
            
        elif attr['type']=='themes':
            
            data_to_tree=[]
            for val in arr_distinct_values[i]:    
                data_to_tree.append({'ID':reg.search(val).group(),'LABEL':val[reg.search(val).end()+1:]})
            
            tree,themesMAP=createTreeOutOfThemes(data_to_tree)
            arr_data.append([tree])
            arr_refinement_indexes.append(0)
            arr_labels.append(themesMAP)
            
            subgroup_pipeline[-1]['contain_themes']=[] 
            filter_operations.append('contain_themes')
        elif attr['type']=='themes2':
            
            data_to_tree=[]
            for val in arr_distinct_values[i]:    
                data_to_tree.append({'ID':reg.search(val).group(),'LABEL':val[reg.search(val).end()+1:]})
            
            tree,themesMAP=createTreeOutOfThemes(data_to_tree)
            tree_themes=tree_theme2(sorted([x['ID'] for x in data_to_tree]))
            #tree_themes['pattern']=['']
            arr_data.append(tree_themes)
            #arr_refinement_indexes.append([''])
            
            arr_refinement_indexes.append(([''],None))
            arr_labels.append(themesMAP)
            
            subgroup_pipeline[-1]['contain_themes']=[] 
            filter_operations.append('contain_themes')
    
    subgroup_pipeline_for_votes=[stage for stage in subgroup_pipeline if stage['dimensionName'] in votes_attributes]
    subgroup_pipeline_for_meps=[stage for stage in subgroup_pipeline if stage['dimensionName'] in users_attributes]
    return arr_data,arr_types,arr_depthmax,arr_refinement_indexes,arr_labels,subgroup_pipeline,filter_operations,subgroup_pipeline_for_votes,subgroup_pipeline_for_meps
Пример #13
0
def orderstream(order_paths='../../../data/feather/', snapshot_paths='../../../data/snap_json/', max_sequence_skip=1,
                random_start=False, **kwargs):
    """
    Generates a stream of orders, either a snapshot of the order book is returned when a disruption in the order stream
    happens or the next order is yielded.

    Parameters
    ----------
    order_paths: str
        Path to the orders
    snapshot_paths: str
        Path to the snapshots

    Yields
    -------
        order: list, snapshot: dict
            The first yield will have a snapshot. Then orders will be yielded with the snapshot as None.


    """

    order_paths = order_paths
    snapshot_paths = snapshot_paths
    order_files = os.listdir(order_paths)
    snap_files = os.listdir(snapshot_paths)

    order_files = SortedList(order_files, key=lambda x: int(x.split('_')[0]))

    snap_files = sorted(snap_files)
    snap_files_ = []
    min_order_files_seq = int(order_files[0].split('_')[1])
    for snap_file in snap_files:
        snap_seq_ = int(''.join(filter(str.isdigit, snap_file)))
        if snap_seq_ > min_order_files_seq:
            snap_files_.append(snap_file)

    snap_files = snap_files
    snap_sequences = np.array([int(re.search(r'\d+', snap_sequence).group()) for snap_sequence in snap_files])

    random_start = random_start
    max_seq_skip = max_sequence_skip

    while True:
        if random_start:
            snap_file = random.choice(snap_files)
            snap_seq = ''.join(filter(str.isdigit, snap_file))
            order_files_ = []
            for order_file in order_files:
                max_order_file_seq = int(order_file.split('_')[2].split('.')[0])
                if max_order_file_seq >= int(snap_seq):
                    order_files_.append(order_file)
            order_files_ = order_files_
        else:
            snap_file = snap_files[0]
            order_files_ = deepcopy(order_files)

        with open(snapshot_paths + snap_file) as f:
            snap = ujson.load(f)
        snap_sequence = snap['sequence']
        prev_order_seq = snap_sequence

        yield None, snap

        break_ = False
        for order_file in order_files_:
            orders = load_orders(order_paths + order_file)
            for order in orders:
                if order.sequence < snap_sequence:
                    pass
                else:
                    if order.sequence - prev_order_seq > max_seq_skip:
                        print('To large gap', order.sequence - prev_order_seq)

                        if random_start:
                            break_ = True
                            break
                        else:
                            snap_seq_k = (snap_sequences >= order.sequence).argmax()
                            snap_file = snap_files[snap_seq_k]
                            with open(snapshot_paths + snap_file) as f:
                                snap = ujson.load(f)
                            snap_sequence = snap['sequence']
                            yield None, snap
                    else:
                        if order.type in MESSAGE_TYPES:
                            yield order, None
                prev_order_seq = order.sequence
            gc.collect()
            if break_:
                break
Пример #14
0
class TxosLinker(object):
    '''
    A class allowing to compute the entropy of Bitcoin transactions 
    and the linkability of inputs/outputs of a transaction
    '''    
    
    '''
    CONSTANTS
    '''   
    # Default maximum duration in seconds
    MAX_DURATION = 180
    
    # Processing options
    LINKABILITY = 'LINKABILITY'
    PRECHECK = 'PRECHECK'
    MERGE_FEES = 'MERGE_FEES'
    
    # Markers
    FEES = 'FEES'
    PACK = 'PACK'
    
    # Max number of inputs (or outputs) which can be processed by this algorithm
    MAX_NB_TXOS = 12
    
    
        
    '''
    ATTRIBUTES
    
    # List of input txos expressed as tuples (id, amount)
    inputs = []
    
    # List of output txos expressed as tuples (id, amount)
    outputs = []
    
    # Fees associated to the transaction
    fees = 0
    
    # Matrix of txos linkability
    #    Columns = input txos
    #    Rows = output txos
    #    Cells = number of combinations for which an input and an output are linked
    links = np.array()
    
    # Number of valid transactions combinations
    nb_tx_cmbn = 0
    
    # Maximum duration of the script (in seconds)
    _max_duration = MAX_DURATION
    '''
    
    
    '''
    INITIALIZATION
    '''
    def __init__(self, inputs=[], outputs=[], fees=0, max_duration=MAX_DURATION, max_txos=MAX_NB_TXOS):
        '''
        Constructor
        Parameters:
            inputs       = list of inputs txos [(v1_id, v1_amount), ...]
            outputs      = list of outputs txos [(v1_id, v1_amount), ...]
            fees         = amount of fees associated to the transaction
            max_duration = max duration allocated to processing of a single tx (in seconds)
            max_txos     = max number of txos. Txs with more than max_txos inputs or outputs are not processed.
        '''
        self._orig_ins = inputs
        self._orig_outs = outputs
        self._orig_fees = fees
        self._max_duration = max_duration
        self.max_txos = max_txos        
        self._packs = []
                        
    
    '''
    PUBLIC METHODS
    '''  
    def process(self, linked_txos=[], options=[LINKABILITY, PRECHECK], intrafees=(0,0)):
        '''
        Computes the linkability between a set of input txos and a set of output txos
        Returns:
            linkability matrix
            number of possible combinations for the transaction
            list of inputs (sorted by decreasing value)
            list of outputs (sorted by decreasing value)
        Parameters:
            linked_txos     = list of sets storing linked input txos. Each txo is identified by its id
            options         = list of actions to be applied
                LINKABILITY : computes the linkability matrix
                PRECHECK    : prechecks existence of deterministic links between inputs and outputs
                MERGE_FEES  : consider that all fees have been paid by a unique sender and manage fees as an additionnal output
            intrafees       = tuple (fees_maker, fees_taker) of max "fees" paid among participants
                              used for joinmarket transactions
                              fees_maker are potential max "fees" received by a participant from another participant
                              fees_taker are potential max "fees" paid by a participant to all others participants 
        '''
        self._options = options
        self.inputs = self._orig_ins.copy()
        self.outputs = self._orig_outs.copy()
        self._fees_maker = intrafees[0]
        self._fees_taker = intrafees[1]
        self._has_intrafees = True if (self._fees_maker or self._fees_taker) else False
        
        # Packs txos known as being controlled by a same entity
        # It decreases the entropy and speeds-up computations 
        if linked_txos:
            self._pack_linked_txos(linked_txos)
        
        # Manages fees
        if (self.MERGE_FEES in options) and (self._orig_fees > 0):
            # Manages fees as an additional output (case of sharedsend by blockchain.info).
            # Allows to reduce the volume of computations to be done.
            self._fees = 0
            txo_fees = (self.FEES, self._orig_fees)
            self.outputs.append(txo_fees)
        else:
            self._fees = self._orig_fees
        
        # Checks deterministic links
        nb_cmbn = 0        
        if self.PRECHECK in options and self._check_limit_ok(self.PRECHECK) and (not self._has_intrafees):
            # Prepares the data
            self._prepare_data()
            self._match_agg_by_val()
            # Checks deterministic links
            dtrm_lnks, dtrm_lnks_id = self._check_dtrm_links()
            # If deterministic links have been found, fills the linkability matrix 
            # (returned as result if linkability is not processed) 
            if dtrm_lnks is not None:
                shape = ( len(self.outputs), len(self.inputs) )
                mat_lnk = np.zeros(shape, dtype=np.int64)
                for (r,c) in dtrm_lnks:
                    mat_lnk[r,c] = 1
        else:
            mat_lnk = None
            dtrm_lnks_id = None        
                    
        # Checks if all inputs and outputs have already been merged
        nb_ins = len(self.inputs)
        nb_outs = len(self.outputs)
        if (nb_ins == 0) or (nb_outs == 0):
            nb_cmbn = 1
            shape = (nb_outs, nb_ins)
            mat_lnk = np.ones(shape, dtype=np.int64)
        elif self.LINKABILITY in options and self._check_limit_ok(self.LINKABILITY):
            # Packs deterministic links if needed 
            if dtrm_lnks_id is not None:
                dtrm_lnks_id = [set(lnk) for lnk in dtrm_lnks_id]
                self._pack_linked_txos(dtrm_lnks_id)
            # Prepares data
            self._prepare_data()
            self._match_agg_by_val()
            # Computes a matrix storing a tree composed of valid pairs of input aggregates
            self._compute_in_agg_cmbn()
            # Builds the linkability matrix
            nb_cmbn, mat_lnk = self._compute_link_matrix()
        
        # Unpacks the matrix
        mat_lnk = self._unpack_link_matrix(mat_lnk, nb_cmbn)
        
        # Returns results
        return mat_lnk, nb_cmbn, self.inputs, self.outputs
                                                                             
        
    '''
    PREPARATION
    '''
    def _prepare_data(self):
        '''
        Computes several data structures which will be used later
        Parameters:
            inputs  = list of input txos
            outputs = list of output txos
        '''
        # Prepares data related to the input txos
        self.inputs,\
        self._all_in_agg,\
        self._all_in_agg_val = self._prepare_txos(self.inputs)
           
        # Prepares data related to the output txos
        self.outputs,\
        self._all_out_agg,\
        self._all_out_agg_val = self._prepare_txos(self.outputs)
        
        
    def _prepare_txos(self, txos):
        '''
        Computes several data structures related to a list of txos
        Returns:
            list of txos sorted by decreasing values
            array of aggregates (combinations of txos) in binary format
            array of values associated to the aggregates
        Parameters:
            txos = list of txos (list of tuples (id, value))
        '''
        # Removes txos with null value
        txos = filter(lambda x: x[1] > 0, txos)
        
        # Orders txos by value
        txos = sorted(txos, key=lambda tup: tup[1], reverse=True)
        
        # Creates a 1D array of values
        vals = [ e[1] for _, e in enumerate(txos) ]
        all_val = np.array(vals, dtype='int64')
        
        # Computes all possible combinations of txos encoded in binary format
        expnt = len(txos)
        shape = (expnt, 2**expnt)
        all_agg = np.zeros(shape, dtype=np.bool)
        base = np.array([0,1], dtype=bool)
        
        for j in range(0, expnt):
            two_exp_j = 2**j
            tmp = np.repeat(base, two_exp_j)
            all_agg[j, :] = np.tile(tmp, 2**(expnt-1) / two_exp_j)
        #all_agg = np.arange(2**expnt) >> np.arange(expnt)[::, np.newaxis] & 1
        
        # Computes values of aggregates
        all_agg_val = np.dot(all_val, all_agg)
        
        # Returns computed data structures
        return txos, all_agg, all_agg_val
   
    
    '''
    PROCESSING OF AGGREGATES
    '''  
    def _match_agg_by_val(self):
        '''
        Matches input/output aggregates by values and returns a bunch of data structs
        '''
        self._all_match_in_agg = SortedList()
        self._match_in_agg_to_val = defaultdict(int)
        self._val_to_match_out_agg = defaultdict(set)
        
        # Gets unique values of input / output aggregates
        all_unique_in_agg_val, _ = np.unique(self._all_in_agg_val, return_inverse=True)
        all_unique_out_agg_val, _ = np.unique(self._all_out_agg_val, return_inverse=True)
        
        # Computes total fees paid/receiver by taker/maker
        if self._has_intrafees:
            fees_taker = self._fees + self._fees_taker
            fees_maker = - self._fees_maker         # doesn't take into account tx fees paid by makers
        
        # Finds input and output aggregates with matching values
        for in_agg_val in np.nditer(all_unique_in_agg_val):
            val = int(in_agg_val)
            
            for out_agg_val in np.nditer(all_unique_out_agg_val):
                
                diff = in_agg_val - out_agg_val
                
                if (not self._has_intrafees) and (diff < 0):
                    break
                else:
                    # Computes conditions required for a matching
                    cond_no_intrafees = (not self._has_intrafees) and diff <= self._fees
                    cond_intrafees = self._has_intrafees and\
                                     ( (diff <= 0 and diff >= fees_maker) or (diff >= 0 and diff <= fees_taker) )
                                     
                    if cond_no_intrafees or cond_intrafees:
                        # Registers the matching input aggregate
                        match_in_agg = np.where(self._all_in_agg_val == in_agg_val)[0]
                        
                        for in_idx in match_in_agg:
                            if not in_idx in self._all_match_in_agg:
                                self._all_match_in_agg.add(in_idx)
                                self._match_in_agg_to_val[in_idx] = val
                        
                        # Registers the matching output aggregate
                        match_out_agg = np.where(self._all_out_agg_val == out_agg_val)[0]
                        self._val_to_match_out_agg[val].update(match_out_agg.tolist())
         
    
    def _compute_in_agg_cmbn(self):
        '''
        Computes a matrix of valid combinations (pairs) of input aggregates
        Returns a dictionary (parent_agg => (child_agg1, child_agg2))
        We have a valid combination (agg1, agg2) if:
           R1/ child_agg1 & child_agg2 = 0 (no bitwise overlap)
           R2/ child_agg1 > child_agg2 (matrix is symmetric)           
        '''
        aggs = self._all_match_in_agg[1:-1]
        tgt = self._all_match_in_agg[-1]
        mat = defaultdict(list)
        saggs = set(aggs)
        
        for i in range(0, tgt+1):
            if i in saggs:
                j_max = min(i, tgt - i + 1)
                for j in range(0, j_max):
                    if (i & j == 0) and (j in saggs):
                        mat[i+j].append( (i,j) )
        
        self._mat_in_agg_cmbn = mat
    
    
    '''
    COMPUTATION OF LINKS BETWEEN TXOS
    '''
    def _check_dtrm_links(self):    
        '''
        Checks the existence of deterministic links between inputs and outputs
        Returns a list of tuples (idx_output, idx_input) and a list of tuples (id_output, id_input)
        '''
        nb_ins = len(self.inputs)
        nb_outs = len(self.outputs)
        
        shape = (nb_outs, nb_ins)
        mat_cmbn = np.zeros(shape, dtype=np.int64)
        
        shape = (1, nb_ins)
        in_cmbn = np.zeros(shape, dtype=np.int64)
        
        # Computes a matrix storing numbers of raw combinations matching input/output pairs
        # Also computes sum of combinations along inputs axis to get the number of combinations
        for (in_idx, val) in self._match_in_agg_to_val.items():
            for out_idx in self._val_to_match_out_agg[val]:
                mat_cmbn += self._get_link_cmbn(in_idx, out_idx)
                in_cmbn += self._all_in_agg[:,in_idx][np.newaxis,:]                       
        
        # Builds a list of sets storing inputs having a deterministic link with an output
        nb_cmbn = in_cmbn[0,0]
        dtrm_rows, dtrm_cols = np.where(mat_cmbn == nb_cmbn)
        dtrm_coords = list(zip(dtrm_rows, dtrm_cols))
        dtrm_aggs = [(self.outputs[o][0], self.inputs[i][0]) for (o,i) in dtrm_coords]
        return dtrm_coords, dtrm_aggs
        
                
    def _compute_link_matrix(self):
        '''
        Computes the linkability matrix
        Returns the number of possible combinations and the links matrix        
        Implements a depth-first traversal of the inputs combinations tree (right to left)
        For each input combination we compute the matching output combinations.
        This is a basic brute-force solution. Will have to find a better method later.
        '''
        nb_tx_cmbn = 0
        itgt = 2 ** len(self.inputs) - 1
        otgt = 2 ** len(self.outputs) - 1
        d_links = defaultdict(int)
        
        # Initializes a stack of tasks & sets the initial task
        #  0: index used to resume the processing of the task (required for depth-first algorithm)
        #  1: il = left input aggregate
        #  2: ir = right input aggregate
        #  3: d_out = outputs combination matching with current input combination
        #             dictionary of dictionary :  { or =>  { ol => (nb_parents_cmbn, nb_children_cmbn) } }
        stack = deque()
        ini_d_out = defaultdict(dict)
        ini_d_out[otgt] = { 0: (1, 0) }
        stack.append( (0, 0, itgt, ini_d_out) )
        
        # Sets start date/hour
        start_time = datetime.now()
        
        # Iterates over all valid inputs combinations (top->down)
        while len(stack) > 0:
            # Checks duration
            curr_time = datetime.now()
            delta_time = curr_time - start_time
            if delta_time.total_seconds() >= self._max_duration:
                return 0, None
            
            # Gets data from task
            t = stack[-1]
            idx_il = t[0]
            il = t[1]
            ir = t[2]
            d_out = t[3]
            n_idx_il = idx_il
            
            # Gets all valid decompositions of right input aggregate
            ircs = self._mat_in_agg_cmbn[ir]
            len_ircs = len(ircs)            
            
            for i in range(idx_il, len_ircs):
                
                n_idx_il = i                
                n_d_out = defaultdict(dict)
                
                # Gets left input sub-aggregate (column from ircs)
                n_il = ircs[i][1]
                
                # Checks if we must process this pair (columns from ircs are sorted in decreasing order)
                if n_il > il:
                    # Gets the right input sub-aggregate (row from ircs)
                    n_ir = ircs[i][0]
                    
                    # Iterates over outputs combinations previously found
                    for o_r in d_out:
                        sol = otgt - o_r
                        # Computes the number of parent combinations
                        nb_prt = sum([s[0] for s in d_out[o_r].values()])
                    
                        # Iterates over output sub-aggregates matching with left input sub-aggregate
                        val_il = self._match_in_agg_to_val[n_il]
                        for n_ol in self._val_to_match_out_agg[val_il]:
                            
                            # Checks compatibility of output sub-aggregate with left part of output combination
                            if (sol & n_ol == 0):
                                # Computes:
                                #   the sum corresponding to the left part of the output combination
                                #   the complementary right output sub-aggregate
                                n_sol = sol + n_ol
                                n_or = otgt - n_sol                                
                                # Checks if the right output sub-aggregate is valid
                                val_ir = self._match_in_agg_to_val[n_ir]
                                match_out_agg = self._val_to_match_out_agg[val_ir]
                                # Adds this output combination into n_d_out if all conditions met
                                if (n_sol & n_or == 0) and (n_or in match_out_agg):
                                    n_d_out[n_or][n_ol] = (nb_prt, 0)
                                    
                    # Updates idx_il for the current task
                    stack[-1] = (i + 1, il, ir, d_out)
                    # Pushes a new task which will decompose the right input aggregate
                    stack.append( (0, n_il, n_ir, n_d_out) )
                    # Executes the new task (depth-first)
                    break
                
                else:
                    # No more results for il, triggers a break and a pop
                    n_idx_il = len_ircs
                    break
                
            # Checks if task has completed     
            if n_idx_il > len_ircs - 1:
                # Pops the current task
                t = stack.pop()
                il = t[1]
                ir = t[2]
                d_out = t[3]
                
                # Checks if it's the root task
                if len(stack) == 0:
                    # Retrieves the number of combinations from root task
                    nb_tx_cmbn = d_out[otgt][0][1]
                
                else:
                    # Gets parent task
                    p_t = stack[-1]
                    p_d_out = p_t[3]
                
                    # Iterates over all entries from d_out
                    for (o_r, l_ol) in d_out.items():
                        r_key = (ir, o_r)
                        # Iterates over all left aggregates
                        for (ol, (nb_prnt, nb_chld)) in l_ol.items():
                            l_key = (il, ol)
                            # Updates the dictionary of links for the pair of aggregates
                            nb_occur = nb_chld + 1
                            d_links[r_key] += nb_prnt
                            d_links[l_key] += nb_prnt * nb_occur
                            # Updates parent d_out by back-propagating number of child combinations
                            p_or = ol + o_r
                            p_l_ol = p_d_out[p_or]
                            for (p_ol, (p_nb_prt, p_nb_chld)) in p_l_ol.items():
                                p_d_out[p_or][p_ol] = (p_nb_prt, p_nb_chld + nb_occur)
        
        # Fills the matrix
        links = self._get_link_cmbn(itgt, otgt)
        nb_tx_cmbn += 1
        for (lnk, mult) in d_links.items():
            links = links + self._get_link_cmbn(lnk[0], lnk[1]) * mult
        
        return nb_tx_cmbn, links
    
    
    def _get_link_cmbn(self, in_agg, out_agg):
        '''
        Computes a linkability matrix encoding the matching of given input/output aggregates
        Returns a numpy array
        Parameters:
            in_agg     = input aggregate
            out_agg    = output aggregate
        '''
        vouts = self._all_out_agg[:,out_agg][:,np.newaxis]
        vins = self._all_in_agg[:,in_agg][np.newaxis,:]
        return np.dot(vouts, vins)        
    
    
    '''
    PACKING/UNPACKING OF LINKED TXOS
    '''
    def _pack_linked_txos(self, linked_txos):
        '''
        Packs input txos which are known as being controlled by a same entity
        Parameters:
            linked_txos = list of sets storing linked input txos. Each txo is identified by its "id"
        '''
        idx = len(self._packs)
         
        # Merges packs sharing common elements
        packs = merge_sets(linked_txos)
         
        for pack in packs:
            ins = []
            val_ins = 0
             
            for i in self.inputs:
                if i[0] in pack:
                    ins.append(i)
                    val_ins += i[1]
                     
            idx += 1
            if len(ins) > 0:
                lbl = '%s_I%i' % (self.PACK, idx)
                inp = (lbl, val_ins)
                self.inputs.append(inp)
                in_pack = (lbl, val_ins, 'INPUTS', ins, [])
                self._packs.append(in_pack)
                [self.inputs.remove(v) for v in ins]
            
    
    def _unpack_link_matrix(self, mat_lnk, nb_cmbn):
        '''
        Unpacks linked txos in the linkability matrix
        Returns the unpacked matrix
        Parameters:
            mat_lnk = linkability matrix to be unpacked
            nb_cmbn = number of combinations associated to the linkability matrix
        '''
        mat_res = mat_lnk
        nb_cmbn = max(1, nb_cmbn)
                        
        for (pack, val, lctn, ins, outs) in reversed(self._packs):
            
            if lctn == 'INPUTS':
                key = (pack, val)
                idx = self.inputs.index(key)
                if mat_lnk is not None:
                    nb_ins = len(ins)
                    nb_outs = len(self.outputs)
                    # Inserts columns into the matrix for packed inputs
                    shape = (nb_outs, nb_ins)
                    vals = np.zeros(shape , dtype=np.int64)
                    vals += mat_res[:,idx][:, np.newaxis]
                    mat_res = np.hstack( (mat_res[:,0:idx], vals, mat_res[:,idx+1:]) )
                # Inserts unpacked inputs into the list of inputs
                self.inputs[idx:idx+1] = ins
                  
            elif lctn == 'OUTPUTS':
                key = (pack, val)
                idx = self.outputs.index(key)
                if mat_lnk is not None:
                    nb_ins = len(self.inputs)
                    nb_outs = len(outs)
                    # Inserts rows into the matrix for packed outputs
                    shape = (nb_outs, nb_ins)
                    vals = np.zeros(shape, dtype=np.int64)
                    vals += mat_res[idx,:][np.newaxis,:]
                    mat_res = np.vstack( (mat_res[0:idx,:], vals, mat_res[idx+1:,:]) )
                # Inserts unpacked outputs into the list of outputs
                self.outputs[idx:idx+1] = outs
                
        return mat_res

    
    '''
    LIMITS
    '''
    def _check_limit_ok(self, mode):
        len_in = len(self.inputs)
        len_out = len(self.outputs)
        max_card = max(len_in, len_out)
        return True if (max_card <= self.max_txos) else False
Пример #15
0
    def scheduling_method(self, cur_time, es, es_dict):
        """
            This function must map the queued events to available nodes at the current time.

            :param cur_time: current time
            :param es_dict: dictionary with full data of the events
            :param es: events to be scheduled
            :param debug: Flag to debug

            :return a tuple of (time to schedule, event id, list of assigned nodes)
        """               
        resource_types = self.resource_manager.resource_types
        avl_resources = self.resource_manager.current_availability
                    
        #=======================================================================
        # Considered queued jobs: Jobs can be fitted in the current system state and less or equal than q_length
        # If a job_obj cannot be fitted or exceed the q_length is directly loaded in the dispatching decision using the no-solution dispatching tuple 
        #=======================================================================
        priorized_jobs = SortedListWithKey(key=lambda job_tuple: job_tuple[1])
              
        current_qjobs = SortedList() 

        #===================================================================
        # Here, if there is a non dispatching previous state set, the current system capacity
        # is verified if it is different (more reasource available than before) the dispatcher is called.
        # Otherwise, a non dispatching decision is returned.
        #===================================================================
        
        # Dispatching Skip
        dispatch = True
        prev_qjobs = None
        
        # Dispatching skip
        if self.non_dispatched_state:

            dispatch = False
            (prev_qjobs, prev_total_resource_usage,) = self.non_dispatched_state
            
            new_jobs = False

            for e in es:                    
                if not(e.id in prev_qjobs):
                    new_jobs = True
                    self.non_dispatched_state = None
                    break        
            if not new_jobs:               
                cur_total_resource_usage = self.resource_manager._resources.usage('dict')
                
                zero_usage = []
                same_usage = []
                for res in resource_types:
                    zero_usage.append(cur_total_resource_usage[res] == 0)
                    same_usage.append(cur_total_resource_usage[res] >= prev_total_resource_usage[res])                    
                
                if all(zero_usage):
                    # The system is empty
                    self.non_dispatched_state = None
                elif all(same_usage):
                    # The system has the same or less capacity wrt the stuck state
                    return [self.dispatching_tuple(e.id) for e in es], []
                else:
                    # The system is not empty but has more capacity wrt the stuck state
                    self.non_dispatched_state = None
                            
        cons_qjobs = {}
        max_ewt = max([self.get_ewt(job.queue) for job in es] + [self.get_ewt(es_dict[job_id]) for job_id in self.resource_manager.current_allocations])
        for node in self.resource_manager.node_names:
            avl_res = avl_resources[node]
            for idx, job_obj in enumerate(es):
                job_id = job_obj.id
                
                if not(job_id in cons_qjobs):
                    current_qjobs.add(job_id)
                    cons_qjobs[job_id] = [False, 0, {}, None]
                    priorized_jobs.add((job_id, self._job_priority_ewt(job_obj, cur_time, max_ewt)))
                        
                possibilities = self._joint_nodes(job_obj, avl_res)
                if possibilities > 0:
                    cons_qjobs[job_id][2][node] = min(possibilities, job_obj.requested_nodes)
                    cons_qjobs[job_id][1] += possibilities 
                    if cons_qjobs[job_id][1] >= job_obj.requested_nodes: 
                        cons_qjobs[job_id][0] = True
                        if not cons_qjobs[job_id][3]: 
                            cons_qjobs[job_id][3] = job_obj
                                                       
        qjobs = 0
        wc_makespan = 0
        makespans = []
        
        remaining_priorized_jobs = []
        
        # Job of the dispatching decision 
        decision_jobs = {}    
        
        for job_id, _ in priorized_jobs:
            t = cons_qjobs[job_id]
            if not t[0] or qjobs > self.cur_q_length - 1:
                decision_jobs[job_id] = self.dispatching_tuple(job_id)
                cons_qjobs.pop(job_id)
            else:
                exp_duration = max(1, t[-1].expected_duration)
                wc_makespan += exp_duration
                makespans.append(exp_duration)
                qjobs += 1
                remaining_priorized_jobs.append(job_id)
        #=======================================================================
        # There are no jobs to dispatch at the current system state. 
        # Then a no solution list is returned. 
        #=======================================================================
        if not cons_qjobs:
            
            # Job Dispatching skip
            cur_total_resource_usage = self.resource_manager._resources.usage('dict')
            self.non_dispatched_state = (current_qjobs, cur_total_resource_usage,)
            
            return decision_jobs.values(), []

        #=======================================================================
        # After an unsuccessful dispatching
        #=======================================================================
        if self.use_max_timelimit:
            timelimit = self.timelimit
        else: 
            timelimit = self.initial_timelimit
                
        a_jobs_list = []
        best_z_list = []
        solved = False
        
        self.priorized_jobs = None
        
        prev_sched = []
        while timelimit <= self.timelimit:
            schedalloc_plan = {}
            args = (schedalloc_plan, cur_time, cons_qjobs, remaining_priorized_jobs, es_dict, resource_types, avl_resources)
            kwargs = {'timelimit':timelimit, 'prev_sched':prev_sched}
            function = getattr(self, 'cp_model')
            function(*args, **kwargs)

            solver_state = schedalloc_plan.pop('solver_state')
            best_z = schedalloc_plan.pop('best_z')
            best_z_list.append(best_z)
            
            if solver_state == self.SolverState.PROBLEM_INFEASIBLE:
                break
            limit_reached = schedalloc_plan.pop('limit_reached')            
            
            disp_jobs = 0
            prev_sched = [] 
            for stime, job_id, _ in schedalloc_plan.values():
                if stime == cur_time:
                    prev_sched.append(job_id)
                    disp_jobs += 1
                
            if disp_jobs == len(cons_qjobs) and solver_state == self.SolverState.NO_MORE_SOLUTIONS.value and not limit_reached:
                solved = True
                break
            elif disp_jobs < len(cons_qjobs) and solver_state == self.SolverState.NO_MORE_SOLUTIONS.value and not limit_reached:
                solved = True
                break
            elif len(best_z_list) >= self.max_k and all([best_z_list[-1] == b for b in best_z_list[-self.max_k:]]):
                solved = True
                break
            else:
                a_jobs_list.append(disp_jobs)
                timelimit *= 2 
        
        self.priorized_jobs = None
        
        # This is useful for print and also to create the unsuccessful data
        dispatched_jobs = 0
        queued_job_ids = []
        for a in schedalloc_plan:
            if a[2]:
                dispatched_jobs += 1
            if dispatched_jobs == 0:
                queued_job_ids.append(a[1])

        if self.reduce_job_length:
            #===================================================================
            # The considered number of jobs in the next scheduling decision are reduced to the half
            # if the current problem instance was not solved, if the current usage is
            # leq of the previous time point. After a successful dispatching this value is reset. 
            # The minimum is 1, otherwise there will be nothing to dispatch
            #===================================================================
            if not solved:
                self.cur_q_length = max(1, self.cur_q_length // 2)
            else:
                self.cur_q_length = self.q_length
        if dispatched_jobs == 0:
            self.non_dispatched_state = (current_qjobs, self.resource_manager._resources.usage('dict'),)
        else:
            self.non_dispatched_state = None
                 
        return list(schedalloc_plan.values()) + list(decision_jobs.values()), []
    def scheduling_method(self, cur_time, es, es_dict):
        """
            This function must map the queued events to available nodes at the current time.

            :param cur_time: current time
            :param es_dict: dictionary with full data of the events
            :param es: events to be scheduled
            :param debug: Flag to debug

            :return a tuple of (time to schedule, event id, list of assigned nodes)
        """
        dispatching_plan = []

        resource_types = self.resource_manager.resource_types
        avl_resources = self.resource_manager.current_availability
        system_capacity = self.resource_manager.system_capacity('nodes')

        # =======================================================================
        # Considered queued jobs: Jobs can be fitted in the current system state and less or equal than q_length
        # If a job_obj cannot be fitted or exceed the q_length is directly loaded in the dispatching decision using the no-solution dispatching tuple
        # =======================================================================
        priorized_jobs = SortedListWithKey(key=lambda job_tuple: job_tuple[1])

        current_qjobs = SortedList()

        cons_qjobs = {}
        for node in self.resource_manager.node_names:
            avl_res = avl_resources[node]
            # avl_res = system_capacity[node]
            for idx, job_obj in enumerate(es):
                job_id = job_obj.id

                if not (job_id in cons_qjobs):
                    current_qjobs.add(job_id)
                    cons_qjobs[job_id] = [False, 0, {}, None]
                    priorized_jobs.add((job_id, self._job_priority_slowdown(job_obj, cur_time)))
                if self._reduced_model:
                    possibilities = self._joint_nodes(job_obj, avl_res)
                    if possibilities > 0:
                        cons_qjobs[job_id][2][node] = min(possibilities, job_obj.requested_nodes)
                        cons_qjobs[job_id][1] += possibilities
                        if cons_qjobs[job_id][1] >= job_obj.requested_nodes:
                            cons_qjobs[job_id][0] = True
                            if not cons_qjobs[job_id][3]:
                                cons_qjobs[job_id][3] = job_obj
                else:
                    cons_qjobs[job_id][0] = True
                    cons_qjobs[job_id][1] = None
                    cons_qjobs[job_id][2] = None
                    cons_qjobs[job_id][3] = job_obj

        qjobs = 0
        wc_makespan = 0
        makespans = []

        selected_priorized_jobs = []

        # Job of the dispatching decision
        decision_jobs = {}

        if self._reduced_model:
            for job_id, _ in priorized_jobs:
                t = cons_qjobs[job_id]
                if not t[0] or qjobs > self._cur_q_length - 1:
                    decision_jobs[job_id] = self.dispatching_tuple(job_id)
                    cons_qjobs.pop(job_id)
                else:
                    exp_duration = max(1, t[-1].expected_duration)
                    wc_makespan += exp_duration
                    makespans.append(exp_duration)
                    qjobs += 1
                    selected_priorized_jobs.append(job_id)
        else:
            cannot_start_selected = 0
            for job_id, _ in priorized_jobs:
                t = cons_qjobs[job_id]
                if (not t[0] and cannot_start_selected >= self._considered_cannot_start) or (
                        qjobs > self._cur_q_length - 1):
                    decision_jobs[job_id] = self.dispatching_tuple(job_id)
                    cons_qjobs.pop(job_id)
                else:
                    if not t[0]:
                        cons_qjobs[job_id][3] = es_dict[job_id]
                        cannot_start_selected += 1
                    exp_duration = max(1, t[-1].expected_duration)
                    wc_makespan += exp_duration  # , self.get_queue(t[-1].queue))  # exp_duration
                    makespans.append(exp_duration)
                    qjobs += 1
                    selected_priorized_jobs.append(job_id)
        # =======================================================================
        # There are no jobs to dispatch at the current system state.
        # Then a no solution list is returned.
        # =======================================================================
        if not cons_qjobs:
            # Job Dispatching skip
            return decision_jobs.values(), []

        solved = False
        self.priorized_jobs = None

        if self._safe:
            manager = mp_dill.Manager()
            schedule_plan = manager.dict()
            process_class = mp_dill.Process

            p = process_class(target=getattr(self, 'cp_model'),
                              args=(
                                  schedule_plan, cur_time, cons_qjobs, selected_priorized_jobs, es_dict, resource_types,
                                  avl_resources),
                              kwargs={'timelimit': timelimit}
                              )
            p.start()
            p.join()

            if p.exitcode != 0:
                schedule_plan.pop('solver_state', None)
                schedule_plan.pop('limit_reached', None)
                return list(decision_jobs.values()) \
                       + [self.dispatching_tuple(job_id, start_time, nodes) for (start_time, job_id, nodes) in
                          schedule_plan.values()] \
                       + [self.dispatching_tuple(job_id, None, []) for job_id in cons_qjobs if
                          not (job_id in schedule_plan)], []
        else:
            schedule_plan = {}
            args = (
                schedule_plan, cur_time, cons_qjobs, selected_priorized_jobs, es_dict, resource_types, avl_resources)
            kwargs = {'max_timelimit': self._max_timelimit}
            function = getattr(self, 'cp_model')
            function(*args, **kwargs)

        solved = schedule_plan.pop('solved')
        of_value = schedule_plan.pop('of_value')
        walltime = schedule_plan.pop('walltime')
        proc_time = schedule_plan.pop('proc_time')
        incurred_time = walltime + proc_time
        failures = schedule_plan.pop('failures')
        branches = schedule_plan.pop('branches')
        p = None

        self.priorized_jobs = None
        dispatching_plan = list(schedule_plan.values())
        self.__instance_data = (
            solved, of_value, walltime, incurred_time, failures, branches,
            dispatching_plan + list(decision_jobs.values()),)

        # This is useful for print and also to create the unsuccessful data
        dispatched_jobs = 0
        queued_job_ids = []
        for a in dispatching_plan:
            if a[2]:
                dispatched_jobs += 1
            if dispatched_jobs == 0:
                queued_job_ids.append(a[1])

        if self._reduce_job_length:
            # ===================================================================
            # The considered number of jobs in the next scheduling decision are reduced to the half
            # if the current problem instance was not solved, if the current usage is
            # leq of the previous time point. After a successful dispatching this value is reset.
            # The minimum is 1, otherwise there will be nothing to dispatch
            # ===================================================================
            if not solved:
                self._cur_q_length = max(1, min(self._cur_q_length,
                                                len(schedule_plan)) // 2)  # max(1, self._cur_q_length // 2)
            else:
                self._cur_q_length = self._q_length

        print('{} - {}: Queued {}, Dispatched {}, Running {}. {}'.format(self._counter, cur_time,
                                                                         len(es) - dispatched_jobs, dispatched_jobs,
                                                                         len(self.resource_manager.current_allocations),
                                                                         self.resource_manager.current_usage))
        return dispatching_plan + list(decision_jobs.values()), []
Пример #17
0
import math
from sortedcontainers.sortedlist import SortedList
import automata as atma
from automata.AnmalZoo.anml_zoo import anml_path, AnmalZoo

fcb_size = 256  # size of the local switches. we assume GS has also the same size
fcb_to_gs = 16  # number of wires from local switches to GS

bigest_component_size = fcb_size / fcb_to_gs * fcb_size

ds = [a for a in AnmalZoo]

for uat in ds:
    r = SortedList()
    big128, big256 = 0, 0

    automatas = atma.parse_anml_file(anml_path[uat])
    automatas.remove_ors()
    automatas = automatas.get_connected_components_as_automatas()

    for atm in automatas:
        nc = atm.nodes_count
        if nc >= 128:
            big128 += 1
        if nc >= 256:
            big256 += 1

        if nc > bigest_component_size:
            print "this NFA can not be fit:", uat
            break
Пример #18
0
def MedCombiner2(intermediates):
    """
    The Running Medians Reducer
    merges the intermediate lists that are packed inside the outer list, intermediates, into one master flat list.

    :rtype : object         master list of the final results
    :param intermediates:   list of lists of the running medians of each input text file
    :return:                the final results
    """
    # master list of the final results
    linesWordCount = []

    # iterating over the sub lists for each input file to concatenate them into a master list
    # for v in intermediates:
    #     linesWordCount+=v
    # print(intermediates)
    resultDict = defaultdict(list)

    # the following loop iterates over the first dictionary key and value pairs and then iterates over the next dictionary's
    # pairs. It continues until it iterates over all dictionaries that are members of the intermediates. While iterating,
    # a new dictionary is created, result, to hold all the pairs of the intermediate dictionaries, thus effectively
    # merging all of them.
    # i = 0
    for d in intermediates:
        # print(d)
        for k, v in dict(d).items():
            resultDict[k] = v
    # for k,l in chain(*intermediates):
    #     resultDict[k] = l

    # print("resultedDict ", resultDict)
    # the following loop iterates over the first dictionary key and value pairs and then iterates over the next dictionary's
    # pairs. It continues until it iterates over all dictionaries that are members of the intermediates. While iterating,
    # a new dictionary is created, result, to hold all the pairs of the intermediate dictionaries, thus effectively
    # merging all of them.
    sortedKeys=sorted(resultDict, key= lambda k:k,reverse=False)
    for k in sortedKeys:
        linesWordCount.extend(resultDict[k])
    # print("linesWordCount ", linesWordCount)

    medianNumbers= []

    # a sorted list to hold the word counts for input lines
    # the sorted list boosts performance substantially when computing the running median because this will not require
    # resorting the wordcount list every time we add an entry to it.
    sortedLinesWordCount = SortedList()
    lineNO = 0

    # running median calculations
    # because I used a sorted list for the wordcounts of lines, now it is straightforward to compute
    # the running median
    for wordcount in linesWordCount:

        sortedLinesWordCount.add(wordcount)
        # print(sortedLinesWordCount)
        index = int(lineNO/2)
        if lineNO%2 == 0:
            medianNumbers.append(float(sortedLinesWordCount[index]))
        else:
            medianNumbers.append(float((sortedLinesWordCount[index] + sortedLinesWordCount[index+1])/2))
        lineNO += 1
        # print(medianNumbers)
    # print(medianNumbers)
    return medianNumbers
Пример #19
0
import gc
import os

import feather
import numpy as np
import pandas as pd
import ujson
from sortedcontainers.sortedlist import SortedList
from tqdm import tqdm

data_dir = '../data'
print('Reformats data from:', data_dir)
files = os.listdir(data_dir + '/json/')  # noqa
snap_files = SortedList([filename for filename in files if 'snaps' in filename],
                        key=lambda fn: pd.to_datetime(fn[:-11], format='%d_%m_%Y_%H_%M_%S'))

try:
    os.makedirs(data_dir + '/snap_json/')
except FileExistsError:
    pass
for snapfile in tqdm(snap_files):
    with open(data_dir + '/json/' + snapfile, 'r') as f:
        snaps = f.readlines()
        for snap in snaps:

            try:
                snap = ujson.loads(snap)
                try:
                    seq = snap['sequence']
                    with open(data_dir + '/snap_json/snap_' + str(seq) + '.json', 'w') as snapf:
                        ujson.dump(snap, snapf)