Пример #1
0
    if os.path.isdir(BUFFER_FILE):
        shutil.rmtree(BUFFER_FILE)
    elif os.path.isfile(BUFFER_FILE):
        os.remove(BUFFER_FILE)

    if int(search_type) == 1:
        # BFS / DIR
        disk_buffer = FifoDiskQueue(BUFFER_FILE)
    else:
        # DFS / FILE
        disk_buffer = LifoDiskQueue(BUFFER_FILE)

    unvisited_links_in = Queue()
    unvisited_links_out = Queue()
    unparsed_html = JoinableQueue()
    buffer_lock = threading.Lock()

    first_link = dict()
    first_link['url'] = start_url
    first_link['parent_url'] = None
    first_link['level'] = 0
    # add first link to queue
    disk_buffer.push(pickle.dumps(first_link))

    # setups and start threads
    threads = list()
    for t in range(NUM_THREADS):
        s = Scraper(unparsed_html, disk_buffer, buffer_lock)
        s.daemon = True
        s.start()
Пример #2
0
    def __init__(self,
                 config_in=None,
                 min_occur=10,
                 min_percent=5,
                 window=2,
                 threads=8,
                 period=24,
                 min_interval=2,
                 es_host='localhost',
                 es_port=9200,
                 es_timeout=480,
                 es_index='logstash-flow-*',
                 kibana_version='4',
                 verbose=True,
                 debug=False):
        """

        :param min_occur: Minimum number of triads to be considered beaconing
        :param min_percent: Minimum percentage of all connection attempts that
         must fall within the window to be considered beaconing
        :param window: Size of window in seconds in which we group connections to determine percentage, using a
         large window size can give inaccurate interval times, multiple windows contain all interesting packets,
         so the first window to match is the interval
        :param threads: Number of cores to use
        :param period: Number of hours to locate beacons for
        :param min_interval: Minimum interval betweeen events to consider for beaconing behavior
        :param es_host: IP Address of elasticsearch host (default is localhost)
        :param es_timeout: Sets timeout to 480 seconds
        :param kibana_version: 4 or 5 (query will depend on version)
        """
        #self.config_in = config_in
        if config_in is not None:
            try:
                self.config = flareConfig(config_in)
                self.es_host = self.config.get('beacon', 'es_host')
                self.es_port = int(self.config.get('beacon', 'es_port'))
                self.es_index = self.config.get('beacon', 'es_index')
                self.MIN_OCCURRENCES = int(
                    self.config.get('beacon', 'min_occur'))
                self.MIN_PERCENT = int(self.config.get('beacon',
                                                       'min_percent'))
                self.WINDOW = int(self.config.get('beacon', 'window'))
                self.NUM_PROCESSES = int(self.config.get('beacon', 'threads'))
                self.period = int(self.config.get('beacon', 'period'))
                self.min_interval = int(
                    self.config.get('beacon', 'min_interval'))
                self.es_timeout = int(self.config.get('beacon', 'es_timeout'))
                self.kibana_version = self.config.get('beacon',
                                                      'kibana_version')
                self.beacon_src_ip = self.config.get('beacon',
                                                     'field_source_ip')
                self.beacon_dest_ip = self.config.get('beacon',
                                                      'field_destination_ip')
                self.beacon_destination_port = self.config.get(
                    'beacon', 'field_destination_port')
                self.beacon_timestamp = self.config.get(
                    'beacon', 'field_timestamp')
                self.beacon_flow_bytes_toserver = self.config.get(
                    'beacon', 'field_flow_bytes_toserver')
                self.beacon_flow_id = self.config.get('beacon',
                                                      'field_flow_id')
                self.beacon_event_type = self.config.get(
                    'beacon', 'event_type')
                self.verbose = self.config.config.getboolean(
                    'beacon', 'verbose')
                self.auth_user = self.config.config.get('beacon', 'username')
                self.auth_password = self.config.config.get(
                    'beacon', 'password')
                self.suricata_defaults = self.config.config.getboolean(
                    'beacon', 'suricata_defaults')
                try:
                    self.debug = self.config.config.getboolean(
                        'beacon', 'debug')
                except:
                    pass

            except Exception as e:
                print((
                    '{red}[FAIL]{endc} Could not properly load your config!\nReason: {e}'
                    .format(red=bcolors.FAIL, endc=bcolors.ENDC, e=e)))
                sys.exit(0)

        else:
            self.es_host = es_host
            self.es_port = es_port
            self.es_index = es_index
            self.MIN_OCCURRENCES = min_occur
            self.MIN_PERCENT = min_percent
            self.WINDOW = window
            self.NUM_PROCESSES = threads
            self.period = period
            self.min_interval = min_interval
            self.kibana_version = kibana_version
            self.es_timeout = es_timeout
            self.beacon_src_ip = 'src_ip'
            self.beacon_dest_ip = 'dest_ip'
            self.beacon_destination_port = 'dest_port'
            self.beacon_timestamp = '@timestamp'
            self.beacon_flow_bytes_toserver = 'bytes_toserver'
            self.beacon_flow_id = 'flow_id'
            self.beacon_event_type = 'flow'
            self.verbose = verbose
            self.suricata_defaults = False

        self.ver = {'4': {'filtered': 'query'}, '5': {'bool': 'must'}}
        self.filt = list(self.ver[self.kibana_version].keys())[0]
        self.query = list(self.ver[self.kibana_version].values())[0]
        self.debug = debug
        self.whois = WhoisLookup()
        self.info = '{info}[INFO]{endc}'.format(info=bcolors.OKBLUE,
                                                endc=bcolors.ENDC)
        self.success = '{green}[SUCCESS]{endc}'.format(green=bcolors.OKGREEN,
                                                       endc=bcolors.ENDC)
        self.fields = [
            self.beacon_src_ip, self.beacon_dest_ip,
            self.beacon_destination_port, 'bytes_toserver', 'dest_degree',
            'occurrences', 'percent', 'interval'
        ]

        try:
            self.vprint(
                '{info}[INFO]{endc} Attempting to connect to elasticsearch...'.
                format(info=bcolors.OKBLUE, endc=bcolors.ENDC))

            self.es = Elasticsearch(self.es_host,
                                    port=self.es_port,
                                    timeout=self.es_timeout,
                                    http_auth=(self.auth_user,
                                               self.auth_password),
                                    verify_certs=False)
            self.vprint(
                '{green}[SUCCESS]{endc} Connected to elasticsearch on {host}:{port}'
                .format(green=bcolors.OKGREEN,
                        endc=bcolors.ENDC,
                        host=self.es_host,
                        port=str(self.es_port)))
        except Exception as e:
            self.vprint(e)
            raise Exception(
                "Could not connect to ElasticSearch -- Please verify your settings are correct and try again."
            )

        self.q_job = JoinableQueue()
        self.l_df = Lock()
        self.l_list = Lock()
        self.high_freq = None
        self.flow_data = self.run_query()
Пример #3
0
def produce(q: JoinableQueue, n: int):
    for i in range(n):
        q.put(f'{current_process().name}: {i}')


def consume(q: JoinableQueue):
    while True:
        i = q.get()
        print(f'{current_process().name} - {i}')
        q.task_done()


if __name__ == '__main__':
    n_producers = 2
    n_consumers = 1
    n_tasks = 100
    q = JoinableQueue()
    producers = [Process(target=produce, args=[q, n_tasks]) for _ in range(n_producers)]
    consumers = [Process(target=consume, args=[q]) for _ in range(n_consumers)]

    for p in producers:
        p.start()
    for p in consumers:
        p.start()
    for p in producers:
        p.join()
    q.join()
    for p in consumers:
        p.terminate()
Пример #4
0
    def runit(self, args):  # pylint:disable=too-many-locals
        """
        This is the entry point for run_ingest_threads.py
        """
        self.spec_file = args["spec_file"].strip()
        self.credentials_file = args["credentials_file"].strip()
        self.path = args["path"].strip()
        self.fmask = args["file_name_mask"].strip()
        self.thread_count = args["threads"]
        self.output_dir = args["output_dir"].strip()
        if "file_pattern" in args.keys():
            self.file_pattern = args["file_pattern"].strip()
        _args_keys = args.keys()
        if "number_stations" in _args_keys:
            self.number_stations = args["number_stations"]
        else:
            self.number_stations = sys.maxsize
        #
        #  Read the load_spec file
        #
        try:
            logging.debug("load_spec filename is %s", self.spec_file)
            load_spec_file = LoadYamlSpecFile({"spec_file": self.spec_file})
            # read in the load_spec file
            self.load_spec = dict(load_spec_file.read())
            # put the real credentials into the load_spec
            self.cb_credentials = self.get_credentials(self.load_spec)
            # stash the load_job
            self.load_spec["load_job_doc"] = self.build_load_job_doc()
            # get the ingest document id.
            # NOTE: in future we may make this (ingest_document_id) a list
            # and start each VxIngestManager with its own ingest_document_id
            self.ingest_document_id = self.load_spec["ingest_document_id"]
            # establish connections to cb, collection
            self.connect_cb()
        except (RuntimeError, TypeError, NameError, KeyError):
            logging.error(
                "*** Error occurred in Main reading load_spec %s: %s ***",
                self.spec_file,
                str(sys.exc_info()),
            )
            sys.exit("*** Error reading load_spec: " + self.spec_file)

        self.ingest_document = self.collection.get(self.ingest_document_id).content
        # load the my_queue with filenames that match the mask and have not already been ingested
        # (do not have associated datafile documents)
        # Constructor for an infinite size  FIFO my_queue
        _q = JoinableQueue()
        model = self.ingest_document["model"]
        # get the urls (full_file_names) from all the datafiles for this type of ingest
        file_query = """
                SELECT url, mtime
                FROM mdata
                WHERE
                subset='metar'
                AND type='DF'
                AND fileType='grib2'
                AND originType='model'
                AND model='{model}' order by url;
                """.format(
                model=model
            )
        file_names = self.get_file_list(file_query, self.path, self.file_pattern)
        for _f in file_names:
            _q.put(_f)

        # instantiate ingest_manager pool - each ingest_manager is a process
        # thread that uses builders to process one file at a time from the queue
        # Make the Pool of ingest_managers
        ingest_manager_list = []
        for thread_count in range(int(self.thread_count)):
            # noinspection PyBroadException
            try:
                self.load_spec["fmask"] = self.fmask
                ingest_manager_thread = VxIngestManager(
                    "VxIngestManager-" + str(thread_count),
                    self.load_spec,
                    self.ingest_document,
                    _q,
                    self.output_dir,
                    number_stations=self.number_stations,
                )
                ingest_manager_list.append(ingest_manager_thread)
                ingest_manager_thread.start()
            except Exception as _e:  # pylint:disable=broad-except
                logging.error("*** Error in  VxIngestManager %s***", str(_e))
        # be sure to join all the threads to wait on them
        finished = [proc.join() for proc in ingest_manager_list]
        self.write_load_job_to_files()
        logging.info("finished starting threads")
        load_time_end = time.perf_counter()
        load_time = timedelta(seconds=load_time_end - self.load_time_start)
        logging.info(" finished %s", str(finished))
        logging.info("    >>> Total load a_time: %s", str(load_time))
        logging.info("End a_time: %s", str(datetime.now()))
        logging.info("--- *** --- End  --- *** ---")
Пример #5
0
    if not is_mp or cli.returncode:
        sys.exit(cli.returncode)

    command = cli.commandline.command
    results = command.results
    if not results:
        err("parallel command did not produce any results\n")
        sys.exit(1)

    from multiprocessing import (
        cpu_count,
        Process,
        JoinableQueue,
    )

    args_queue = JoinableQueue(len(results))

    for args in results:
        args_queue.put(args[2:])

    # Grab the program_name and module_names from the first result args.
    (_, kwds) = extract_command_args_and_kwds(*results[0])
    kwds['args_queue'] = args_queue

    nprocs = cpu_count()
    if parallelism_hint:
        if parallelism_hint > nprocs:
            fmt = "warning: parallelism hint exceeds ncpus (%d vs %d)\n"
            msg = fmt % (parallelism_hint, nprocs)
            sys.stderr.write(msg)
        nprocs = parallelism_hint
def main():
    parser = argparse.ArgumentParser(description="Parse different kind of ped files.")
    
    parser.add_argument('family_file', 
        type=str, nargs=1,
        help='A pedigree file. Default is cmms format.'
    )
    parser.add_argument('variant_file', 
        type=str, nargs=1, 
        help='A variant file.Default is vcf format'
    )
    parser.add_argument('-o', '--outfile', 
        type=str, nargs=1, default=[None], 
        help='Specify the path to output, if no file specified the output will be printed to screen.'
    )
    parser.add_argument('--version', 
        action="version", version=pkg_resources.require("Mip_Family_Analysis")[0].version
    )
    parser.add_argument('-v', '--verbose', 
        action="store_true", 
        help='Increase output verbosity.'
    )
    parser.add_argument('-cmms', '--cmms', 
        action="store_true", 
        help='If run with cmms specific structure.'
    )
    parser.add_argument('-s', '--silent', 
        action="store_true", 
        help='Do not print the variants.'
    )
    parser.add_argument('-pos', '--position', 
        action="store_true", 
        help='If output should be sorted by position. Default is sorted on rank score'
    )
    parser.add_argument('-tres', '--treshold', 
        type=int, nargs=1, 
        help='Specify the lowest rank score to be outputted.'
    )
    
    args = parser.parse_args()
    
    var_file = args.variant_file[0]
    file_name, file_extension = os.path.splitext(var_file)
    
    # Print program version to std err:
    
    sys.stderr.write('Version: %s \n' % str(pkg_resources.require("Mip_Family_Analysis")[0].version))
        
    start_time_analysis = datetime.now()
    
    # Start by parsing at the pedigree file:
    my_family = get_family(args)
    preferred_models = my_family.models_of_inheritance
    
    # Take care of the headers from the variant file:
    head = get_header(var_file)
    
    check_individuals(my_family, head, args)
    
    add_cmms_metadata(head)
    
    # The variant queue is just a queue with splitted variant lines:
    variant_queue = JoinableQueue(maxsize=1000)
    # The consumers will put their results in the results queue
    results = Manager().Queue()
    # Create a temporary file for the variants:
    temp_file = NamedTemporaryFile(delete=False)
    
    if args.verbose:
        print(('Temp files: %s' % temp_file.name))
    
    num_model_checkers = (cpu_count()*2-1)

    if args.verbose:
        print(('Number of cpus: %s' % str(cpu_count())))
    
    model_checkers = [variant_consumer.VariantConsumer(variant_queue, results, my_family, args.verbose) 
                        for i in range(num_model_checkers)]
    
    for w in model_checkers:
        w.start()
    
    var_printer = variant_printer.VariantPrinter(results, temp_file, head, args.verbose)
    var_printer.start()
    
        
    var_parser = variant_parser.VariantFileParser(var_file, variant_queue, head, args.verbose)
    var_parser.parse()            
    
    for i in range(num_model_checkers):
        variant_queue.put(None)
    
    variant_queue.join()
    results.put(None)
    var_printer.join()
    
    if args.verbose:
        print('Models checked!')
        print('Start sorting the variants: \n')
        start_time_variant_sorting = datetime.now()
    
    print_headers(args, head)
    
    var_sorter = variant_sorter.FileSort(temp_file, outFile=args.outfile[0], silent=args.silent)
    var_sorter.sort()
    
    os.remove(temp_file.name)
    
    if args.verbose:
        print(('Variants sorted!. Time to sort variants: %s \n' % str(datetime.now() - start_time_variant_sorting)))
        print(('Total time for analysis: %s' % str(datetime.now() - start_time_analysis)))
Пример #7
0
#validationPredictors = [moveDown_clf_40, moveUp_clf_40, pullUp_clf_40, swingAndPointForward_clf_40, rotateLeft_clf_40, rotateRight_clf_40, stillGesture_clf_40]

#class_names of the gestures
#these must be in the same order as the predictors
#class_names = ['movPosX', 'movPosY', 'movPosZ', 'movNegX', 'movNegY', 'movNegZ', 'stillGesture']

class_names = [
    'start', 'startR', 'land', 'landR', 'wp_back', 'wp_backR', 'wp_del',
    'wp_next', 'wp_nextR', 'wp_set', 'stillGesture'
]

#################################################################################
# create communication queues
#

wrapper_to_predictor_queue = JoinableQueue()
predictor_to_application_queue = JoinableQueue()
direct_control_queue = JoinableQueue()

mode_switch_flag = False

#################################################################################
# BLE data streaming callback
#
# Handles a dataframe every 20ms
# There are different dataframes that can approach
#
# Application Mode 3D (mode == 1)
#
# -----------------------------------------------------------------------------------------------------
# |  0  |  1  | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 |   18   |  19  |
Пример #8
0
def local_search(
    local_search_starting_points,
    local_search_random_points,
    param_space,
    fast_addressing_of_data_array,
    enable_feasible_predictor,
    optimization_function,
    optimization_function_parameters,
    scalarization_key,
    number_of_cpus,
    previous_points=None,
    profiling=None,
    noise=False,
):
    """
    Optimize the acquisition function using a mix of random and local search.
    This algorithm random samples N points and then does a local search on the
    best points from the random search and the best points from previous iterations (if any).
    :param local_search_starting_points: an integer for the number of starting points for the local search. If 0, all points will be used.
    :param local_search_random_points: number of random points to sample before the local search.
    :param param_space: a space object containing the search space.
    :param fast_addressing_of_data_array: A list containing the points that were already explored.
    :param enable_feasible_predictor: whether to use constrained optimization.
    :param optimization_function: the function that will be optimized by the local search.
    :param optimization_function_parameters: a dictionary containing the parameters that will be passed to the optimization function.
    :param scalarization_key: the name given to the scalarized values.
    :param previous_points: previous points that have already been evaluated.
    :return: all points evaluted and the best point found by the local search.
    """
    if number_of_cpus == 0:
        number_of_cpus = cpu_count()
    t0 = datetime.datetime.now()
    tmp_fast_addressing_of_data_array = copy.deepcopy(
        fast_addressing_of_data_array)
    input_params = param_space.get_input_parameters()
    feasible_parameter = param_space.get_feasible_parameter()[0]
    data_array = {}
    end_of_search = False
    # percentage of oversampling for the local search starting points
    oversampling_factor = 2

    default_configuration = param_space.get_default_or_random_configuration()
    str_data = param_space.get_unique_hash_string_from_values(
        default_configuration)
    if str_data not in fast_addressing_of_data_array:
        tmp_fast_addressing_of_data_array[str_data] = 1
    if param_space.get_space_size() < local_search_random_points:
        all_configurations = dict_of_lists_to_list_of_dicts(
            param_space.get_space())
        half_of_points = int(len(all_configurations) / 2)
        uniform_configurations = all_configurations[0:half_of_points]
        prior_configurations = all_configurations[half_of_points::]
    else:
        uniform_configurations = param_space.get_random_configuration(
            size=local_search_random_points,
            use_priors=False,
            return_as_array=True)
        prior_configurations = param_space.get_random_configuration(
            size=local_search_random_points,
            use_priors=True,
            return_as_array=True)

        uniform_configurations = array_to_list_of_dicts(
            uniform_configurations, param_space.get_input_parameters())
        prior_configurations = array_to_list_of_dicts(
            prior_configurations, param_space.get_input_parameters())

    sampling_time = datetime.datetime.now()
    sys.stdout.write_to_logfile(("Total RS time %10.4f sec\n" %
                                 ((sampling_time - t0).total_seconds())))

    # check that the number of configurations are not less than the number of CPUs
    min_number_of_configs = min(len(uniform_configurations),
                                len(prior_configurations))
    if min_number_of_configs < number_of_cpus:
        number_of_cpus = min_number_of_configs

    # to avoid having partitions with no samples, it's necessary to compute a floor for the number of partitions for small sample spaces
    # alternatively, an arbitraty number of samples could be set for the number of points where we do not have to partition (since it will be quick anyway)
    min_number_per_partition = min_number_of_configs / number_of_cpus
    partitions_per_cpu = min(10, int(min_number_per_partition))
    if number_of_cpus == 1:
        function_values_uniform, feasibility_indicators_uniform = optimization_function(
            configurations=uniform_configurations,
            **optimization_function_parameters)
        function_values_prior, feasibility_indicators_prior = optimization_function(
            configurations=prior_configurations,
            **optimization_function_parameters)
    else:
        # the number of splits of the list of input points that each process is expected to handle

        uniform_partition_fraction = len(uniform_configurations) / (
            partitions_per_cpu * number_of_cpus)
        prior_partition_fraction = len(prior_configurations) / (
            partitions_per_cpu * number_of_cpus)
        uniform_partition = [
            uniform_configurations[int(i * uniform_partition_fraction):int(
                (i + 1) * uniform_partition_fraction)]
            for i in range(partitions_per_cpu * number_of_cpus)
        ]
        prior_partition = [
            prior_configurations[int(i * prior_partition_fraction):int(
                (i + 1) * prior_partition_fraction)]
            for i in range(partitions_per_cpu * number_of_cpus)
        ]

        # Define a process queue and the processes, each containing half uniform and half prior partitions
        # as arguments to the nested function along with the queue
        input_queue = JoinableQueue()
        for i in range(number_of_cpus * partitions_per_cpu):
            combined_partition = uniform_partition[i] + prior_partition[i]
            input_queue.put({
                "partition": combined_partition,
                "split_index": len(uniform_partition[i]),
                "conf_index": i,
            })
        output_queue = Queue()

        processes = [
            Process(
                target=parallel_optimization_function,
                args=(
                    optimization_function_parameters,
                    input_queue,
                    output_queue,
                    i,
                    optimization_function,
                ),
            ) for i in range(number_of_cpus)
        ]

        function_values_uniform, feasibility_indicators_uniform = [
            {}
        ] * len(uniform_configurations), [{}] * len(uniform_configurations)
        function_values_prior, feasibility_indicators_prior = [
            {}
        ] * len(prior_configurations), [{}] * len(prior_configurations)

        # starting the processes and ensuring there's nothing more to process - joining the input queue when it's empty
        with threadpool_limits(limits=1):

            for process in processes:
                process.start()
                input_queue.put(None)
            input_queue.join()

        # the index on which to split the output
        for i in range(number_of_cpus * partitions_per_cpu):

            # would like this queue call to be non-blocking, but that does not work since the processes would need to be closed (?) for that to reliably work
            result = output_queue.get()
            scalarized_values, feasibility_indicators, split_index, conf_index = (
                result["scalarized_values"],
                result["feasibility_indicators"],
                result["split_index"],
                result["conf_index"],
            )
            # since half of the result is uniform and half is prior, it needs splitting in the middle of the resulting arrays
            function_values_uniform[
                int(conf_index * uniform_partition_fraction
                    ):int(conf_index * uniform_partition_fraction) +
                split_index] = scalarized_values[0:split_index]
            feasibility_indicators_uniform[
                int(conf_index * uniform_partition_fraction
                    ):int(conf_index * uniform_partition_fraction) +
                split_index] = feasibility_indicators[0:split_index]
            function_values_prior[int(conf_index * prior_partition_fraction
                                      ):int(conf_index *
                                            prior_partition_fraction) +
                                  (len(scalarized_values) - split_index
                                   )] = scalarized_values[split_index::]
            feasibility_indicators_prior[
                int(conf_index *
                    prior_partition_fraction):int(conf_index *
                                                  prior_partition_fraction) +
                (len(scalarized_values) -
                 split_index)] = feasibility_indicators[split_index::]

        # Safeguard so ensure the processes actually stop - ensures no process waits for more input and quits the MSLS function

        input_queue.close()
        output_queue.close()
        for i in range(len(processes)):
            processes[i].join()

    acquisition_time = datetime.datetime.now()
    sys.stdout.write_to_logfile(
        ("Optimization function time %10.4f sec\n" %
         (acquisition_time - sampling_time).total_seconds()))

    # This will concatenate the entire neighbors array if all configurations were evaluated
    # but only the evaluated configurations if we reached the budget and did not evaluate all
    function_values_uniform_size = len(function_values_uniform)
    new_data_array_uniform = concatenate_list_of_dictionaries(
        uniform_configurations[:function_values_uniform_size])
    new_data_array_uniform[scalarization_key] = function_values_uniform

    function_values_prior_size = len(function_values_prior)
    new_data_array_prior = concatenate_list_of_dictionaries(
        prior_configurations[:function_values_prior_size])
    new_data_array_prior[scalarization_key] = function_values_prior

    if enable_feasible_predictor:
        new_data_array_uniform[
            feasible_parameter] = feasibility_indicators_uniform
        new_data_array_prior[feasible_parameter] = feasibility_indicators_prior

    new_data_array = concatenate_data_dictionaries(new_data_array_uniform,
                                                   new_data_array_prior)
    data_array = concatenate_data_dictionaries(data_array, new_data_array)

    # If some configurations were not evaluated, we reached the budget and must stop
    if (function_values_uniform_size < len(uniform_configurations)) or (
            function_values_prior_size < len(prior_configurations)):
        sys.stdout.write_to_logfile(
            "Out of budget, not all configurations were evaluated, stopping local search\n"
        )
        end_of_search = True

    best_nbr_of_points = local_search_starting_points * oversampling_factor
    if enable_feasible_predictor:
        local_search_configurations_uniform = get_min_feasible_configurations(
            new_data_array_uniform,
            best_nbr_of_points,
            scalarization_key,
            feasible_parameter,
        )
        local_search_configurations_prior = get_min_feasible_configurations(
            new_data_array_prior,
            best_nbr_of_points,
            scalarization_key,
            feasible_parameter,
        )
    else:
        local_search_configurations_uniform = get_min_configurations(
            new_data_array_uniform, best_nbr_of_points, scalarization_key)
        local_search_configurations_prior = get_min_configurations(
            new_data_array_prior, best_nbr_of_points, scalarization_key)

    local_search_configurations = concatenate_data_dictionaries(
        local_search_configurations_uniform, local_search_configurations_prior)

    if previous_points is not None:
        concatenation_keys = input_params + [scalarization_key]
        if enable_feasible_predictor:
            concatenation_keys + [feasible_parameter]
            best_previous = get_min_feasible_configurations(
                previous_points,
                local_search_starting_points,
                scalarization_key,
                feasible_parameter,
            )
        else:
            best_previous = get_min_configurations(
                previous_points, local_search_starting_points,
                scalarization_key)

        local_search_configurations = concatenate_data_dictionaries(
            local_search_configurations, best_previous, concatenation_keys)
        data_array = concatenate_data_dictionaries(data_array, previous_points,
                                                   concatenation_keys)

    local_search_points_numpy, col_of_keys = dict_of_lists_to_numpy(
        local_search_configurations, return_col_of_key=True)
    uniform_points = local_search_points_numpy[0:best_nbr_of_points]
    prior_points = local_search_points_numpy[
        best_nbr_of_points:best_nbr_of_points * 2]
    best_previous_points = local_search_points_numpy[best_nbr_of_points * 2::]

    (
        best_previous_points,
        prior_points,
        uniform_points,
    ) = param_space.remove_duplicate_configs(
        best_previous_points,
        prior_points,
        uniform_points,
        ignore_columns=col_of_keys["scalarization"],
    )
    combined_unique_points = np.concatenate(
        (
            uniform_points[0:local_search_starting_points],
            prior_points[0:local_search_starting_points],
            best_previous_points[0:local_search_starting_points],
        ),
        axis=0,
    )
    local_search_configurations = {
        key: combined_unique_points[:, column].tolist()
        for key, column in col_of_keys.items()
    }

    data_collection_time = datetime.datetime.now()
    number_of_configurations = len(local_search_configurations[list(
        local_search_configurations.keys())[0]])
    sys.stdout.write_to_logfile("Starting local search iteration: " +
                                ", #configs:" + str(number_of_configurations) +
                                "\n")
    input_queue = JoinableQueue()
    output_queue = Queue()
    # puts each configuration in a queue to be evaluated in parallel
    for idx in range(number_of_configurations):
        input_queue.put({
            "config":
            get_single_configuration(local_search_configurations, idx),
            "idx":
            idx,
        })
        sys.stdout.write_to_logfile((f"{idx}, \n"))

    for i in range(number_of_cpus):
        input_queue.put(None)

    if number_of_cpus == 1:
        parallel_multistart_local_search(
            input_queue,
            output_queue,
            input_params,
            param_space,
            optimization_function_parameters,
            optimization_function,
            enable_feasible_predictor,
            scalarization_key,
            0,
        )
        input_queue.join()

    else:
        processes = [
            Process(
                target=parallel_multistart_local_search,
                args=(
                    input_queue,
                    output_queue,
                    input_params,
                    param_space,
                    optimization_function_parameters,
                    optimization_function,
                    enable_feasible_predictor,
                    scalarization_key,
                    i,
                ),
            ) for i in range(number_of_cpus)
        ]

        with threadpool_limits(limits=1):
            for process in processes:
                process.start()
            input_queue.join()

    result_array = {}
    for i in range(number_of_configurations):
        result = output_queue.get()
        sys.stdout.write_to_logfile(result["logstring"], msg_is_verbose=True)
        result_array = concatenate_data_dictionaries(result_array,
                                                     result["data_array"])
    data_array = concatenate_data_dictionaries(result_array, data_array)

    input_queue.close()
    output_queue.close()

    if number_of_cpus != 1:
        for i in range(len(processes)):
            processes[i].join()

    local_search_time = datetime.datetime.now()
    sys.stdout.write_to_logfile(
        ("Multi-start LS time %10.4f sec\n" %
         (local_search_time - acquisition_time).total_seconds()))
    # Compute best configuration found in the local search
    best_configuration = {}
    tmp_data_array = copy.deepcopy(data_array)
    best_configuration_idx = np.argmin(tmp_data_array[scalarization_key])
    for param in input_params:
        best_configuration[param] = tmp_data_array[param][
            best_configuration_idx]
    configuration_string = param_space.get_unique_hash_string_from_values(
        best_configuration)
    # If the best configuration has already been evaluated before, remove it and get the next best configuration
    while configuration_string in fast_addressing_of_data_array:
        for key in tmp_data_array:
            del tmp_data_array[key][best_configuration_idx]
        best_configuration_idx = np.argmin(tmp_data_array[scalarization_key])
        for param in input_params:
            best_configuration[param] = tmp_data_array[param][
                best_configuration_idx]
        configuration_string = param_space.get_unique_hash_string_from_values(
            best_configuration)

    post_MSLS_time = datetime.datetime.now()

    sys.stdout.write_to_logfile(
        ("MSLS time %10.4f sec\n" %
         (post_MSLS_time - acquisition_time).total_seconds()))
    if profiling is not None:
        profiling.add("(LS) Random sampling time",
                      (sampling_time - t0).total_seconds())
        profiling.add(
            "(LS) Acquisition evaluation time",
            (acquisition_time - sampling_time).total_seconds(),
        )
        profiling.add(
            "(LS) Data collection time",
            (data_collection_time - acquisition_time).total_seconds(),
        )
        profiling.add(
            "(LS) Multi-start LS time",
            (local_search_time - data_collection_time).total_seconds(),
        )
        profiling.add(
            "(LS) Post-MSLS data treatment time",
            (post_MSLS_time - local_search_time).total_seconds(),
        )

    return data_array, best_configuration
Пример #9
0
    return counts


def get_features((q_in, q_out)):
    while True:
        (name, text) = q_in.get()

        lines = ''.join(text).split('\r\n')

        counts = count_bytes(lines)

        q_out.put([name, counts])
        q_in.task_done()


q = JoinableQueue(20)
q_feats = Queue()

pool = Pool(6, get_features, ((q, q_feats), ))

with libarchive.public.file_reader(TRAIN_PATH) as archive:
    for entry in archive:

        # Use only .bytes
        if (entry.pathname.find('.bytes') != -1):
            text = []
            for b in entry.get_blocks():
                text.append(b)

            q.put((entry.pathname, text), True)
    def get_data_for_df(
        start_date,
        end_date,
        period,
        test,
        log_level,
        cpu_cnt_manual,
        path_data_output="",
    ):
        """
        This function retrieves required data in a multiprocessed fashion.
        """
        def date_range(
            start_date,
            end_date,
            period,
        ):
            """
            for further explanation, see
            https://stackoverflow.com/questions/1060279/
            iterating-through-a-range-of-dates-in-python
            """
            yield_date_start = start_date
            yield_date_end = start_date
            yield_date_period = 0

            date_range = int((end_date - start_date).days / period + 1)
            for n in range(date_range):
                #print ("n =", n)
                if n > 0:
                    yield_date_start = yield_date_end + timedelta(1)

                yield_date_end = start_date + timedelta((n + 1) * period)
                if yield_date_end > end_date:
                    yield_date_end = end_date

                yield_date_period = (yield_date_end -
                                     yield_date_start).days + 1

                yield yield_date_start, yield_date_end, yield_date_period

        def concat_test(
            test,
            path_data_output,
        ):
            """
            This function provides means to test the correct functioning of the
            concatenation of the subprocess results.
            """
            def get_data_for_df_test(
                start_date,
                end_date,
                period,
                test,
            ):
                """
                """
                process_id = 0
                process_cnt = 0
                velo_instances = []
                velo_instances_ret = []
                queue = JoinableQueue()
                date_format = "%m/%d/%Y"
                for i in range(3):
                    end_date_o = datetime.strptime(
                        end_date[i],
                        date_format,
                    ).date()
                    start_date_o = datetime.strptime(
                        start_date[i],
                        date_format,
                    ).date()

                    process_name = "process_{:03d}".format(process_id)
                    velo_inst = Velo(
                        process_id=process_id,
                        process_name=process_name,
                        queue=queue,
                        date_id=date,
                    )

                    process = Process(target=velo_inst.run)

                    process_id += 1
                    process_cnt += 1
                    velo_instances.append(velo_inst)
                    Multiprocess.processes.append(process)

                for i in range(process_cnt):
                    Multiprocess.processes[i].start()

                for i in range(process_cnt):
                    msg_process_id = None
                    msg_from_queue = ""
                    while True:
                        msg_from_queue = queue.get()
                        msg_process_id = msg_from_queue[0]
                        velo_instances_ret.append(msg_from_queue[1])
                        queue.task_done()
                        break

                    Multiprocess.processes[msg_process_id].join()

                return velo_instances_ret

            def ds_cmp(
                test_number,
                test_name,
                ds_a_a,
                ds_a_b,
                ds_b,
                path_data_output,
            ):
                """
                Takes two data structures ds_a_a, ds_a_b, merges them (ds_a) and
                compares ds_a with a third data structute ds_b via comparing their
                md5 hashes.
                """
                def ds_to_list(ds):
                    """
                    This function transforms a given data structure to a list.
                    """
                    l_ds = None
                    if isinstance(ds, list):
                        l_ds = ds
                    elif isinstance(ds, type(np.ndarray)):
                        Multiprocess.logger.debug(
                            "test #[{}]:  ds_a_a type = {}".format(
                                test_number, str(type(ds_a_a))))
                        l_ds = ds.tolist()
                    else:
                        l_ds = ds.tolist()

                    return l_ds

                def ds_export(
                    ds,
                    version,
                    test_number,
                    path_data_output,
                ):
                    """
                    This function can be applied in order to export a given data
                    structure to a textfile.
                    """
                    path_data_output_l = "{}_ds/ds_{}_{}.txt".format(
                        path_data_output,
                        test_number,
                        version,
                    )
                    with open(path_data_output_l, 'w') as text_file:
                        text_file.write("{}".format(ds))

                    return

                def ds_hash_cmp(
                    s_ds_a,
                    s_ds_b,
                ):
                    """
                    This function compute the md5 hashes of two given data structures
                    in form of strings and compares them.
                    """
                    hash_a = hashlib.md5()
                    hash_b = hashlib.md5()

                    hash_a.update(str.encode(s_ds_a))
                    hash_b.update(str.encode(s_ds_b))

                    s_hash_a = hash_a.hexdigest()
                    s_hash_b = hash_b.hexdigest()

                    if s_hash_a == s_hash_b:
                        return True

                    return False

                ds_a = None

                #derive ds_a from a_a and a_b
                ds_a_a_bak = ds_a_a

                if type(ds_a_a) == list:
                    ds_a = ds_a_a + ds_a_b

                elif type(ds_a_a) == DatetimeIndex:
                    ds_a = ds_a_a_bak.append(ds_a_b)

                elif type(ds_a_a) == str:
                    ds_a = "{}, {}".format(ds_a_a[0:-1], ds_a_b[1:])

                else:
                    ds_a = np.concatenate([ds_a_a, ds_a_b])

                s_da_a_a = None
                s_da_a_b = None
                s_da_a = None
                s_da_b = None

                #transform to list
                if type(ds_a_a) != str:
                    s_ds_a_a = str(ds_to_list(ds_a_a))
                    s_ds_a_b = str(ds_to_list(ds_a_b))
                    s_ds_a = str(ds_to_list(ds_a))
                    s_ds_b = str(ds_to_list(ds_b))
                else:
                    s_ds_a_a = ds_a_a
                    s_ds_a_b = ds_a_b
                    s_ds_a = ds_a
                    s_ds_b = ds_b

                #put each tx in newline
                s_ds_a_a = s_ds_a_a.replace("), Tx(", "),\nTx(")
                s_ds_a_b = s_ds_a_b.replace("), Tx(", "),\nTx(")
                s_ds_a = s_ds_a.replace("), Tx(", "),\nTx(")
                s_ds_b = s_ds_b.replace("), Tx(", "),\nTx(")

                #export to file
                ds_export(s_ds_a_a, "_a_a", test_number, path_data_output)
                ds_export(s_ds_a_b, "_a_b", test_number, path_data_output)
                ds_export(s_ds_a, "_a", test_number, path_data_output)
                ds_export(s_ds_b, "_b", test_number, path_data_output)

                #hashing
                hash_equal = None
                if ds_hash_cmp(s_ds_a, s_ds_b) == True:
                    hash_equal = "{}Y{}".format(cs.PRGnBH, cs.WHI)
                else:
                    hash_equal = "{}N{}".format(cs.REE, cs.WHI)

                ret_str_pre = "{}[{}testing cat #{}{}]".format(
                    cs.WHI,
                    cs.PRGnBH,
                    test_number,
                    cs.WHI,
                )
                ret_str = "{}  {}same: [{}] -- {}{}".format(
                    ret_str_pre,
                    cs.WHI,
                    hash_equal,
                    test_name,
                    cs.RES,
                )

                Multiprocess.logger.debug(ret_str)
                return

            Multiprocess.logger.info("Starting mode: Test[Concatenation]")
            #start_date_a_a = "01/01/2010"
            #end_date_a_a   = "02/01/2011"
            #period_a_a     = 397

            #start_date_a_b = "02/02/2011"
            #end_date_a_b   = "03/01/2012"
            #period_a_b     = 394

            #start_date_b   = "01/01/2010"
            #end_date_b     = "03/01/2012"

            start_date_a_a = "01/01/2010"
            end_date_a_a = "02/01/2010"
            period_a_a = 32

            start_date_a_b = "02/02/2010"
            end_date_a_b = "03/01/2010"
            period_a_b = 28

            start_date_b = "01/01/2010"
            end_date_b = "03/01/2010"

            ret = get_data_for_df_test(
                start_date=[start_date_a_a, start_date_a_b, start_date_b],
                end_date=[end_date_a_a, end_date_a_b, end_date_b],
                period=[period_a_a, period_a_b, period_a_a + period_a_b],
                test=test,
            )

            processes_test = []

            ret_cnt = len(ret[0])
            ret_keys = list(ret[0].keys())
            for i in range(ret_cnt):
                test_number = "{:02d}".format(i + 1)
                i_name = ret_keys[i]

                process = Process(target=ds_cmp,
                                  args=(
                                      test_number,
                                      i_name,
                                      ret[0][i_name],
                                      ret[1][i_name],
                                      ret[2][i_name],
                                      path_data_output,
                                  ))

                processes_test.append(process)

            for i in range(ret_cnt):
                processes_test[i].start()

            for i in range(ret_cnt):
                processes_test[i].join()

            return

        def multiprocess_test(
            process_id,
            process_name,
            queue,
            date_id,
        ):
            """
            This function provides some dummy commands in order to check the
            correct functioning of the multiprocessing of the chain.
            """
            s_p_d = Velo.sub_proc_dates
            date_period_start = s_p_d[date_id][0]
            date_period_end = s_p_d[date_id][1]
            date_period = s_p_d[date_id][2]
            date_period_start = date_period_start
            date_period_end = date_period_end
            date_period_end_next_day = date_period_end + timedelta(days=1)
            date_period = date_period
            start_date = date_period_start.strftime("%m/%d/%Y")
            end_date = date_period_end.strftime("%m/%d/%Y")
            end_date_next = date_period_end_next_day.strftime("%m/%d/%Y")

            ret = {}

            process_name_str = "{}[{}{}/{:03}{}]{}".format(
                cs.RES,
                cs.PRGnBA,
                process_name,
                Multiprocess.process_cnt - 1,
                cs.RES,
                cs.RES,
            )
            Multiprocess.logger.info(
                "{}{}  Loading transactions from [{}--{}, {}, {:03d}]".format(
                    process_name_str,
                    cs.WHI,
                    start_date,
                    end_date,
                    end_date_next,
                    date_period,
                ))
            time.sleep(random.randint(2, 4))

            ret["process_id"] = [process_id]

            Multiprocess.logger.debug("{}{}  Sending results".format(
                process_name_str,
                cs.WHI,
            ))

            queue.put([process_id, ret])
            queue.close()

            Multiprocess.logger.debug("{}{}  terminating".format(
                process_name_str,
                cs.WHI,
            ))
            exit(0)

        def subprocess_manage(
            process_cnt,
            processes,
            cpu_cnt,
            process_result,
            queue,
        ):
            """
            This functions works as a multiprocess pool sublementary.
            """
            process_id = 0
            start_allowed = True
            processes_fin = process_cnt

            #Start first cpu_cnt subprocesses
            if cpu_cnt > process_cnt:
                cpu_cnt = process_cnt
            for i in range(cpu_cnt):
                processes[i].start()
                Multiprocess.process_last_started += 1

                if not processes[i].is_alive():
                    Multiprocess.logger.error(
                        "{}[{}process_{:03}/{:03}{}]  Not running".format(
                            cs.RES,
                            cs.PRGnBA,
                            i,
                            process_cnt - 1,
                            cs.RES,
                        ))
                else:
                    Multiprocess.logger.debug(
                        "{}[{}process_{:03}/{:03}{}]  Starting".format(
                            cs.RES,
                            cs.PRGnBA,
                            i,
                            process_cnt - 1,
                            cs.RES,
                        ))

            Multiprocess.logger.debug(
                "{}[{}process_{:03}{}-{}{:03}{}]  Started".format(
                    cs.RES,
                    cs.PRGnBA,
                    0,
                    cs.RES,
                    cs.PRGnBA,
                    cpu_cnt - 1,
                    cs.RES,
                ))

            process_id_str = "{}[{}process_{:03}/{:03}{}]".format(
                cs.RES,
                cs.PRGnBG,
                process_id,
                process_cnt - 1,
                cs.RES,
            )

            #start next subprocess if the last one finished and its results
            #were retrieved
            while processes_fin > 0:

                if process_id < cpu_cnt - 1:
                    process_id = cpu_cnt - 1
                    continue

                #retrieve result from queue
                while True:
                    process_xxx_str = "{}[{}process_xxx/{:03}{}]".format(
                        cs.RES,
                        cs.PRGnBG,
                        process_cnt - 1,
                        cs.RES,
                    )
                    Multiprocess.logger.info("{}{}  retrieving results".format(
                        process_xxx_str,
                        cs.PRGnBG,
                    ))

                    msg_from_queue = queue.get()
                    msg_process_id = msg_from_queue[0]
                    msg_process_id_str = "{}[{}process_{:03}/{:03}{}]".format(
                        cs.RES,
                        cs.PRGnBG,
                        msg_process_id,
                        process_cnt - 1,
                        cs.RES,
                    )

                    process_result[msg_process_id] = msg_from_queue[1]
                    Multiprocess.logger.info("{}{}  results retrieved".format(
                        msg_process_id_str,
                        cs.PRGnBG,
                    ))
                    queue.task_done()

                    #processes[msg_process_id].terminate()
                    processes[msg_process_id].join()
                    #processes[msg_process_id].terminate()
                    Multiprocess.logger.info("{}{}  terminated/joined".format(
                        msg_process_id_str,
                        cs.PRGnBF,
                    ))
                    break

                processes_fin -= 1

                if process_id < process_cnt - 1:
                    process_id += 1
                    process_tmp = processes[process_id]

                    process_tmp.start()
                    Multiprocess.process_last_started += 1

                    if not process_tmp.is_alive():
                        Multiprocess.logger.error(
                            "{}  Not running".format(process_id_str))
                    else:
                        Multiprocess.logger.debug(
                            "{}  Starting".format(process_id_str))

            Multiprocess.logger.debug("Returning from subprocess_manage()")
            return

        def ds_cat(ds_res, ds_nxt_id, ds_nxt, process_name):
            """
            This function concatenates two given data structures.
            """
            if ds_nxt_id != Multiprocess.cat_nxt:
                Multiprocess.logger.error(
                    "{}[{}{}/{:03}{}]{}  ds_nxt_id != Multiprocess.cat_nxt".
                    format(
                        cs.RES,
                        cs.PRGnBE,
                        process_name,
                        process_cnt - 1,
                        cs.RES,
                        cs.PRGnBE,
                    ))
                return

            Multiprocess.cat_nxt += 1

            #initial setup
            if ds_nxt_id == 0:
                Multiprocess.logger.info(
                    "{}[{}{}/{:03}{}]{}  data appended".format(
                        cs.RES,
                        cs.PRGnBH,
                        process_name,
                        Multiprocess.process_cnt - 1,
                        cs.RES,
                        cs.PRGnBH,
                    ))
                return ds_nxt

            ds_new = {}

            for i, v in ds_res.items():

                if type(ds_nxt[i]) == list:
                    ds_new[i] = ds_res[i] + ds_nxt[i]

                elif type(ds_nxt[i]) == DatetimeIndex:
                    ds_new[i] = ds_res[i].append(ds_nxt[i])

                else:
                    ds_new[i] = np.concatenate([ds_res[i], ds_nxt[i]])

            Multiprocess.logger.info(
                "{}[{}{}/{:03}{}]{}  data appended".format(
                    cs.RES,
                    cs.PRGnBH,
                    process_name,
                    Multiprocess.process_cnt - 1,
                    cs.RES,
                    cs.PRGnBH,
                ))

            return ds_new

        if test > 0:
            concat_test(
                test,
                path_data_output,
            )
            return False

        velo_instances = []
        process_result = []
        process_result_cat = []
        process_id = 0
        process_cnt = 0
        cpu_cnt = multiprocessing.cpu_count()
        cpu_cnt_test = 16
        start_allowed = True
        cat_finished = False
        queue = JoinableQueue()
        start_date_o = datetime.strptime(start_date, "%m/%d/%Y").date()
        end_date_o = datetime.strptime(end_date, "%m/%d/%Y").date()

        #Set cpu count manually for debugging
        if cpu_cnt_manual >= 0:
            cpu_cnt = cpu_cnt_manual
            cpu_cnt_test = cpu_cnt_manual

        if test == -1:
            Multiprocess.logger.info("Starting mode: Test[Multiprocess]")
            cpu_cnt = cpu_cnt_test
        else:
            Multiprocess.logger.info("Starting mode: Production")

        #check if period is so high that less than cpu_cnt cores would be used
        num_days = (end_date_o - start_date_o).days
        period_max = ceil(num_days / cpu_cnt)
        if period > period_max:
            period = period_max

        Multiprocess.logger.debug(
            "Determine velocity based on blocksci with period = {} days".
            format(period))

        # for date_period_start, date_period_end, date_period in date_range(
        #     start_date_o,
        #     end_date_o,
        #     period
        # ):
        s_p_d = Velo.sub_proc_dates
        for date in range(len(s_p_d)):
            date_period = s_p_d[date][2]
            if date_period <= 0:
                continue

            process_name = "process_{:03d}".format(process_id)

            velo_inst = Velo(
                process_id=process_id,
                process_name=process_name,
                queue=queue,
                date_id=date,
            )

            process = None
            if test == 0:
                process = Process(target=velo_inst.run)

            elif test == -1:
                process = Process(target=multiprocess_test,
                                  args=(
                                      process_id,
                                      process_name,
                                      queue,
                                      date,
                                  ))

            process_id += 1
            process_cnt += 1
            velo_instances.append(velo_inst)
            Multiprocess.processes.append(process)
            process_result.append(None)

        Multiprocess.process_cnt = process_cnt
        Velo.process_cnt = process_cnt

        thread_subprocess_manage = threading.Thread(
            target=subprocess_manage,
            args=(
                process_cnt,
                Multiprocess.processes,
                cpu_cnt,
                process_result,
                queue,
            ),
        )
        thread_subprocess_manage.start()

        #concatenate all consecutive results

        time_to_wait_is_alive = 0.1
        time_to_wait_is_none = 0.1
        while Multiprocess.cat_nxt < process_cnt:
            cat_nxt = Multiprocess.cat_nxt
            process_name_nxt = velo_instances[cat_nxt].process_name
            process_name_nxt_str = "{}[{}{}/{:03}{}]".format(
                cs.RES,
                cs.PRGnBE,
                process_name_nxt,
                process_cnt - 1,
                cs.RES,
            )
            #process that would produced the next results to be concatenated ...
            #...was not started yet => continue
            if cat_nxt > Multiprocess.process_last_started:
                continue

            #...was started and is still running => continue
            if Multiprocess.processes[cat_nxt].is_alive():
                time_sleep = time_to_wait_is_alive + 2
                time.sleep(time_sleep)
                if time_sleep <= 20:
                    time_to_wait_is_alive *= 2
                elif time_sleep <= 60:
                    time_to_wait_is_alive += 10

                if time_to_wait_is_alive > 3.2:
                    Multiprocess.logger.info("{}{}  still running".format(
                        process_name_nxt_str,
                        cs.PRGnBE,
                    ))
                continue
            time_to_wait_is_alive = 0.1

            #...finished, but did not produce a result => Thats an major error

            if process_result[cat_nxt] is None:
                time.sleep(time_to_wait_is_none)
                time_to_wait_is_none *= 2

                if time_to_wait_is_none > 3.2:
                    Multiprocess.logger.warning("{}  no results yet!".format(
                        process_name_nxt_str, ))
                elif time_to_wait_is_none > 6.4:
                    Multiprocess.logger.critical("{}  no results!".format(
                        process_name_nxt_str, ))
                    processes_kill_all()
                    exit(-1)
                continue
            time_to_wait_is_none = 0.1

            # concatenate
            if test == -1:
                time.sleep(0.2)
            process_result_cat = ds_cat(
                process_result_cat,
                cat_nxt,
                process_result[cat_nxt],
                process_name_nxt,
            )

            process_result[cat_nxt] = None
            velo_instances[cat_nxt] = None

        #give thread_subprocess_manage time to return
        time.sleep(2)

        if thread_subprocess_manage.is_alive():
            Multiprocess.logger.warning("Exiting concat while to early!")

        thread_subprocess_manage.join()

        return process_result_cat
Пример #11
0
def _run(logMessage, errMessage):

    # 这在tk线程中运行
    print('启动')
    dbtime = MongoTime()
    while True:
        time_config = dbtime.select_one({"flag": 1})
        type = time_config.get('type')
        padding_time = time_config.get('time')
        start_time = time.time()
        xy = XianYu(logMessage, errMessage)
        xy.run(type)
        print('异步爬取用时:', time.time() - start_time)
        # TODO:配置中的时间
        errMessage.put('爬取耗时{}秒'.format(int(time.time() - start_time)))

        if not padding_time:
            padding_time = 10
        time.sleep(padding_time)


if __name__ == '__main__':
    from multiprocessing import Process, JoinableQueue
    logMessage = JoinableQueue()
    errMessage = JoinableQueue()
    TProcess_crawler = threading.Thread(target=_run,
                                        args=(logMessage, errMessage))
    # TProcess_crawler.daemon = True
    TProcess_crawler.start()
    # TProcess_crawler.join()
    print('继续运行')
Пример #12
0
def main(args):

    # variables
    bam_file = False

    bc = {}
    bc_readcount = defaultdict(int)
    bc_umi = {}
    num_bc = 0
    offset = 0
    lnum = 0
    sz_umi = ""
    umi_file = ""
    # string to capture file summary table that's used with kallisto pseudo -b
    sz_table = "#id\tumiFile\tcellFile\n"
    dumi = None
    bam_flag = False
    sam_header = ""
    quant_mode = False

    ##
    ## check for output folder
    ##
    if not os.path.isdir(args.outpath):
        ms.message("Creating output folder {}".format(args.outpath))
        os.mkdir(args.outpath)

    file_queue = JoinableQueue()
    p = None
    pool = []

    if args.R is not None:
        if not os.path.isfile(args.R):
            ms.error_message(
                "Supplied annotation file does not exist ({})".format(args.R))
            return 1
        else:
            quant_mode = True

    ##
    ## figure out if we have a bam as input. if so we have to convert it to sam for indexing
    ##
    if re.search("\.bam$", args.fin):
        # send the sam file into the output folder
        sam_name = args.outpath + "/" + os.path.basename(
            re.sub("\.bam$", ".sam", args.fin))

        if not os.path.isfile(sam_name):
            # need to convert alignments to sam
            bam_flag = True
            cmd = "samtools view -h {} > {}".format(args.fin, sam_name)
            t0 = time()
            message("Temporarily converting BAM to SAM format")
            rres = runcmd(cmd)
            if rres[0] != 0:
                sys.stderr.write(
                    "Error: samtools exited with non-zero exit status!\n")
                return 1

            sys.stderr.write("{} sec\n".format(time() - t0))

    else:
        sam_name = args.fin

    ##
    ## we need to index all barcodes and track umi per barcode. if these pickle
    ## files exist we can use them
    ##

    bc_pkl = args.outpath + "/" + BC_PICKLE
    bc_umi_pkl = args.outpath + "/" + BC_UMI_PICKLE
    bc_readcount_pkl = args.outpath + "/" + BC_READCOUNT
    sam_header_pkl = args.outpath + "/sam_header.pkl"

    if os.path.isfile(bc_pkl) and os.path.isfile(
            bc_umi_pkl) and os.path.isfile(bc_readcount_pkl):
        ##
        # load indexes from pickles
        ms.message(
            "Loading existing barcode and umi indexes from output folder")
        t0 = time()
        bc = pickle.load(open(bc_pkl, "rb"))
        bc_umi = pickle.load(open(bc_umi_pkl, "rb"))
        bc_readcount = pickle.load(open(bc_readcount_pkl, "rb"))
        sam_header = pickle.load(open(sam_header_pkl, "rb"))
        num_bc = len(bc.keys())
        ms.time_diff(t0)

    else:
        # we have to index

        #
        # parse the alignments. in this loop we only extract the cell barcode and the umi
        # plus record the file position offsets for barcodes. the dict that is built
        # is indexed by the barcodes and each element contains a list of file offsets for
        # reads that came from that barcode. we also get all of the distinct umis collected
        # per barcode in this loop in order to estimate the actual cell count before
        # writing all of the read files out to disk
        message('Indexing cell barcodes from alignments and counting raw UMI.')
        t0 = time()
        with open(sam_name, "r") as fin:

            for szl in fin:
                if szl[0] == "@":
                    # append header line to header string
                    sam_header += szl
                    offset += len(szl)
                    continue

                # count lines and produce progress message so we know this thing is
                # running
                lnum += 1
                if lnum % 1000000 == 0:
                    progress_message("read {} lines".format(lnum))

                # fetch the cell barcode from the read name
                line_bc = parse_barcode(szl)

                if line_bc not in bc:
                    # first encounter with this barcode
                    num_bc += 1
                    # init a list for this barcode's line offsets within this sam file
                    bc[line_bc] = []
                    # init a dict for the barcode to track umis
                    bc_umi[line_bc] = defaultdict(int)

                # append line offset to this barcode's list
                bc[line_bc].append(offset)
                # get the umi and add it to this barcode's dict IF this is not a
                # secondary alignment
                aln = szl.split("\t")

                if (int(aln[1]) & 0x100) == 0:
                    # not a secondary alignment. track it.
                    umi = parse_umi(szl)
                    bc_umi[line_bc][umi] += 1

                if ((int(aln[1]) & 0x4) == 0) and ((int(aln[1]) & 0x100) == 0):
                    # this read is aligned and is a primary alignment so we can count this one
                    # into this barcode's aligned read count
                    bc_readcount[line_bc] += 1

                # update offset to the next line
                offset += len(szl)

        # final progress message and total time of parsing
        progress_message("read {} lines".format(lnum), last=True)
        sys.stderr.write("{} sec\n".format(time() - t0))

        t0 = time()

        if not args.no_pickles:
            ms.message("saving indexes to disk")
            pickle.dump(bc, open(bc_pkl, "wb"))
            pickle.dump(bc_umi, open(bc_umi_pkl, "wb"))
            pickle.dump(bc_readcount, open(bc_readcount_pkl, "wb"))
            pickle.dump(sam_header, open(sam_header_pkl, "wb"))
            ms.time_diff(t0)

    #
    # implement cell number detection per 10x.
    # here's what happens. you take the 'exp-cells' value (expected cells)
    # and multiply that by 0.01 to get an index. sort the barcodes and the
    # barcode umi counts in descending order and jump to the index you just
    # calculated and then take that index's umi count. scale that count
    # by 0.1. now you take as many cells, starting from the top of the umi
    # count sorted list, that have at least that many UMI.  that's literally
    # how they do it.
    #

    t0 = time()
    message("Determining cell count")

    #
    # write a file that will contain the cell id, umi count and read count
    # for each cell id. might be informative...who knows.
    with open("{}/barcode_umi_counts.txt".format(args.outpath), "w") as fout:
        bc_umi_counts = []

        fout.write("barcode\tumi_count\tdistinct_reads\talignments\n")

        for lbc in bc.keys():
            num_umi = len(bc_umi[lbc].keys())
            bc_umi_counts.append([lbc, num_umi])
            # write the cell id, distinct umi count and total read count to file
            fout.write("\t".join(
                map(str, [lbc, num_umi, bc_readcount[lbc],
                          len(bc[lbc])])))
            fout.write("\n")

    #
    # sort by umi count in descending order and threshold
    bc_umi_counts.sort(key=lambda x: x[1], reverse=True)
    exp_cells = int(math.floor(args.exp_cells * 0.01 - 1))

    num_reads = 0
    num_umi = 0

    i = 0
    while True:
        if bc_umi_counts[i][1] < bc_umi_counts[exp_cells][1] * 1.0 / 10:
            break

        # count umi and count distinct reads
        lbc = bc_umi_counts[i][0]
        num_reads += bc_readcount[lbc]
        num_umi += len(bc_umi[lbc].keys())

        i += 1

    #
    # number of actual cells is 'i' because 'i' is incremented before
    # checking if the umi count passes the threshold. i-1 is the index
    # of the last cell we would accept
    num_cells = i

    #
    # now we can generate a summary for the detected cells
    with open("{}/cell_summary.tsv".format(args.outpath), "w") as fout:

        fout.write("estimated_cells\t{}\n".format(num_cells))
        fout.write("total_reads\t{}\n".format(num_reads))
        fout.write("total_umi\t{}\n".format(num_umi))
        fout.write("reads_per_cell\t{}\n".format(num_reads * 1.0 / num_cells))
        fout.write("umi_per_cell\t{}\n".format(num_umi * 1.0 / num_cells))

        # find the median barcode and corresponding read count
        if num_cells % 2 == 0:
            # even count
            median_idx = num_cells / 2
        else:
            median_idx = num_cells / 2 + 1

        median_lbc = bc_umi_counts[median_idx][0]
        fout.write("median_reads_per_cell\t{}\n".format(
            bc_readcount[median_lbc]))

    #
    # let user know what's up
    sys.stderr.write("{} sec\n".format(time() - t0))
    sys.stderr.write("Total distinct barcodes:  {}\n".format(num_bc))
    sys.stderr.write("Cell number estimate:     {}\n".format(num_cells))

    if args.estimate_only:
        if bam_flag:
            # input was BAM so we can dump the converted file. just putting in
            # some logic to be certain that the original file is not deleted.
            if os.path.isfile(args.fin) and os.path.isfile(sam_name) and (
                    sam_name != args.fin):
                os.unlink(sam_name)

        ms.message("Done.")
        return 0

    if args.force_cells is not None:
        # change number of cells to either the total barcodes or the
        # value provided by the user, whichever is smaller
        num_cells = min([args.force_cells, num_bc])
        sys.stderr.write("Forced cell output:       {}\n".format(num_cells))

    t0 = time()

    message(
        "Parsing individual detected cell alignments out to individual files")

    if quant_mode:
        # start quantificaion child processes for parsed sam files
        for i in range(args.p):
            p = Process(target=quantification_worker,
                        args=(
                            file_queue,
                            args,
                        ))
            p.daemon = True
            p.start()
            pool.append(p)

    else:
        # start child process for sam to bam conversion
        for i in range(args.p):
            p = Process(target=compress_reads, args=(file_queue, ))
            p.daemon = True
            p.start()
            pool.append(p)

    fin = open(sam_name, "r")

    # write individual cell files
    i = 0
    sz_umi = ""
    while i < num_cells:
        # get barcode
        lbc = bc_umi_counts[i][0]
        # start output strings
        szout = sam_header
        #sz_umi = ""
        # setup output file name
        cell_file = "{}/{}.sam".format(args.outpath, lbc)
        #umi_file = "{}.umi".format(lbc)

        # update user on progress
        progress_message("Writing {} - {}/{} ({} reads)".format(
            cell_file, i + 1, num_cells, len(bc[lbc])))

        if args.samplerate < 1 and args.samplerate > 0:

            ##
            # to subsample we have to run through all read offsets for this cell and index the reads
            # then take a subset of them to write out to disk. I have to do this because the
            # alignment file contains secondary alignments which have to be collapsed by
            # read name prior to the subsampling.
            read_index = defaultdict(list)
            for offset in bc[lbc]:
                fin.seek(offset)
                aln = fin.readline().strip().split("\t")
                rname = aln[0]
                read_index[rname].append(offset)

            #
            # now by looping through distinct reads we can dump out only those that are at the specified rate
            for rname in read_index.keys():
                if random.random() > args.samplerate:
                    continue

                # dump this read
                for offset in read_index[rname]:
                    fin.seek(offset)
                    szout += fin.readline()

        else:

            # loop through line offsets for this barcode and append lines to the output string
            for offset in bc[lbc]:
                fin.seek(offset)
                szout += fin.readline()

        # write the file
        with open(cell_file, "w") as fout:
            fout.write(szout)

        # send the file off for bam compression
        file_queue.put(cell_file)

        i += 1

    fin.close()

    sys.stderr.write("\n")
    sys.stderr.write("{} sec\n".format(time() - t0))

    if bam_flag:
        # input was BAM so we can dump the converted file. just putting in
        # some logic to be certain that the original file is not deleted.
        if os.path.isfile(args.fin) and os.path.isfile(sam_name) and (
                sam_name != args.fin):
            os.unlink(sam_name)

    sys.stderr.write("Waiting for child process to finish compressing files\n")

    for p in pool:
        file_queue.put(None)
    file_queue.join()

    for p in pool:
        p.join()

    message("finished!")

    return 0
Пример #13
0
        res = queue.get()
        queue.task_done()
        return res

    def flush_queue(queue):
        res = []
        while queue.empty() == False:
            res.append(queue.get())
        return res

    event_enable_comms = Event()
    event_client_disconnect = Event()

    event_client_disconnect.clear()

    queue_rx = JoinableQueue()
    queue_tx = JoinableQueue()
    queue_log = JoinableQueue()
    test_commprocess = CommProcess(port=8080,
                                   address='127.0.0.1',
                                   events={
                                       'enable_comms': event_enable_comms,
                                       'client_disconnect':
                                       event_client_disconnect
                                   },
                                   queues={
                                       'tx_msg': queue_tx,
                                       'rx_msg': queue_rx,
                                       'log': queue_log
                                   },
                                   debug_log=debug_log_path)
Пример #14
0
def piping_synthesis(overlay_url, base_path):
    # check_base VM
    start_time = time.time()
    meta_stream = urllib2.urlopen(overlay_url)
    meta_raw = read_all(meta_stream)
    meta_info = msgpack.unpackb(meta_raw)
    url_manager = Manager()
    overlay_urls = url_manager.list()
    url_prefix = os.path.dirname(overlay_url)
    for blob in meta_info[Const.META_OVERLAY_FILES]:
        blob_filename = os.path.basename(blob[Const.META_OVERLAY_FILE_NAME])
        url = os.path.join(url_prefix, blob_filename)
        overlay_urls.append(url)
    (base_diskmeta, base_mem, base_memmeta) = \
            Const.get_basepath(base_path, check_exist=True)

    # read overlay files
    # create named pipe to convert queue to stream
    time_transfer = Queue(); time_decomp = Queue();
    time_delta = Queue(); time_fuse = Queue();
    tmp_dir = tempfile.mkdtemp()
    temp_overlay_filepath = os.path.join(tmp_dir, "overlay_file")
    temp_overlay_file = open(temp_overlay_filepath, "w+b")
    overlay_pipe = os.path.join(tmp_dir, 'overlay_pipe')
    os.mkfifo(overlay_pipe)

    # overlay
    demanding_queue = Queue()
    download_queue = JoinableQueue()
    download_process = Process(target=synthesis.network_worker, 
            args=(
                overlay_urls, demanding_queue, download_queue, time_transfer, CHUNK_SIZE,
                )
            )
    decomp_process = Process(target=synthesis.decomp_worker,
            args=(
                download_queue, overlay_pipe, time_decomp, temp_overlay_file,
                )
            )
    modified_img, modified_mem, fuse, delta_proc, fuse_thread = \
            cloudlet.recover_launchVM(base_path, meta_info, overlay_pipe, 
                    log=sys.stdout, demanding_queue=demanding_queue)
    delta_proc.time_queue = time_delta
    fuse_thread.time_queue = time_fuse

    # start processes
    download_process.start()
    decomp_process.start()
    delta_proc.start()
    fuse_thread.start()

    # wait for end
    delta_proc.join()
    fuse_thread.join()

    # printout result
    end_time = time.time()
    total_time = (end_time-start_time)
    synthesis.SynthesisTCPHandler.print_statistics(start_time, end_time, \
            time_transfer, time_decomp, time_delta, time_fuse, \
            print_out=sys.stdout)

    delta_proc.finish()

    if os.path.exists(overlay_pipe):
        os.unlink(overlay_pipe)
    shutil.rmtree(tmp_dir)

    print "\n[Time] Total Time for synthesis(including download) : %f" % (total_time)
    return fuse
Пример #15
0
    def parallel(self):
        from multiprocessing import Process, Queue, JoinableQueue

        if debug:
            print(inspect.stack()[0][3])
    
        self.ntrajs = []
        for i in range(self.cpus):
            self.ntrajs.append(min(int(np.floor(float(self.ntraj)
                                             / self.cpus)),
                                   self.ntraj - sum(self.ntrajs)))
        cnt = sum(self.ntrajs)
        while cnt < self.ntraj:
            for i in range(self.cpus):
                self.ntrajs[i] += 1
                cnt += 1
                if (cnt >= self.ntraj):
                    break
        self.ntrajs = np.array(self.ntrajs)
        self.ntrajs = self.ntrajs[np.where(self.ntrajs > 0)]
        self.nprocs = len(self.ntrajs)
        sols = []
        processes = []
        resq = JoinableQueue()
        resq.join()

        if debug:
            print("Number of cpus: " + str(self.cpus))
            print("Trying to start " + str(self.nprocs) + " process(es).")
            print("Number of trajectories for each process: " + str(self.ntrajs))

        for i in range(self.nprocs):
            p = Process(target=self.evolve_serial,
                        args=((resq, self.ntrajs[i], i, self.seed * (i + 1)),))
            p.start()
            processes.append(p)
        cnt = 0

        while True:
            try:
                sols.append(resq.get())
                resq.task_done()
                cnt += 1
                if (cnt >= self.nprocs):
                    break
            except KeyboardInterrupt:
                break
            except:
                pass

        resq.join()
        for proc in processes:
            try:
                proc.join()
            except KeyboardInterrupt:
                if debug:
                    print("Cancel thread on keyboard interrupt")
                proc.terminate()
                proc.join()
        resq.close()
        return sols
Пример #16
0
Task queue length needs to be suitable. One finished task will post two tasks to the queue.

'''

import os
import multiprocessing
from multiprocessing import Pool, JoinableQueue, Array, Value
import random
import traceback

N = 4
QUE_LEN = -1

NUM_ELEMENTS = 100
nums = Array('i', [random.randint(0, 100) for i in range(NUM_ELEMENTS)])
task_que = JoinableQueue(QUE_LEN)
finished_tasks = Value('i', 0)


class EndOfQueue():  # poison pill
    def __init__(self):
        pass


class QuickSortTask():
    def __init__(self, left, right):
        self.left = left
        self.right = right

    def quick_sort_task(self):
        print 'PID {}: Quick sorting:'.format(
Пример #17
0
import io
import json
import logging
from multiprocessing.pool import ThreadPool
from multiprocessing import JoinableQueue, Manager
import os

import requests

from . import COUNTRIES_VALIDATION_DATA_DIR

MAIN_URL = 'http://i18napis.appspot.com/address/data'
COUNTRY_PATH = os.path.join(COUNTRIES_VALIDATION_DATA_DIR, '%s.json')

logger = logging.getLogger(__name__)
work_queue = JoinableQueue()
manager = Manager()


def fetch(url): # pragma: no cover
    logger.debug(url)
    data = requests.get(url).json()
    return data


def get_countries(): # pragma: no cover
    return fetch(MAIN_URL)['countries'].split('~')


def process(key):
    url = '%s/%s' % (MAIN_URL, key)
Пример #18
0
def main(_):
    print_flags()
    initialize_folders()

    env = gym.make(FLAGS.env_name)

    if FLAGS.is_train and FLAGS.is_monitor:
        def monitor_frequency_func(iteration):
            return (iteration + FLAGS.monitor_frequency) % FLAGS.monitor_frequency == 0

        env = wrappers.Monitor(env, FLAGS.log_dir + "/" + FLAGS.scope,
                               video_callable=monitor_frequency_func,
                               resume=FLAGS.is_load)

    job_queue = JoinableQueue()
    result_queue = Queue()
    e = 0

    if FLAGS.n_processes == 1 or not FLAGS.is_train:
        reacher = TRPOAgent(FLAGS.env_name, FLAGS.scope, FLAGS.max_kl, job_queue, result_queue)
        reacher.start()
    else:
        # PARALLEL TRAINING OFFERS ALMOST LINEAR IMPROVEMENT ON 2 processors
        proll = ParallelRollout(FLAGS.env_name, FLAGS.traj_len, FLAGS.n_processes, FLAGS.max_kl)
        parallel_reacher = ParallelTRPOAgent(FLAGS.env_name, FLAGS.scope, FLAGS.max_kl, job_queue, result_queue, proll)
        parallel_reacher.start()

    if FLAGS.is_load:
        job_queue.put(('load', (FLAGS.checkpoint_dir + '/' + FLAGS.scope,)))
        job_queue.join()

    try:
        while True:

            e += 1
            if FLAGS.is_train:
                print ("EPISODE =", e)
                start = time.time()
                job_queue.put(('learn', (FLAGS.gamma, FLAGS.n_trajs, FLAGS.traj_len)))
                job_queue.join()
                end = time.time()
                print ("ROLLOUT TAKES", end - start)

            obs = env.reset()
            for i in range(FLAGS.traj_len):
                job_queue.put(('act', (obs,)))
                job_queue.join()
                obs, _, done, _ = env.step(result_queue.get())
                if not FLAGS.is_train:
                    env.render()
                if done: break

            if e % FLAGS.checkpoint_freq == 0 and FLAGS.is_train:
                job_queue.put(('save', (FLAGS.checkpoint_dir + '/' + FLAGS.scope,)))
                job_queue.join()
                job_queue.put(('log', (FLAGS.log_dir + '/' + FLAGS.scope, 'my_log.json',)))
                job_queue.join()

    except KeyboardInterrupt:
        print('You pressed Ctrl+C!')
        if FLAGS.is_train and FLAGS.is_monitor:
            env.close()
        proll.end()
        parallel_reacher.join()
        sys.exit(0)
Пример #19
0
    def __init__(self,
                 channel,
                 pulse_width_min,
                 pulse_width_max,
                 angle_min,
                 angle_max,
                 init_angle,
                 turnoff_timeout=0):
        """Define a new software controllable servo with adjustable speed control

        Keyword arguments:
        pulse_width_min -- The minimum pulse width defining the lowest angle
        pulse_width_max -- The maximum pulse width defining the biggest angle
        init_angle -- Initial angle that the servo should take when it is powered on. Range is 0 to 180deg
        turnoff_timeout -- number of seconds after which the servo is turned off if no command is received. 0 = never turns off
        """

        self.angle_min = angle_min
        self.angle_max = angle_max
        self.angle_range = angle_max - angle_min
        self.pulse_width_min = pulse_width_min
        self.pulse_width_max = pulse_width_max
        self.pulse_width_range = pulse_width_max - pulse_width_min

        self.turnoff_timeout = turnoff_timeout

        self.current_pulse_width = self.angle_to_pulse_width(init_angle)
        self.last_pulse_width = self.current_pulse_width

        self.last_angle = init_angle

        self.pulse_length = 20.0 * 10.0**-3.0  # 20 ms

        logging.debug("Angle min: {} deg".format(self.angle_min))
        logging.debug("Angle max: {} deg".format(self.angle_max))
        logging.debug("Angle tot: {} deg".format(self.angle_range))
        logging.debug("Pulse min: {} ms".format(self.pulse_width_min * 1000.0))
        logging.debug("Pulse max: {} ms".format(self.pulse_width_max * 1000.0))
        logging.debug("Pulse tot: {} ms".format(self.pulse_width_range *
                                                1000.0))

        self.queue = JoinableQueue(1000)
        self.lastCommandTime = 0

        self.t = Thread(target=self._wait_for_event, name="Servo")
        self.t.daemon = True
        self.running = True
        self.t.start()

        # Branch based on channel type.

        if type(channel) == int:  # Revision A
            self.pwm = PWM(channel, 50, self.current_pulse_width)
        else:  # Revision B
            # Set up the Shift register for enabling this servo
            if channel == "P9_14":
                shiftreg_nr = 1
                self.pwm = PWM_pin(channel, 50, self.current_pulse_width)
            elif channel == "P9_16":
                shiftreg_nr = 2
                self.pwm = PWM_pin(channel, 50, self.current_pulse_width)
            else:
                logging.warning(
                    "Tried to assign servo to an unknown channel/pin: " +
                    str(channel))
                return
            ShiftRegister.make(5)
            self.shift_reg = ShiftRegister.registers[shiftreg_nr]
        self.set_enabled()
        self.pwm.set_value(
            self.angle_to_pulse_width(init_angle) / self.pulse_length)
Пример #20
0
def MCMC(n,
         theta_0,
         priors_dict,
         beta,
         rho,
         chains,
         burn_rate=0.1,
         down_sample=1,
         max_attempts=6,
         pflag=True,
         cpu=None,
         randomize=True):
    # Check input parameters
    mcmcChecks(n, theta_0, beta, rho, chains, burn_rate, down_sample,
               max_attempts)
    print("Performing MCMC Analysis")
    # Selecting optimal temperature
    hyper_theta, beta = hyperparameter_fitting(theta_0, priors_dict, beta, rho,
                                               max_attempts)
    if pflag == True:
        check_proposals(hyper_theta, 50)
    # Overdisperse chains
    if randomize == True:
        print("Dispersing chains")
        if chains > 1:
            chains_list = disperse_chains(hyper_theta, priors_dict, chains)
        else:
            chains_list = [hyper_theta]
    else:
        chains_list = [hyper_theta for i in range(chains)]
    # Sample using MCMC
    print("Sampling from posterior distribution")
    if chains >= cpu_count():
        NUMBER_OF_PROCESSES = cpu_count() - 1
    else:
        NUMBER_OF_PROCESSES = chains
    if cpu != None:
        NUMBER_OF_PROCESSES = cpu  # Manual override of core number selection
    print("Using {} threads".format(NUMBER_OF_PROCESSES))
    with open(results_dir + 'progress.txt',
              'w') as f:  # clear previous progress report
        f.write('')
    jobs = Queue()  # put jobs on queue
    result = JoinableQueue()
    countQ = JoinableQueue()
    for m in range(chains):
        jobs.put([chains_list[m], beta, rho, n, priors_dict])
    [
        Process(target=mh, args=(i, jobs, result, countQ)).start()
        for i in range(NUMBER_OF_PROCESSES)
    ]
    # pull in the results from each thread
    pool_results = []
    chain_attempts = []
    for m in range(chains):
        r = result.get()
        pool_results.append(r)
        result.task_done()
        a = countQ.get()
        chain_attempts.append(a)
    # tell the workers there are no more jobs
    for w in range(NUMBER_OF_PROCESSES):
        jobs.put(None)
    # close all extra threads
    result.join()
    jobs.close()
    result.close()
    countQ.close()

    # Perform data analysis
    average_acceptance = np.mean([el[1] for el in chain_attempts])
    print("Average acceptance rate was {:.1f}%".format(average_acceptance))
    samples = get_parameter_distributions(pool_results, burn_rate, down_sample)
    plot_parameter_autocorrelations(samples.drop('gamma', axis=1))
    get_summary_statistics(samples.drop('gamma', axis=1))
    with open(results_dir + 'simulation_summary.txt', 'w') as f:
        f.write('Temperature used was {}\n'.format(beta))
        f.write('Number of chains = {}\n'.format(chains))
        f.write(
            "Average acceptance rate was {:.1f}%\n".format(average_acceptance))
        f.write("Initial conditions were\n")
        for i in chains_list:
            f.write(str(i))
            f.write("\n")
Пример #21
0
"""
    1. 多进程的队列,from multiprocessing import Queue
    2. 多线程的队列,from queue import Queue
    3. 本程序是生产者/消费者模型,用 2 个队列:一个队列用于存储待完成的任务(JoinableQueue,有join,task_done),另外一个用于存储任务完成后的结果(Queue)
"""
import time
from multiprocessing import Process, JoinableQueue, Queue
from random import random

tasks_queue = JoinableQueue()
results_queue = Queue()


def task_double(n):
    return n * 2


def producer(in_queue):
    while True:
        wt = random()  # assumed working time
        time.sleep(wt)
        in_queue.put((task_double, wt))

        if wt > 0.9:  # 10% chance to stop producing
            in_queue.put(None)
            print('stop producer')
            break


def consumer(in_queue, out_queue):
    while True:
Пример #22
0
    hra = HiriseAssembly()
    hra.load_assembly(args.infile)

    hra.merge_masked_regions(debug=args.debug)

    print(len(hra.layout_lines))
    if len(hra.layout_lines) == 0:
        print("#make trivial layout")
        hra.make_trivial_layout_lines(debug=args.debug)

    ces.set_exp_insert_size_dist_fit_params(hra.model_params)
    model = ces.model

    nbams = len(hra.bams)
    inq = JoinableQueue(maxsize=0)
    readers = []
    for i in range(nbams):
        reader = Process(target=read_pairs_from_bam,
                         args=(inq, i, hra.bams[i], hra, args.mapq,
                               args.slice),
                         daemon=False)
        reader.start()
        readers.append(reader)

    q = JoinableQueue(maxsize=0)
    histogram_queue = JoinableQueue(maxsize=0)
    final_histogram_queue = JoinableQueue(maxsize=0)
    outfiles = [
        open("{}.part.{}".format(args.outfile, i), "wt")
        for i in range(args.nthreads)
Пример #23
0
 def __init__(self):
     super().__init__()
     self.queue = JoinableQueue()
     self.signal = 0
Пример #24
0
def kmeans(points, k, cutoff, wvmodel):
    # points = [(point, ) for point in points]
    # Pick out k random points to use as our initial centroids
    initial = []
    i = 0
    while i != k:
        random_index = random.randrange(0, len(points))
        if random_index not in initial:
            initial.append(random_index)
            i += 1

    # Create k clusters using those centroids
    # Note: Cluster takes lists, so we wrap each point in a list here.
    clusters = [Cluster([points[p]], wvmodel) for p in initial]

    # Loop through the dataset until the clusters stabilize
    loopCounter = 0
    while True:
        # Create a list of lists to hold the points in each cluster
        lists = [[] for _ in clusters]
        # Start counting loops
        loopCounter += 1
        # For every point in the dataset ...

        # Make multithread
        q = JoinableQueue()
        d_q = Queue()
        threads = []
        lock = Lock()
        for i in range(THREAD):
            _clusters = deepcopy(clusters)
            t = Process(target=_evaluate_point,
                        args=(d_q, q, lock, _clusters, wvmodel))
            t.start()
            threads.append(t)
        for p in points:
            q.put(p)

        q.join()

        # stop workers
        for t in threads:
            t.terminate()

        for _ in range(len(points)):
            tup = d_q.get()
            lists[int(tup[0])].append(tup[1])

        # Set our biggest_shift to zero for this iteration
        biggest_shift = 0.0

        # For each cluster ...
        for i in range(len(clusters)):
            # Calculate how far the centroid moved in this iteration
            shift = clusters[i].update(lists[i])
            # Keep track of the largest move from all cluster centroid updates
            biggest_shift = max(biggest_shift, shift)

        print('Iteration ' + str(loopCounter) + ' : ' + str(biggest_shift))

        # If the centroids have stopped moving much, say we're done!
        if biggest_shift < cutoff:
            print("Converged after %s iterations" % loopCounter)
            break

    return clusters
Пример #25
0
def consumer(q):
    while 1:
        time.sleep(1)
        # try:  # 这样其实不合适,应为无法判断到底是做得快还是吃得快,如果吃的快的话,消费者的程序就会直接结束
        #     baozi = q.get_nowait()
        #     print(baozi+"被吃了")
        # except:
        #     print("包子已经吃光了")
        baozi = q.get()
        if baozi == None:
            print("都吃完了")
            break
        print(baozi + "被吃了")
        q.task_done()  # 给队列发送一个取出的任务已经处理完毕的信号,并给计数器减一


if __name__ == '__main__':
    # q = Queue(10)
    # 同样是一个长度为10的队列,JoinableQueue中会有一个类似于计数器的东西,添加
    # 一个数据,计数器就相当于加一,凡是接收到一个taskdone信号,计数器减一
    # 当所有的任务或者数据都放入q中时,就可以通过q来调用join方法,让生产者等着,
    # 就等于让计数器归零,程序才继续执行                             #
    q = JoinableQueue(10)
    pro_p = Process(target=producter, args=(q, ))
    con_p = Process(target=consumer, args=(q, ))
    pro_p.start()
    con_p.daemon = True  # 当生产者任务结束,消费者也跟随任务结束,所以设置守护进程
    con_p.start()

    pro_p.join()  # 等待生产者任务处理完毕
Пример #26
0
    def _fit_mt(self,
                X_valid,
                Y_valid,
                b=0.5,
                beta=1,
                set_unlabeled_as_neg=True,
                n_threads=2,
                eval_batch_size=None):
        """Multi-threaded implementation of `GridSearch.fit`."""
        # First do a preprocessing pass over the data to make sure it is all
        # non-lazily loaded
        # TODO: Better way to go about it than this!!
        logger.info("Loading data...")
        model = self.model_class(**self.model_class_params)
        model._preprocess_data(self.X_train)
        model._preprocess_data(X_valid)

        # Create queue of hyperparameters to test
        logger.info("Launching jobs...")
        params_queue = JoinableQueue()
        param_val_sets = []
        for k, param_vals in enumerate(self.search_space()):
            param_val_sets.append(param_vals)
            hps = self.model_hyperparams.copy()
            for pn, pv in zip(self.param_names, param_vals):
                hps[pn] = pv
            params_queue.put((k, hps))

        # Create a queue to store output results
        scores_queue = JoinableQueue()

        # Start UDF Processes
        ps = []
        for i in range(n_threads):
            p = ModelTester(self.model_class,
                            self.model_class_params,
                            params_queue,
                            scores_queue,
                            self.X_train,
                            X_valid,
                            Y_valid,
                            Y_train=self.Y_train,
                            b=b,
                            save_dir=self.save_dir,
                            set_unlabeled_as_neg=set_unlabeled_as_neg,
                            eval_batch_size=eval_batch_size)
            p.start()
            ps.append(p)

        # Collect scores
        run_stats = []
        while any([p.is_alive() for p in ps]):
            while True:
                try:
                    scores = scores_queue.get(True, QUEUE_TIMEOUT)
                    k = scores[0]
                    param_vals = param_val_sets[k]
                    run_stats.append([k] + list(param_vals) + list(scores[1:]))
                    logger.info("Model {0} Done; score: {1}".format(
                        k, scores[-1]))
                    scores_queue.task_done()
                except Empty:
                    break

        # Terminate the processes
        for p in ps:
            p.terminate()

        # Load best model; first element in each row of run_stats is the model
        # index, last one is the score to sort by
        # Note: the models may be returned out of order!
        i_opt = np.argmax([s[-1] for s in run_stats])
        k_opt = run_stats[i_opt][0]
        model = self.model_class(**self.model_class_params)
        model.load('{0}_{1}'.format(model.name, k_opt), save_dir=self.save_dir)

        # Also save the best model as separate file
        model.save(model_name="{0}_best".format(model.name),
                   save_dir=self.save_dir)

        # Return model and DataFrame of scores
        # Test for categorical vs. binary in hack-ey way for now...
        f_score = 'F-{0}'.format(beta)
        categorical = (len(scores) == 2)
        labels = ['Acc.'] if categorical else ['Prec.', 'Rec.', f_score]
        sort_by = 'Acc.' if categorical else f_score
        self.results = DataFrame.from_records(
            run_stats, columns=["Model"] + self.param_names +
            labels).sort_values(by=sort_by, ascending=False)
        return model, self.results
Пример #27
0
def Boruvka_parallel_queue(g, lista_pesi_condivisi, lista_connessioni,
                           dict_edge):
    grafoB = Graph()

    lista_nodi = g.vertices()

    for node in lista_nodi:
        grafoB.insert_vertex(node.element())

    peso_albero = 0
    parent = Array("i", g.vertex_count(), lock=False)
    successor_next = Array("i", g.vertex_count(), lock=False)

    result = Queue()
    jobs_min = JoinableQueue()
    processes_minimo = minimo_paralelo(parent, successor_next,
                                       lista_pesi_condivisi, lista_connessioni,
                                       jobs_min, result)

    lista_nodi_boruvka = grafoB.vertices()

    t312 = time()

    while len(lista_nodi) > 1:
        lista_divisa_interi = dividi_gruppi(lista_nodi, 8)
        add_jobs(jobs_min, lista_divisa_interi, 0)
        jobs_min.join()

        while result.qsize() > 0:
            lista_result = result.get()
            for node, edge_r in lista_result:
                edge = dict_edge[edge_r]
                n1, n2 = edge.endpoints_posizione()
                e = grafoB.insert_edge(lista_nodi_boruvka[n1],
                                       lista_nodi_boruvka[n2], edge.element())
                if e is not None:
                    peso_albero += edge.element()

        for node in lista_nodi:
            i = node.element()
            parent_opposto = parent[parent[i]]
            if i == parent_opposto:
                if i < parent[i]:
                    parent[i] = i
                else:
                    parent[parent[i]] = parent[i]

        while True:
            bool = True

            add_jobs(jobs_min, lista_divisa_interi, 1)
            jobs_min.join()

            while result.qsize() > 0:

                lista_return = result.get()
                for i, element in enumerate(lista_return):
                    if element is not None:
                        successor_next[i] = element

            for x, y in zip(parent, successor_next):
                if x != y:

                    bool = False
                    break

            if bool:
                break

            add_jobs(jobs_min, lista_divisa_interi, 2)
            jobs_min.join()

        for node in lista_nodi:
            node.root = parent[node.element()]
            node.setElement(node.root)

        dict_merge = {}

        for node in lista_nodi:
            if node.root == node.posizione:
                dict_merge[node.root] = []

        add_jobs(jobs_min, lista_divisa_interi, 3)
        jobs_min.join()

        lista = [x for x in lista_nodi]
        lista_nodi = []
        for node in lista:
            if node.posizione != node.root:
                dict_merge[node.root].append(node.posizione)
            else:
                lista_nodi.append(node)

        if len(lista_nodi) <= 1:
            break

        add_jobs(jobs_min, dict_merge, 4)
        jobs_min.join()

    for pr in processes_minimo:
        pr.terminate()

    return (grafoB, peso_albero)
Пример #28
0
 def __init__(self, cache=10):
     super(StoreWorker, self).__init__()
     self.store_q = JoinableQueue()
     self.cache = cache
Пример #29
0
                        type=nonexistant_file,
                        help='Output file in json format')
    parser.add_argument('--n-readers',
                        type=int,
                        default=2,
                        help='Number of reader processes to start')
    parser.add_argument('--n-workers',
                        type=int,
                        default=1,
                        help='Number of worker processes to start')

    args = parser.parse_args()

    # Setup queues and the results dictionary
    # Queue for input files in HDF5 format
    inputfile_queue = JoinableQueue()
    # These variables are required to track the results at one place and collect error messages
    manager = MyManager()
    manager.start()
    progressbar = manager.tqdm(total=len(args.timeseries_files))
    results = manager.dict()
    error_list = manager.list()

    logging = logging.getLogger(os.path.basename(__file__))

    # Setup readers, workers and aggregators according to command line parameters
    pipeline = ParallelPipeline(
        steps=[
            (HDF5Reader, {
                'hdf5_group': 'imputed',
                'progressbar': progressbar,
Пример #30
0
def main():
    parser = argparse.ArgumentParser(description='Computing TFLite accuracy')
    parser.add_argument('--model',
                        required=True,
                        help='Path to the model (protocol buffer binary file)')
    parser.add_argument(
        '--alphabet',
        required=True,
        help=
        'Path to the configuration file specifying the alphabet used by the network'
    )
    parser.add_argument('--lm',
                        required=True,
                        help='Path to the language model binary file')
    parser.add_argument(
        '--trie',
        required=True,
        help=
        'Path to the language model trie file created with native_client/generate_trie'
    )
    parser.add_argument('--csv',
                        required=True,
                        help='Path to the csv source file')
    parser.add_argument(
        '--proc',
        required=False,
        default=cpu_count(),
        type=int,
        help='Number of processes to spawn, defaulting to number of CPUs')
    parser.add_argument(
        '--dump',
        required=False,
        action='store_true',
        default=False,
        help=
        'Dump the results as text file, with one line for each wav: "wav transcription"'
    )
    args = parser.parse_args()

    memory_limit(0.5)

    manager = Manager()
    work_todo = JoinableQueue(
    )  # this is where we are going to store input data
    work_done = manager.Queue()  # this where we are gonna push them out

    #tf.get_default_graph().as_default()
    #tf.reset_default_graph()
    #tfconfig = tf.ConfigProto()
    #tfconfig.gpu_options.per_process_gpu_memory_fraction = 0.1
    #tfconfig.gpu_options.allow_growth = True
    #tfconfig.allow_soft_placement=True

    processes = []
    for i in range(args.proc):
        worker_process = Process(target=tflite_worker,
                                 args=(args.model, args.alphabet, args.lm,
                                       args.trie, work_todo, work_done, i),
                                 daemon=True,
                                 name='tflite_process_{}'.format(i))
        worker_process.start()  # Launch reader() as a separate python process
        processes.append(worker_process)

    print([x.name for x in processes])

    wavlist = []
    predictions = []

    with open(args.csv, 'r') as csvfile:
        csvreader = csv.DictReader(csvfile)
        count = 0
        for row in csvreader:
            count += 1
            work_todo.put({'filename': row['wav_filename']})
    print('Totally %d wav entries found in csv\n' % count)
    work_todo.join()
    num_work = work_done.qsize()
    print('\nTotally %d wav file transcripted' % num_work)

    while not work_done.empty():
        msg = work_done.get()
        predictions.append(msg['prediction'])
        wavlist.append(msg['wav'])
    print('predition= ')
    for i in range(num_work):
        print("\n-----Test %d-----\n" % i)
        print(predictions[i])
        print()

    if args.dump:
        with open(args.csv + '.txt',
                  'w') as ftxt, open(args.csv + '.out', 'w') as fout:
            for wav, txt, out in zip(wavlist, ground_truths, predictions):
                ftxt.write('%s %s\n' % (wav, txt))
                fout.write('%s %s\n' % (wav, out))
            print('Reference texts dumped to %s.txt' % args.csv)
            print('Transcription   dumped to %s.out' % args.csv)