Пример #1
0
def test_api(client_key):

    # 1. create test image data and both processing and result queues
    urls = ['https://demo.restb.ai/images/demo/demo-1.jpg',
            'https://demo.restb.ai/images/demo/demo-2.jpg',
            'https://demo.restb.ai/images/demo/demo-3.jpg',
            'https://demo.restb.ai/images/demo/demo-4.jpg',
            'https://demo.restb.ai/images/demo/demo-5.jpg',
            'https://demo.restb.ai/images/demo/demo-6.jpg']
    queue = mp.Queue()
    image_id = 1
    for url in urls:
        for model in __MODELS.keys():
            queue.put(dict(id=image_id, url=url, model=model))
        image_id += 1
    results = mp.Queue()

    # 2. Pick which API endpoint to use (US vs. EU)
    url = __URL_US

    # 3. Define concurrency specific objects
    # stats objects
    lock_stats = mp.Lock()
    counter = mp.Value('i', 0)
    avg_req_time = mp.Value('f', 0)
    time_start = mp.Value('f', 999999999999999)
    time_end = mp.Value('f', 0)

    # 4. Spawn processes/threads to process the images in the queue
    pool = []
    for i in range(__requests_per_second):
        # pass in necessary parameters to thread, including client key, etc.
        p = mp.Process(target=image_process_thread,
                       args=(url, client_key, queue, results,
                             lock_stats, counter, avg_req_time, time_start, time_end))
        pool.append(p)
        p.start()

    # 5. clean-up after queue has been processed with "poison pill"
    while not queue.empty():
        # wait for queue to be processed
        time.sleep(1)
    for i in pool:
        # seed shutdown messages / poison pills
        queue.put(dict(id=-1, url='shutdown', model='shutdown'))
    for p in pool:
        # enforce clean shutdown of threads
        p.join()

    # 6. finally, return accumulated results
    total = time_end.value - time_start.value
    print('[{requests}] requests processed in [{seconds}] seconds with average time [{time}] ms, total throughput: [{throughput}] rps'.format(
        requests=counter.value,
        seconds=str(round(total / 1000.0, 1)),
        time=str(round(avg_req_time.value / counter.value, 0)),
        throughput=str(round(counter.value / (total / 1000.0), 2))
    ))
    return results
Пример #2
0
def main(argv):
    parse_args(argv)
    init_config()
    get_password()
    authenticate()

    q = multiprocessing.Queue()
    # if we're processing a directory which contains X number of json files
    if os.path.isfile(config['inputpath']):
        q.put(config['inputpath'])
        config['num_threads'] = 1
    else:
        for f in os.listdir('.'):
            q.put(f)

    threads = []

    for i in range(config['num_threads']):
        t = multiprocessing.Process(target=upload_dispatcher, args=(q, ))
        t.setDaemon(True)
        threads.append(t)
        t.start()
        q.put(None)

    for t in threads:
        t.join()
Пример #3
0
 def __init__(self):
     self.cellDict = {}
     self.queue = multiprocessing.Queue()
     logger.info('start process for parse Msg')
     self.process = multiprocessing.Process(target=parseMsg,
                                            args=(self, self.queue, logger))
     self.process.start()
Пример #4
0
    def __init__(self, cases=None):
        """

        :param cases: case 列表
        """
        super(Runner, self).__init__()
        self.__cases = cases
        self.__tasks = multithreading.JoinableQueue()
        self.__results = multithreading.Queue()
Пример #5
0
    def __init__(self,
                 seeds=2,
                 token_file_name="tokens.csv",
                 seed_list=None,
                 following_pages_limit=0):

        # Get seeds from seeds.csv
        self.seed_pool = FileImport().read_seed_file()

        # Create seed_list if none is given by sampling from the seed_pool
        if seed_list is None:

            self.number_of_seeds = seeds
            try:
                self.seeds = self.seed_pool.sample(n=self.number_of_seeds)
            except ValueError:  # seed pool too small
                stderr.write(
                    "WARNING: Seed pool smaller than number of seeds.\n")
                self.seeds = self.seed_pool.sample(n=self.number_of_seeds,
                                                   replace=True)

            self.seeds = self.seeds[0].values
        else:
            self.number_of_seeds = len(seed_list)
            self.seeds = seed_list

        self.seed_queue = mp.Queue()

        for seed in self.seeds:
            self.seed_queue.put(seed)

        # Get authorized user tokens for app from tokens.csv
        self.tokens = FileImport().read_token_file(token_file_name)

        # and put them in a queue
        self.token_queue = mp.Queue()

        for token, secret in self.tokens.values:
            self.token_queue.put((token, secret, {}, {}))

        # Initialize DataBaseHandler for DB communication
        self.dbh = DataBaseHandler()
        self.following_pages_limit = following_pages_limit
Пример #6
0
def main():

    db = sys.argv[1]
    conn = sqlite3.connect(db, detect_types=sqlite3.PARSE_DECLTYPES)
    cursor = conn.cursor()
    cursor.execute("SELECT MIN(date) FROM input")
    for mindate, in cursor.fetchall():
        mindate = datetime.strptime(mindate, "%Y-%m-%d %H:%M:%S")

    cursor.execute("SELECT MAX(date) FROM input")
    for maxdate, in cursor.fetchall():
        maxdate = datetime.strptime(maxdate, "%Y-%m-%d %H:%M:%S")

    days = (maxdate - mindate).days
    t_queue = []
    for i in range(days + 2):
        t_queue.append((i, mindate + timedelta(i), mindate + timedelta(i + 1)))

    cursor.execute("DELETE FROM temporary_label_clusters")
    result_queue = mp.Queue()
    pool = mp.Pool(1)
    robj = pool.imap_unordered(lambda x: process(x, db, result_queue), t_queue)

    output_file = open('output.tmp', 'w')

    # TODO: write result processing
    max_cluster_id = 0
    for x in robj:
        tmp_cluster_id = 0
        for line in x.split("\n"):
            line = line.strip()
            if len(line) == 0:
                continue
            identifier, _, cluster = line.partition(' ')
            identifier = int(identifier)
            cluster = int(cluster)
            if cluster > tmp_cluster_id:
                tmp_cluster_id = cluster
            if cluster == 0:
                continue
            cluster += max_cluster_id
            print >> output_file, identifier, cluster

        max_cluster_id += tmp_cluster_id
Пример #7
0
def get_dbs():
    q = multiprocessing.Queue()
    unique = set()

    # if a checkpoint exists, run _db_updates with since parameter, otherwise just run _db_updates
    if config['incremental'] and os.path.exists(config['checkpointpath']):
        f = open(config['checkpointpath'], 'r')
        checkpoint_seq = f.read()
        seq_obj = requests.get('{0}_db_updates?since={1}'.format(
            config['baseurl'], checkpoint_seq),
                               headers=config['authheader']).json()
        # write the latest checkpoint
        write_checkpoint(seq_obj)

        for db_object in seq_obj['results']:
            db = db_object['dbname']
            if db not in ['_replicator', 'metrics', 'dbs'
                          ] and db not in unique:
                unique.add(db)
                q.put(db)

    else:
        # we need to get the latest sequence number to prepare for future incrementals
        r = requests.get('{0}_db_updates?limit=0&descending=true'.format(
            config['baseurl']),
                         headers=config['authheader'])
        if r.status_code != 200:
            print 'Warning:  Failed to retrieve a sequence number.  Please ensure the global changes feed is enabled.'
        else:
            # write the latest checkpoint
            write_checkpoint(r.json())

        # now we grab all the databases in the account.
        dbs = requests.get('{0}_all_dbs'.format(config['baseurl']),
                           headers=config['authheader']).json()
        for db in dbs:
            if db not in ['_replicator', 'metrics', 'dbs']:
                q.put(db)

    # return the queue of databses
    return q
Пример #8
0
    def __init__(self, token_file_name="tokens.csv", token_queue=None):
        self.credentials = FileImport().read_app_key_file()

        self.ctoken = self.credentials[0]
        self.csecret = self.credentials[1]

        if token_queue is None:
            self.tokens = FileImport().read_token_file(token_file_name)

            self.token_queue = mp.Queue()

            for token, secret in self.tokens.values:
                self.token_queue.put((token, secret, {}, {}))
        else:
            self.token_queue = token_queue

        self.token, self.secret, self.reset_time_dict, self.calls_dict = self.token_queue.get(
        )
        self.auth = tweepy.OAuthHandler(self.ctoken, self.csecret)
        self.auth.set_access_token(self.token, self.secret)
        self.api = tweepy.API(self.auth,
                              wait_on_rate_limit=False,
                              wait_on_rate_limit_notify=False)
Пример #9
0
def main():
    print 'Starting bulk loading program...'
    print 'List of test runs: {0}'.format(runs)
    print 'Output file: {0}'.format(output_file)

    with open(output_file, 'r+') as title:
        title.write('Time,Threads,Request Size,Requests/Second,Docs Written')

    for run in runs:
        body_size = run['body_size']
        trials = run['trials']

        for trial in range(trials):
            # read the external json file
            print 'Reading {0} for test document to post...'.format(filename)
            with open(filename) as fh:
                doc = json.load(fh)
            time.sleep(1)

            # build the request body we want to send
            print 'Building a {0} document request body...'.format(body_size)
            request_body = {'docs': []}
            for i in range(body_size):
                request_body['docs'].append(doc)
            time.sleep(1)

            # delete the database if it exists and then recreate it
            print 'Re-initializing the testing database...'
            r = requests.get(base_url + '_all_dbs',
                             auth=(user, pwd),
                             verify=False).json()
            if db in r:
                requests.delete(base_url + db, auth=(user, pwd), verify=False)
            requests.put(base_url + db, auth=(user, pwd), verify=False)

            # insert ddocs if needed
            if ddoc_filename != '':
                print 'Posting ddocs to the database...'
                with open(ddoc_filename) as df:
                    ddocs = json.load(df)
                    requests.post(bulk_url,
                                  data=json.dumps(ddocs),
                                  headers={'content-type': 'application/json'},
                                  auth=(user, pwd),
                                  verify=False)
            time.sleep(1)

            # start to create threads
            print 'Initializing {0} threads...'.format(num_threads)
            threads = []
            q = multiprocessing.Queue()
            for j in range(num_threads):
                t = multiprocessing.Process(target=execute,
                                            args=(q, request_body))
                threads.append(t)

            print '{0} threads created...'.format(num_threads)

            # start the threads
            for thread in threads:
                thread.start()

            print 'All threads started...'

            # wait until they are done
            for k in threads:
                k.join()

            # sum total docs written from the queue
            total_requests = 0
            while not q.empty():
                total_requests += q.get()

            # compute metrics
            requests_per_second = float(total_requests) / interval
            docs_written = total_requests * body_size

            with open(output_file, 'a') as oh:
                oh.write('\n{0},{1},{2},{3},{4}'.format(
                    interval, num_threads, body_size, requests_per_second,
                    docs_written))

            print 'Trial complete...'
            time.sleep(120)

        print 'TEST RUN COMPLETE'
Пример #10
0
            for page in range(1, 50 + 1):
                pool.apply_async(self._get_urls,
                                 args=(page, city),
                                 callback=self._gu_callback)
        pool.close()
        pool.join()

        pool = mpd.Pool()
        for url in self.urls:
            pool.apply_async(self._get_detail, (url, ),
                             callback=self._gd_callback)
        pool.close()
        pool.join()


SPIPQ = mpd.Queue()
SPIQ = mpd.Queue()


def run() -> None:
    """Run a spider."""
    carspi = CarSpi()
    carspi.run()
    datas = list(carspi.details)
    SPIQ.put(datas)


def w_data() -> int:
    """Write datas."""
    while True:
        try:
Пример #11
0
def main(subreddits):
    """
    Get comment stream for subreddits and process them. Will continue
    until interrupted.

    Parameters
    ----------
    subreddits : list[str]
        The subreddits to process comments from

    """

    logger = resources.LOGGER

    reddit = resources.load_reddit()
    username = reddit.user.me().name
    comments, submissions = get_streams(subreddits)

    main_queue = mp.Queue(1024)
    stop_event = mp.Event()  # for stopping workers

    # save the reply dict when the script exits
    atexit.register(lambda: logger.info("Exited nbviewerbot"))

    # create workers to add praw objects to the queue
    workers = []
    comments_worker = mp.DummyProcess(
        name="CommentWorker",
        target=utils.load_queue,
        args=(main_queue, comments, stop_event),
    )
    workers.append(comments_worker)

    submissions_worker = mp.DummyProcess(
        name="SubmissionWorker",
        target=utils.load_queue,
        args=(main_queue, submissions, stop_event),
    )
    workers.append(submissions_worker)

    # make sure workers end on main thread end
    atexit.register(lambda e: e.set(), stop_event)

    # let's get it started in here
    [w.start() for w in workers]
    logger.info("Started nbviewerbot, listening for new comments...")

    while not stop_event.is_set():
        try:
            praw_obj = main_queue.get(timeout=1)
            process_praw_object(praw_obj, username)
        except queue.Empty:
            pass  # no problems, just nothing in the queue
        except KeyboardInterrupt:
            stop_event.set()
            logger.warn("Stopping nbviewerbot...")
        except:
            stop_event.set()
            logger.exception(
                "Uncaught exception on object, skipping. Details:")
            raise

        if not all([w.is_alive() for w in workers]):
            stop_event.set()
            raise InterruptedError("Praw worker died unexpectedly")
def ParallelIterator(keras_sequence,
                     epochs,
                     shuffle,
                     use_on_epoch_end,
                     workers=4,
                     queue_size=10):
    sourceQueue = mp.Queue()  # queue for getting batch indices
    batchQueue = mp.Queue(
        maxsize=queue_size)  # queue for getting actual batches
    indices = np.arange(len(keras_sequence))  # array of indices to be shuffled

    use_on_epoch_end = 'on_epoch_end' in dir(
        keras_sequence) if use_on_epoch_end == True else False
    batchesLeft = 0

    #     printQueue = mp.Queue()                   # queue for printing messages
    #     import threading
    #     screenLock = threading.Semaphore(value=1)
    #     totalWorkers= 0

    #     def printer():
    #         nonlocal printQueue, printing
    #         while printing:
    #             while not printQueue.empty():
    #                 text = printQueue.get(block=True)
    #                 screenLock.acquire()
    #                 print(text)
    #                 screenLock.release()

    # fills the batch indices queue (called when sourceQueue is empty -> a few batches before an epoch ends)
    def fillSource():
        nonlocal batchesLeft

        #         printQueue.put("Iterator: fill source - source qsize = " + str(sourceQueue.qsize()))
        if shuffle == True:
            np.random.shuffle(indices)

        # puts the indices in the indices queue
        batchesLeft += len(indices)
        #         printQueue.put("Iterator: batches left:" + str(batchesLeft))
        for i in indices:
            sourceQueue.put(i)

    # function that will load batches from the Keras Sequence
    def worker():
        nonlocal sourceQueue, batchQueue, keras_sequence, batchesLeft
        #         nonlocal printQueue, totalWorkers
        #         totalWorkers += 1
        #         thisWorker = totalWorkers

        while True:
            #             printQueue.put('Worker: ' + str(thisWorker) + ' will try to get item')
            index = sourceQueue.get(block=True)  # get index from the queue
            #             printQueue.put('Worker: ' + str(thisWorker) + ' got item ' +  str(index) + " - source q size = " + str(sourceQueue.qsize()))

            if index is None:
                break

            item = keras_sequence[index]  # get batch from the sequence
            batchesLeft -= 1
            #             printQueue.put('Worker: ' + str(thisWorker) + ' batches left ' + str(batchesLeft))

            batchQueue.put((index, item),
                           block=True)  # puts batch in the batch queue

    #             printQueue.put('Worker: ' + str(thisWorker) + ' added item ' + str(index) + ' - queue: ' + str(batchQueue.qsize()))

    #         printQueue.put("hitting end of worker" + str(thisWorker))

    #       #printing pool that will print messages from the print queue
    #     printing = True
    #     printPool = mp.Pool(1, printer)

    # creates the thread pool that will work automatically as we get from the batch queue
    pool = mp.Pool(workers, worker)
    fillSource(
    )  # at this point, data starts being taken and stored in the batchQueue

    # generation loop
    for epoch in range(epochs):

        # if not waiting for epoch end synchronization, always keeps 1 epoch filled ahead
        if (use_on_epoch_end == False):
            if epoch + 1 < epochs:  # only fill if not last epoch
                fillSource()

        for batch in range(len(keras_sequence)):

            # if waiting for epoch end synchronization, wait for workers to have no batches left to get, then call epoch end and fill
            if use_on_epoch_end == True:
                if batchesLeft == 0:
                    keras_sequence.on_epoch_end()
                    if epoch + 1 < epochs:  # only fill if not last epoch
                        fillSource()
                    else:
                        batchesLeft = -1  # in the last epoch, prevents from calling epoch end again and again

            # yields batches for the outside loop that is using this generator
            originalIndex, batchItems = batchQueue.get(block=True)
            yield epoch, batch, originalIndex, batchItems

    #         print("iterator epoch end")
    #     printQueue.put("closing threads")

    # terminating the pool - add None to the queue so any blocked worker gets released
    for i in range(workers):
        sourceQueue.put(None)
    pool.terminate()
    pool.close()
    pool.join()
    #     printQueue.put("terminated")

    #     printing = False
    #     printPool.terminate()
    #     printPool.close()
    #     printPool.join()

    del pool, sourceQueue, batchQueue
Пример #13
0
def main(args):
    cap = cv2.VideoCapture(args.INPUT)

    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    reported_fps = int(cap.get(cv2.CAP_PROP_FPS))
    reported_bitrate = int(cap.get(cv2.CAP_PROP_BITRATE))

    print("Total frames: {0}".format(total_frames))
    print("Reported FPS: {0}".format(reported_fps))
    print("Reported Bitrate: {0}kbps".format(reported_bitrate))

    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    size = (width, height)

    original_path = None
    if args.OUTPUT:
        original_path = Path(args.OUTPUT)
    else:
        original_path = Path(args.INPUT)
    original_name = original_path.stem
    original_parent = original_path.parent

    # Video with duplicated frames removed
    result_queue = None
    result_proc = None
    if args.SAVE in (1, 3):
        result_name = Path(original_parent).joinpath(original_name +
                                                     "_result.avi")
        result_name.unlink(missing_ok=True)
        result_queue = mp.Queue()
        result_proc = mp.Process(target=writer_create,
                                 args=(
                                     str(result_name),
                                     reported_fps,
                                     size,
                                     result_queue,
                                 ))
        result_proc.start()

    # Video with difference blend mode between original and result video
    diff_queue = None
    diff_proc = None
    if args.SAVE in (2, 3):
        diff_name = Path(original_parent).joinpath(original_name + "_diff.avi")
        diff_name.unlink(missing_ok=True)
        diff_queue = mp.Queue()
        diff_proc = mp.Process(target=writer_create,
                               args=(
                                   str(diff_name),
                                   reported_fps,
                                   size,
                                   diff_queue,
                               ))
        diff_proc.start()

    frames = []
    frame_number = -1
    prev_frame = None
    # with tqdm(total=total_frames, unit="frames") as prog_bar:
    time_start = time.time()
    prog_bar = tqdm(total=total_frames, unit="frames", leave=True)
    while (cap.isOpened()):
        frame_number += 1
        prog_bar.set_description(
            "Processing frame number {}".format(frame_number))
        prog_bar.update(1)

        ret, frame = cap.read()

        if frame_number == 0:
            prev_frame = frame.copy()
            continue

        try:
            # frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            # prev_frame_gray = cv2.cvtColor(prev_frame, cv2.COLOR_BGR2GRAY)
            # frame_diff = cv2.absdiff(frame_gray, prev_frame_gray)
            frame_diff = cv2.absdiff(frame, prev_frame)
            if diff_queue is not None:
                # diff_writer.write(frame_diff)
                diff_queue.put(frame_diff)
            mean = frame_diff.mean()

            if mean > args.THRESHOLD:
                frames.append(True)
                if result_queue is not None:
                    # result_writer.write(frame)
                    result_queue.put(frame)
            else:
                frames.append(False)

            prev_frame = frame.copy()
        except KeyboardInterrupt:
            exit(1)
        except Exception as e:
            # print("\r\n{0}".format(e))
            if frame_number > total_frames:
                break
            else:
                continue

        if frame_number > total_frames:
            break

    time_stop = time.time()
    n = prog_bar.n
    prog_bar.close()
    msg = "Calculations "

    cap.release()
    if result_queue is not None:
        print("Finishing writing to result video file.")
        result_queue.put("STOP")
        result_proc.join()
        msg += "and writing to the result file "
    if diff_queue is not None:
        print("Finishing writing to difference video file.")
        diff_queue.put("STOP")
        diff_proc.join()
        msg += "and writing to the difference file "
    cv2.destroyAllWindows()

    time_total = time_stop - time_start
    app_fps = n / time_total
    msg += "took {0} seconds to complete.\n"
    msg += "Average frames calculated per second is {1}."
    print(msg.format(time_total, app_fps))

    res = [i for i in frames if i]

    times = dict()

    base = float((1 / reported_fps) * 1000)
    for i in range(len(res)):
        if i == 0:
            times[i] = base
        else:
            times[i] = base + (base * (res[i] - res[i - 1]))

    data = dict()
    data["frame number"] = []
    data["frametime"] = []
    data["framerate"] = []
    for k, v in times.items():
        data["frame number"].append(k)
        data["frametime"].append(v)
        if v == 0:
            data["framerate"].append("INF")
        else:
            data["framerate"].append(1 / (v / 1000))

    df = pd.DataFrame.from_dict(data)

    frametime_stats = pd.DataFrame(df, columns=["frametime"])
    framerate_stats = pd.DataFrame(df, columns=["framerate"])

    stats_basic = dict()
    stats_frametime_dict = dict()
    stats_framerate_dict = dict()

    stats_basic["Number of Unique Frames"] = [int(sum(frames))]
    stats_basic["Number of Duplicated Frames"] = [
        int(len(frames) - sum(frames))
    ]
    if len(frames) == 0:
        stats_basic["Percentage of Unique Frames"] = ["0 %"]
        stats_basic["Percentage of Duplicated Frames"] = ["0 %"]
    else:
        stats_basic["Percentage of Unique Frames"] = [
            "{} %".format(sum(frames) / len(frames) * 100)
        ]
        stats_basic["Percentage of Duplicated Frames"] = [
            "{} %".format(stats_basic["Number of Duplicated Frames"][0] /
                          len(frames) * 100)
        ]

    stats_frametime_dict["Lowest"] = dict(frametime_stats.min(axis=0))
    stats_frametime_dict["Highest"] = dict(frametime_stats.max(axis=0))
    stats_frametime_dict["Mean"] = dict(frametime_stats.mean(axis=0))
    stats_frametime_dict["Median"] = dict(frametime_stats.median(axis=0))
    stats_frametime_dict["0.1 Percent Lows"] = dict(
        frametime_stats.quantile(q=0.001, axis=0))
    stats_frametime_dict["1 Percent Lows"] = dict(
        frametime_stats.quantile(q=0.01, axis=0))
    stats_frametime_dict["99 Percent Lows"] = dict(
        frametime_stats.quantile(q=0.99, axis=0))
    stats_frametime_dict["99.9 Percent Lows"] = dict(
        frametime_stats.quantile(q=0.999, axis=0))

    stats_framerate_dict["Lowest"] = dict(framerate_stats.min(axis=0))
    stats_framerate_dict["Highest"] = dict(framerate_stats.max(axis=0))
    stats_framerate_dict["Mean"] = dict(framerate_stats.mean(axis=0))
    stats_framerate_dict["Median"] = dict(framerate_stats.median(axis=0))
    stats_framerate_dict["0.1 Percent Lows"] = dict(
        framerate_stats.quantile(q=0.001, axis=0))
    stats_framerate_dict["1 Percent Lows"] = dict(
        framerate_stats.quantile(q=0.01, axis=0))
    stats_framerate_dict["99 Percent Lows"] = dict(
        framerate_stats.quantile(q=0.99, axis=0))
    stats_framerate_dict["99.9 Percent Lows"] = dict(
        framerate_stats.quantile(q=0.999, axis=0))

    stats_basic_df = pd.DataFrame.from_dict(stats_basic)
    stats_frametime_df = pd.DataFrame.from_dict(stats_frametime_dict)
    stats_framerate_df = pd.DataFrame.from_dict(stats_framerate_dict)

    stats_joined = pd.concat([stats_frametime_df, stats_framerate_df], axis=0)

    print("\nStatistics")
    print(stats_basic_df.transpose().to_string(header=False))
    print("\n", stats_joined.transpose().to_string())

    csv_name = Path(original_parent).joinpath(original_name + "_report.csv")
    csv_name.unlink(csv_name)
    df.to_csv(csv_name, index=False)
Пример #14
0
    def next(self, nsolutions=None):

        Log('=' * 80)
        Log('Simplex Random Walk')
        Log('=' * 80)

        Log("    %i equations" % len(self.eq_list))

        Log("%6s %6s %6s\n%6i %6i %6i" %
            (">=", "<=", "=", self.geq_count, self.leq_count, self.eq_count))

        if nsolutions == 0: return

        assert nsolutions is not None

        dim = self.nVars
        dof = dim - self.eq_count

        burnin_len = max(10, int(self.burnin_factor * dof))
        redo = max(100, int((dof**self.redo_exp) * self.redo_factor))

        nmodels = nsolutions
        nthreads = self.nthreads

        self.stride = int(dim + 1)

        n_stored = 0
        self.dim = dim
        self.dof = dof
        self.redo = redo

        self.burnin_len = burnin_len

        accept_rate = self.accept_rate
        accept_rate_tol = self.accept_rate_tol

        store = np.zeros((dim, 1 + burnin_len),
                         order='Fortran',
                         dtype=np.float64)
        newp = np.zeros(dim, order='C', dtype=np.float64)
        eval = np.zeros(dim, order='C', dtype=np.float64)
        evec = np.zeros((dim, dim), order='F', dtype=np.float64)

        self.eqs = np.zeros((self.eqn_count + dim, dim + 1),
                            order='C',
                            dtype=np.float64)
        for i, [c, e] in enumerate(self.eq_list):
            self.eqs[i, :] = e
        for i in xrange(dim):
            self.eqs[self.eqn_count + i, 1 + i] = 1

        self.dist_eqs = np.zeros((self.eqn_count - self.eq_count, dim + 1),
                                 order='C',
                                 dtype=np.float64)
        i = 0
        for c, e in self.eq_list:
            if c == 'eq':
                continue
            elif c == 'leq':
                p = e
            elif c == 'geq':
                p = -e
            self.dist_eqs[i, :] = p
            i += 1

        Log('Using lpsolve %s' % lpsolve('lp_solve_version'))
        Log("random seed = %s" % self.random_seed)
        Log("threads = %s" % self.nthreads)
        Log("acceptence rate = %s" % self.accept_rate)
        Log("acceptence rate tolerance = %s" % self.accept_rate_tol)
        Log("dof = %s" % self.dof)
        Log("sample distance = max(100,%s * %s^%s) = %s" %
            (self.redo_factor, self.dof, self.redo_exp, redo))
        Log("starting twiddle = %s" % self.twiddle)
        Log("burn-in length = %s" % burnin_len)

        time_begin_next = time.clock()

        #-----------------------------------------------------------------------
        # Create pseudo inverse matrix to reproject samples back into the
        # solution space.
        #-----------------------------------------------------------------------
        P = np.eye(dim)
        if self.eq_count > 0:
            self.A = np.zeros((self.eq_count, dim),
                              order='C',
                              dtype=np.float64)
            self.b = np.zeros(self.eq_count, order='C', dtype=np.float64)
            for i, [c, e] in enumerate(self.eq_list[:self.eq_count]):
                self.A[i] = e[1:]
                self.b[i] = e[0]
            self.Apinv = pinv(self.A)
            P -= np.dot(self.Apinv, self.A)
        else:
            self.A = None
            self.B = None
            self.Apinv = None

        ev, evec = eigh(P)
        #-----------------------------------------------------------------------

        #-----------------------------------------------------------------------
        # Find a point that is completely inside the simplex
        #-----------------------------------------------------------------------
        Log('Finding first inner point')
        time_begin_inner_point = time.clock()
        self.inner_point(newp)
        time_end_inner_point = time.clock()
        ok, fail_count = self.in_simplex(newp, eq_tol=1e-12, tol=0, verbose=1)
        assert ok

        self.avg0 = newp

        #       eqs  = self.eqs.copy('A')
        #       eqs[:,1:] = np.dot(self.eqs[:,1:], evec)

        #       print newp

        #       S = zeros(self.eqs.shape[0])
        #       newp[:] = np.dot(evec.T, newp)
        #       newp0 = newp.copy()
        #       steps = newp.copy()
        #       for q in range(100):
        #           csamplex.refine_center(self, eqs, newp, ev, S, steps)
        #           d = newp - newp0
        #           #print d
        #           print norm(d)
        #           #print
        #           newp0 = newp.copy()

        #       #assert 0
        #       newp[:] = np.dot(evec, newp)

        store[:, 0] = newp
        n_stored = 1

        #-----------------------------------------------------------------------
        # Estimate the eigenvectors of the simplex
        #-----------------------------------------------------------------------
        Log('Estimating eigenvectors')
        time_begin_est_eigenvectors = time.clock()
        self.measured_ev(newp, ev, eval, evec)
        time_end_est_eigenvectors = time.clock()

        #-----------------------------------------------------------------------
        # Now we can start the random walk
        #-----------------------------------------------------------------------

        Log("Getting solutions")

        q = MP.Queue()

        ran_set_seed(self.random_seed)
        seeds = np.random.choice(1000000 * nthreads, nthreads, replace=False)

        #-----------------------------------------------------------------------
        # Launch the threads
        #-----------------------------------------------------------------------
        threads = []
        models_per_thread = nmodels // nthreads
        models_under = nmodels - nthreads * models_per_thread
        id, N = 0, 0
        while id < nthreads and N < nmodels:
            n = models_per_thread
            if id < models_under:
                n += 1
            assert n > 0
            Log('Thread %i gets %i' % (id, n))
            cmdq = MP.Queue()
            ackq = MP.Queue()
            thr = MP.Process(target=rwalk_burnin,
                             args=(id, n, int(np.ceil(burnin_len / nthreads)),
                                   self, q, cmdq, ackq, newp, self.twiddle,
                                   eval.copy('A'), evec.copy('A'), seeds[id]))
            threads.append([thr, cmdq, ackq])
            N += n
            id += 1

        assert N == nmodels

        for thr, cmdq, _ in threads:
            thr.daemon = True
            thr.start()
            cmdq.put(['CONT'])

        def drainq(q):
            try:
                while True:
                    q.get(block=False)
            except QueueEmpty:
                pass

        def pause_threads(threads):
            for _, cmdq, ackq in threads:
                cmdq.put(['WAIT'])
                assert ackq.get() == 'OK'

        def adjust_threads(i, cont_cmd):
            pause_threads(threads)
            drainq(q)
            Log('Computing eigenvalues... [%i/%i]' % (i, burnin_len))
            self.compute_eval_evec(store, eval, evec, n_stored)

            # new twiddle <-- average twiddle
            t = 0
            for _, cmdq, ackq in threads:
                cmdq.put(['REQ TWIDDLE'])
                t += ackq.get()
            t /= len(threads)

            Log('New twiddle %f' % t)
            for _, cmdq, _ in threads:
                cmdq.put(['NEW DATA', [eval.copy('A'), evec.copy('A'), t]])
                cmdq.put([cont_cmd])

        #-----------------------------------------------------------------------
        # Burn-in
        #-----------------------------------------------------------------------
        time_begin_burnin = time.clock()
        compute_eval_window = 2 * self.dof
        j = 0
        for i in xrange(burnin_len):
            j += 1
            k, vec = q.get()

            store[:, n_stored] = vec
            n_stored += 1

            if j == compute_eval_window:
                j = 0
                adjust_threads(i + 1, 'CONT')
                compute_eval_window = int(0.1 * burnin_len + 1)
            elif len(threads) < compute_eval_window:
                threads[k][1].put(['CONT'])
        time_end_burnin = time.clock()

        #-----------------------------------------------------------------------
        # Actual random walk
        #-----------------------------------------------------------------------
        time_begin_get_models = time.clock()
        adjust_threads(burnin_len, 'RWALK')
        i = 0
        while i < nmodels:
            k, vec = q.get()
            t = np.zeros(dim + 1, order='Fortran', dtype=np.float64)
            t[1:] = vec
            i += 1
            Log('%i models left to generate' % (nmodels - i),
                overwritable=True)
            yield t

        time_end_get_models = time.clock()

        #-----------------------------------------------------------------------
        # Stop the threads and get their running times.
        #-----------------------------------------------------------------------
        time_threads = []
        for thr, cmdq, ackq in threads:
            cmdq.put(['STOP'])
            m, t = ackq.get()
            assert m == 'TIME'
            time_threads.append(t)
            #thr.terminate()

        time_end_next = time.clock()

        max_time_threads = np.amax(time_threads) if time_threads else 0
        avg_time_threads = np.mean(time_threads) if time_threads else 0

        Log('-' * 80)
        Log('SAMPLEX TIMINGS')
        Log('-' * 80)
        Log('Initial inner point    %.2fs' %
            (time_end_inner_point - time_begin_inner_point))
        Log('Estimate eigenvectors  %.2fs' %
            (time_end_est_eigenvectors - time_begin_est_eigenvectors))
        Log('Burn-in                %.2fs' %
            (time_end_burnin - time_begin_burnin))
        Log('Modeling               %.2fs' %
            (time_end_get_models - time_begin_get_models))
        Log('Max/Avg thread time    %.2fs %.2fs' %
            (max_time_threads, avg_time_threads))
        Log('Total wall-clock time  %.2fs' % (time_end_next - time_begin_next))
        Log('-' * 80)