示例#1
0
def default(pf, args, start_t):
    """Evaluates candidates, highest signature similarity first.

    Results are written to the file given by args.results (--results
    option).
    """

    print("Getting all candidates...")
    t = time.time()
    candidates = list(pf.candidates())
    print("Done, got {} candidates in {}s".format(len(candidates),
                                                  time.time() - t))

    print("Sorting candidates...")
    t = time.time()
    candidates = sorted(candidates, key=lambda c: -c[1])
    print("Done in {}s".format(time.time() - t))

    csv_file = args.results if 'results' in args else 'results.txt'
    append_results = not args.dont_append_results if 'dont_append_results' in args else True
    extended_results = args.extended if 'extended' in args else False

    csv = CsvWriter(csv_file, append=append_results)

    print("Verifying candidates...")
    found = 0
    found_times = np.array([])
    for i, ((c1, c2), sim) in enumerate(candidates):
        jac_sim = pf.jaccard_similarity(c1, c2)
        elapsed_t = time.time() - start_t

        if jac_sim > .5:
            c1, c2 = min(c1, c2) + 1, max(c1, c2) + 1
            if extended_results:
                csv.write([c1, c2, sim, jac_sim, i, elapsed_t])
            else:
                csv.write([c1, c2])
            found += 1
            found_times = np.append(found_times, time.time() - start_t)
            print("Found {} (at signature similarity {}, after {}s)".format(
                found, sim, elapsed_t),
                  end='\r')

        # stop when 30 minutes are over
        if elapsed_t > 1800 - 2:
            print("\nTime's up, stopping.")
            break

        # every 100 candidates, check whether rate is so low we should stop
        if found >= 100 and i % 100 == 0:
            if elapsed_t - found_times[-10] > 60:  # less than 10 per minute
                print("\nRate is slowing down, stopping.")
                break

    print("Finished in {}s.".format(elapsed_t))
    print("Found {} pairs, {} per minute.".format(found,
                                                  found / elapsed_t * 60))

    return True
示例#2
0
def escrever_casas(casas, arquivo):
    with open(arquivo, "wb") as f:
        writer = CsvWriter(f)
        writer.writerow([
            "casa: sigla UF", "casa: nome UF", "casa: NOME",
        ])
        for casa in casas:
            writer.writerow([
                casa.municipio.uf.sigla, casa.municipio.uf, casa.nome,
            ])
def inner_simulation(rounds, lower, upper, step, debug, logging, k, delta):
    """
    This is the function that manages the different values for which
    we should run the simulation for and also writes results to file
    as they come in
    """

    # Run the simulation for all correlation coefficients supplied
    for d in delta:
        if debug == 0 or debug == 1:
            print(f"\nRunning for ρ={d}")

        # Run the simulation for all preference ordering lengths supplied
        for pref_length in k:
            if debug == 0 or debug == 1:
                print(f"\nRunning for k={pref_length}")

            # Initialize file writer if enabled (default)
            if logging:
                writer = CsvWriter(d, pref_length, rounds, lower, upper)

            # Initialize while loop that runs the simulation for the specified amount of rounds
            current_round = 0
            while rounds == -1 or rounds > current_round:

                if debug != -1:
                    print(
                        f"\nStarting round {current_round + 1} out of {rounds}"
                    )

                # Run the simulation for ρ and k decided by outer loop, for all n
                for n in range(max(lower, pref_length), upper, step):

                    # Create the preferences for all agents
                    male_prefs, female_prefs = get_preferences(
                        n, pref_length, d)

                    # Run deferred acceptance and look for deviations
                    useful_deviators_cnt = count_useful_deviatiors(
                        male_prefs, female_prefs)

                    # Calculate ratio of agents with useful deviations
                    ratio = useful_deviators_cnt / float(n)

                    if debug == 1 or debug == 0:
                        print(f"d={d} k={pref_length}: result n={n}: ", ratio)

                    if logging:
                        writer.write(n, ratio)

                current_round += 1
示例#4
0
def get_jaccard_distribution(pf, args, start_t):
    """Samples random pairs and saves their Jaccard similarity to CSV.

    The result is used by jaccard_distribution.R to fit an
    distribution to the data.
    """
    csv = CsvWriter(args.results, append=True)
    csv.write_header(['u1', 'u2', 'jac_sim', 'sig_sim'])

    for i, (u1, u2) in enumerate(
            zip(np.random.permutation(pf.n_docs),
                np.random.permutation(pf.n_docs))):
        if u1 == u2: next
        print("Wrote {} similarities".format(i), end='\r')
        csv.write([u1, u2, pf.jaccard_similarity(u1, u2), pf.sig_sim(u1, u2)])

    return True
示例#5
0
def csv_export(request, username, id_string):
    owner = get_object_or_404(User, username=username)
    xform = get_object_or_404(XForm, id_string=id_string, user=owner)
    if not has_permission(xform, owner, request):
        return HttpResponseForbidden('Not shared.')
    valid, dd = dd_for_params(id_string, owner, request)
    if not valid: return dd
    writer = CsvWriter(dd, dd.get_data_for_excel(), dd.get_keys(),\
            dd.get_variable_name)
    file_path = writer.get_default_file_path()
    writer.write_to_file(file_path)
    if request.GET.get('raw'):
        id_string = None
    response = response_with_mimetype_and_name('application/csv', id_string,
        extension='csv',
        file_path=file_path, use_local_filesystem=True)
    return response
示例#6
0
def get_candidate_distribution(pf, args, start_t):
    """Samples pairs from candidates, stratified by signature similarity.

    The result is stored in a CSV, including the signature similarity,
    Jaccard similarity, and weight (> 1 if there were more samples in
    the stratum than max_per_step, to get approximately accurate
    frequencies).
    """
    candidates = list(pf.candidates())

    csv_file = args.results
    csv = CsvWriter(csv_file, append=True)
    csv.write_header([
        'run_id', 'u1', 'u2', 'sig_sim', 'jac_sim', 'sig_len', 'bands',
        'max_buckets', 'used_buckets', 'weight'
    ])

    run_id = datetime.now().isoformat()

    lim = 0
    step = 0.05
    max_per_step = 100
    while lim < 1:
        cand = [(c, sim) for c, sim in candidates
                if sim >= lim and sim < lim + step]
        n = len(cand)
        print("{} candidates between {} and {}".format(n, lim, lim + step))

        weight = max(n, max_per_step) / max_per_step
        for i in np.random.permutation(n)[:max_per_step]:
            (c1, c2), sim = cand[i]
            csv.write([
                run_id, c1, c2, sim,
                pf.jaccard_similarity(c1, c2), pf.sig_len, pf.n_bands,
                pf.max_buckets,
                len(pf.buckets), weight
            ])

        lim += step

    return True
示例#7
0
    def write_csv_file(self, driver):
        historical_data_btn = driver.find_element_by_css_selector(
            "#quote-nav > ul > li:nth-child(6) > a")
        driver.get(historical_data_btn.get_attribute("href"))

        try:
            date_range_btn = WebDriverWait(driver, 10).until(
                EC.presence_of_element_located(
                    (By.CLASS_NAME, "dateRangeBtn")))
            date_range_btn.click()
        except MaxRetryError as exc:
            print(exc)

        max_date_btn = driver.find_element_by_css_selector(
            '[data-value="MAX"]').click()
        download_btn = driver.find_element_by_css_selector(
            f'[download="{self.company_name}.csv"]')
        download_link = download_btn.get_attribute("href")

        r = requests.get(download_link)

        csv_writer = CsvWriter(r.text, self.company_name)
        csv_writer.run()
示例#8
0
文件: run.py 项目: 2e3s/KijijiRentals

try:
    os.makedirs('cache/pages')
    os.makedirs('cache/subpages')
except OSError as e:
    if e.errno != errno.EEXIST:
        raise

descriptions = dict()

page = Page(1)

f = open("cache/result.txt", 'w+', encoding='utf-8')
counter = Counter(f)
csv_writer = CsvWriter()
csv_writer.init()
db = Db()

while page.has_next_page():
    page.load_full()
    for ad in page.get_ads():
        counter.increase_all()
        if ad.is_montreal():
            ad.load_full()
        else:
            continue

        validator = AdValidator(ad, counter, descriptions)

        if not validator.validate():
示例#9
0
import numpy as np
import time
import subprocess
import os
import gc

import data
from csv_writer import CsvWriter
from util import ensure_directory
"""
This file simulates the evaluation environment and stores results
as a CSV.
"""

csv = CsvWriter("diagnostics/out/evaluation.csv", append=True)
csv.write_header([
    'note', 'batch', 'run', 'found', 'incorrect', 'time', 'ppm', 'terminated'
])


def jaccard_sim(data, u1, u2):
    m1 = data.movie[data.user == u1]
    m2 = data.movie[data.user == u2]
    return len(np.intersect1d(m1, m2)) / len(np.union1d(m1, m2))


def run_evaluation(note, batch=0, runs=5):
    for run in range(runs):
        cmd = [
            "python3",
            "main.py",
示例#10
0
# settings
input_file_name = 'read.html'
cache_dir_name = 'cache'
out_file_name = 'out.csv'
min_delay = 90
max_delay = 120

print('Load books from file: "%s"' % input_file_name)
read_parser = ReadParser()
if read_parser.load_from_file(input_file_name) is False:
    exit(1)
print('Books loaded.')

print('Parse books from summary.')
books = read_parser.parse_books()
print('Books parsed: %s.' % len(books))

print('Start download detailed book pages.')
cache = CacheManager(cache_dir_name)
loader = PageLoader(cache, min_delay, max_delay)
loader.download(books)
print('Detailed book pages downloaded.')

print('Prepare books for export.')
details_parser = DetailsParser(cache)
ready_books = details_parser.parse(books)
print('Books ready to export: %s.' % len(ready_books))

writer = CsvWriter()
writer.save(ready_books, out_file_name)
print('Books saved to "%s"' % out_file_name)
示例#11
0
import numpy as np
import multiprocessing as mp
from sklearn.model_selection import ParameterGrid
from datetime import datetime
import time
import subprocess
import os

from csv_writer import CsvWriter
from util import ensure_directory

csv = CsvWriter("diagnostics/out/experiments.csv", append=True)
csv.write_header(
    ['batch_id', 'run_id', 'bands', 'rows', 'max_buckets', 'time', 'count'])

batch_id = datetime.now().isoformat()


def count_lines_in_file(filename):
    if not os.path.exists(filename):
        return 0

    count = 0
    with open(filename) as f:
        for l in f:
            if l.strip() != '':
                count += 1
    return count


def perform(params):
示例#12
0
    #             min_pos = 0
    #
    #         if max_pos == count_period + 1:
    #             csv_array[i + 1]["<POSITION>"] = 0
    #             csv_array[i + 1]["<LONG_EXIT>"] = 1
    #             max_pos = 0
    #
    #     return csv_array


if __name__ == "__main__":
    csvArray = csv_worker.CsvReader('TATN_151029_201029.csv').csv_to_dict()
    csvArray = csv_worker.CsvDictEditor(csvArray).transformed_csv()
    csvAlgorithm = Algorithm(csvArray)
    csv = csvAlgorithm.short_position()
    csvAlgorithm = Algorithm(csv)
    csv = csvAlgorithm.long_position()
    csvAlgorithm = Algorithm(csv)
    csv = csvAlgorithm.fill_with_zeros()
    csvAlgorithm = Algorithm(csv)
    csv = csvAlgorithm.fill_with_procents()
    csvWriter = CsvWriter(csv)
    xlsx_path = '1.xlsx'
    csvWriter.writeToXls(xlsx_path,csv)

    # csvAlgorithm = Algorithm(csv)
    # csv = csvAlgorithm.short_position()


    for row in csv:
        print(row)
示例#13
0
def main():
    log = logging.getLogger('main')

    # creating exit mask from points, where we will be counting our vehicles
    base = np.zeros(SHAPE + (3, ), dtype='uint8')
    exit_mask = cv2.fillPoly(base, EXIT_PTS, (255, 255, 255))[:, :, 0]
    stream = None
    # produce a stabilized video
    if args.stabilize_video == 'yes':
        cap = cv2.VideoCapture(args.video_source)
        stabilize_frames(cap, log)
        return
    else:
        stream = cv2.VideoCapture(args.video_source)
        stream.set(cv2.CAP_PROP_FRAME_WIDTH, SHAPE[1])
        stream.set(cv2.CAP_PROP_FRAME_HEIGHT, SHAPE[0])

    writer = VideoWriter('detected.mp4', (SHAPE[1], SHAPE[0]))

    bg_subtractor = cv2.createBackgroundSubtractorMOG2(history=500,
                                                       detectShadows=True)
    # skipping 500 frames to train bg subtractor
    train_bg_subtractor(bg_subtractor, stream, num=500)

    pipeline = PipelineRunner(
        pipeline=[
            ContourDetection(bg_subtractor=bg_subtractor,
                             save_image=False,
                             image_dir=IMAGE_DIR),
            # we use y_weight == 2.0 because traffic are moving vertically on video
            # use x_weight == 2.0 for horizontal.
            # VehicleCounter(exit_masks=[exit_mask], y_weight=2.0),
            VehicleCounter(),
            Visualizer(image_dir=IMAGE_DIR),
            CsvWriter(path='./', name='report.csv')
        ],
        log_level=logging.DEBUG)

    _frame_number = -1
    frame_number = -1

    while True:
        (grabbed, frame) = stream.read()

        if not frame.any():
            log.error("Frame capture failed, stopping...")
            break

        # real frame number
        _frame_number += 1

        # skip every 2nd frame to speed up processing
        if _frame_number % 2 != 0:
            continue

        # frame number that will be passed to pipline
        # this needed to make video from cutted frames
        frame_number += 1

        pipeline.set_context({
            'frame': frame,
            'frame_number': frame_number,
        })
        new_context = pipeline.run()

        cv2.imshow('Video', new_context['frame'])
        writer(new_context['frame'])

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
示例#14
0
    def generate_csv_reports(test_runner, is_cert=False):
        """Generate CSV reports with the results of the test.

        Generate an entry in the weekly CSV report and another one in the daily CSV report
        if required. If is_cert is True, it will use different values for device, branch
        and buildname than the ones detected automatically.
        """
        is_ci_server = os.getenv("ON_CI_SERVER")
        # Provide a default value so that call to find works even if the variable does not exist
        is_by_test_suite = os.getenv("RUN_ID",
                                     "").find("testing_by_TEST_SUITE")
        if (not is_ci_server or is_by_test_suite != -1) and not is_cert:
            return
        fieldnames = [
            'START_TIME', 'DATE', 'TEST_SUITE', 'TEST_CASES_PASSED',
            'FAILURES', 'AUTOMATION_FAILURES', 'UNEX_PASSES', 'KNOWN_BUGS',
            'EX_PASSES', 'IGNORED', 'UNWRITTEN', 'PERCENT_FAILED', 'DEVICE',
            'VERSION', 'BUILD', 'TEST_DETAILS'
        ]
        passes = test_runner.passed + test_runner.unexpected_passed
        failures = test_runner.unexpected_failures + test_runner.automation_failures
        totals = passes + failures
        error_rate = float(failures * 100) / totals
        device = os.getenv(
            "DEVICE"
        ) if not is_cert else test_runner.runner.testvars["device_cert"]
        branch = os.getenv(
            "BRANCH"
        ) if not is_cert else test_runner.runner.testvars["branch_cert"]
        run_id = os.getenv("RUN_ID")
        buildname_var = os.getenv(
            "DEVICE_BUILDNAME"
        ) if not is_cert else test_runner.runner.testvars["buildname_cert"]
        print "Device: {}   Branch: {}  Run id: {}  Device buildname: {}".format(
            device, branch, run_id, buildname_var)
        index = buildname_var.find(".Gecko")
        buildname = buildname_var[:index]
        index = -1
        filedir = ""
        html_webdir = test_runner.runner.testvars['output'][
            'webdir'] + "/{}/{}/{}".format(device, branch, run_id)
        on_ci_server = os.getenv("ON_CI_SERVER")
        if on_ci_server:
            index = html_webdir.find("owd_tests")
            filedir = html_webdir[index + 9:]
        else:
            filedir = test_runner.runner.testvars['output']['html_output']
        values = [
            test_runner.start_time.strftime("%d/%m/%Y %H:%M"),
            test_runner.end_time.strftime("%d/%m/%Y %H:%M"), run_id,
            "{} / {}".format(passes, totals),
            str(test_runner.unexpected_failures),
            str(test_runner.automation_failures),
            str(test_runner.unexpected_passed),
            str(test_runner.expected_failures),
            str(test_runner.passed),
            str(test_runner.skipped), "0", "{:.2f}".format(error_rate), device,
            branch, buildname, filedir
        ]
        weekly_file = '/var/www/html/owd_tests/total_csv_file.csv'
        daily_file = '/var/www/html/owd_tests/{}/{}/partial_csv_file_NEW.csv'.format(
            device, branch)
        csv_writer = CsvWriter(device, branch)
        csv_writer.create_report(fieldnames, dict(zip(fieldnames, values)),
                                 weekly_file, False)
        csv_writer.create_report(fieldnames, dict(zip(fieldnames, values)),
                                 daily_file)
        if on_ci_server:
            Utilities.persist_result_files(test_runner.runner.testvars, device,
                                           branch, run_id)
示例#15
0
def monitor(share_queue, quit_event, email_event, config_dict=dict()):
    try:
        target_process_name_list = config_dict["target_process"]
        interval = config_dict["interval"]
        log_path = config_dict["log_path"]
        email_context = config_dict["email_context"]
        email_length = config_dict["email_length"]
        memory_limit = config_dict["memory_limit"]
        log_interval = config_dict["log_interval"]
    except Exception as e:
        print("Error at configDict, monitor exit.", e)
        sys.exit(-1)

    line_number = 0
    continue_flag = True
    emailMessageQueue = deque(maxlen=email_length)
    if log_interval == 7:
        log_opt = 'week'
    elif log_interval == 30:
        log_opt = 'month'
    else:
        log_opt = 'day'
    t1 = time.localtime()
    logFileName = log_file_name.log_file_name(opt=log_opt)
    csv_f = CsvWriter(logFileName, path=log_path)

    while continue_flag is True:
        # logTempDict 为单次记录信息的项
        td = OrderedDict()
        line_number += 1
        td['LineNumber'] = line_number
        # 时间
        td['Time'] = time.strftime("%Y/%m/%d %H:%M:%S", time.localtime())
        # 全局CPU和内存状态
        currentCpuState = get_cpu_state(interval=interval)
        td['CPU_percent'] = currentCpuState
        global_memory_state = get_memory_state()
        td['memory_percent'], td['memory_used'], td[
            'memory_total'] = global_memory_state
        # 获取网络IO状态
        network_status = get_network_state()
        td['bytes_sent'] = network_status[0]
        td['bytes_recv'] = network_status[1]
        # 目标进程状态
        refresh_target_processes(target_process_name_list)
        process_status_dict, is_exceed = process_state(
            target_process_name_list, memory_limit)
        if process_status_dict['process_total_numbers'] > 0:
            td['process_name'] = process_status_dict['process_name']
            td['process_memory_occupied'] = process_status_dict[
                'memory_occupied']
            td['process_total_numbers'] = process_status_dict[
                'process_total_numbers']
            td['process_read_cnt'] = process_status_dict['p_read_cnt']
            td['process_write_cnt'] = process_status_dict['p_write_cnt']
        else:
            td['process_name'] = process_status_dict['process_name']
            td['process_memory_occupied'], td['process_total_numbers'], td[
                'process_read_cnt'], td['process_write_cnt'] = 0, 0, 0, 0

        if share_queue is not None:
            share_queue.append(
                (td['LineNumber'], td['CPU_percent'], td['memory_percent'],
                 td['process_memory_occupied'],
                 td['bytes_recv'] + td['bytes_sent']))

        # 将本次记录项放入限长队列
        emailMessageQueue.append(td)

        # ----------write dict to csv file----------------
        # print("Now writing at line: %d" % line_number)
        csv_f.dict_to_csv(td)
        t2 = time.localtime()

        # if line_number % 120 == 0:
        #     csv_f.flush()

        if is_exceed:
            try:
                csv_email_f = CsvWriter(email_context)
                csv_email_f.queue_dict_to_csv(emailMessageQueue)
                csv_email_f.close()
            except Exception as e:
                print("Error at generate Email file!", e)
            if email_event is None or not email_event.isSet():
                # print("email_event is None:", email_event is None)
                thd = threading.Thread(target=wrapped_email_sender,
                                       kwargs={
                                           'config_dict': config_dict,
                                           'xls_format': True
                                       })
                # wrapped_email_sender(configDict=config_dict, xls_format=True)
                thd.run()
                if email_event is not None:
                    email_event.set()
        if quit_event is not None and quit_event.isSet():
            continue_flag = False  # <==> break
            csv_f.close()
        if need_to_switch(t1, t2, log_interval):
            f_name = csv_f.filename
            csv_f.close()
            zip_the_old_file(f_name.replace("csv", "zip"), f_name, log_path)
            t1 = t2  # Update t1
            logFileName = log_file_name.log_file_name(opt=log_opt)
            csv_f = CsvWriter(logFileName, path=log_path)
    return 0