def getOverLap(keyword, setups):
    print(f'\n[CALLBACK] Checking Types for {keyword}')
    print('\tSETUPS', setups)

    if setups is not None and len(setups) != 0:

        conf0, conf1 = getConfigs(setups)

        print(f'\tGetting {keyword} for:\n'
              f'\t{conf0.name}\n'
              f'\t{conf1.name}\n')

        # TODO: Watch out for keyword change, as this is a dynamic key
        opt0 = Result.objects(config=conf0).distinct(f'dynamic_{keyword}')
        opt1 = Result.objects(config=conf1).distinct(f'dynamic_{keyword}')
        overlap = set(opt0) & set(opt1)

        checkboxes = []
        selected = []

        for value in overlap:
            checkboxes.append({'label': value, 'value': value})
            selected.append(value)

        return sorted(checkboxes, key=lambda c: c['label']), sorted(selected)

    else:
        return [], []
def _retrieveDataAndBuildSpeedupTable(setups):
    print('\n[CALLBACK] Retrieving Data')

    if setups is None or setups == []:
        return [None, None]

    conf0, conf1 = getConfigs(setups)
    # Get all results for both configs
    results0 = Result.objects(config=conf0).hint('config_hashed')
    results1 = Result.objects(config=conf1).hint('config_hashed')

    missing_results = 0

    start = time.time()

    df0 = aggregate_results(results0)
    df1 = aggregate_results(results1)

    print(f'\tAggregated in {(time.time() - start)} seconds')

    def calculate_speedup(data0: pd.DataFrame, data1: pd.DataFrame) -> pd.DataFrame:
        """
        Return Dataframe containing all matched configs and the speedup

        Args:
            data0: aggregated result dataframe commit0
            data1: aggregated result dataframe commit1

        Returns:
            table: dataframe with speedup table
        """

        quantity = 'minTime'
        # TODO: watch out for no matches when adding other timing quants into dataframe
        all_data1_configs = data1.drop(columns=quantity)  # Drop column for matching
        table = all_data1_configs.copy()

        for i_search in range(len(data0)):
            search_config = data0.loc[i_search, data0.columns != quantity]  # Select row except time column
            full_match = (all_data1_configs == search_config).all(axis=1)  # Checks against all df1 rows and marks if full row matches df0 row, except z column
            i_match = full_match[full_match == True].index[0]  # Get index of the full match in data1

            speedup = data0.loc[i_match, quantity] / data1.loc[i_search, quantity]
            # label = ''.join(str(v) + ' ' for v in data1.loc[i_match, :].values)
            label = ''.join([f'{str(v):>10} ' for v in data1.loc[i_match, :].values])
            table.loc[i_match, 'quantity'] = data1.loc[i_match, quantity]
            table.loc[i_match, 'speedup'] = speedup
            table.loc[i_match, 'label'] = label

        return table

    speedupTable = calculate_speedup(df0, df1).sort_values('speedup')
    #speedupTable = calculate_speedup(df0, df1).sort_values('quantity')

    return [speedupTable.to_json(), setups]
Пример #3
0
    def generatePlot(self):
        """
        Quick overview plot for commit
        :return:
        """

        try:

            imgur = ImgurUploader()

            confs = Config.objects(commitSHA=self.sha)
            images = []

            # Multiple Plots if more than one config was run
            conf: Config
            for conf in confs:
                results = Result.objects(config=conf)

                means = np.array([r.meanTime for r in results])
                mins = np.array([r.minTime for r in results])

                header, all_keys = get_dyn_keys(results)
                header_string = r'$\bf{' + header + '}$'
                labels = generate_label_table(results, all_keys)

                # Sort by minimum time
                sort_keys = np.argsort(mins)[::-1]
                sorted_means = means[sort_keys]
                sorted_mins = mins[sort_keys]
                sorted_labels = labels[sort_keys]
                sorted_labels = np.append(sorted_labels, header_string)

                fig = plt.figure(figsize=(15, len(means) / 4))
                plt.gca().set_title(conf)
                plt.barh(np.arange(len(means)), sorted_means, label='mean')
                plt.barh(np.arange(len(means)), sorted_mins, label='min')
                plt.legend()
                plt.xlabel('nanoseconds')
                plt.xscale('log')
                plt.yticks(np.arange(len(sorted_labels)), sorted_labels)
                plt.grid(which='both', axis='x')
                plt.tight_layout()

                # Upload figure
                buf = io.BytesIO()
                fig.savefig(buf, format='png')
                buf.seek(0)
                link, hash = imgur.upload(buf.read())
                conf.perfImgurLink = link
                conf.perfDeleteHash = hash
                conf.save()

                self.updateStatus(1, "PLOTTING", "PLOTTING succeeded\n", link)

        except Exception as e:
            self.updateStatus(-1, "PLOTTING", f"PLOTTING failed\n{e}")

        os.chdir(self.baseDir)
        return True
Пример #4
0
def singleResults(setup):
    print('\n[CALLBACK] Single Results')

    # Retrieve data
    start = time.time()

    conf = Config.objects().get(id=setup)
    results = Result.objects(config=conf).hint('config_hashed')
    df = aggregate_results(results)
    print(f'\tAggregated singles in {(time.time() - start)} seconds')

    return [df.to_json(), f'{conf.commitSHA[0:8]}: {conf.commitMessage}']
Пример #5
0
def getOptions(keyword, setup):
    print(f'\n[CALLBACK] Checking Types for {keyword}')
    print('\tSETUPS', setup)
    conf = Config.objects().get(id=setup)
    opts = Result.objects(config=conf).distinct(f'dynamic_{keyword}')
    checkboxes = []
    selected = []
    for val in opts:
        checkboxes.append({'label': val, 'value': val})
        selected.append(val)

    return sorted(checkboxes, key=lambda c: c['label']), sorted(selected)
def _aggregateResults(config, sliderDict, sliderPos):
    print('[CALLBACK] Getting Results')

    if config is None or config == []:
        return None, None

    start = time.time()

    parsedSlider = pd.read_json(sliderDict)

    baseConf = Config.objects().get(id=config)
    baseRes = Result.objects(config=baseConf)

    base_df = aggregate_results(baseRes)

    compData = []

    for i in range(sliderPos[0] + 1, sliderPos[1] + 1):

        # Get Matching Config for other SHA
        sha = parsedSlider.iloc[i].SHA
        try:
            conf = Config.objects().get(commitSHA=sha, setup=baseConf.setup)
        except me.MultipleObjectsReturned:
            conf = Config.objects(
                commitSHA=sha, setup=baseConf.setup).order_by('-id').first()
        except me.DoesNotExist:
            continue

        # Get Results
        df = aggregate_results(Result.objects(config=conf))
        print(f'\t{len(df)}')
        if len(df) != 0:
            compData.append(df.to_json())

    print(f'\tAggregated all results: {time.time() - start} seconds')

    return [base_df.to_json(), compData], [sliderPos, config]
    def _dynFunction(setup):

        if setup is None or setup == []:
            return [], []

        conf = Config.objects.get(id=setup)
        options = Result.objects(config=conf).distinct(f'dynamic_{keyword}')

        checkboxes = []
        selected = []

        for value in options:
            checkboxes.append({'label': value, 'value': value})
            selected.append(value)

        return sorted(checkboxes, key=lambda c: c['label']), sorted(selected)
Пример #8
0
    def parse_and_upload(self):

        print("uploading", self.mdFlexDir)

        try:
            cpu = get_cpu_info()["brand"]
        except Exception as e:
            print(f"Couldn't determine CPU brand: {e}")
            cpu = "N/A"
        run_timestamp = datetime.utcnow()

        coarse_pattern = re.compile(
            r'Collected times for\s+{(.*)}\s:\s\[(.*)\]')
        config_pattern = re.compile(r'([^,]+): ([^,]+)')
        times_pattern = re.compile(r'(\d+)')
        config_runs = coarse_pattern.findall(
            self.measure_output.stdout.decode('utf-8'))

        db_entry = Config()
        db_entry.name = 'performance via single tuning phase'  # TODO: Keep name field?
        db_entry.date = run_timestamp
        db_entry.commitSHA = self.sha
        db_entry.commitMessage = self.repo.commit(self.sha).message
        db_entry.commitDate = self.repo.commit(self.sha).authored_datetime
        db_entry.mergedBaseSHA = self.baseSHA

        # Assumes tests were run on this system
        db_entry.system = cpu

        # Saving Setup used in perf script
        db_entry.setup = self.perfSetup

        # TODO: Decide if uniqueness is enforced (Change spare in models to False)
        # db_entry.unique = db_entry.name + db_entry.commitSHA + db_entry.system + str(db_entry.date)
        # try:
        #     db_entry.save()
        # except NotUniqueError:
        #     print("Exact Configuration for system and commit + date already saved!")
        #     continue
        try:
            db_entry.save()
        except Exception as e:
            self.updateStatus(-1, "UPLOAD", str(e))
            return False, f'Upload of config to DB failed {e}'
        print(db_entry)

        for run in config_runs:

            results = Result()
            results.config = db_entry

            # Filter all config parameters
            config = config_pattern.findall(run[0])

            # Parsing output
            try:
                # Parsing Config keys and values
                for pair in config:
                    key = pair[0].replace(' ', '')  # Replace spaces
                    key = 'dynamic_' + key  # Adding prefix to clearly show dynamic field creation in DB
                    quantity = pair[1].replace(' ', '')  # Replace spaces

                    try:  # Try converting to float if appropriate
                        quantity = float(quantity)
                    except ValueError:
                        pass

                    print(key, quantity)
                    results[key] = quantity

                # Parsing times
                times = times_pattern.findall(run[1])
                times = [float(t) for t in times]
                results.measurements = times
                results.meanTime = np.mean(times)  # Mean running Time
                results.minTime = np.min(times)  # Min running Time
            except Exception as e:
                print(f'Parsing of measurement failed {e}')
                self.updateStatus(-1, "PARSING", str(e))
                return False, f'Parsing failed with {e}'

            try:
                results.save()
            except Exception as e:
                self.updateStatus(-1, "UPLOAD", str(e))
                return False, f'Upload of Result failed with {e}'
            print(results)

        os.chdir(self.baseDir)
        self.updateStatus(1, "UPLOAD", "RESULT UPLOAD succeeded\n")
        return True, 'Upload succeeded'
Пример #9
0
    def _compareConfigs(self, base: Config, test: Config):
        """
        Given two configs, find all overlapping results and compare them

        :param base: PR Base SHA config
        :param test: Commit in PR to compare to base
        :return:
        """

        # Use base as common denominator and look for results containing the keys in base
        baseResults = Result.objects(config=base)
        testResults = Result.objects(config=test)

        missing_results_counter = 0
        minSpeeds = []
        meanSpeeds = []

        matchedResults = []

        for baseRes in baseResults:
            # Build dynamic keys dict
            dynamicFields = [
                key for key in baseRes.__dict__['_fields_ordered']
                if 'dynamic_' in key
            ]
            query = dict()
            for field in dynamicFields:
                query[field] = baseRes[field]

            # TODO: Change above to similar dict comprehension
            # dynamicQuery = {k: r0[k] for k in r0.__dict__['_fields_ordered'] if 'dynamic_' in k}

            # Get Results with matching settings (filter existing queryset)
            testRes = testResults.filter(**query)
            if len(testRes) == 0:
                missing_results_counter += 1
                continue
            testRes = testRes.order_by(
                '-_id').first()  # Get newest matching if there's more than one

            minSpeedup, meanSpeedup = self._compareResults(baseRes, testRes)
            minSpeeds.append(minSpeedup)
            meanSpeeds.append(meanSpeedup)
            matchedResults.append(testRes)

        header, all_keys = get_dyn_keys(matchedResults)
        header_string = r'$\bf{' + header + '}$'
        labels = generate_label_table(matchedResults, all_keys)

        sort_keys = np.argsort(minSpeeds)
        sorted_min_speedsup = np.array(minSpeeds)[sort_keys]
        sorted_mean_speedsup = np.array(meanSpeeds)[sort_keys]
        sorted_labels = labels[sort_keys]
        sorted_labels = np.append(sorted_labels, header_string)

        colors = [
            'g' if speed >= CheckFlow.PERF_THRESHOLD else 'r'
            for speed in sorted_min_speedsup
        ]

        fig = plt.figure(figsize=(15, len(labels) / 4))
        plt.title('Speedup')
        plt.barh(np.arange(len(sort_keys)),
                 sorted_min_speedsup,
                 color=colors,
                 alpha=.5,
                 label='Speedup: minimum runtime')
        plt.barh(np.arange(len(sort_keys)),
                 sorted_mean_speedsup,
                 color='gray',
                 alpha=.5,
                 label='Speedup: mean runtime')
        plt.axvline(1, c='k', label='no change')
        plt.axvline(CheckFlow.PERF_THRESHOLD, c='r', label='passing threshold')
        plt.yticks(np.arange(len(sorted_labels)), sorted_labels)
        plt.legend(loc='lower right')
        plt.grid(which='both', axis='x')
        plt.xlim(0, 2)
        plt.tight_layout()
        plt.show()

        print(
            f"{missing_results_counter} not matched out of {len(baseResults)}")
        return fig, sorted_min_speedsup, sorted_mean_speedsup, missing_results_counter
Пример #10
0
               password=os.environ['PASSWORD'])

    # New setups with more tuning-samples and changed rebuild frequency
    homoID = '5f44050def458403b65f97fa'
    imhomoID = '5f44050def458403b65f97f9'
    sha = '20382287f7f3d1ff2aa8414891ea657245670c80'

    h**o = Setup.objects().get(id=homoID)
    inhomo = Setup.objects().get(id=imhomoID)

    for s_name, setup in zip(['h**o', 'inhomo'], [h**o, inhomo]):
        configs = Config.objects(setup=setup, commitSHA=sha)

        # TODO: Remove limit here
        for conf in configs:
            results = list(Result.objects(config=conf))  # [:10]
            data = []
            labels = []
            for res in results:
                res: Result
                resDict = res.__dict__
                keys = [
                    k for k in resDict.keys()
                    if 'dynamic' in k and '_dynamic_lock' not in k
                ]
                labels.append(''.join([f'{str(resDict[k])} ' for k in keys]))
                data.append(res.measurements)
            data = np.array(data)

            # TODO: Watch out for rebuild freq change
            rebuild_freq = 4