Пример #1
0
def append_predictions(user: User) -> None:
    """ Appends predictions for each issue.

    Parameters:
    ----------
    user:
        User instance.
    """

    bugs = pd.DataFrame(
        get_issues(fields=["Key", "Description_tr"],
                   filters=[UNRESOLVED_BUGS_FILTER]))

    # Split DF to process its chunks asynchronously.
    chunk_size = ceil(len(bugs) / multiprocessing.cpu_count())

    archive_path = get_archive_path(user)
    training_parameters = read_from_archive(archive_path,
                                            TRAINING_PARAMETERS_FILENAME)

    with multiprocessing.Pool() as pool:
        df_predictions = [
            pool.apply_async(
                calculate_predictions,
                args=(chunk, training_parameters, archive_path),
            ) for chunk in np.array_split(bugs, chunk_size)
        ]

        df_predictions = [prediction.get() for prediction in df_predictions]

    df_predictions = pd.concat(df_predictions)
    del df_predictions["Description_tr"]

    update_issues(df_predictions.T.to_dict().values())
Пример #2
0
    def post(self, request):
        highlighted_terms = []
        user = request.user
        metric = (
            request.data.get("metric")
            if request.data.get("metric") != "Areas of testing"
            else "areas_of_testing"
        )
        value = request.data.get("value")
        probabilities = loads(redis_conn.get(f"probabilities:{user.id}"))

        if probabilities[metric][value] > 0.05:
            archive_path = get_archive_path(user)
            description = loads(redis_conn.get(f"description:{user.id}"))

            index = metric
            if metric != "areas_of_testing":
                index = f"{metric}_{value}"
            top_terms = (
                read_from_archive(archive_path, TOP_TERMS_FILENAME)[index]
                .dropna()
                .tolist()
            )
            tfidf = StemmedTfidfVectorizer(stop_words=STOP_WORDS)
            tfidf.fit_transform([description])
            for term in tfidf.get_feature_names():
                if term in top_terms:
                    highlighted_terms.append(term)

        context = {"terms": highlighted_terms}

        return Response(context)
Пример #3
0
    def post(self, request):
        instance = request.user

        cache = redis_conn.get(f"analysis_and_training:{request.user.id}")
        filters = loads(cache)["filters"] if cache else None
        fields = get_issues_fields(request.user)
        df = pd.DataFrame(get_issues(filters=filters, fields=fields))

        # New predictions will be appended after training.
        delete_old_predictions()

        settings = get_training_settings(request.user)

        if settings["mark_up_source"] not in df.columns:
            raise InvalidMarkUpSource

        resolutions = ([
            resolution["value"] for resolution in settings["bug_resolution"]
        ] if len(settings["bug_resolution"]) != 0 else [])

        areas_of_testing = []

        if settings["mark_up_source"]:
            areas_of_testing = [
                area["area_of_testing"]
                for area in settings["mark_up_entities"]
            ] + ["Other"]
            for area in settings["mark_up_entities"]:
                df = mark_up_series(
                    df,
                    settings["mark_up_source"],
                    area["area_of_testing"],
                    area["entities"],
                )
            df = mark_up_other_data(df, areas_of_testing)

        delete_training_data(get_archive_path(instance))

        train(
            instance,
            df,
            areas_of_testing,
            resolutions,
        )

        context = {
            "result": "success",
        }

        process = Process(target=append_predictions, args=(request.user, ))
        process.start()

        return Response(context, status=200)
Пример #4
0
def check_training_files(user: User) -> None:
    """ Raises warning if models don't exist.

    Parameters:
    ----------
    user:
        User instance.
    """
    archive_path = get_archive_path(user)
    with ZipFile(archive_path, "r") as archive:
        if TRAINING_PARAMETERS_FILENAME not in archive.namelist():
            raise ModelsNotTrainedWarning
Пример #5
0
    def post(self, request):
        def _convert_to_integer(value):
            return int(floor((value * 100) + 0.5))

        description = clean_text(request.data.get("description"))

        if not description.strip():
            raise DescriptionCantAnalyzedWarning

        archive_path = get_archive_path(request.user)
        training_parameters = read_from_archive(
            archive_path, TRAINING_PARAMETERS_FILENAME
        )
        probabilities = {}
        probabilities["resolution"] = calculate_resolution_predictions(
            description, training_parameters["Resolution"], archive_path
        )
        probabilities[
            "areas_of_testing"
        ] = calculate_area_of_testing_predictions(
            description, training_parameters["areas_of_testing"], archive_path
        )

        for metric in ["Time to Resolve", "Priority"]:
            probabilities[metric] = get_probabilities(
                description,
                training_parameters[metric],
                read_from_archive(archive_path, metric + ".sav"),
            )

        for probability in probabilities:
            if probability == "resolution":
                for resolution in probabilities[probability]:
                    resolution_obj = probabilities[probability][resolution]
                    for metric in resolution_obj:
                        resolution_obj[metric] = _convert_to_integer(
                            resolution_obj[metric]
                        )
            else:
                for metric in probabilities[probability]:
                    probabilities[probability][metric] = _convert_to_integer(
                        probabilities[probability][metric]
                    )

        redis_conn.set(f"description:{request.user.id}", dumps(description))
        redis_conn.set(
            f"probabilities:{request.user.id}", dumps(probabilities)
        )

        context = {"probabilities": probabilities}

        return Response(context)
Пример #6
0
def get_training_settings(user: Model) -> dict:
    """ Prepares training settings data for serializing.

    Parameters:
    ----------
    user:
        User instance.

    Returns:
    ----------
        Training settings with path to user archive.
    """
    training_settings_path = get_archive_path(user)
    training_settings = read_from_archive(training_settings_path,
                                          TRAINING_SETTINGS_FILENAME)

    return training_settings
Пример #7
0
    def post(self, request):
        user = request.user
        filters = request.data.get("filters", [])
        offset = DEFAULT_OFFSET
        limit = DEFAULT_LIMIT

        check_training_files(user)
        check_predictions()

        archive_path = get_archive_path(user)
        training_parameters = read_from_archive(archive_path,
                                                TRAINING_PARAMETERS_FILENAME)

        predictions_table_settings = get_predictions_table_settings(user)
        predictions = get_predictions_table(predictions_table_settings,
                                            filters, None, None)

        if predictions.empty:
            return Response({})

        prediction_table = paginate_bugs(predictions, offset, limit)

        areas_of_testing_percentage = calculate_aot_percentage(
            predictions["Area of Testing"])
        priority_percentage = calculate_priority_percentage(
            predictions["Priority"], training_parameters["Priority"])
        ttr_percentage = calculate_ttr_percentage(
            predictions["Time to Resolve"],
            training_parameters["Time to Resolve"],
        )

        resolution_percentage = calculate_resolution_percentage(
            predictions, training_parameters["Resolution"])

        result = {
            "predictions_table": prediction_table.T.to_dict().values(),
            "prediction_table_rows_count": len(predictions),
            "areas_of_testing_chart": areas_of_testing_percentage,
            "priority_chart": priority_percentage,
            "ttr_chart": ttr_percentage,
            "resolution_chart": resolution_percentage,
        }

        return Response(result)
Пример #8
0
    def get(self, request):
        archive_path = get_archive_path(request.user)
        if not is_file_in_archive(archive_path, TRAINING_PARAMETERS_FILENAME):
            raise DescriptionAssessmentUnavailableWarning
        settings = get_training_settings(request.user)

        resolutions = (
            [resolution["value"] for resolution in settings["bug_resolution"]]
            if len(settings["bug_resolution"]) != 0
            else []
        )

        training_parameters = read_from_archive(
            archive_path, TRAINING_PARAMETERS_FILENAME
        )
        context = {
            "priority": training_parameters.get("Priority"),
            "resolution": resolutions,
            "areas_of_testing": training_parameters.get("areas_of_testing"),
        }

        return Response(context)
Пример #9
0
def update_training_settings(training_settings: dict, user: Model) -> None:
    """ Updates training settings.

    Parameters:
    ----------
    training_settings:
        Training settings.
    user:
        User instance
    """
    def _parse() -> None:
        """ Parses objects with nested objects.
        """
        for obj in training_settings:
            if isinstance(obj, list):
                training_settings[obj] = [dict(value) for value in obj]

    def _check_by_changing():
        """ Checks that training data hasn't been edited.
        """
        current_settings = read_from_archive(archive_path,
                                             TRAINING_SETTINGS_FILENAME)

        is_changed = False

        for key, obj in current_settings.items():
            if key == "mark_up_source":
                if obj != training_settings[key]:
                    is_changed = True
                    break
            elif key == "bug_resolution":
                current_metrics = {resolution["value"] for resolution in obj}
                new_metrics = {
                    resolution["value"]
                    for resolution in training_settings["bug_resolution"]
                }
                if current_metrics.difference(new_metrics):
                    is_changed = True
                    break
            else:
                old_areas_of_testing = {
                    entity["area_of_testing"]: entity["entities"]
                    for entity in obj
                }
                new_areas_of_testing = {
                    entity["area_of_testing"]: entity["entities"]
                    for entity in training_settings[key]
                }
                for iteration, key_ in enumerate(old_areas_of_testing, 1):
                    if key_ not in new_areas_of_testing or set(
                            old_areas_of_testing[key_]).difference(
                                set(new_areas_of_testing[key_])):
                        is_changed = True
                        break

        if is_changed:
            delete_training_data(archive_path)

    _parse()

    archive_path = get_archive_path(user)
    training_data = pickle.dumps(training_settings)

    _check_by_changing()
    update_training_config(archive_path, training_data)