Пример #1
0
def get_similar_reviews_for_app(app_config_file, query, num_results):
    # Creating an AppConfig object
    app_config = AppConfig(utils.open_json(app_config_file))

    # Log the current operation which is being performed.
    logging.info(logs.QUERY_START, FawkesActions.QUERY_SIMILAR_REVIEWS, "ALL",
                 app_config.app.name)

    # Path where the user reviews were stored after parsing.
    processed_user_reviews_file_path = constants.PROCESSED_USER_REVIEWS_FILE_PATH.format(
        base_folder=app_config.fawkes_internal_config.data.base_folder,
        dir_name=app_config.fawkes_internal_config.data.processed_data_folder,
        app_name=app_config.app.name,
    )

    # Loading the reviews
    reviews = utils.open_json(processed_user_reviews_file_path)

    # Converting the json object to Review object
    reviews = [Review.from_review_json(review) for review in reviews]

    # Filtering out reviews which are not applicable.
    reviews = filter_utils.filter_reviews_by_time(
        filter_utils.filter_reviews_by_channel(
            reviews,
            filter_utils.filter_disabled_review_channels(app_config),
        ),
        datetime.now(timezone.utc) -
        timedelta(days=app_config.algorithm_config.algorithm_days_filter))

    similar_reviews = get_similar_reviews(reviews, query, num_results)

    # Log the current operation which is being performed.
    logging.info(logs.QUERY_END, FawkesActions.QUERY_SIMILAR_REVIEWS, "ALL",
                 app_config.app.name)

    # Create the intermediate folders
    query_results_file_path = constants.QUERY_RESULTS_FILE_PATH.format(
        base_folder=app_config.fawkes_internal_config.data.base_folder,
        dir_name=app_config.fawkes_internal_config.data.query_folder,
        app_name=app_config.app.name,
        query_hash=utils.calculate_hash(query))

    dir_name = os.path.dirname(query_results_file_path)
    pathlib.Path(dir_name).mkdir(parents=True, exist_ok=True)

    utils.dump_json(
        [{
            "score": score,
            "review": review.to_dict(),
        } for score, review in similar_reviews],
        query_results_file_path,
    )
Пример #2
0
def generate_keyword_weights(fawkes_config_file=constants.FAWKES_CONFIG_FILE):
    # Read the app-config.json file.
    fawkes_config = FawkesConfig(utils.open_json(fawkes_config_file))
    # For every app registered in app-config.json we
    for app_config_file in fawkes_config.apps:
        # Creating an AppConfig object
        app_config = AppConfig(utils.open_json(app_config_file))
        # First look at the category keywords.
        utils.dump_json(
            parse_keywords_file(
                app_config.algorithm_config.category_keywords_file),
            app_config.algorithm_config.category_keywords_weights_file,
        )
        # Then look at the bug-feature keywords
        utils.dump_json(
            parse_keywords_file(
                app_config.algorithm_config.bug_feature_keywords_file, False),
            app_config.algorithm_config.bug_feature_keywords_weights_file,
        )
Пример #3
0
def run_algo(fawkes_config_file=constants.FAWKES_CONFIG_FILE):
    # Read the app-config.json file.
    fawkes_config = FawkesConfig(utils.open_json(fawkes_config_file))
    # For every app registered in app-config.json we
    for app_config_file in fawkes_config.apps:
        # Creating an AppConfig object
        app_config = AppConfig(utils.open_json(app_config_file))
        # Path where the user reviews were stored after parsing.
        parsed_user_reviews_file_path = constants.PARSED_USER_REVIEWS_FILE_PATH.format(
            base_folder=app_config.fawkes_internal_config.data.base_folder,
            dir_name=app_config.fawkes_internal_config.data.parsed_data_folder,
            app_name=app_config.app.name,
        )

        # Loading the reviews
        reviews = utils.open_json(parsed_user_reviews_file_path)

        # Converting the json object to Review object
        reviews = [Review.from_review_json(review) for review in reviews]

        # Filtering out reviews which are not applicable.
        reviews = filter_utils.filter_reviews_by_time(
            filter_utils.filter_reviews_by_channel(
                reviews,
                filter_utils.filter_disabled_review_channels(app_config),
            ),
            datetime.now(timezone.utc) -
            timedelta(days=app_config.algorithm_config.algorithm_days_filter))

        # Number of process to make
        num_processes = min(constants.PROCESS_NUMBER, os.cpu_count())

        if constants.CIRCLECI in os.environ:
            num_processes = 2

        # Adding sentiment
        with Pool(num_processes) as process:
            reviews = process.map(add_review_sentiment_score, reviews)

        if app_config.algorithm_config.categorization_algorithm != None and app_config.algorithm_config.category_keywords_weights_file != None:
            # We read from the topic file first
            topics = {}
            topics = utils.open_json(
                app_config.algorithm_config.category_keywords_weights_file)

            # Adding text-match categorization
            with Pool(num_processes) as process:
                reviews = process.map(
                    partial(text_match_categortization,
                            app_config=app_config,
                            topics=topics), reviews)

        if app_config.algorithm_config.bug_feature_keywords_weights_file != None:
            # We read from the topic file first
            topics = {}
            topics = utils.open_json(
                app_config.algorithm_config.bug_feature_keywords_weights_file)

            # Adding bug/feature classification
            with Pool(num_processes) as process:
                reviews = process.map(
                    partial(bug_feature_classification, topics=topics),
                    reviews)

        if app_config.algorithm_config.categorization_algorithm == CategorizationAlgorithms.LSTM_CLASSIFICATION:
            # Load the TensorFlow model
            model = tf.keras.models.load_model(
                constants.LSTM_CATEGORY_MODEL_FILE_PATH.format(
                    base_folder=app_config.fawkes_internal_config.data.
                    base_folder,
                    dir_name=app_config.fawkes_internal_config.data.
                    models_folder,
                    app_name=app_config.app.name,
                ))

            # Load the article tokenizer file
            tokenizer_json = utils.open_json(
                constants.LSTM_CATEGORY_ARTICLE_TOKENIZER_FILE_PATH.format(
                    base_folder=app_config.fawkes_internal_config.data.
                    base_folder,
                    dir_name=app_config.fawkes_internal_config.data.
                    models_folder,
                    app_name=app_config.app.name,
                ), )
            article_tokenizer = tf.keras.preprocessing.text.tokenizer_from_json(
                tokenizer_json)

            # Load the label tokenizer file
            tokenizer_json = utils.open_json(
                constants.LSTM_CATEGORY_LABEL_TOKENIZER_FILE_PATH.format(
                    base_folder=app_config.fawkes_internal_config.data.
                    base_folder,
                    dir_name=app_config.fawkes_internal_config.data.
                    models_folder,
                    app_name=app_config.app.name,
                ), )
            label_tokenizer = tf.keras.preprocessing.text.tokenizer_from_json(
                tokenizer_json)

            cleaned_labels = {}
            for review in reviews:
                label = review.derived_insight.category
                cleaned_label = re.sub(r'\W+', '', label)
                cleaned_label = cleaned_label.lower()
                cleaned_labels[cleaned_label] = label

            # Adding LSTM categorization
            reviews = lstm_classification(reviews, model, article_tokenizer,
                                          label_tokenizer, cleaned_labels)

        # Create the intermediate folders
        processed_user_reviews_file_path = constants.PROCESSED_USER_REVIEWS_FILE_PATH.format(
            base_folder=app_config.fawkes_internal_config.data.base_folder,
            dir_name=app_config.fawkes_internal_config.data.
            processed_data_folder,
            app_name=app_config.app.name,
        )

        dir_name = os.path.dirname(processed_user_reviews_file_path)
        pathlib.Path(dir_name).mkdir(parents=True, exist_ok=True)

        utils.dump_json(
            [review.to_dict() for review in reviews],
            processed_user_reviews_file_path,
        )
Пример #4
0
def parse_reviews(fawkes_config_file = constants.FAWKES_CONFIG_FILE):
    # Read the app-config.json file.
    fawkes_config = FawkesConfig(
        utils.open_json(fawkes_config_file)
    )
    # For every app registered in app-config.json we
    for app_config_file in fawkes_config.apps:
        # Creating an AppConfig object
        app_config = AppConfig(
            utils.open_json(
                app_config_file
            )
        )
        parsed_reviews = []
        # We now read the review details for each channel
        for review_channel in app_config.review_channels:
            # We parse the channels only if its enabled!
            if review_channel.is_channel_enabled and review_channel.channel_type != ReviewChannelTypes.BLANK:
                raw_user_reviews_file_path = constants.RAW_USER_REVIEWS_FILE_PATH.format(
                    base_folder=app_config.fawkes_internal_config.data.base_folder,
                    dir_name=app_config.fawkes_internal_config.data.raw_data_folder,
                    app_name=app_config.app.name,
                    channel_name=review_channel.channel_name,
                    extension=review_channel.file_type
                )
                if review_channel.file_type == constants.JSON: # Parse JSON
                    channel_reviews = parse_json(
                        raw_user_reviews_file_path,
                        review_channel, app_config
                    )
                elif review_channel.file_type == constants.CSV: # Parse CSV
                    channel_reviews = parse_csv(
                        raw_user_reviews_file_path,
                        review_channel,
                        app_config
                    )
                elif review_channel.file_type == constants.JSON_LINES:
                    channel_reviews = parse_json_lines(
                        raw_user_reviews_file_path,
                        review_channel,
                        app_config
                    )
                else: # Unsupported file format
                    raise (
                        "Format not supported exception. Check your file-type key in your config."
                    )
                parsed_reviews += channel_reviews

        # Executing custom code after parsing.
        if app_config.custom_code_module_path != None:
            custom_code_module = importlib.import_module(app_config.custom_code_module_path, package=None)
            parsed_reviews = custom_code_module.run_custom_code_post_parse(
                parsed_reviews)

        # After parsing the reviews for that all channels, we dump it into a file.
        # The file has a particular format.
        # {base_folder}/{dir_name}/{app_name}/parsed-user-feedback.{extension}
        parsed_user_reviews_file_path = constants.PARSED_USER_REVIEWS_FILE_PATH.format(
            base_folder=app_config.fawkes_internal_config.data.base_folder,
            dir_name=app_config.fawkes_internal_config.data.parsed_data_folder,
            app_name=app_config.app.name,
        )

        # Create the intermediate folders
        dir_name = os.path.dirname(parsed_user_reviews_file_path)
        pathlib.Path(dir_name).mkdir(parents=True, exist_ok=True)

        utils.dump_json(
            [parsed_review.to_dict() for parsed_review in parsed_reviews],
            parsed_user_reviews_file_path
        )
Пример #5
0
def run_algo(fawkes_config_file=constants.FAWKES_CONFIG_FILE):
    # Read the app-config.json file.
    fawkes_config = FawkesConfig(utils.open_json(fawkes_config_file))
    # For every app registered in app-config.json we
    for app_config_file in fawkes_config.apps:
        # Creating an AppConfig object
        app_config = AppConfig(utils.open_json(app_config_file))
        # Log the current operation which is being performed.
        logging.info(logs.OPERATION, FawkesActions.RUN_ALGO, "ALL",
                     app_config.app.name)

        # Path where the user reviews were stored after parsing.
        parsed_user_reviews_file_path = constants.PARSED_USER_REVIEWS_FILE_PATH.format(
            base_folder=app_config.fawkes_internal_config.data.base_folder,
            dir_name=app_config.fawkes_internal_config.data.parsed_data_folder,
            app_name=app_config.app.name,
        )

        # Loading the reviews
        reviews = utils.open_json(parsed_user_reviews_file_path)

        # Converting the json object to Review object
        reviews = [Review.from_review_json(review) for review in reviews]

        # Filtering out reviews which are not applicable.
        reviews = filter_utils.filter_reviews_by_time(
            filter_utils.filter_reviews_by_channel(
                reviews,
                filter_utils.filter_disabled_review_channels(app_config),
            ),
            datetime.now(timezone.utc) -
            timedelta(days=app_config.algorithm_config.algorithm_days_filter))

        # Log the number of reviews we got.
        logging.info(logs.NUM_REVIEWS, len(reviews), "ALL",
                     app_config.app.name)

        # Number of process to make
        num_processes = min(constants.PROCESS_NUMBER, os.cpu_count())

        if constants.CIRCLECI in os.environ:
            num_processes = 2

        # Running sentiment analysis
        reviews = run_sentiment_analysis(reviews, app_config, num_processes)

        # Running categorization
        reviews = run_categorization(reviews, app_config, num_processes)

        # Running bug/feature categorizatio
        reviews = run_bug_feature_categorization(reviews, app_config,
                                                 num_processes)

        # Running the message encoding
        reviews = run_review_text_encoding(reviews, app_config, num_processes)

        # Create the intermediate folders
        processed_user_reviews_file_path = constants.PROCESSED_USER_REVIEWS_FILE_PATH.format(
            base_folder=app_config.fawkes_internal_config.data.base_folder,
            dir_name=app_config.fawkes_internal_config.data.
            processed_data_folder,
            app_name=app_config.app.name,
        )

        dir_name = os.path.dirname(processed_user_reviews_file_path)
        pathlib.Path(dir_name).mkdir(parents=True, exist_ok=True)

        utils.dump_json(
            [review.to_dict() for review in reviews],
            processed_user_reviews_file_path,
        )
Пример #6
0
def generate_summary(fawkes_config_file=constants.FAWKES_CONFIG_FILE):
    """
        @param{string}: fawkes_config_file - config file path
        @returns{map<string,list<string>>}: summarized_reviews - summarized reviews per category

        Main function to create a summary of reviews
            - queries to get reviews
            - preprocess reviews based on each category
            - cluster similar reviews
            - rank and summarize amongst cluster to provide a summarize
    """
    # Read the app-config.json file.
    fawkes_config = FawkesConfig(utils.open_json(fawkes_config_file))
    # For every app registered in app-config.json we-
    for app_config_file in fawkes_config.apps:
        # Creating an AppConfig object
        app_config = AppConfig(utils.open_json(app_config_file))
        # Path where the user reviews were stored after parsing.
        processed_user_reviews_file_path = constants.PROCESSED_USER_REVIEWS_FILE_PATH.format(
            base_folder=app_config.fawkes_internal_config.data.base_folder,
            dir_name=app_config.fawkes_internal_config.data.
            processed_data_folder,
            app_name=app_config.app.name,
        )

        # Loading the reviews
        reviews = utils.open_json(processed_user_reviews_file_path)
        # Converting the json object to Review object
        reviews = [Review.from_review_json(review) for review in reviews]

        reviews = queries.getVocByCategory(reviews)
        summarized_reviews = {}

        # For each category, generate a summary
        for category in reviews:
            summarized_category_review = []

            # get reviews per category
            categorized_review = reviews[category]

            # Preprocess reviews
            sentences = preprocess_review(categorized_review)
            # number of sentences in a category should be atleast greater than
            # the number of clusters
            if (len(sentences) >
                    app_config.algorithm_config.summarization.num_clusters -
                    1):
                clustered_sentences = k_means_classification(
                    sentences,
                    app_config.algorithm_config.summarization.num_clusters)
                for cluster in clustered_sentences.values():
                    if len(cluster) < constants.minimum_reviews_per_cluster:
                        continue
                    text = ". ".join(cluster)
                    gen_summary = summarize_text(
                        text,
                        app_config.algorithm_config.summarization.
                        summary_length_per_cluster,
                    )
                    summarized_category_review.append(gen_summary)
            else:
                logging.info(logs.INSUFFICIENT_DATA, category)
            summarized_reviews[category] = summarized_category_review

        query_results_file_path = constants.REVIEW_SUMMARY_RESULTS_FILE_PATH.format(
            base_folder=app_config.fawkes_internal_config.data.base_folder,
            dir_name=app_config.fawkes_internal_config.data.query_folder,
            app_name=app_config.app.name,
        )

        dir_name = os.path.dirname(query_results_file_path)
        pathlib.Path(dir_name).mkdir(parents=True, exist_ok=True)

        utils.dump_json([{
            "summarized_reviews": summarized_reviews
        }], query_results_file_path)

        return summarized_reviews
Пример #7
0
def fetch_reviews(fawkes_config_file = constants.FAWKES_CONFIG_FILE):
    # Read the app-config.json file.
    fawkes_config = FawkesConfig(
        utils.open_json(fawkes_config_file)
    )
    # For every app registered in app-config.json we
    for app_config_file in fawkes_config.apps:
        # Creating an AppConfig object
        app_config = AppConfig(
            utils.open_json(
                app_config_file
            )
        )
        # Each app has a list of review channels from which the user reviews are fetched.
        for review_channel in app_config.review_channels:
            if review_channel.is_channel_enabled and review_channel.channel_type != ReviewChannelTypes.BLANK:

                # Log the current operation which is being performed.
                logging.info(logs.OPERATION, FawkesActions.FETCH, review_channel.channel_name, app_config.app.name)

                reviews = []
                # Depending on the channel type, we have different "fetchers" to get the data.
                if review_channel.channel_type == ReviewChannelTypes.TWITTER:
                    reviews = tweets.fetch(
                        review_channel
                    )
                elif review_channel.channel_type == ReviewChannelTypes.SALESFORCE:
                    reviews = salesforce.fetch(
                        review_channel
                    )
                elif review_channel.channel_type == ReviewChannelTypes.SPREADSHEET:
                    reviews = spreadsheet.fetch(
                        review_channel
                    )
                elif review_channel.channel_type == ReviewChannelTypes.CSV:
                    reviews = comma_separated_values.fetch(
                        review_channel
                    )
                elif review_channel.channel_type == ReviewChannelTypes.ANDROID:
                    reviews = playstore.fetch(
                        review_channel
                    )
                elif review_channel.channel_type == ReviewChannelTypes.IOS:
                    reviews = appstore.fetch(
                        review_channel
                    )
                elif review_channel.channel_type == ReviewChannelTypes.SPLUNK:
                    reviews = splunk.fetch(
                        review_channel
                    )
                elif review_channel.channel_type == ReviewChannelTypes.REMOTE_FILE:
                    reviews = remote.fetch(
                        review_channel
                    )
                elif review_channel.channel_type == ReviewChannelTypes.VERTICA:
                    reviews = vertica.fetch(
                        review_channel
                    )
                else:
                    continue

                # Log the number of reviews we got.
                logging.info(logs.NUM_REVIEWS, len(reviews), review_channel.channel_name, app_config.app.name)

                # After fetching the review for that particular channel, we dump it into a file.
                # The file has a particular format.
                # {base_folder}/{dir_name}/{app_name}/{channel_name}-raw-feedback.{extension}
                raw_user_reviews_file_path = constants.RAW_USER_REVIEWS_FILE_PATH.format(
                    base_folder=app_config.fawkes_internal_config.data.base_folder,
                    dir_name=app_config.fawkes_internal_config.data.raw_data_folder,
                    app_name=app_config.app.name,
                    channel_name=review_channel.channel_name,
                    extension=review_channel.file_type)

                # Create the intermediate folders
                dir_name = os.path.dirname(raw_user_reviews_file_path)
                pathlib.Path(dir_name).mkdir(parents=True, exist_ok=True)

                if review_channel.file_type == constants.JSON:
                    utils.dump_json(reviews, raw_user_reviews_file_path)
                else:
                    with open(raw_user_reviews_file_path, "w") as file:
                        file.write(reviews)

        # There are lot of use-cases where we need to execute custom code after the data is fetched.
        # This might include data-transformation, cleanup etc.
        # This is the right place to do that.
        if app_config.custom_code_module_path != None:
            custom_code_module = importlib.import_module(app_config.custom_code_module_path, package=None)
            reviews = custom_code_module.run_custom_code_post_fetch()