Exemplo n.º 1
0
    def execute(self, arguments=None, persist=True, check_license=False):
        """Execute task."""
        started_at = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S.%f")
        results = arguments.get('result', None)
        external_request_id = arguments.get('external_request_id', None)

        recommendations = []
        input_stack = {}

        for result in results:
            temp_input_stack = {d["package"]: d["version"] for d in
                                result.get("details", [])[0].get("_resolved")}
            input_stack.update(temp_input_stack)

        for result in results:
            details = result['details'][0]
            resolved = details['_resolved']
            manifest_file_path = details['manifest_file_path']

            recommendation = {
                'companion': [],
                'alternate': [],
                'usage_outliers': [],
                'manifest_file_path': manifest_file_path
            }
            new_arr = [r['package'] for r in resolved]
            # If new_err is empty list
            if not new_arr:
                recommendations.append(recommendation)
                continue

            insights_payload = {
                'ecosystem': details['ecosystem'],
                'unknown_packages_ratio_threshold':
                    float(os.environ.get('UNKNOWN_PACKAGES_THRESHOLD', 0.3)),
                'package_list': new_arr,
                'comp_package_count_threshold': int(os.environ.get(
                    'MAX_COMPANION_PACKAGES', 5))
            }
            if details['ecosystem'] in self.kronos_ecosystems:
                insights_payload.update({
                    'alt_package_count_threshold': int(os.environ.get('MAX_ALTERNATE_PACKAGES', 2)),
                    'outlier_probability_threshold': float(os.environ.get('OUTLIER_THRESHOLD',
                                                                          0.6)),
                    'user_persona': "1",  # TODO - remove janus hardcoded value
                })
            input_task_for_insights_recommender = [insights_payload]

            # Call PGM and get the response
            start = datetime.datetime.utcnow()
            insights_response = self.call_insights_recommender(input_task_for_insights_recommender)
            elapsed_seconds = (datetime.datetime.utcnow() -
                               start).total_seconds()
            msg = "It took {t} seconds to get insight's response" \
                  "for external request {e}.".format(t=elapsed_seconds,
                                                     e=external_request_id)
            logger.info(msg)

            # From PGM response process companion and alternate packages and
            # then get Data from Graph
            # TODO - implement multiple manifest file support for below loop

            if insights_response is not None:
                for insights_result in insights_response:
                    companion_packages = []
                    ecosystem = insights_result['ecosystem']

                    # Get usage based outliers
                    recommendation['usage_outliers'] = \
                        insights_result.get('outlier_package_list', [])

                    # Append Topics for User Stack
                    recommendation['input_stack_topics'] = insights_result.get(
                            'package_to_topic_dict', {})
                    # Add missing packages unknown to PGM
                    recommendation['missing_packages_pgm'] = insights_result.get(
                        'missing_packages', [])
                    for pkg in insights_result['companion_packages']:
                        companion_packages.append(pkg['package_name'])

                    # Get Companion Packages from Graph
                    comp_packages_graph = GraphDB().get_version_information(companion_packages,
                                                                            ecosystem)

                    # Apply Version Filters
                    filtered_comp_packages_graph, filtered_list = GraphDB().filter_versions(
                        comp_packages_graph, input_stack, external_request_id, rec_type="COMPANION")

                    filtered_companion_packages = \
                        set(companion_packages).difference(set(filtered_list))
                    logger.info(
                        "Companion Packages Filtered for external_request_id {} {}"
                        .format(external_request_id, filtered_companion_packages)
                    )

                    # Get the topmost alternate package for each input package
                    alternate_packages, final_dict = GraphDB.get_topmost_alternate(
                        insights_result=insights_result, input_stack=input_stack
                    )

                    alt_packages_graph = []
                    if alternate_packages:
                        alt_packages_graph = GraphDB().get_version_information(
                            alternate_packages, ecosystem)

                    # Apply Version Filters
                    filtered_alt_packages_graph, filtered_list = GraphDB().filter_versions(
                        alt_packages_graph, input_stack, external_request_id, rec_type="ALTERNATE")

                    filtered_alternate_packages = \
                        set(alternate_packages).difference(set(filtered_list))
                    logger.info(
                        "Alternate Packages Filtered for external_request_id {} {}"
                        .format(external_request_id, filtered_alternate_packages)
                    )

                    if check_license:
                        # Apply License Filters
                        lic_filtered_alt_graph, lic_filtered_comp_graph = \
                            License.perform_license_analysis(
                                resolved=resolved, ecosystem=ecosystem,
                                filtered_alt_packages_graph=filtered_alt_packages_graph,
                                filtered_comp_packages_graph=filtered_comp_packages_graph,
                                filtered_alternate_packages=filtered_alternate_packages,
                                filtered_companion_packages=filtered_companion_packages,
                                external_request_id=external_request_id
                            )
                    else:
                        lic_filtered_alt_graph = filtered_alt_packages_graph
                        lic_filtered_comp_graph = filtered_comp_packages_graph

                    # Get Topics Added to Filtered Packages
                    topics_comp_packages_graph = GraphDB(). \
                        get_topics_for_comp(lic_filtered_comp_graph,
                                            insights_result.get('companion_packages', []))

                    # Create Companion Block
                    comp_packages = create_package_dict(topics_comp_packages_graph)
                    final_comp_packages = \
                        set_valid_cooccurrence_probability(comp_packages)

                    recommendation['companion'] = final_comp_packages

                    # Get Topics Added to Filtered Packages
                    topics_comp_packages_graph = GraphDB(). \
                        get_topics_for_alt(lic_filtered_alt_graph,
                                           insights_result.get('alternate_packages', {}))

                    # Create Alternate Dict
                    alt_packages = create_package_dict(topics_comp_packages_graph, final_dict)
                    recommendation['alternate'] = alt_packages

                recommendations.append(recommendation)
            else:
                return {
                    'recommendation': 'pgm_error',
                    'external_request_id': external_request_id,
                    'message': 'PGM Fetching error'
                }

        ended_at = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S.%f")
        audit = {'started_at': started_at, 'ended_at': ended_at, 'version': 'v1'}

        task_result = {
            'recommendations': recommendations,
            '_audit': audit,
            '_release': 'None:None:None'
        }

        if persist:
            return persist_data_in_db(external_request_id=external_request_id,
                                      task_result=task_result)
        else:
            return {'recommendation': 'success',
                    'external_request_id': external_request_id,
                    'result': task_result}
Exemplo n.º 2
0
    def execute(self, arguments=None, persist=True, check_license=False):
        """Execute task."""
        # TODO: reduce cyclomatic complexity
        started_at = datetime.datetime.utcnow().strftime(
            "%Y-%m-%dT%H:%M:%S.%f")
        results = arguments.get('result', None)
        external_request_id = arguments.get('external_request_id', None)

        input_task_for_insights_recommender = []
        recommendations = []
        # TODO: refactoring
        input_stack = {}
        for result in results:
            temp_input_stack = {
                d["package"]: d["version"]
                for d in result.get("details", [])[0].get("_resolved")
            }
            input_stack.update(temp_input_stack)

        for result in results:
            details = result['details'][0]
            resolved = details['_resolved']
            manifest_file_path = details['manifest_file_path']

            recommendation = {
                'companion': [],
                'alternate': [],
                'usage_outliers': [],
                'manifest_file_path': manifest_file_path
            }
            new_arr = [r['package'] for r in resolved]
            # If new_err is empty list
            if not new_arr:
                recommendations.append(recommendation)
                continue

            json_object = {
                'ecosystem':
                details['ecosystem'],
                'unknown_packages_ratio_threshold':
                float(os.environ.get('UNKNOWN_PACKAGES_THRESHOLD', 0.3)),
                'package_list':
                new_arr,
                'comp_package_count_threshold':
                int(os.environ.get('MAX_COMPANION_PACKAGES', 5))
            }
            if details['ecosystem'] in self.kronos_ecosystems:
                json_object.update({
                    'alt_package_count_threshold':
                    int(os.environ.get('MAX_ALTERNATE_PACKAGES', 2)),
                    'outlier_probability_threshold':
                    float(os.environ.get('OUTLIER_THRESHOLD', 0.6)),
                    'user_persona':
                    "1",  # TODO - remove janus hardcoded value
                })
            input_task_for_insights_recommender.append(json_object)

            # Call PGM and get the response
            start = datetime.datetime.utcnow()
            insights_response = self.call_insights_recommender(
                input_task_for_insights_recommender)
            elapsed_seconds = (datetime.datetime.utcnow() -
                               start).total_seconds()
            msg = "It took {t} seconds to get insight's response" \
                  "for external request {e}.".format(t=elapsed_seconds,
                                                     e=external_request_id)
            logger.info(msg)

            # From PGM response process companion and alternate packages and
            # then get Data from Graph
            # TODO - implement multiple manifest file support for below loop

            if insights_response is not None:
                for insights_result in insights_response:
                    companion_packages = []
                    ecosystem = insights_result['ecosystem']

                    # Get usage based outliers
                    recommendation['usage_outliers'] = \
                        insights_result.get('outlier_package_list', [])

                    # Append Topics for User Stack
                    recommendation['input_stack_topics'] = insights_result.get(
                        'package_to_topic_dict', {})
                    # Add missing packages unknown to PGM
                    recommendation[
                        'missing_packages_pgm'] = insights_result.get(
                            'missing_packages', [])
                    for pkg in insights_result['companion_packages']:
                        companion_packages.append(pkg['package_name'])

                    # Get Companion Packages from Graph
                    comp_packages_graph = GraphDB().get_version_information(
                        companion_packages, ecosystem)

                    # Apply Version Filters
                    filtered_comp_packages_graph, filtered_list = GraphDB(
                    ).filter_versions(comp_packages_graph,
                                      input_stack,
                                      external_request_id,
                                      rec_type="COMPANION")

                    filtered_companion_packages = \
                        set(companion_packages).difference(set(filtered_list))
                    logger.info(
                        "Companion Packages Filtered for external_request_id {} {}"
                        .format(external_request_id,
                                filtered_companion_packages))

                    # Get the topmost alternate package for each input package

                    # Create intermediate dict to Only Get Top 1 companion
                    # packages for the time being.
                    temp_dict = {}
                    for pkg_name, contents in insights_result.get(
                            'alternate_packages', {}).items():
                        pkg = {}
                        for ind in contents:
                            pkg[ind['package_name']] = ind['similarity_score']
                        temp_dict[pkg_name] = pkg

                    final_dict = {}
                    alternate_packages = []
                    for pkg_name, contents in temp_dict.items():
                        # For each input package
                        # Get only the topmost alternate package from a set of
                        # packages based on similarity score
                        top_dict = dict(Counter(contents).most_common(1))
                        for alt_pkg, sim_score in top_dict.items():
                            final_dict[alt_pkg] = {
                                'version': input_stack[pkg_name],
                                'replaces': pkg_name,
                                'similarity_score': sim_score
                            }
                            alternate_packages.append(alt_pkg)

                    # if alternate_packages:
                    # Get Alternate Packages from Graph
                    alt_packages_graph = GraphDB().get_version_information(
                        alternate_packages, ecosystem)

                    # Apply Version Filters
                    filtered_alt_packages_graph, filtered_list = GraphDB(
                    ).filter_versions(alt_packages_graph,
                                      input_stack,
                                      external_request_id,
                                      rec_type="ALTERNATE")

                    filtered_alternate_packages = \
                        set(alternate_packages).difference(set(filtered_list))
                    logger.info(
                        "Alternate Packages Filtered for external_request_id {} {}"
                        .format(external_request_id,
                                filtered_alternate_packages))
                    if check_license:
                        # apply license based filters
                        list_user_stack_comp = extract_user_stack_package_licenses(
                            resolved, ecosystem)
                        license_filter_output = apply_license_filter(
                            list_user_stack_comp, filtered_alt_packages_graph,
                            filtered_comp_packages_graph)

                        lic_filtered_alt_graph = license_filter_output[
                            'filtered_alt_packages_graph']
                        lic_filtered_comp_graph = license_filter_output[
                            'filtered_comp_packages_graph']
                        lic_filtered_list_alt = license_filter_output[
                            'filtered_list_pkg_names_alt']
                        lic_filtered_list_com = license_filter_output[
                            'filtered_list_pkg_names_com']
                    else:
                        lic_filtered_alt_graph = filtered_alt_packages_graph
                        lic_filtered_comp_graph = filtered_comp_packages_graph
                        lic_filtered_list_alt = lic_filtered_list_com = list()

                    if len(lic_filtered_list_alt) > 0:
                        s = set(filtered_alternate_packages).difference(
                            set(lic_filtered_list_alt))
                        msg = \
                            "Alternate Packages filtered (licenses) for external_request_id {} {}" \
                            .format(external_request_id, s)
                        logger.info(msg)

                    if len(lic_filtered_list_com) > 0:
                        s = set(filtered_companion_packages).difference(
                            set(lic_filtered_list_com))
                        msg = "Companion Packages filtered (licenses) for external_request_id {} " \
                              "{}".format(external_request_id, s)
                        logger.info(msg)

                    # Get Topics Added to Filtered Packages
                    topics_comp_packages_graph = GraphDB(). \
                        get_topics_for_comp(lic_filtered_comp_graph,
                                            insights_result.get('companion_packages', []))

                    # Create Companion Block
                    comp_packages = create_package_dict(
                        topics_comp_packages_graph)
                    recommendation['companion'] = comp_packages

                    # Get Topics Added to Filtered Packages
                    topics_comp_packages_graph = GraphDB(). \
                        get_topics_for_alt(lic_filtered_alt_graph,
                                           insights_result.get('alternate_packages', {}))

                    # Create Alternate Dict
                    alt_packages = create_package_dict(
                        topics_comp_packages_graph, final_dict)
                    recommendation['alternate'] = alt_packages

                recommendations.append(recommendation)
            else:
                return {
                    'recommendation': 'pgm_error',
                    'external_request_id': external_request_id,
                    'message': 'PGM Fetching error'
                }

        ended_at = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S.%f")
        audit = {
            'started_at': started_at,
            'ended_at': ended_at,
            'version': 'v1'
        }

        task_result = {
            'recommendations': recommendations,
            '_audit': audit,
            '_release': 'None:None:None'
        }

        if persist:
            # Store the result in RDS
            try:
                insert_stmt = insert(WorkerResult).values(
                    worker='recommendation_v2',
                    worker_id=None,
                    external_request_id=external_request_id,
                    analysis_id=None,
                    task_result=task_result,
                    error=False)
                do_update_stmt = insert_stmt.on_conflict_do_update(
                    index_elements=['id'], set_=dict(task_result=task_result))
                session.execute(do_update_stmt)
                session.commit()
                return {
                    'recommendation': 'success',
                    'external_request_id': external_request_id,
                    'result': task_result
                }
            except SQLAlchemyError as e:
                session.rollback()
                return {
                    'recommendation': 'database error',
                    'external_request_id': external_request_id,
                    'message': '%s' % e
                }
        else:
            return {
                'recommendation': 'success',
                'external_request_id': external_request_id,
                'result': task_result
            }
Exemplo n.º 3
0
    def execute(self, arguments=None):
        started_at = datetime.datetime.utcnow().strftime(
            "%Y-%m-%dT%H:%M:%S.%f")
        results = arguments.get('result', None)
        external_request_id = arguments.get('external_request_id', None)

        input_task_for_pgm = []
        recommendations = []
        input_stack = {}
        for result in results:
            temp_input_stack = {
                d["package"]: d["version"]
                for d in result.get("details", [])[0].get("_resolved")
            }
            input_stack.update(temp_input_stack)

        for result in results:
            details = result['details'][0]
            resolved = details['_resolved']
            manifest_file_path = details['manifest_file_path']

            recommendation = {
                'companion': [],
                'alternate': [],
                'usage_outliers': [],
                'manifest_file_path': manifest_file_path
            }
            new_arr = [r['package'] for r in resolved]
            json_object = {
                'ecosystem':
                details['ecosystem'],
                'comp_package_count_threshold':
                int(os.environ.get('MAX_COMPANION_PACKAGES', 5)),
                'alt_package_count_threshold':
                int(os.environ.get('MAX_ALTERNATE_PACKAGES', 2)),
                'outlier_probability_threshold':
                float(os.environ.get('OUTLIER_THRESHOLD', 0.6)),
                'unknown_packages_ratio_threshold':
                float(os.environ.get('UNKNOWN_PACKAGES_THRESHOLD', 0.3)),
                'user_persona':
                "1",  # TODO - remove janus hardcoded value
                # completely and assing a cateogory here
                'package_list':
                new_arr
            }
            input_task_for_pgm.append(json_object)

            # Call PGM and get the response
            start = datetime.datetime.utcnow()
            pgm_response = self.call_pgm(input_task_for_pgm)
            elapsed_seconds = (datetime.datetime.utcnow() -
                               start).total_seconds()
            msg = 'It took {t} seconds to get response from PGM ' \
                  'for external request {e}.'.format(t=elapsed_seconds,
                                                     e=external_request_id)
            print(msg)

            # From PGM response process companion and alternate packages and
            # then get Data from Graph
            # TODO - implement multiple manifest file support for below loop

            if pgm_response is not None:
                for pgm_result in pgm_response:
                    companion_packages = []
                    ecosystem = pgm_result['ecosystem']

                    # Get usage based outliers
                    recommendation['usage_outliers'] = \
                        pgm_result['outlier_package_list']

                    # Append Topics for User Stack
                    recommendation['input_stack_topics'] = pgm_result.get(
                        'package_to_topic_dict', {})

                    for pkg in pgm_result['companion_packages']:
                        companion_packages.append(pkg['package_name'])

                    # Get Companion Packages from Graph
                    comp_packages_graph = GraphDB().get_version_information(
                        companion_packages, ecosystem)

                    # Apply Version Filters
                    filtered_comp_packages_graph, filtered_list = GraphDB(
                    ).filter_versions(comp_packages_graph, input_stack)

                    filtered_companion_packages = \
                        set(companion_packages).difference(set(filtered_list))
                    _logger.info(
                        "Companion Packages Filtered for external_request_id {} {}"
                        .format(external_request_id,
                                filtered_companion_packages))

                    # Get the topmost alternate package for each input package

                    # Create intermediate dict to Only Get Top 1 companion
                    # packages for the time being.
                    temp_dict = {}
                    for pkg_name, contents in pgm_result[
                            'alternate_packages'].items():
                        pkg = {}
                        for ind in contents:
                            pkg[ind['package_name']] = ind['similarity_score']
                        temp_dict[pkg_name] = pkg

                    final_dict = {}
                    alternate_packages = []
                    filtered_alt_packages_graph = []
                    for pkg_name, contents in temp_dict.items():
                        # For each input package
                        # Get only the topmost alternate package from a set of
                        # packages based on similarity score
                        top_dict = dict(Counter(contents).most_common(1))
                        for alt_pkg, sim_score in top_dict.items():
                            final_dict[alt_pkg] = {
                                'version': input_stack[pkg_name],
                                'replaces': pkg_name,
                                'similarity_score': sim_score
                            }
                            alternate_packages.append(alt_pkg)

                    # if alternate_packages:
                    # Get Alternate Packages from Graph
                    alt_packages_graph = GraphDB().get_version_information(
                        alternate_packages, ecosystem)

                    # Apply Version Filters
                    filtered_alt_packages_graph, filtered_list = GraphDB(
                    ).filter_versions(alt_packages_graph, input_stack)

                    filtered_alternate_packages = \
                        set(alternate_packages).difference(set(filtered_list))
                    _logger.info(
                        "Alternate Packages Filtered for external_request_id {} {}"
                        .format(external_request_id,
                                filtered_alternate_packages))

                    # apply license based filters
                    list_user_stack_comp = extract_user_stack_package_licenses(
                        resolved, ecosystem)
                    license_filter_output = apply_license_filter(
                        list_user_stack_comp, filtered_alt_packages_graph,
                        filtered_comp_packages_graph)

                    lic_filtered_alt_graph = license_filter_output[
                        'filtered_alt_packages_graph']
                    lic_filtered_comp_graph = license_filter_output[
                        'filtered_comp_packages_graph']
                    lic_filtered_list_alt = license_filter_output[
                        'filtered_list_pkg_names_alt']
                    lic_filtered_list_com = license_filter_output[
                        'filtered_list_pkg_names_com']

                    if len(lic_filtered_list_alt) > 0:
                        s = set(filtered_alternate_packages).difference(
                            set(lic_filtered_list_alt))
                        msg = \
                            "Alternate Packages filtered (licenses) for external_request_id {} {}" \
                            .format(external_request_id, s)
                        _logger.info(msg)

                    if len(lic_filtered_list_com) > 0:
                        s = set(filtered_companion_packages).difference(
                            set(lic_filtered_list_com))
                        msg = \
                            "Companion Packages filtered (licenses) for external_request_id {} {}" \
                                .format(external_request_id, s)
                        _logger.info(msg)

                    # Get Topics Added to Filtered Packages
                    topics_comp_packages_graph = GraphDB(). \
                        get_topics_for_comp(lic_filtered_comp_graph,
                                            pgm_result['companion_packages'])

                    # Create Companion Block
                    comp_packages = create_package_dict(
                        topics_comp_packages_graph)
                    recommendation['companion'] = comp_packages

                    # Get Topics Added to Filtered Packages
                    topics_comp_packages_graph = GraphDB(). \
                        get_topics_for_alt(lic_filtered_alt_graph,
                                           pgm_result['alternate_packages'])

                    # Create Alternate Dict
                    alt_packages = create_package_dict(
                        topics_comp_packages_graph, final_dict)
                    recommendation['alternate'] = alt_packages

            recommendations.append(recommendation)

        ended_at = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S.%f")
        audit = {
            'started_at': started_at,
            'ended_at': ended_at,
            'version': 'v1'
        }

        task_result = {
            'recommendations': recommendations,
            '_audit': audit,
            '_release': 'None:None:None'
        }

        wr = WorkerResult(worker='recommendation_v2',
                          worker_id=None,
                          external_request_id=os.getenv(
                              'TEST_REQUEST_ID', external_request_id),
                          analysis_id=None,
                          task_result=task_result,
                          error=False)

        # Store the result in RDS
        try:
            session.add(wr)
            session.commit()
        except SQLAlchemyError as e:
            session.rollback()
            return {
                'recommendation': 'database error',
                'external_request_id': external_request_id,
                'message': '%s' % e
            }

        return {
            'recommendation': 'success',
            'external_request_id': external_request_id
        }