def get_names(self):
     """
     Return the list of names of configurations supported by the GSM
     """
     configs = Publisher.all(sort=[{"publisher_name.exact" : {"order" : "asc"}}])
     names = [p['publisher_name'] for p in configs]
     return names
    def license_detect(self, record):
        # get all the URL-s from ES into a list
        #     need some way of getting facets from the DAO, ideally
        #     directly in list form as well as the raw form
        all_configs = Publisher.all(sort=[{'publisher_name': 'asc'}])  # always get them in the same order relative to each other
        url_index = self._generate_publisher_config_index_by_url(all_configs)
        url_index = OrderedDict(sorted(url_index.iteritems(), key=lambda x: len(x[0]), reverse=True))  # longest url-s first
        id_index = self._generate_publisher_config_index_by_id(all_configs)

        # get all the configs that match
        matching_configs = []
        work_on = record.provider_urls
        work_on = self.clean_urls(work_on, strip_leading_www=True)

        for config_url, config_id in url_index.items():
            for incoming_url in work_on:
                if incoming_url.startswith(config_url):
                    matching_configs.append(id_index[config_id])
        # future:
        # use tries to prefix match them to the incoming URL
        #     if the results of this could be ordered by URL length that
        #     would be great, or stop at first match option

        urls_contents = {}
        # prefetch the content, we'll be reusing it a lot
        for incoming_url in record.provider_urls:
            unused_response, urls_contents[incoming_url], unused_content_length = util.http_stream_get(incoming_url)

        # order their license statements by whether they have a version,
        # and then by length

        successful_config = None
        current_licenses_count = len(record.license)
        new_licenses_count = 0
        for config in matching_configs:
            matching_config_licenses = config['licenses']

            matching_config_licenses = sorted(
                matching_config_licenses,
                key=lambda lic: (
                    lic.get('version'),  # with reverse=True, this will actually sort licenses in REVERSE ALPHABETICAL order of their versions, blank versions go last
                    len(lic['license_statement'])  # longest first with reverse=True
                ),
                reverse=True
            )

            # try matching like that
            lic_statements = []
            for l in matching_config_licenses:
                lic_statement = {}
                lic_statement[l['license_statement']] = {'type': l['license_type'], 'version': l['version']}
                lic_statements.append(lic_statement)

            for incoming_url, content in urls_contents.iteritems():
                self.simple_extract(lic_statements, record, incoming_url, first_match=True, content=content, handler=config.publisher_name)
                new_licenses_count = len(record.license)
                # if we find a license, stop trying the different URL-s
                if new_licenses_count > current_licenses_count:
                    break
            # if we find a license, stop trying the configs and record which config found it
            if new_licenses_count > current_licenses_count:
                # found it!
                successful_config = config
                break

        # if no config exists which can match the license, then try the flat list
        # do not try the flat list of statements if a matching config has been found
        # this keeps these "virtual" plugins, i.e. the configs, consistent with how
        # the rest of the system operates
        lic_statements = []
        flat_license_list_success = False
        if len(matching_configs) <= 0:
            all_statements = LicenseStatement.all()
            all_statements = sorted(
                all_statements,
                key=lambda lic: (
                    lic.get('version', '') == '',  # does it NOT have a version? last!
                    # see http://stackoverflow.com/questions/9386501/sorting-in-python-and-empty-strings

                    len(lic['license_statement'])  # length of license statement
                )
            )

            for l in all_statements:
                lic_statement = {}
                lic_statement[l['license_statement']] = {'type': l['license_type'], 'version': l.get('version', '')}
                lic_statements.append(lic_statement)

            for incoming_url, content in urls_contents.iteritems():
                self.simple_extract(lic_statements, record, incoming_url, first_match=True, content=content)  # default handler - the plugin's name
                new_licenses_count = len(record.license)
                # if we find a license, stop trying the different URL-s
                if new_licenses_count > current_licenses_count:
                    break

            if new_licenses_count > current_licenses_count:
            # one of the flat license index did it
                flat_license_list_success = True

        if successful_config:
            return successful_config.publisher_name, self.__version__
        elif flat_license_list_success:
            return self._short_name, self.__version__

        # in case everything fails, return 'oag' as the handler to
        # be consistent with the failure handler in the workflow module
        # so that way, all "completely failed" licenses will have 'oag'
        # on them, except that the GSM ones will have the GSM's current
        # version
        return 'oag', self.__version__