Exemplo n.º 1
0
  def find_clusters(self, method, n_clusters = NUM_OF_CLUSTERS):
      self.method = method
      if len(self.snippets) == 0:
          print("Sorry. There is nothing to cluster. Firstly, search for something.")
          return
      if method.lower() == "stc":
          self.clustering = SuffixTreeClustering(self.snippets)
      elif method.lower() == "fca":
          self.clustering = FCAClustering(self.snippets)
      elif method.lower() == "ward":
          self.clustering = HierarchicalClustering(self.snippets)
      elif method.lower() == "k-means" or method.lower() == "kmeans":
          self.clustering = kMeansClustering(self.snippets)
      else:
          print("Sorry, unknown clustering algorithm.")
          return {}
 
      self.clustering.find_clusters(n_clusters)
      
      return self.get_clusters()
Exemplo n.º 2
0
    def find_clusters(self, method, n_clusters=NUM_OF_CLUSTERS):
        self.method = method
        if len(self.snippets) == 0:
            print(
                "Sorry. There is nothing to cluster. Firstly, search for something."
            )
            return
        if method.lower() == "stc":
            self.clustering = SuffixTreeClustering(self.snippets)
        elif method.lower() == "fca":
            self.clustering = FCAClustering(self.snippets)
        elif method.lower() == "ward":
            self.clustering = HierarchicalClustering(self.snippets)
        elif method.lower() == "k-means" or method.lower() == "kmeans":
            self.clustering = kMeansClustering(self.snippets)
        else:
            print("Sorry, unknown clustering algorithm.")
            return {}

        self.clustering.find_clusters(n_clusters)

        return self.get_clusters()
Exemplo n.º 3
0
class Newster:
    def __init__(self, api_urls, api_keys, query = ''):
        self.API_URLs = api_urls
        self.API_KEYs = api_keys
        self.snippets = []
        self.sources = []
        self.links = []
        self.titles = []
        if len(query) > 0:
            result = search_articles(self.API_URLs, self.API_KEYs, query)
            self.snippets = result['snippets']
            self.sources = result['sources']
            self.links = result['links']
            self.titles = result['titles']
        self.clustering = None # stores clustering object
            
    def search(self, query):
        if len(query) > 0:
            self.snippets = search_articles(self.API_URLs, self.API_KEYs, query)
        return self.snippets
            
    def find_clusters(self, method, n_clusters = NUM_OF_CLUSTERS):
        self.method = method
        if len(self.snippets) == 0:
            print("Sorry. There is nothing to cluster. Firstly, search for something.")
            return
        if method.lower() == "stc":
            self.clustering = SuffixTreeClustering(self.snippets)
        elif method.lower() == "fca":
            self.clustering = FCAClustering(self.snippets)
        elif method.lower() == "ward":
            self.clustering = HierarchicalClustering(self.snippets)
        elif method.lower() == "k-means" or method.lower() == "kmeans":
            self.clustering = kMeansClustering(self.snippets)
        else:
            print("Sorry, unknown clustering algorithm.")
            return {}
   
        self.clustering.find_clusters(n_clusters)
        
        return self.get_clusters()
        
    def get_snippets(self):
        return self.snippets
        
    def get_links(self):
        return self.links
        
    def get_sources(self):
        return self.sources
        
    def get_titles(self):
        return self.titles
        
    def print_snippets(self):
        for num, snippet in enumerate(self.snippets):
            print("Snippet #%i: " % num, end = ' ')
            print(snippet)
            print("--------------------------------")

    def print_links(self):
        for num, link in enumerate(self.links):
            print("URL for #%i: " % num, end = ' ')
            print(link)
            print("--------------------------------")
            
    def print_sources(self):
        for num, source in enumerate(self.sources):
            print("Source for #%i: " % num, end = ' ')
            print(source)
            print("--------------------------------")

    def print_titles(self):
        for num, title in enumerate(self.titles):
            print("Title for #%i: " % num, end = ' ')
            print(title)
            print("--------------------------------")
            
    def print_search_results(self):
         for item in range(len(self.snippets)):
             print("Search result #%i" % item)
             print("Title: ", end = '')
             print(self.titles[item])
             print("Snippet: ", end = '')
             print(self.snippets[item])
             print("URL: ", end = '')
             print(self.links[item])
             print("Source: ", end = '')
             print(self.sources[item])
             print('-------------------------------')
    
    def get_clusters(self):
        if self.clustering:
            return self.clustering.get_clusters()
        else:
            return {}

    def get_common_tags(self, num = 2):
        return self.clustering.get_common_phrases(num)
    
    def get_number_of_good_clusters(self):
        if self.method == 'stc':
            return self.clustering.get_number_of_good_clusters()
    
    def print_clusters(self):
        if self.clustering:
            return self.clustering.print_clusters()
Exemplo n.º 4
0
class Newster:
    def __init__(self, api_urls, api_keys, query=''):
        self.API_URLs = api_urls
        self.API_KEYs = api_keys
        self.snippets = []
        self.sources = []
        self.links = []
        self.titles = []
        if len(query) > 0:
            result = search_articles(self.API_URLs, self.API_KEYs, query)
            self.snippets = result['snippets']
            self.sources = result['sources']
            self.links = result['links']
            self.titles = result['titles']
        self.clustering = None  # stores clustering object

    def search(self, query):
        if len(query) > 0:
            self.snippets = search_articles(self.API_URLs, self.API_KEYs,
                                            query)
        return self.snippets

    def find_clusters(self, method, n_clusters=NUM_OF_CLUSTERS):
        self.method = method
        if len(self.snippets) == 0:
            print(
                "Sorry. There is nothing to cluster. Firstly, search for something."
            )
            return
        if method.lower() == "stc":
            self.clustering = SuffixTreeClustering(self.snippets)
        elif method.lower() == "fca":
            self.clustering = FCAClustering(self.snippets)
        elif method.lower() == "ward":
            self.clustering = HierarchicalClustering(self.snippets)
        elif method.lower() == "k-means" or method.lower() == "kmeans":
            self.clustering = kMeansClustering(self.snippets)
        else:
            print("Sorry, unknown clustering algorithm.")
            return {}

        self.clustering.find_clusters(n_clusters)

        return self.get_clusters()

    def get_snippets(self):
        return self.snippets

    def get_links(self):
        return self.links

    def get_sources(self):
        return self.sources

    def get_titles(self):
        return self.titles

    def print_snippets(self):
        for num, snippet in enumerate(self.snippets):
            print("Snippet #%i: " % num, end=' ')
            print(snippet)
            print("--------------------------------")

    def print_links(self):
        for num, link in enumerate(self.links):
            print("URL for #%i: " % num, end=' ')
            print(link)
            print("--------------------------------")

    def print_sources(self):
        for num, source in enumerate(self.sources):
            print("Source for #%i: " % num, end=' ')
            print(source)
            print("--------------------------------")

    def print_titles(self):
        for num, title in enumerate(self.titles):
            print("Title for #%i: " % num, end=' ')
            print(title)
            print("--------------------------------")

    def print_search_results(self):
        for item in range(len(self.snippets)):
            print("Search result #%i" % item)
            print("Title: ", end='')
            print(self.titles[item])
            print("Snippet: ", end='')
            print(self.snippets[item])
            print("URL: ", end='')
            print(self.links[item])
            print("Source: ", end='')
            print(self.sources[item])
            print('-------------------------------')

    def get_clusters(self):
        if self.clustering:
            return self.clustering.get_clusters()
        else:
            return {}

    def get_common_tags(self, num=2):
        return self.clustering.get_common_phrases(num)

    def get_number_of_good_clusters(self):
        if self.method == 'stc':
            return self.clustering.get_number_of_good_clusters()

    def print_clusters(self):
        if self.clustering:
            return self.clustering.print_clusters()