Пример #1
0
    def get_keyword_relevancy_map(self, name_list, n_list, terms, sortcontext,
                                  enginetype='BING',
                                  license=None):
        """
        http://www.clips.ua.ac.be/pages/pattern-web#sort
        """
        results_list = []
        self.name_list = name_list
        results = sort(
            terms=[],
            context=sortcontext,  # Term used for sorting.
            service=enginetype,   # GOOGLE, YAHOO, BING, ...
            license=None,         # You should supply your own API license key
                                  # for the given service.
            strict=True,          # Wraps the query in quotes, i.e.'mac sweet'.
            reverse=True,         # Reverses term and context: 'sweet mac' <=>
                                  # 'mac sweet'.
            cached=True)

        for weight, term in results:
            results.append("%5.2f" % (weight * 100) + "%", term)

        return {
            'data': results_list,
            'summary': None
        }
Пример #2
0
import os, sys; sys.path.insert(0, os.path.join("..", ".."))

from pattern.web import GOOGLE, YAHOO, BING, sort

# The pattern.web module includes an interesting sort() algorithm.
# Ir classifies search terms according to a search engine's total results count.
# When a context is defined, it sorts according to relevancy to the context:
# sort(terms=["black", "green", "red"], context="Darth Vader") =>
# yields "black" as the best candidate, 
# because "black Darth Vader" yields more search results.

results = sort(
      terms = [
        "arnold schwarzenegger", 
        "chuck norris", 
        "dolph lundgren", 
        "steven seagal",
        "sylvester stallone", 
        "mickey mouse",
        ],
    context = "dangerous", # Term used for sorting.
    service = BING,        # GOOGLE, YAHOO, BING, ...
    license = None,        # You should supply your own API license key for the given service.
     strict = True,        # Wraps the query in quotes, i.e. 'mac sweet'. 
    reverse = True,        # Reverses term and context: 'sweet mac' instead of 'mac sweet'.
     cached = True)
    
for weight, term in results:
    print "%5.2f" % (weight * 100) + "%", term
Пример #3
0
with open(fname) as f:
    diction = f.readlines()
    for term in diction:
        if len(term) > lengthmin:
            subset.append(term.strip('\n'))


# function to get a random term from the minlength dictionary in subset list
def rando(listofterms, num):
    i = 0
    while i < num:
        randomed = random.choice(listofterms)
        #print randomed
        searchlist.append(randomed)
        i = i + 1
    return


searchlist = [
]  # the list of terms that will be generated in the rando function
# setup the default search terms
rando(
    subset, numterms
)  # get total list of terms based on numterms set in the globals section above
from pattern.web import sort

results = sort(terms=searchlist, context=contexter, prefix=True)
for weight, term in results:
    print "%.2f" % (weight * 100) + '%', term
exit()
Пример #4
0
sys.path.insert(0, os.path.join("..", ".."))

from pattern.web import GOOGLE, YAHOO, BING, sort

# The pattern.web module includes an interesting sort() algorithm.
# Ir classifies search terms according to a search engine's total results count.
# When a context is defined, it sorts according to relevancy to the context:
# sort(terms=["black", "green", "red"], context="Darth Vader") =>
# yields "black" as the best candidate,
# because "black Darth Vader" yields more search results.

results = sort(
    terms=[
        "arnold schwarzenegger",
        "chuck norris",
        "dolph lundgren",
        "steven seagal",
        "sylvester stallone",
        "mickey mouse",
    ],
    context="dangerous",  # Term used for sorting.
    service=BING,  # GOOGLE, YAHOO, BING, ...
    license=
    None,  # You should supply your own API license key for the given service.
    strict=True,  # Wraps the query in quotes, i.e. 'mac sweet'. 
    reverse=
    True,  # Reverses term and context: 'sweet mac' instead of 'mac sweet'.
    cached=True)

for weight, term in results:
    print "%5.2f" % (weight * 100) + "%", term
Пример #5
0
from pattern.web import sort

results = sort(terms=[
    'sunset', 'puppies', 'kittens', 'babies', 'hedgehogs', 'birthday',
    'bunnies'
],
               context='joy',
               prefix=True)

for weight, term in results:
    print "%.2f" % (weight * 100) + '%', term
diction = []
subset = []
lengthmin = 6
numterms = 10
fname = 'assets/dictionary-list.html'
with open(fname) as f:
    diction = f.readlines()
    for term in diction:
     if len(term) > lengthmin:
          subset.append(term.strip('\n'))

# function to get a random term from the minlength dictionary in subset list
def rando(listofterms,num):
     i = 0
     while i < num:
          randomed = random.choice(listofterms)
          #print randomed
          searchlist.append(randomed)
          i = i + 1
     return

searchlist = [] # the list of terms that will be generated in the rando function
# setup the default search terms 
rando(subset,numterms) # get total list of terms based on numterms set in the globals section above
from pattern.web import sort
 
results = sort(terms=searchlist,context=contexter,prefix=True)
for weight, term in results:
     print "%.2f" % (weight * 100) + '%', term
exit()
Пример #7
0
from pattern.web import Google, plaintext, sort
from optparse import OptionParser

parser = OptionParser()
parser.add_option("-q", "--query", dest="query",
                  help="txt file with searchwords")
parser.add_option("-f", "--filename", dest="filename",
                  help="specify filename to save data too")
parser.add_option("-c", "--context", dest="context",
                  help="specify context for search")
(options, args) = parser.parse_args()

# open the list of query words
f = codecs.open(options.query, encoding='utf-8')
# build results in context of the "Energiewende"
results = sort(terms=[x.strip() for x in f], context = options.context)

# create weights
if results.count() > 0:
     for weight, term in results:
     #term = term.encode('utf-8') <-- needed?
         with codecs.open(options.filename, "a", "utf-8") as f:
            try:
                 f.write(term.encode("utf-8"))
                 f.write(',')
                 f.write("%5.2f" % (weight * 100))
                 f.write('\n')
             except UnicodeDecodeError as e:
                 print e
else:
    print "No results. Sorry."