Пример #1
0
    return [item['link'] for item in res['items']][:10]

# Returns a list of links appearing in search for a given request.
def GetLinksForQueries(queries):
    links = []
    for query in queries:
        links += GetLinksForQuery(query)
    # remove duplicates
    return list(set(links))


parser, st, stop = common_lib.Init()

query = sys.argv[1]

keyword_query = common_lib.buildFullQuery(query, query, parser, stop)

links_keywords = GetLinksForQueries([keyword_query])

sum_score = 0
len_score = 0

for link in links_keywords:
    passage, _ = passage_retrieval.GetTopPassageFromLink(keyword_query, link)
    if passage:
        score = passage_retrieval.ScorePassage(keyword_query, passage)
        sum_score += score
        len_score += 1
        print "======================================================"
        print passage.encode('utf-8')
        print "score ----------> " + str(score)
Пример #2
0
i = 1
all_scores = 0
top_scores = 0
for ves in e.findall('vespaadd'):
    for doc in ves.findall('document'):
        f.write("<doc number = \"" + str(i) + "\">\n")
        try:
            # adding links for keyword query
            if (i != 396):
                i += 1
                continue
            print "doc " + str(i)
            cont = ""
            if doc.find('content') != None:
                cont = doc.find('content').text
            keyword_query = common_lib.buildFullQuery(
                doc.find('subject').text, cont, parser, stop)
            links_bing = GetLinksForQueryBing(keyword_query)
            links_google = GetLinksForQueryGoogle(keyword_query)
            f.write("<keyword_query>" + escape(keyword_query) +
                    "</keyword_query>\n")
            f.write("<links>\n")
            sum_score = 0
            len_score = 0
            j = 1
            max_score = 0
            passages = []
            scores = {}
            keyword_query = passage_retrieval.RemoveSynonymsFromKeywords(
                keyword_query)
            print keyword_query
            for link in links_bing:
Пример #3
0

# Returns a list of links appearing in search for a given request.
def GetLinksForQueries(queries):
    links = []
    for query in queries:
        links += GetLinksForQuery(query)
    # remove duplicates
    return list(set(links))


parser, st, stop = common_lib.Init()

query = sys.argv[1]

keyword_query = common_lib.buildFullQuery(query, query, parser, stop)

links_keywords = GetLinksForQueries([keyword_query])

sum_score = 0
len_score = 0

for link in links_keywords:
    passage, _ = passage_retrieval.GetTopPassageFromLink(keyword_query, link)
    if passage:
        score = passage_retrieval.ScorePassage(keyword_query, passage)
        sum_score += score
        len_score += 1
        print "======================================================"
        print passage.encode('utf-8')
        print "score ----------> " + str(score)
Пример #4
0
import xml.etree.ElementTree
from xml.sax.saxutils import escape
import common_lib

parser, st, stop = common_lib.Init()

f = open("output_small.xml", "wb")
f.write("<data>\n")
e = xml.etree.ElementTree.parse('small_sample.xml').getroot()
i = 1
for ves in e.findall('vespaadd'):
    for doc in ves.findall('document'):
        f.write("<doc number = \"" + str(i) + "\">\n")
        i += 1
        f.write("<subject>" + escape(doc.find('subject').text) + "</subject>\n")
        f.write("<content>" + escape(doc.find('content').text) + "</content>\n")
        #f.write("<category>" + escape(subject) + "</category>\n")
        f.write("<keywords>" + escape(common_lib.buildFullQuery(doc.find('subject').text, doc.find('content').text, parser, stop)) + "</keywords>\n")
        f.write("</doc>\n")
f.write("</data>\n")
f.close()

#print buildFullQuery("long-distance trail throughout CA", parser)
Пример #5
0
      ).execute()

    results = [item['link'] for item in res['items']]
    results = results[:min(20, len(results))]
    return [r for r in results if r.find("youtube") == -1]

parser, st, stop = common_lib.Init()

title = sys.argv[1]
body = sys.argv[1]

signal.signal(signal.SIGALRM, signal_handler)
signal.alarm(51)   # 51 seconds

try:
    keyword_query = common_lib.buildFullQuery(title, body, parser, stop)

    links_bing_yahoo = GetLinksForQueryBing("answers.yahoo.com " + keyword_query)[:2]
    links_bing = links_bing_yahoo + GetLinksForQueryBing(keyword_query)
    links_google = GetLinksForQueryGoogle(keyword_query)

    keyword_query =  passage_retrieval.RemoveSynonymsFromKeywords(keyword_query)

    sum_score = 0
    len_score = 0
    results = []
    rank = 1
    for link in links_bing:
        passage, score = passage_retrieval.GetTopPassageFromLinkWithML(keyword_query, link)
        if passage:
            results.append([link, link in links_google, passage, 0, rank])
Пример #6
0
    results = [item['link'] for item in res['items']]
    results = results[:min(20, len(results))]
    return [r for r in results if r.find("youtube") == -1]


parser, st, stop = common_lib.Init()

title = sys.argv[1]
body = sys.argv[1]

signal.signal(signal.SIGALRM, signal_handler)
signal.alarm(51)  # 51 seconds

try:
    keyword_query = common_lib.buildFullQuery(title, body, parser, stop)

    links_bing_yahoo = GetLinksForQueryBing("answers.yahoo.com " +
                                            keyword_query)[:2]
    links_bing = links_bing_yahoo + GetLinksForQueryBing(keyword_query)
    links_google = GetLinksForQueryGoogle(keyword_query)

    keyword_query = passage_retrieval.RemoveSynonymsFromKeywords(keyword_query)

    sum_score = 0
    len_score = 0
    results = []
    rank = 1
    for link in links_bing:
        passage, score = passage_retrieval.GetTopPassageFromLinkWithML(
            keyword_query, link)
Пример #7
0
      ).execute()

    return [item['link'] for item in res['items']][:20]

def GetLinksForQueryBing(query):
    bing = PyBingSearch('3Bybyj2qcK/w5FXbBqBUjI9MajN51efC2uYldmzvvnY')
    result_list = bing.search_all(query, limit=20, format='json')

    return [result.url for result in result_list][:20]

parser, st, stop = common_lib.Init()
output = {}
#f = open("links_results", "wb")
#f.write("<data>\n")
e = xml.etree.ElementTree.parse('small_sample.xml').getroot()
i = 0
for ves in e.findall('vespaadd'):
    for doc in ves.findall('document'):
        # adding links for keyword query
        print "next doc"
        keyword_query = common_lib.buildFullQuery(doc.find('subject').text, doc.find('content').text, parser, stop)
        google_links = GetLinksForQueryGoogle(keyword_query)
        bing_links = GetLinksForQueryBing(keyword_query)

        links = [google_links, bing_links]
        output[i] = links
        i += 1

with open('output_links.txt', 'w') as f:
  json.dump(output, f, ensure_ascii=False)