示例#1
0
def indico_batch_ner():
    """another ONE-OFF method to call the indico.io API to batch NER 18192 texts
    """
    with open('sentiments.csv', 'wb') as f:
        texts = []
        writer = csv.writer(f)
        with open('texts/filenames.txt', 'r') as filenames:
            fn_list = map(str.strip, [filename for filename in filenames])
            fn_list = map(lambda x: 'texts/texts/' + x, fn_list)
            for fn in fn_list:
                texts.append(get_texts(fn))  # returns TextMessage object
        texts = [item for sublist in texts for item in sublist]
        with open('indico_ner_errors.txt', 'w') as error_log:
            for text in texts:
                sentiment_result = None
                try:
                    sentiment_result = named_entities(text.body.encode(),
                                                      api_key=INDICO_API_KEY)
                except BaseException as e:
                    error_log.write(str(e))
                finally:
                    writer.writerow([
                        unicode(s).encode('utf-8') for s in [
                            text.msg_id, text.posix,
                            repr(text.sent), text.body,
                            repr(text.mentions), sentiment_result
                        ]
                    ])
示例#2
0
def analysis(data):
    sentiment = ind.sentiment_hq(data)
    tags = sort(ind.text_tags(data))
    languages = sort(ind.language(data))
    politics = sort(ind.political(data))
    keywords = sort(ind.keywords(data))
    names = sort(ind.named_entities(data))

    print "Sentiment", sentiment

    print "\n\n\nTags"
    for t in tags:
        print t[0], float(t[1]) * 100

    print "\n\n\nLanguages"
    for l in languages:
        print l[0], float(l[1]) * 100

    print "\n\n\nPolitical"
    for p in politics:
        print p[0], float(p[1]) * 100
    
    print "\n\nkeywords"
    for k in keywords:
        print k[0], float(k[1]) * 100
示例#3
0
 def test_named_entities(self):
     text = "London Underground's boss Mike Brown warned that the strike ..."
     expected_entities = ("London Underground", "Mike Brown")
     expected_keys = set(["categories", "confidence"])
     entities = named_entities(text)
     for entity in expected_entities:
         assert entity in expected_entities
         assert not (set(entities[entity]) - expected_keys)
示例#4
0
def entityMatch(message):
	ne = indicoio.named_entities(message)
	for e in ne.keys():
		if ne[e]["confidence"] > .8:
			if ne[e]["categories"]["organization"] > .5:
				if e in entities:
					return e
	return "None"
示例#5
0
 def score(self, slide_length, window_length, AItype='tags'):
     self.parse(slide_length, window_length)
     if AItype == 'tags':
         self.scores['tags'] = [indicoio.text_tags(i) for i in self.strings]
     elif AItype == 'keywords':
         self.scores['keywords'] = [indicoio.keywords(i) for i in self.strings]
     elif AItype == 'names':
         self.scores['names'] = [indicoio.named_entities(i) for i in self.strings]
     else:
         raise Exception('Warning: {} not a valid category'.format(category))
示例#6
0
def indico_batch_ner():
    """another ONE-OFF method to call the indico.io API to batch NER 18192 texts
    """
    with open('sentiments.csv', 'wb') as f:
        texts = []
        writer = csv.writer(f)
        with open('texts/filenames.txt', 'r') as filenames:
            fn_list = map(str.strip, [filename for filename in filenames])
            fn_list = map(lambda x: 'texts/texts/' + x, fn_list)
            for fn in fn_list:
                texts.append(get_texts(fn))  # returns TextMessage object
        texts = [item for sublist in texts for item in sublist]
        with open('indico_ner_errors.txt', 'w') as error_log:
            for text in texts:
                sentiment_result = None
                try:
                    sentiment_result = named_entities(text.body.encode(), api_key=INDICO_API_KEY)
                except BaseException as e:
                    error_log.write(str(e))
                finally:
                    writer.writerow([unicode(s).encode('utf-8') for s in
                                     [text.msg_id, text.posix, repr(text.sent),
                                      text.body, repr(text.mentions), sentiment_result]])
 def entity_extraction(self, text):
     self.named_entities = named_entities(text, threshold=0)
示例#8
0
#fd = open("BBC.txt", "r")
#string1 += fd.read()
string1 = string1.replace("\xe2\x80\x9c", "\"")
string1 = string1.replace("\xe2\x80\x9d", "\"")
string1 = string1.replace("\xe2\x80\x99", "\'")
keywordList = []
tagList = []
entityList = []

myList = string1.split("\n", size)

for x in range(0, size):
    keywordList.append(indicoio.keywords(myList[x], top_n=10,
                                         independent=True))
    tagList.append(indicoio.text_tags(myList[x], threshold=.05))
    entityList.append(indicoio.named_entities(myList[x]))
    #print indicoio.text_tags(myList[x], threshold=.1)
    #print indicoio.keywords(myList[x], top_n=6, independent=True)

## build 2-d array of weights
matrix = [[0 for x in range(size)] for x in range(size)]

for x in range(0, size):
    for y in range(0, size):
        matrix[x][y] = 1000 * compareKeywords(
            keywordList[x], keywordList[y]) * compareTags(
                tagList[x], tagList[y]) * compareEntities(
                    entityList[x], entityList[y])
        #print str(x) + " " + str(y) + " " + str(matrix[x][y])

示例#9
0
def findNames(inputString):  
    return indicoio.named_entities(inputString)
 def fetch_named_entities(self):
     properNouns = indicoio.named_entities(self.data)
     print("\nproperNouns: ", properNouns)
     nounDict = {k: v['confidence'] for k, v in properNouns.items()}
     self.final_json['keywords'].update(
         sorted(nounDict, key=nounDict.get, reverse=True)[:5])
示例#11
0
def get_entities():
    if request.method == 'POST':
        data = dict(request.form)['data_to_analyze']
        return json.dumps({
            'keywords': indicoio.named_entities(data)
        })   
 def fetch_named_entities(self):
     properNouns = indicoio.named_entities(self.data)
     print("\nproperNouns: ", properNouns)
     nounDict = {k:v['confidence'] for k,v in properNouns.items()}
     self.final_json['keywords'].update(sorted(nounDict, key=nounDict.get, reverse=True)[:5])