Пример #1
0
def main():
    global POPULATION_COUNT, ITERATIONS, MAX_FUNC_DEPTH
    file = open('data.txt', 'w')
    if len(sys.argv) == 3: ITERATIONS = int(sys.argv[1])
    if len(sys.argv) == 3: POPULATION_COUNT = int(sys.argv[2])

    f, data = create_test_data()
    write_init_data(file, f, data)

    population = create_population(POPULATION_COUNT, MAX_FUNC_DEPTH)
    normalizator = Normalizator(data, mistake)

    functions = FunctoionContainer(choose_parent_pairs2, choose_to_kill,
                                   create_children)
    options = Options(0.4)
    mutator = Mutator(population, normalizator, options, functions)

    print 'population: '
    #for s in population: print 'm: ', mistake(s.f, data), '\t\ts.f: ', s.f
    for i in range(ITERATIONS):
        print 'population ' + str(i) + ' : '
        for j in range(5):
            s = population[(j + 1) * (-1)]
            print 'm: ', mistake(s.f, data), '\t\ts.f: ', s.f
            if j == 1:  # ITERATION_NUMBER:MISTAKE:FUNCTION
                write_population_data(file, i, s, mistake(s.f, data))
        mutator.make_children()
        mutator.kill_unused()
        #	mutator.change_random()
        mutator.change_age()
        mutator.mutate()
        normalizator.compute_distribuante(population)
        population.sort()
    file.close()
Пример #2
0
def main():
    global POPULATION_COUNT, ITERATIONS, MAX_FUNC_DEPTH
    file = open('data.txt', 'w')
    if len(sys.argv) == 3: ITERATIONS = int(sys.argv[1])
    if len(sys.argv) == 3: POPULATION_COUNT = int(sys.argv[2])

    f, data = create_test_data()
    write_init_data(file, f, data)

    population = create_population(POPULATION_COUNT, MAX_FUNC_DEPTH)
    normalizator = Normalizator(data, mistake)

    functions = FunctoionContainer(choose_parent_pairs2, choose_to_kill, create_children)
    options = Options(0.4)	
    mutator = Mutator(population, normalizator, options, functions)


    print 'population: '
    #for s in population: print 'm: ', mistake(s.f, data), '\t\ts.f: ', s.f
    for i in range(ITERATIONS):
        print 'population ' + str(i) + ' : '
        for j  in range(5):
            s = population[(j+1)*(-1)]
            print 'm: ', mistake(s.f, data), '\t\ts.f: ', s.f
            if j == 1: # ITERATION_NUMBER:MISTAKE:FUNCTION
                write_population_data(file, i, s, mistake(s.f, data))
        mutator.make_children()
        mutator.kill_unused()
    #	mutator.change_random()
        mutator.change_age()
        mutator.mutate()
        normalizator.compute_distribuante(population)
        population.sort()
    file.close()
Пример #3
0
def saveTweetsOf(screen_name, category):
  for i in range(1,4):
    statuses = api.GetSearch(term=settings_local.USERTRACK, lang='es', page=i,per_page=100)
    for s in statuses:
      if(s.text.find('RT',0,2)==-1):
        screen_name = s._user._screen_name
        t = Normalizator.normalize(s.text)
        em = Normalizator.usedPattern(Patterns.DUPLICATED_LETTERS)
        print Normalizator.usedPattern(Patterns.SPECIALCHARS_AND_MENTIONS)
        print smart_str(s.text.lower())
        if (not(t.isspace())):
          selected = raw_input('Es un reclamo?')
          tweets = db[settings_local.CATEGORIES[int(selected)]]          
          tweet = {'normalized_tweet': t,
                   'tweet': s.text,
                   'screen_name': screen_name,
                   'emphatized': em
                   }
          tweets.insert(tweet)
Пример #4
0
def testTweets():
    for i in range(1, 8):
        statuses = api.GetSearch(term=settings_local.USERTRACK, lang="es", page=i, per_page=100)
        for s in statuses:
            t = Normalizator.normalize(s.text)
            if not (t.isspace()):
                if classifier.classify(extract_features(t.split())) == "reclamo":
                    print "\033[91m" + t
                else:
                    print "\033[94m" + t
Пример #5
0
import etc
from normalizator import Normalizator


normalizator = Normalizator()
candidates_attr = list(map(lambda data: {'id': data['id'], 'attributes': data['attributes'] }, etc.candidates))


candidates_attr_normalized = list()

def get_spesific_values(a_list, key):
    temp = []
    for d in a_list:
        temp.append(d['attributes'][key])
    return temp



for i in range(len(candidates_attr)):
    current_salary = candidates_attr[i]['attributes']['salary']
    current_gpa = candidates_attr[i]['attributes']['gpa']
    current_dependant = candidates_attr[i]['attributes']['dependant']

    salary_normalized = normalizator.normalize_salary(get_spesific_values(candidates_attr, 'salary'), current_salary)
    gpa_normalized = normalizator.normalize_gpa(get_spesific_values(candidates_attr, 'gpa'), current_gpa)
    dependant_normalized = normalizator.normalize_dependant(get_spesific_values(candidates_attr, 'dependant'), current_dependant)

    candidates_attr_normalized.append([candidates_attr[i]['id'],salary_normalized, gpa_normalized, dependant_normalized])


# Create a simple weight
Пример #6
0
        for s in statuses:
            t = Normalizator.normalize(s.text)
            if not (t.isspace()):
                if classifier.classify(extract_features(t.split())) == "reclamo":
                    print "\033[91m" + t
                else:
                    print "\033[94m" + t


allTweets = []
for c in settings_local.CATEGORIES:
    statuses = db[c].find()

    for s in statuses:
        t = s["normalized_tweet"]
        t = Normalizator.normalize(t)
        if not (t.isspace()):
            allTweets.append((t, c))

tweets = []
for (words, sentiment) in allTweets:
    words_filtered = [e.lower() for e in words.split() if len(e) >= 3]
    tweets.append((words_filtered, sentiment))


def get_words_in_tweets(tweets):
    all_words = []
    for (words, sentiment) in tweets:
        all_words.extend(words)
    return all_words