Python TFIDF.process примеры использования

Язык программирования: Python

Пространство имен/Пакет: tfidf

Класс/Тип: TFIDF

Метод/Функция: process

Примеров на hotexamples.com: 4

Python TFIDF.process - 4 примера найдено. Это лучшие примеры Python кода для tfidf.TFIDF.process, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

TFIDF(30)

transform(4)

highest(2)

get_tfidf(2)

process(2)

tf_idf(2)

done(2)

docHandler(2)

to_array(1)

tfidf(1)

similarity(1)

similar_docs(1)

save_to_pickle(1)

relevancy(1)

prepare_data(1)

train_from_text(1)

normalized_additive_idf_ignore_common_words(1)

get_tfidf_dataframe(1)

get_tfs(1)

calc_cosine_similarity(1)

get_from_pickle(1)

getTFIDF(1)

getOnlyXData(1)

getOnlyX(1)

getIDF(1)

gen_vector(1)

from_array(1)

fit_transform(1)

findNumDocs(1)

create(1)

calculate_tfidf_document(1)

update(1)

Пример #1

Показать файл

Файл: get_tfidf_words.py Проект: angadgadre/DataInColour

def wordcount(filename, ent_file, tfidf, text, id):
    resources = open(filename)
    resources.readline()  # header
    wordcount = TFIDF(get_entities(ent_file))
    for id, lines in groupby(csv.reader(resources), id):
        maintext = ' '.join(text(line).lower() for line in lines)
        wordcount.process(maintext)
    wordcount.done()

    out = open(tfidf, 'w')
    for word, _, _, tfidf in wordcount.highest(200):
        out.write('%s\t%f\n' % (word, tfidf))

Пример #2

Показать файл

Файл: get_tfidf_words.py Проект: aravindanbalan/DataInColour

def wordcount(filename, ent_file, tfidf, text, id):
  resources = open(filename)
  resources.readline() # header
  wordcount = TFIDF(get_entities(ent_file))
  for id, lines in groupby(csv.reader(resources), id):
    maintext = ' '.join(text(line).lower() for line in lines)
    wordcount.process(maintext)
  wordcount.done()

  out = open(tfidf, 'w')
  for word, _, _, tfidf in wordcount.highest(200):
    out.write('%s\t%f\n' % (word, tfidf))

Пример #3

Показать файл

Файл: data_2011.py Проект: angadgadre/DataInColour

def count(district,
          type='essays',
          extract_text=lambda line: ' '.join(line[3:10]),
          id=lambda line: line[0]):
    (_projectid, _teacher_acctid, _schoolid, school_ncesid, school_latitude,
     school_longitude, school_city, school_state, school_zip, school_metro,
     school_district, school_county, school_charter, school_magnet,
     school_year_round, school_nlns, school_kipp, school_charter_ready_promise,
     teacher_prefix, teacher_teach_for_america, teacher_ny_teaching_fellow,
     primary_focus_subject, primary_focus_area, secondary_focus_subject,
     secondary_focus_area, resource_usage, resource_type, poverty_level,
     grade_level, vendor_shipping_charges, sales_tax,
     payment_processing_charges, fulfillment_labor_materials,
     total_price_excluding_optional_support,
     total_price_including_optional_support, students_reached,
     used_by_future_students, total_donations, num_donors,
     eligible_double_your_impact_match, eligible_almost_home_match,
     funding_status, date_posted, date_completed, date_thank_you_packet_mailed,
     date_expiration) = range(46)
    proj_ids = []
    projects = open('../data/projects.%scsv' % district)
    projects.readline().strip()  # header
    for proj in csv.reader(projects):
        if proj[date_posted].startswith('2011'):
            proj_ids.append(proj[0])
    proj_ids = frozenset(proj_ids)
    projects.close()

    wordcount = TFIDF(get_entities(ent_file))
    essays = open('../data/%s.%scsv' % (type, district))
    essays.readline()  # header
    for proid, lines in groupby(csv.reader(essays), id):
        if proid in proj_ids:
            text = ' '.join(extract_text(line) for line in lines).lower()
            wordcount.process(text)
    wordcount.done()
    essays.close()

    out = open('../data/wc_%s%scsv' % (type, district), 'w')
    for word, tf, df, tfidf in wordcount.highest(0):
        out.write('%s\t%f\t%f\t%f\n' % (word, tf, df, tfidf))

Пример #4

Показать файл

Файл: data_2011.py Проект: aravindanbalan/DataInColour

def count(district, type='essays', extract_text=lambda line: ' '.join(line[3:10]), id=lambda line:line[0]):
  (_projectid,_teacher_acctid,_schoolid,school_ncesid,school_latitude,school_longitude,school_city,school_state,school_zip,school_metro,school_district,school_county,school_charter,school_magnet,school_year_round,school_nlns,school_kipp,school_charter_ready_promise,teacher_prefix,teacher_teach_for_america,teacher_ny_teaching_fellow,primary_focus_subject,primary_focus_area,secondary_focus_subject,secondary_focus_area,resource_usage,resource_type,poverty_level,grade_level,vendor_shipping_charges,sales_tax,payment_processing_charges,fulfillment_labor_materials,total_price_excluding_optional_support,total_price_including_optional_support,students_reached,used_by_future_students,total_donations,num_donors,eligible_double_your_impact_match,eligible_almost_home_match,funding_status,date_posted,date_completed,date_thank_you_packet_mailed,date_expiration) = range(46)
  proj_ids = []
  projects = open('../data/projects.%scsv' % district)
  projects.readline().strip() # header
  for proj in csv.reader(projects):
    if proj[date_posted].startswith('2011'):
      proj_ids.append(proj[0])
  proj_ids = frozenset(proj_ids)
  projects.close()

  wordcount = TFIDF(get_entities(ent_file))
  essays = open('../data/%s.%scsv' % (type, district))
  essays.readline() # header
  for proid, lines in groupby(csv.reader(essays), id):
    if proid in proj_ids:
      text = ' '.join(extract_text(line) for line in lines).lower()
      wordcount.process(text)
  wordcount.done()
  essays.close()

  out = open('../data/wc_%s%scsv' % (type, district), 'w')
  for word, tf, df, tfidf in wordcount.highest(0):
    out.write('%s\t%f\t%f\t%f\n' % (word, tf, df, tfidf))