Exemplos de Utility.load_categories em Python

Linguagem de programação: Python

Classe / Tipo: Utility

Método / Função: load_categories

Exemplos em hotexamples.com: 3

Utility.load_categories em Python - 3 exemplos encontrados. Esses são os exemplos do mundo real mais bem avaliados de Utility.load_categories do pacote table-linker em Python extraídos de projetos de código aberto. Você pode avaliar os exemplos para nos ajudar a melhorar a qualidade deles.

Métodos Frequentes

Exibir Ocultar

checkToken(11)

hashToken(8)

assertSuccess(6)

getWords(6)

getMyRoot(6)

Cmd(6)

getid(5)

report_error(5)

isArray(4)

hms(4)

get_repo_dir(4)

CheckParameter(4)

get_total_seq_from_fasta(4)

md5(3)

scale_a(3)

load_categories(3)

compare_all_despite_starvation(3)

add_to_mark(3)

getMyApp(2)

getACT(2)

fileName(2)

iterate_minibatches(2)

covarianceCalculator(2)

getSuboptimality(2)

get_seq_dict(2)

getVideoFile(2)

getCenter(2)

getMinute(2)

makeRandomToken(2)

ConvertPostion(2)

toast(2)

CodeUnit(2)

search_key_of_value_part_match(2)

scale_height(2)

Consensus(2)

load_vocabulary(2)

addToHistogram(2)

rot_center(1)

randomTrainingIndice(1)

get_sitelist_unambig_aa(1)

get_str_with_index(1)

union(1)

switch_led_info(1)

get_total_nongap_nuc_by_pos(1)

sound(1)

sortDictionary(1)

selectCodingOption(1)

load_dict_with_images(1)

search_key_of_value(1)

length_distribution_on_suffix(1)

Métodos Frequentes

checkToken (11)

hashToken (8)

assertSuccess (6)

getWords (6)

getMyRoot (6)

Cmd (6)

getid (5)

report_error (5)

isArray (4)

hms (4)

Métodos Frequentes

get_repo_dir (4)

CheckParameter (4)

get_total_seq_from_fasta (4)

md5 (3)

scale_a (3)

load_categories (3)

compare_all_despite_starvation (3)

add_to_mark (3)

getMyApp (2)

getACT (2)

fileName (2)

iterate_minibatches (2)

covarianceCalculator (2)

getSuboptimality (2)

get_seq_dict (2)

getVideoFile (2)

getCenter (2)

getMinute (2)

makeRandomToken (2)

ConvertPostion (2)

Métodos Frequentes

fileName (2)

iterate_minibatches (2)

covarianceCalculator (2)

getSuboptimality (2)

get_seq_dict (2)

getVideoFile (2)

getCenter (2)

getMinute (2)

makeRandomToken (2)

ConvertPostion (2)

toast (2)

CodeUnit (2)

search_key_of_value_part_match (2)

scale_height (2)

Consensus (2)

load_vocabulary (2)

addToHistogram (2)

rot_center (1)

randomTrainingIndice (1)

get_sitelist_unambig_aa (1)

get_str_with_index (1)

union (1)

switch_led_info (1)

get_total_nongap_nuc_by_pos (1)

sound (1)

sortDictionary (1)

selectCodingOption (1)

load_dict_with_images (1)

search_key_of_value (1)

length_distribution_on_suffix (1)

Métodos Frequentes

toast (2)

CodeUnit (2)

search_key_of_value_part_match (2)

scale_height (2)

Consensus (2)

load_vocabulary (2)

addToHistogram (2)

rot_center (1)

randomTrainingIndice (1)

get_sitelist_unambig_aa (1)

get_str_with_index (1)

union (1)

switch_led_info (1)

get_total_nongap_nuc_by_pos (1)

sound (1)

sortDictionary (1)

selectCodingOption (1)

load_dict_with_images (1)

search_key_of_value (1)

length_distribution_on_suffix (1)

id_generator (1)

intersect (1)

report_warning (1)

process_image_dsift (1)

loadConfig (1)

isAscending (1)

safeGetDirectory (1)

isDescending (1)

list2dic (1)

launch_fatal_process_alert (1)

sample_utility (1)

AABB3 (1)

get_seq2len (1)

Completer (1)

all_chars (1)

DataSegment (1)

Data (1)

CubicTimeScaling (1)

ConvertPostion2 (1)

Contrast_and_Bright (1)

Exemplo n.º 1

0

Exibir arquivo

Arquivo: logistic_regression.py Projeto: PetrovskiIgor/NewsReaderServer

def get_me_some_test_samples(file_path, prob_to_take=1.0): """ :param file_path: the path for the file where we load the data from :param prob_to_take: the probability to take ceratin news as a test sample :return: returns X_test and Y_test X_test is a list of lists, where each list in the list represents a list of feautres Y_test represents the class id for each list of features in X_test """ cat_to_ind, ind_to_cat = Utility.load_categories() vectorizer = get_persisted_vectorizer(vectorizer_file_path) X_test = [] Y_test = [] for line in open(file_path): parts = line.decode(encoding="utf-8").strip().split("\t") category = parts[0] text = parts[1] if random.random() < prob_to_take: X_test.append(text) Y_test.append(cat_to_ind[category]) X_test = vectorizer.transform(X_test) return X_test, Y_test

Exemplo n.º 2

0

Exibir arquivo

Arquivo: logistic_regression.py Projeto: PetrovskiIgor/NewsReaderServer

def create_model(model_file_name, vectorizer_file_name, training_percentage=0.93, max_iter=300, print_logs=False): """ creating the model for logistic regression and storing it for later use :param model_file_name: the name of the file where the model will be stored :param vectorizer_file_name: the name of file where the vectorizer will be stored :param training_percentage: how much of the training set should we use :param max_iter: the maximum number of iterations for training :param print_logs: whether we should print the logs (such as time needed for training) or not :return: returns the model and the vectorizer """ begin = time.time() t0 = time.time() # for a given category we return it's id cat_to_ind = {} # for a given id we return the category associated with that id ind_to_cat = [] curr_cat_ind = 0 X_train = [] Y_train = [] cat_to_ind, ind_to_cat = Utility.load_categories() for line in open(Utility.training_file_path): parts = line.decode(encoding="utf-8").strip().split("\t") category = parts[0] text = parts[1] item = [text, cat_to_ind[category]] if random.random() < training_percentage: X_train.append(item[0]) Y_train.append(item[1]) if print_logs: print "Read the dataset for %d seconds" % (time.time() - t0) t0 = time.time() vectorizer = TfidfVectorizer(encoding="utf-8", lowercase=True, stop_words=Utility.stop_words) X_train = vectorizer.fit_transform(X_train) if print_logs: print "Vectorized the train and test set for %d seconds" % (time.time() - t0) classifier = linear_model.LogisticRegression(max_iter=max_iter) classifier.predi t0 = time.time() classifier.fit(X_train, Y_train) if print_logs: print "Trained the model for %d seconds" % (time.time() - t0) t0 = time.time() file_to_write = open(model_file_name, "w") Pickler(file_to_write).dump(classifier) file_to_write.close() file_to_write = open(vectorizer_file_name, "w") Pickler(file_to_write).dump(vectorizer) file_to_write.close() if print_logs: print "Dumped the items for %d seconds." % (time.time() - t0) if print_logs: print "Total time: %d seconds" % (time.time() - begin) return classifier, vectorizer

Exemplo n.º 3

0

Exibir arquivo

Arquivo: crawler.py Projeto: K5rovski/python_test_gcloud_project

def parse_rss_feed(rss_feed_url, stop_after=None): feedback = '' #model = get_persisted_model() _,ind_to_cat = Utility.load_categories() #vectorizer = get_persisted_vectorizer() #feedback += 'In parse rss feed!!!\n' try: #opening the url and reading the content #feedback += 'trying to open the content %s\n' % rss_feed_url c = urlopen(rss_feed_url) #feedback += 'Opened the content\n' content = c.read() #feedback += 'Read the content' soup = BeautifulSoup(content) #feedback += 'Opened the content\n' ind = 0 tuples = [] for item in soup.findAll('item'): ind += 1 #feedback += 'Item num: %d\n' % ind if (stop_after is not None) and ind > stop_after: break try: title = item.find('title').string link_url = item.find('link').string description_object = item.find('description') description = '' for p in BeautifulSoup(description_object.string).findAll('p'): description += '%s ' % p.text feedback += 'title: %s\n' % title.strip() feedback += 'link_url: %s\n' % link_url feedback += 'description: %s\n' % description pub_date = item.find('pubdate') if pub_date is not None: pub_date = pub_date.string datetime_obj = parse(pub_date, ignoretz=True) feedback += 'pub_date: %s\n' % (datetime_obj.strftime('%B %d %Y %H:%M')) date_milli = (datetime_obj - epoch).total_seconds() * 1000.0 feedback += 'milli: %f\n' % date_milli else: feedback += 'pub_date: None\n' text = '' img_url = None #we try to fetch the photo url directly from the rss feed, if not possible we will try later again if (item.description is not None) and (item.description.string is not None): img_obj = BeautifulSoup(item.description.string).find('img') if img_obj is not None: img_url = img_obj['src'] elif item.description is not None: img_obj = item.description.find('img') if img_obj is not None: img_url = img_obj['src'] #here we get the content of the news link_content = urlopen(link_url).read() innerSoup = BeautifulSoup(link_content) for script in innerSoup(['script', 'style']): script.extract() if rss_feed_url in Utility.fetch_text_specifications: specifications = Utility.fetch_text_specifications[rss_feed_url] if isinstance(specifications, list): #we take the paragraphs start = Utility.fetch_text_specifications[rss_feed_url][0] end = len(innerSoup.findAll('p')) if len(Utility.fetch_text_specifications[rss_feed_url]) > 1: end = Utility.fetch_text_specifications[rss_feed_url][1] for p in innerSoup.findAll('p')[start:end]: text += p.text else: tag_type = specifications['tag_type'] attr_type = specifications['attribute_type'] attr_value = specifications['attribute_value'] #feedback += 'tag_type: %s attr_type: %s attr_value: %s\n' % (tag_type, attr_type, attr_value) sections = innerSoup.findAll(tag_type, {attr_type: attr_value}) #feedback += 'tags size: %d\n' % len(sections) #if we need to go deeper if 'nested_tag_type' in specifications: nested_tag_type = specifications['nested_tag_type'] nested_attr_type = specifications['nested_attribute_type'] nested_attr_value = specifications['nested_attribute_value'] limit = specifications.get('limit', 1000) recursive = specifications.get('recursive', True) new_sections = [] for section in sections: new_sections.extend(section.findAll(nested_tag_type,{ nested_attr_type: nested_attr_value}, limit=limit, recursive=recursive)) sections = new_sections for section in sections: feedback += 'tag name: %s\n' % section.name text += section.text #we are trying to get the image from the news if img_url is None: imgs = innerSoup.findAll('img') img_url = '' if imgs is not None and len(imgs) > 0: img_url = imgs[0]['src'] feedback += 'img_url: %s\n' % img_url feedback += 'text: %s\n' % text.strip() #X_train = vectorizer.transform([text]) #cat_ind = model.predict(X_train) #feedback +='CATEGORY: %s' % ind_to_cat[cat_ind[0]] feedback += '------------------------------\n' tuples.append((title,link_url, Utility.getWords(text))) except Exception as inst: feedback += 'Inner Exception type: %s\n' % str(type(inst)) feedback += 'Inner Exception message: %s\n' % inst.message feedback += 'Numer of posts: %d\n' % ind #go from front front = -1 for ind in xrange(100): num_documents = 0 everywhere = True for i in xrange(1, len(tuples)): if not tuples[i-1][2][ind] == tuples[i][2][ind]: everywhere = False break if not everywhere: front = 1 + ind break return feedback except Exception as inst: feedback += 'Exception type: %s\n' % type(inst) feedback += 'Exception message: %s\n' % inst.message #if there is an exception, we return and empty list of news posts return feedback