class Summarizer: def __init__(self): self.parser = Parser() ''' test : it is catchphrase, one sentence. title: sentence list. ''' def summarize(self, text, title): sentences = text result = [] ## step 1, get term list of catchphrase. (catchphrase_keywords, catchphrase_wordCount) = self.parser.getKeywords(title) result.append( (catchphrase_keywords, catchphrase_wordCount) ) catchword_list = [catchphrase_keywords[idx]['word'] for idx in range(len(catchphrase_keywords))] #print("[*catchword_list*]",catchword_list) ## step 2, get top k word list in sentences. ## 2.1 get term list of detail. #text_merged = " ".join(sentences) #(detail_keywords, detail_wordCount) = self.parser.getKeywords(text_merged) for idx in range(len(text)): (sentence_keywords, sentence_wordCount) = self.parser.getKeywords(text[idx]) result.append( (sentence_keywords, sentence_wordCount) ) word_list = [sentence_keywords[idx]['word'] for idx in range(len(sentence_keywords))] #print("\n[*word_list*]", word_list) return result
def __init__(self): daemon = Pyro4.Daemon(host=others.get_ip()) self.client_uri = daemon.register(self) print(self.client_uri) self.parser = Parser() self.main_server = None # almacena la uri del servidor principal self.current_request_id = 0 self.start_time_current_request = None self.current_request_reports = [] self.expected_replies = 0 self.id_lock = threading.Lock( ) # para cambiar current_request_id y expected_replies self.list_lock = threading.Lock( ) # para agregar y eliminar cosas de current_request_reports threading.Thread(target=daemon.requestLoop).start()
def make_topic(ref, title, desc): """ Добавляет новую тему в таблицу :param ref: ссылка :param title: название :param desc: описание """ print('new topic') all_topic_text = '' topic_words_len = defaultdict(int) topic_words_freq = defaultdict(int) articles = Parser(ref) times_articles = articles.get_time() a_titles, a_description, a_refs = articles.get_titles() for j in range(len(a_titles)): print('new article') article_words_len = defaultdict(int) article_words_freq = defaultdict(int) article = Parser(a_refs[j]) all_article_text = article.get_paragraphs() all_topic_text += ' ' + all_article_text fill_words(all_article_text.split(), article_words_freq, article_words_len) new_article = Article(topic=title, name=a_titles[j], href=a_refs[j], text=article.get_paragraphs(), upd=dateparser.parse(times_articles[j].text), stat_words_len=json.dumps(article_words_len), stat_words_freq=json.dumps(article_words_freq)) new_article.save() make_tags(article.get_tags(), a_titles[j]) fill_words(all_topic_text.split(), topic_words_freq, topic_words_len) new_topic = Topic(name=title, description=desc, href=ref, upd=dateparser.parse(times_articles[0].text), stat_words_len=json.dumps(topic_words_len), stat_words_freq=json.dumps(topic_words_freq)) new_topic.save()
@author: xuzairong """ from urlsMan import StockListUrlMan from config import stockListUrl, stockUrl from downLoader import Downloader from myParser import Parser from mongoDbMan import MongoMan if __name__ == "__main__": #url管理器 urlsManObject = StockListUrlMan() stockListUrls = urlsManObject.getStockListUrl(stockListUrl) #下载器 downloaderObject = Downloader() #解析器 parserObject = Parser() #mongo管理器 mongoManObject = MongoMan() #结果 result = [] count = 0 for url in stockListUrls: print(url) try: driver = downloaderObject.getStockInfo(url) json = parserObject.parseCoreData(driver) json["url"] = url result.append(json) count = count + 1 print(count) if count % 10 == 0:
all_topic_text += ' ' + article.text fill_words(all_topic_text.split(), topic_words_freq, topic_words_len) return json.dumps(topic_words_len), json.dumps(topic_words_freq) while True: try: db.close() db.connect() for index in range(len(titles)): if len(Topic.select().where(Topic.name == titles[index])) == 0: make_topic(refs[index], titles[index], description[index]) else: cur_topic = Topic.get(Topic.name == titles[index]) last_upd = cur_topic.upd articles = Parser(refs[index]) times_articles = articles.get_time() cur_topic.upd = dateparser.parse(times_articles[0].text) cur_topic.save() a_titles, a_description, a_refs = articles.get_titles() have_new = False for j in range(len(times_articles)): if dateparser.parse(times_articles[j].text) > last_upd: have_new = True print('new article') article = Parser(a_refs[j]) article_words_len = defaultdict(int) article_words_freq = defaultdict(int) all_article_text = article.get_paragraphs() fill_words(all_article_text.split(), article_words_freq, article_words_len) new_article = Article(topic=titles[index], name=a_titles[j],
class Client: UPDATE_SERVERS_TIMEOUT = 2 UPDATE_SERVERS_TIME = 2 WAIT_ANSWERS_TIME = 60 WAIT_AND_TRY_AGAIN_TIME = 5 MAX_ATTEMPT_NUMBER = 2 FILE_PART_SIZE = 10000000 # 10mb DOWNLOAD_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'downloads') def __init__(self): daemon = Pyro4.Daemon(host=others.get_ip()) self.client_uri = daemon.register(self) print(self.client_uri) self.parser = Parser() self.main_server = None # almacena la uri del servidor principal self.current_request_id = 0 self.start_time_current_request = None self.current_request_reports = [] self.expected_replies = 0 self.id_lock = threading.Lock( ) # para cambiar current_request_id y expected_replies self.list_lock = threading.Lock( ) # para agregar y eliminar cosas de current_request_reports threading.Thread(target=daemon.requestLoop).start() def call_exec_cmd(self, request_id, server, command, params): server = Pyro4.Proxy(server) server.exec_cmd(request_id, command.value, params, self.client_uri) def create_filename(self, tags, filename): s = '' for t in tags: s += t s += '_' s += '()' s += filename return s def start_client(self): self.update_main_server() while True: try: s = input() command, params = self.parser.parse(s) if self.main_server is not None: if command is not None: if command == command.cp: # esto es porque el cp debe comprobar que el archivo exista y enviar el size path, tags, filename = params if os.path.exists(path): fd = os.stat(path) size = fd.st_size params = (path, tags, filename, size) else: print(Strings.FILE_NOT_FOUND.format(path)) continue with self.id_lock: self.current_request_id = self.current_request_id + 1 with self.list_lock: self.current_request_reports = [] future = time.time() + self.WAIT_ANSWERS_TIME attemp = 1 with self.id_lock: self.expected_replies = 0 while attemp <= self.MAX_ATTEMPT_NUMBER: try: with self.id_lock: self.expected_replies += 1 self.call_exec_cmd(self.current_request_id, self.main_server, command, params) break except: with self.id_lock: self.expected_replies -= 1 print( Strings.UNREACHEABLE_SERVER_ERROR.format( self.main_server)) self.main_server = None self.update_main_server() attemp += 1 while True: if self.expected_replies <= 0 or time.time( ) > future: break with self.id_lock: self.current_request_id = 0 self.expected_replies = 0 if command == command.ls: if len(self.current_request_reports) > 0: print('Archivos hallados') for f in self.current_request_reports: print(f) else: print( 'No se encontraron archivos con las caracteristicas definidas' ) print() if command == command.info: if len(self.current_request_reports) > 0: if self.current_request_reports[0] is not None: for f in self.current_request_reports: print(f) else: print( 'No se encontro el archivo solicitado') print() if command == command.rm: if len(self.current_request_reports ) > 0 and self.current_request_reports[ 0] is not None: print('Archivos eliminados') for t in self.current_request_reports: print(t) else: print( 'No se encontraron archivos para eliminar') print() if command == command.get: if len(self.current_request_reports) > 0: if self.current_request_reports[0] is not None: # significa que se encontro alguien que tuviera el archivo tags, filename = params servers = [] for s in self.current_request_reports: servers.append(s) params = (tags, filename, servers) threading.Thread(target=self.client_get, args=params).start() else: print('No se encontro el archivo {0}'.format( params)) if command == command.cp: if len(self.current_request_reports) > 0: if self.current_request_reports[0] is not None: # significa que el servidor principal encontro algun server que recibiera el archivo path, tags, filename, size = params params = ( self.current_request_reports[0], path, tags, filename, ) threading.Thread(target=self.client_cp, args=params).start() else: print('El archivo {0} ya existe'.format( path)) else: print('comando invalido') else: print('No se encuentran servidores') except: continue def client_cp(self, server_uri, path, tags, filename, offset=0, attempt=0): # copiar el archivo por partes completed = False while attempt < self.MAX_ATTEMPT_NUMBER: if os.path.exists(path): try: fd = open(path, 'a+b') fd.seek(offset) content = fd.read(self.FILE_PART_SIZE) fd.close() except: print(Strings.FILE_LOST_SUDDENLY.format(path)) break if len(content) > 0: try: server = Pyro4.Proxy(server_uri) print( 'Enviando offset{0}, tags:{1}, filename{2}'.format( offset, tags, filename)) server.fill_file(tags, filename, content, offset) offset += len(content) except: time.sleep(self.WAIT_AND_TRY_AGAIN_TIME) print(Strings.TRYING_AGAIN.format('copiar', path)) attempt += 1 continue else: try: server = Pyro4.Proxy(server_uri) server.fill_file(tags, filename, content, -1) print('Se termino de enviar tags:{0}, filename{1}'. format(tags, filename)) except: pass print(Strings.SUCCESFUL_OPERATION.format('copiar', path)) completed = True break else: print(Strings.FILE_LOST_SUDDENLY.format(path)) if completed: print(Strings.OPERATION_FAIL.format('copiar ', path)) def client_get(self, tags, filename, servers): # seleccionar server para copiar attempt = -1 completed = False offset = 0 # crear el archivo en la carpeta predefinida path = os.path.join(self.DOWNLOAD_PATH, filename) if offset == 0: try: if os.path.exists(path): print(Strings.FILE_ALREADY_EXISTS.format(path)) return fd = open(path, 'x') fd.close() print('Creando archivo tags:{0}, filename:{1}'.format( tags, filename)) except: print(Strings.FILE_LOST_SUDDENLY.format(path)) print(Strings.OPERATION_FAIL.format('get ', (tags, filename))) return i = 0 # empezar a descargar el archivo while i != len(servers): if os.path.exists(path): try: server = Pyro4.Proxy(servers[i]) correct, content = server.get_part(tags, filename, offset, self.FILE_PART_SIZE) print( 'Recibiendo offset:{0}, tags:{1}, filename:{2}'.format( offset, tags, filename)) if correct: content = b64decode(content['data']) offset += len(content) else: break except: time.sleep(self.WAIT_AND_TRY_AGAIN_TIME) print(Strings.TRYING_AGAIN.format('get', path)) if attempt < self.MAX_ATTEMPT_NUMBER: attempt += 1 else: attempt = 0 i += 1 continue if len(content) > 0: try: size = os.stat(path).st_size if size <= offset: fd = open(path, 'a+b') fd.seek(offset) fd.write(content) fd.close() except: print( Strings.FILE_LOST_SUDDENLY.format( (tags, filename))) break else: print( Strings.SUCCESFUL_OPERATION.format( 'copiar', (tags, filename))) completed = True break else: print(Strings.FILE_LOST_SUDDENLY.format(path)) break if completed is False: print(Strings.OPERATION_FAIL.format('get ', (tags, filename))) def report(self, request_id, command, output): if request_id == self.current_request_id and self.expected_replies > 0: command = Command[command] if command == Command.ls or command == Command.info or command == Command.rm or command == Command.cp or command == Command.get: with self.list_lock: if len(output) > 0: self.current_request_reports.extend(output) with self.id_lock: self.expected_replies -= 1 def update_main_server(self): while self.main_server is None: self.scan_loop() time.sleep(self.UPDATE_SERVERS_TIME) def scan_loop(self): scanner = socketutil.createBroadcastSocket() scanner.settimeout(self.UPDATE_SERVERS_TIMEOUT) main_server = None try: scanner.sendto(b'get_uri_client', ('255.255.255.255', 1212)) except: print('Error al hacer broadcast') while True: try: data, address = scanner.recvfrom(512) main_server = data.decode() break except: break self.main_server = main_server print('Servidor Principal:{0}'.format(self.main_server))
def predictBayesianModel(sentenceList=[ 'hello world occupation lease', 'machine learning board', 'machine learning lease occupation' ], input_path="./model/train_model.npz", word_index_file="./model/word_index.npz"): #print("\n-----------------------------------------") #print("Load model1: ./model/train_model.npz") #print("-----------------------------------------") #-------------------------------------------------- npzfile = np.load(input_path) pi_bar = npzfile['arr_0'] theta_bar = npzfile['arr_1'] #print("\n[[pi]]:") #print(pi_bar) #print(pi_bar.shape) #print("\n[[theta]]:") #print(theta_bar) #print(theta_bar.shape) #print("\n-----------------------------------------") #print("Load model2: ./model/word_index.npz") #print("-----------------------------------------") #-------------------------------------------------- npzfile2 = np.load(word_index_file) catchword_index = npzfile2['arr_0'] bodyword_index = npzfile2['arr_1'] #print("\n[[catchword index]]:") #print(catchword_index) #print(catchword_index.shape) #print("\n[[bodyword index]]:") #print(bodyword_index) #print(bodyword_index.shape) #-------------------------------------------------- scoreRecord = [] parser = Parser() catchword_list = catchword_index.tolist() bodyword_list = bodyword_index.tolist() #-------------------------------------------------- # Get catchword_positionList #-------------------------------------------------- catchwords = sentenceList[0] (keywords, wordCount) = parser.getKeywords(catchwords) catchword_positionList = [] #print("keywords: ", keywords) for elem in keywords: word = elem['word'] count = elem['count'] idx = catchword_list.index(word) if word in catchword_list else -1 if (idx != -1): #print("appending ", catchword_list[idx]) catchword_positionList.append(idx) #Debug #print("catchword_positionList:", catchword_positionList) #for catchwordPos in catchword_positionList: #print(catchword_list[catchwordPos]) #-------------------------------------------------- # Calculate score for each word in body sentence. # The first sentence is catchphrases. #-------------------------------------------------- for idx in range(1, len(sentenceList)): (keywords, wordCount) = parser.getKeywords(sentenceList[idx]) sentence_score = 0 ''' 1) get the position list of catch words in predicted case. 2) for each word in each sentence, find the scores for each catchword in theta_bar. 3) add these scores which will be the final for one word in this sentence. 4) evaluate next word... until the end of this sentence. 5) goto 2). ''' ## print("----------- sentence --------------") for elem in keywords: # Jeff: For each word in body sentence. word = elem['word'] count = elem['count'] ## print("sentence word :", word) ## print("sentence word count:", count) ## print(" ") word_score = 0 wordInSentence_idx = bodyword_list.index( word) if word in bodyword_list else -1 if (wordInSentence_idx != -1): # Jeff: For each word in catchphrase for catchwordIdx in catchword_positionList: ## print("* theta_bar[",idx, "][", catchword_list[catchwordIdx], "]") ## print("* score:", theta_bar[idx][catchwordIdx]) ## print(" ") word_score += theta_bar[idx][catchwordIdx] sentence_score += word_score * count scoreRecord.append(sentence_score) # NB: sentence ith, from 1 to end. #print("\nScore list for each sentence:") #print([ float("%.2f" % elem) for elem in scoreRecord ]) #print("") return scoreRecord
def __init__(self): self.parser = Parser()
filesToCompile = getFilesInDirectory(directory) osFiles = [ 'Jack_Programs/jack-os/Array.jack', 'Jack_Programs/jack-os/Keyboard.jack', 'Jack_Programs/jack-os/Math.jack', 'Jack_Programs/jack-os/Memory.jack', 'Jack_Programs/jack-os/Output.jack', 'Jack_Programs/jack-os/Screen.jack', 'Jack_Programs/jack-os/String.jack', 'Jack_Programs/jack-os/Sys.jack' ] filesToCompile = osFiles + filesToCompile displayMessage("\n\n\nPrograms to Compile") for file in filesToCompile: print(file) print("\n\n") time.sleep(0.3) for file in filesToCompile: parser = Parser(file, exitOnErrors) symbolTables = parser.parse() # update the dictionaries for the symbol tables globalClassTable = symbolTables[0] globalMethodTable[file.split("/")[-1].split(".")[0]] = symbolTables[1] assertions = assertions + symbolTables[2] classMethods = symbolTables[3] vmCode[file.split("/")[-1].split(".")[0]] = symbolTables[4] checkAssertions(assertions, globalClassTable, globalMethodTable, classMethods) vmCode = fixVMCode(vmCode) printoutTables() saveFiles() displayMessage("\n\n\nDone")
import numpy as np import tensorflow as tf import random import sys, os import json import argparse from myParser import Parser #import parser from datamanager import DataManager from actor import ActorNetwork from LSTM_critic import LSTM_CriticNetwork tf.logging.set_verbosity(tf.logging.ERROR) os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' #get parse argv = sys.argv[1:] parser = Parser().getParser() args, _ = parser.parse_known_args(argv) random.seed(args.seed) #get data #dataManager = DataManager(args.dataset) dataManager = DataManager('../AGnews') train_data, dev_data, test_data = dataManager.getdata(args.grained, args.maxlenth) word_vector = dataManager.get_wordvector(args.word_vector) if args.fasttest == 1: train_data = train_data[:1000] dev_data = dev_data[:200] test_data = test_data[:200]
def main(): if noCommandLineArguements(): print("ERROR: No source file specified") exit() parser = Parser(SOURCE_FILE_NAME) output = Output(SOURCE_FILE_NAME) symbolTable = SymbolTable() code = Code() # first pass while parser.hasMoreCommands(): parser.advance() # read next command from source file if parser.commandType() == "L_COMMAND": # label if not symbolTable.contains(parser.symbol()): symbolTable.addEntry(parser.symbol(), parser.getAddress()) parser.resetFile() # reset file pointer to first line of source file # second pass while parser.hasMoreCommands(): parser.advance() # read next command from source file if parser.commandType() == "A_COMMAND": # addressing command if parser.isConstant(): output.writeToBin(parser.getConstant()) else: if not symbolTable.contains(parser.symbol()): symbolTable.addEntry(parser.symbol(), "new") output.writeToBin(symbolTable.getAddress(parser.symbol())) elif parser.commandType() == "C_COMMAND": # computation command output.writeToBin(C_COMMAND_PREFIX + code.comp(parser.comp()) + code.dest(parser.dest()) + code.jump(parser.jump())) elif parser.commandType() != "L_COMMAND": # label print("ERROR: Unexpected command") exit() parser.closeFile() output.closeFile()
from collections import defaultdict import json import requests from myParser import Parser from bd import Topic, Article, Tag, db import dateparser import config session = requests.Session() session.max_redirects = config.MAX_REDIRECTS my_site = Parser(config.MY_SITE) titles, description, refs = my_site.get_titles() all_titles = set(titles) db.connect() def make_tags(tags, title): """ Заполняет таблицу с тегами. :param tags: лист тегов :param title: статья, откуда мы взяли теги """ for tag in tags: new_tag = Tag(name=tag.text, article=title, href=tag['href']) new_tag.save() def fill_words(text, words_freq, words_len): """ Заполняет данные словари для статистики словами :param text: слова :param words_freq: словарь для сохранения частот :param words_len: словарь для сохранения длин