def main(): '''Generate index files.''' # Start Timer options = docopt(__doc__) base_url = options['--url'] #todo: make sure this is a safe url property_name = options['<propertyName>'] root_dir = options['<root>'] include_in_global_search = options['--includeInGlobalSearch'] intro_message = ''' Indexing {property_name} Manifest will {is_global}be included in global property searches. url: {base_url} root: {root_dir} '''.format(property_name=property_name, base_url=base_url, root_dir=root_dir, is_global=('' if include_in_global_search else 'NOT ')) print(intro_message) #Build the Index and manifest index = Index(base_url, property_name, root_dir, include_in_global_search) index.build() manifest = json.dumps(index.manifest) print('Exporting index...') # Export the manifest with open(PROJECT_DIR + 'manifest.json', 'a') as file: file.seek(0) file.truncate() file.write(manifest) print('Index written to manifest.json!\n')
def __init__(self): myCrawler = Crawler(self.LINKS) crawledURLs = myCrawler.getVisited() linkStructure = myCrawler.getLinkStructure() print("Link-Struktur:\n") myCrawler.printLinkStructure() myPageRank = PageRank(linkStructure) pageRanks = myPageRank.getPageRank() print("\n\nPageRanks:\n") myPageRank.printPageRank() myIndex = Index(self.STOPWORDS, crawledURLs) index = myIndex.getIndex() print("\n\nIndex:\n") myIndex.printIndex() myScorer = Scorer(pageRanks, index,linkStructure) #myScorer.usePageRank(True) print("\n\nDokumentenlängen:\n") myScorer.printDocumentLengths() print("\n\nSuchergebnisse:\n") myScorer.calculateScores(["tokens"]) myScorer.calculateScores(["index"]) myScorer.calculateScores(["classification"]) myScorer.calculateScores(["tokens", "classification"])
def extract_text(docno, index_path, collection_type='html'): ''' collection_type: 'html' or 'text' ''' text = ''; try: index = Index(index_path); content = index.get_doc_content(docno); if collection_type == 'html': html_path, text_path, title_path = map(lambda suffix: '%s.%s' % (docno, suffix), suffixes); f = open(html_path, 'w') f.write(content) f.close() subprocess.call(['python', extract_script, html_path, text_path, title_path]) #title_f = open(title_path) # first line is the title #text = ' '.join(map(str.strip, title_f.readlines())) + '\n' text_f = open(text_path) text += ''.join(text_f.readlines()) os.remove(html_path); os.remove(text_path); os.remove(title_path); elif collection_type == 'text': text = content except Exception, e: sys.stderr.write('error at docno %s\n' % docno);
class PositionalIndexer: def __init__(self): self.index_db = Index() self.preprocessor = PreProcessor() self.id_manager = IdManager() def index(self, files_parent_directory): for (word, file, position) in get_word_by_word(files_parent_directory): tokens = self.preprocessor.pre_process(word) path = combine_path(files_parent_directory, file) doc_id = self.id_manager.get_document_id(path) for token in tokens: self.index_db.index(token, doc_id, position) def save(self, compress: bool): ids_filename = 'Index/ids.mir' Serialization.write_ids(self.id_manager, ids_filename) index_filename = 'Index/index.mir' Serialization.write_to_file(self.index_db, index_filename, compress) def load(self, compress: bool): ids_filename = 'Index/ids.mir' self.id_manager = Serialization.load_ids(ids_filename) index_filename = 'Index/index.mir' self.index_db = Serialization.read_from_file(index_filename, compress) def search(self, query: str): for token in query.split(' '): for term in self.preprocessor.pre_process(token): posting_list = self.index_db.find(term) term_frequency = len(posting_list)
def main(self): index = Index() stock = Stock() sise = Sise() average_per_business_category_dict = self.read_average_per_file() stock_item_list = stock.get_all_stock_item_list() print("종목명/PER/업종PER/상승률/상승하락/2019시초가/현재가/업종코드/업종설명") for stock_item in stock_item_list: stock_code = stock_item[0] per_pair = index.get_stock_item_per(stock_code) stock_item_per = per_pair[0].replace(",", "") business_average_per = per_pair[1].replace(",", "") if stock_item_per == '-' or business_average_per == '-': continue if float(stock_item_per) > float(business_average_per): continue increase_rate = sise.get_increase_rate_by_code(stock_code, None) if increase_rate is None: continue print(stock_item[1] + "/" + stock_item_per + "/" + business_average_per + "/" + str(increase_rate[0]) + "/" + increase_rate[1] + "/" + str(increase_rate[2]) + "/" + str(increase_rate[3]) + "/" + stock_item[2] + "/" + average_per_business_category_dict[stock_item[2]][1])
def __init__(self, db, cdb): # self.cacheIndex = Index() # self.cacheIndex.createFromCursor(cdb.index()) # print self.cacheIndex.find('берзански') self.cacheIndex = Index() self.cacheIndex.createFromCursor(cdb.findAll()) self.db = db self.numDocs = 127000
def selector(self, location): """Match the grammar of a selector.""" ret = None # 0 to many patterns. while True: if self.current.kind == "[": self.match("[") self.stack.append("ExpressionList") expressions = self.expression_list() self.match("]") for expression in expressions: if not isinstance(location, Location) or not isinstance(location.type, Array): # Not an array variable. try: raise InvalidArray(location.type, self.last) except InvalidArray as e: self.invalid_token(e) cur = Index(location, expression) cur.token = self.last ret = cur location = cur elif self.current.kind == ".": self.match(".") name = self.match("identifier") if name == None: continue if self.record_scope == None: # Not a record variable. try: raise InvalidRecord(location.type, self.last) except InvalidRecord as e: self.invalid_token(e) break # Access the record's scope and find the variable with name. self.current_scope = self.record_scope entry = self.find(name.value) var = VariableAST(name.value, entry) self.set_record_scope(entry) cur = FieldAST(location, var) ret = cur location = cur else: # Pattern ended. break self.current_scope = self.program_scope return ret
def read_from_file(filename: str, compress: bool): result = Index() if not os.path.exists(os.path.dirname(filename)): raise ValueError('The path fo loading the index does not exist.') with open(filename, 'rb') as inp: while True: byte = inp.read(4) if byte == b'': break word_length = int.from_bytes(byte, 'big') if word_length == 0: break byte = inp.read(word_length) if byte == b'': raise ValueError('Malformed index file.') word = byte.decode('UTF-8') byte = inp.read(4) if byte == b'': raise ValueError('Malformed index file.') posting_list_length = int.from_bytes(byte, 'big') for i in range(posting_list_length): document_id: int position: int if compress: document_id_array = [] while True: byte = inp.read(1) if byte == b'': raise ValueError('Malformed index file.') document_id_array.append(byte) if byte >= b'\x80': break document_id = VariableByteUtility.decode( b''.join(document_id_array)) position_array = [] while True: byte = inp.read(1) if byte == b'': raise ValueError('Malformed index file.') position_array.append(byte) if byte >= b'\x80': break position = VariableByteUtility.decode( b''.join(position_array)) else: byte = inp.read(4) if byte == b'': raise ValueError('Malformed index file.') document_id = int.from_bytes(byte, 'big') byte = inp.read(4) if byte == b'': raise ValueError('Malformed index file.') position = int.from_bytes(byte, 'big') result.index(word, document_id, position) return result
def __init__(self, filename, stopfile): self.schema = commentjson.loads(open(filename, 'r').read()) #just convenience self.ents = self.schema['ents'] self.types = self.schema['types'] self.ind = Index(self.types, stopfile) self.indexFile = 'index' if 'links' in self.schema: self.defaults = self.schema['defaults'] self.links = self.schema['links']
def get_index(self, id): index = Index(id) buffer = index.get_xml().encode('utf-8') hash = md5(buffer).digest() self.ssend('\x03' + hash) flag = self.srecvall(1) if flag == "\x04": buffer = self.recv_file() index.set_xml(buffer) elif flag == "\x05": pass else: raise Exception, "Protocol Error"
def index_scripts(self, script_paths: list, index: Index) -> None: for relative_path in script_paths: try: source_path = self._get_absolute_script_path(relative_path) except FileExistsError: source_path = ''.join([ x for x in self.get_script_paths(True) if x.endswith(relative_path) ]) if source_path == '' or not os.path.exists(source_path): raise index.write_file(source_path)
def get_index(self, id): index = Index(id) buffer = index.get_xml().encode('utf-8') hash = md5(buffer).digest() self.ssend('\x03'+hash) flag = self.srecvall(1) if flag == "\x04": buffer = self.recv_file() index.set_xml(buffer) elif flag == "\x05": pass else: raise Exception, "Protocol Error"
def test_weighter(): parser = ParserCACM() textRepresenter = PorterStemmer() fname = "data/cacm/cacm.txt" I = Index(parser, textRepresenter) I.indexation(fname) weighters = [Binary(I), TF(I), TF_IDF(I), Log(I), Log_plus(I)] for i, w in enumerate(weighters): print "Test of weighter" + str(i) print "getDocWeightsForDoc" print w.getDocWeightsForDoc("20") print "getDocWeightsForStem" print w.getDocWeightsForStem("accelerat") print "getDocWeightsForQuery" print w.getWeightsForQuery(I.getTfsForDoc("20"))
def __init__(self, settings, screen, button): self.screen = screen self.screen_rect = screen.get_rect() self.theSettings = Settings self.image = Index().a1[0] self.image2 = Index().a2[0] self.image3 = Index().a3[0] self.rect = self.image.get_rect() self.text_color = (0, 255, 0) self.font = pygame.font.SysFont(None, 48) screen = pygame.display.set_mode( (settings.screen_width, settings.screen_height)) screen.fill(settings.bg_color)
def calculateScores(self, query): scores = {} for term in query: term = Index.normalize(term) queryLength = self.__calculateQueryLength(query) for term in query: if term in self.__index: postingList = self.__index[term] df = len(postingList) + 0.0 wtq = math.log10(self.__N/df) for site in postingList: if site not in scores: scores[site] = 0 tf = postingList[site] wtd = (1 + math.log10(tf)) * wtq scores[site] += wtd * wtq for site in scores: scores[site] /= self.__documentLengths[site] * queryLength if self.__pageRankIsOn: for site in scores: scores[site] *= self.__pageRanks[site] scores = (sorted(scores.items(), key = operator.itemgetter(1))) scores.reverse() scores = collections.OrderedDict(scores) #print scores print(query) for site in scores: print(site + ': ' + str(format(scores[site], '.6F')))
def reset(self): # cleanup database self.db.clear() self.msgMngr.clear() # create indexes self.db.create(Index(), OID_I_LOGIN2OID) self.db.create(Index(), OID_I_NAME2OID) # create admin self.registerPlayer('admin', self.createAdmin(), OID_ADMIN) # create universe self.db.create(self.cmdPool[T_UNIVERSE].new(T_UNIVERSE), OID_UNIVERSE) self.createUniverse() # save all informations self.db.checkpoint() self.msgMngr.checkpoint() self.clientMngr.checkpoint()
def __init__(self): super(Tools, self).__init__() vboxIndex = gtk.VBox(False, 5) aIndex = gtk.Alignment(0.5, 0.25, 0, 0) aIndex.add(Index()) vboxAlign = gtk.VBox(False, 5) aAlign = gtk.Alignment(0.5, 0.25, 0, 0) aAlign.add(Alignment()) #Viewer notebook vboxViewer = gtk.VBox(False, 5) aViewer = gtk.Alignment(0.5, 0.25, 0, 0) aViewer.add(Viewer()) vboxIndex.pack_start(aIndex) vboxAlign.pack_start(aAlign) vboxViewer.pack_start(aViewer) self.set_tab_pos(gtk.POS_TOP) self.append_page(vboxIndex) self.set_tab_label_text(vboxIndex, config.LOC["tools_index"]) self.append_page(vboxAlign) self.set_tab_label_text(vboxAlign, config.LOC["tools_align"]) self.append_page(vboxViewer) self.set_tab_label_text(vboxViewer, config.LOC["tools_viewer"])
def initializePortfolio(): write("Initializing Portfolio... ") flush() items = config.getElementsByTagName('item') indices = config.getElementsByTagName('index') for item in items: tickerNode = item.getElementsByTagName('ticker') symbol = tickerNode[0].firstChild.data averagePriceNode = item.getElementsByTagName('averagePrice') averagePrice = float(averagePriceNode[0].firstChild.data) shareCountNode = item.getElementsByTagName('shareCount') shareCount = int(shareCountNode[0].firstChild.data) name = item.getAttribute('name') asset = Asset(symbol, name, averagePrice, shareCount) portfolio.addAsset(asset) for index in indices: tickerNode = index.getElementsByTagName('ticker') symbol = tickerNode[0].firstChild.data name = index.getAttribute('name') indexAsset = Index(symbol, name) portfolio.addIndex(indexAsset) write("done") flush()
def __init__(self, ai_settings, screen): super(Ship, self).__init__() self.screen = screen self.ai_settings = ai_settings ims = Index() self.images = ims.s1 self.explode = ims.E1 self.index = 0 self.image = self.images[self.index] self.rect = self.image.get_rect() self.screen_rect = screen.get_rect() self.timer = pygame.time.get_ticks() # Start each new ship at the bottom center of the screen. self.rect.centerx = self.screen_rect.centerx self.rect.bottom = self.screen_rect.bottom self.death_index = None self.last_frame = None self.dieing = False self.dead = False # Store a decimal value for the ship's center. self.center = float(self.rect.centerx) # Movement flags. self.moving_right = False self.moving_left = False
def index(): idx = Index("bitstamp", "btcusd", "12-h") candle_matrix, closePrice_matrix = idx.create_matrix() openPrice_ar, closePrice_ar = idx.create_np_array(candle_matrix, closePrice_matrix) print("Check openPrice_ar and candle_matrix: ", openPrice_ar == candle_matrix[:, 2]) #EXPECTED true print("Check closePrice_ar and closePrice_matrix: ", closePrice_ar == closePrice_matrix) #EXPECTED true print("openPrice_ar shape: ", openPrice_ar.shape) print("closePrice_ar: ", closePrice_ar.shape) op_mean, cp_mean, op_q0, op_q1, op_q2, op_q3, op_q4, cp_q0, cp_q1, cp_q2, cp_q3, cp_q4, corrMatrix = idx.index( openPrice_ar, closePrice_ar) idx.print_dotplot(openPrice_ar, closePrice_ar, op_mean, cp_mean, op_q0, op_q1, op_q2, op_q3, op_q4, cp_q0, cp_q1, cp_q2, cp_q3, cp_q4) idx.scatterplot(openPrice_ar, closePrice_ar) idx.correlationMatrix(corrMatrix)
def initIndex(database_file): """Init Index or load it if previously computed""" sys.stdout.write("Indexing database...") sys.stdout.flush() if os.path.isfile('Index.p'): I = pickle.load(open("Index.p", "rb")) else: parser = ParserCLEF08() textRepresenter = PorterStemmer() I = Index(parser, textRepresenter) I.indexation(database_file) I.parser = None pickle.dump(I, open("Index.p", "wb")) sys.stdout.write("Done!\n") sys.stdout.flush() return I
def main(): """Determine arguments and pass arguments to compiler""" _options = namedtuple( 'ProjectOptions', 'game_type input_path disable_anonymizer disable_bsarch disable_indexer' ) _options.disable_anonymizer = _args.disable_anonymizer _options.disable_bsarch = _args.disable_bsarch _options.disable_indexer = _args.disable_indexer _options.game_type = GameType.from_str(_args.game) _options.input_path = _args.input _project = Project(_options) time_elapsed = TimeElapsed() ppj = PapyrusProject(_project) # the index is used to exclude unchanged scripts from compilation absolute_script_paths = ppj.get_script_paths(absolute_paths=True) file_name, file_extension = os.path.splitext( os.path.basename(ppj.input_path)) project_index = Index(file_name, absolute_script_paths) ppj.compile_custom(project_index, time_elapsed) no_scripts_modified = False missing_scripts_found = False if _options.disable_indexer: pex_paths = ppj.get_script_paths_compiled() else: pex_paths, validation_states = ppj.validate_project( project_index, time_elapsed) no_scripts_modified = len( validation_states ) == 1 and ValidationState.FILE_NOT_MODIFIED in validation_states missing_scripts_found = ValidationState.FILE_NOT_EXIST in validation_states if _options.disable_anonymizer: log.warn('Anonymization disabled by user.') elif no_scripts_modified: log.error( 'Cannot anonymize compiled scripts because no source scripts were modified' ) else: ppj.anonymize_scripts(pex_paths, ppj.output_path) if missing_scripts_found: log.error('Cannot pack archive because there are missing scripts') else: ppj.pack_archive() time_elapsed.print()
def main(args): global korpus help_flags = ["-h", "--help"] build_flags = ["-b", "--build"] n_flag = ["-n"] def flag_set(flags): return any([flag in args for flag in flags]) if len(args) == 1 or flag_set(help_flags): print_usage() return building = (flag_set(build_flags) or not os.path.isfile(INDEX_FILENAME)) n = None word = args[-1] if flag_set(n_flag): param = args[args.index("-n") + 1] if not param.isdigit() or int(param) < 0: print("Fel parameter till -n: ", param) return else: n = int(param) with Korpus(KORPUS_PATH) as korpus: index = Index() if building: print("Bygger index.") index.build(korpus) print("Index färdigbyggt.") if word in build_flags: return try: indices = index[word] except Exception as e: print("\nOjdå, det här gick inge bra: ", e, "\n") print_usage() else: offset = 30 + len(word) print_results(indices, n, offset)
def __init__(self, server, lst): """ We create a new Database instance by giving it the server instance (usefull to get the server name from the database) and the list of the content of the tbschema formatted file. :param server: The server instance :param lst: The list of the tbschema content """ # Server instance self.server = server # Database's name for i in get_all_keyword_position_in_list('database ', lst): self.database_name = lst[i].split()[1][:-1] # Dictionnary of tables self.tables_dictionnary = {} for i in get_all_keyword_position_in_list('create table ', lst): table_name = get_table_name(lst[i]) self.tables_dictionnary[table_name] = Table(self, lst[i][:-1]) # Dictionnary of indexes self.indexes_dictionnary = {} for i in get_all_keyword_position_in_list('create index ', lst): index_name = get_index_name(lst[i], False) self.indexes_dictionnary[index_name] = Index(self, lst[i][:-1]) for i in get_all_keyword_position_in_list('create unique index ', lst): index_name = get_index_name(lst[i], True) self.indexes_dictionnary[index_name] = Index(self, lst[i][:-1]) # Dictionnary of grants self.grants_dictionnary = {} for i in get_all_keyword_position_in_list('grant ', lst): grant_key = get_grant_key(lst[i]) self.grants_dictionnary[grant_key] = Grant(self, lst[i][:-1]) # Dictionnary of revokes self.revokes_dictionnary = {} for i in get_all_keyword_position_in_list('revoke ', lst): revoke_key = get_revoke_key(lst[i]) self.revokes_dictionnary[revoke_key] = Revoke(self, lst[i][:-1])
def doc_to_vector_all(self,filename,feature_filename,all_feature): """use features to make doc to vector""" docsvector=self.count_tfidf_all(all_feature) with open(feature_filename,'w') as featurefile: for word in sorted(self.feature): featurefile.write('%s\n' % word) my_index=Index(self.name) with open(filename,'w') as traintext: for doc in self.docs: traintext.write('%d:' % doc) feature_pos=0 for tfidf in docsvector[doc]: if tfidf: my_index.create_Index(feature_pos,doc) traintext.write('\t<%d,%f>' % (feature_pos,tfidf)) feature_pos+=1 traintext.write('\n') my_index.record_Index()
def load_indexes(self): 'adds indexes specified in the Config/study.cfg file' #This is a list of objects, NOT directories. A list of directories is in self.MainConfig self.Indexes = {} self.IndexList = [] for index_name, index_path in self.config.indexes.iteritems(): index_path = os.path.join(self.path, index_path) config_path = os.path.join(self.path, 'Config', 'Indexes', index_name + '.cfg') self.Indexes[index_name] = Index(self, index_name, index_path, config_path) self.IndexList.append(self.Indexes[index_name])
def build_ii(self): start = time.time() index = Index(self._config, callback=self.print_callback) index.process_blocks() index.merge_blocks() end = time.time() print("La construcción del índice demoró %s segundos" % (end - start))
def extract_text(docno, index_path): text = ''; try: index = Index(index_path); content = index.get_doc_content(docno); html_path, text_path, title_path = map(lambda suffix: '%s.%s' % (docno, suffix), suffixes); f = open(html_path, 'w'); f.write(content); f.close(); subprocess.call(['python', extract_script, html_path, text_path, title_path]); title_f = open(title_path); # first line is the title text = ' '.join(map(str.strip, title_f.readlines())) + '\n'; text_f = open(text_path); text += ''.join(text_f.readlines()); #os.remove(html_path); #os.remove(text_path); #os.remove(title_path); except Exception, e: sys.stderr.write('error at docno %s\n' % docno);
def reset(self): # cleanup database self.db.clear() self.msgMngr.clear() # create indexes self.db.create(Index(), Const.OID_I_LOGIN2OID) # create admin self.registerPlayer(Const.ADMIN_LOGIN, self.createAdmin(), Const.OID_ADMIN) # create universe self.createUniverse() # save all informations self.db.checkpoint() self.msgMngr.checkpoint() self.clientMngr.checkpoint()
def main(): crawler = Crawler([ "http://people.f4.htw-berlin.de/fileadmin/user_upload/Dozenten/WI-Dozenten/Classen/DAWeb/smdocs/d01.html", "http://people.f4.htw-berlin.de/fileadmin/user_upload/Dozenten/WI-Dozenten/Classen/DAWeb/smdocs/d06.html", "http://people.f4.htw-berlin.de/fileadmin/user_upload/Dozenten/WI-Dozenten/Classen/DAWeb/smdocs/d08.html" ]) crawler.crawl() bank = crawler.get_bank() bank.sortBank() print '\nLinkstruktur: \n' bank.printOutgoing() print '\nPageRanks:' rank = PageRank(bank, 0.95, 0.04) rank.calculate() print '\n\nIndex: \n' i = Index( bank ) i.printIndex() s = Scorer( 'tokens', i ) print '\nDokumentenlaenge: \n' s.printDocumentLength() print '\nSuchergebnisse: \n' s.printScoring() s = Scorer( 'index', i ) s.printScoring() s = Scorer( 'classification', i ) s.printScoring() s = Scorer( 'tokens classification', i ) s.printScoring()
def __init__(self, ai_settings, screen, alien): super(Lazer, self).__init__() self.screen = screen ims = Index() self.images = ims.L1 self.index = 0 self.image = self.images[self.index] self.rect = self.image.get_rect() self.rect.centerx = alien.rect.centerx self.rect.top = alien.rect.bottom self.timer = pygame.time.get_ticks() self.y = float(self.rect.y) self.speed_factor = ai_settings.lazer_speed
def __init__(self, ai_settings, screen): super(Alien, self).__init__() self.screen = screen self.ai_settings = ai_settings self.type = 0 # Load the alien image, and set its rect attribute. ims = Index() self.images = ims.a1 self.timer = 0 self.index = 0 self.image = self.images[self.index] self.rect = self.image.get_rect() self.image = self.images[self.index] self.Explode = Index().E1 self.rect = self.image.get_rect() self.rect.x = self.rect.width self.rect.y = self.rect.height self.x = float(self.rect.x) self.dead = False self.death_index = None self.last_frame = None
def add_index(self, name=None, kind=None, columns=None, json=None): # input validation if kind is not None and kind not in ["PRIMARY", "UNIQUE", "INDEX"]: raise CSVTableError("Index must be of the following: " "PRIMARY,UNIQUE,INDEX") if columns is not None and not set(columns) <= set(self._columns): raise CSVTableError("Index must use columns of the " "containing database") # make sure primary index is kept singular idx = Index(name, self, kind, columns, json) if kind == "PRIMARY": self._indexes["PRIMARY"] = idx else: self._indexes[name] = idx
def populate_web(self): """ Populates the web and the inverted index keyword dictionary with the urls provided """ occurdic = {} for url in self.urls: page = scrape(url) keywords = get_keywords(page.text) index = len(self.web) self.web.append(Index(index, page.title, page.url, page.links_to)) for word in keywords: value = OccurrenceList() value.append(index) occurdic[word.lower()] = value self.keywords.add(word.lower(), value) self.rank_page(occurdic, len(self.web))
def save_text(page_url): with urllib.request.urlopen(page_url) as url: s = url.read() soup = BeautifulSoup(s, "html.parser") for script in soup(["script", "style"]): # kill all script and style elements script.extract() # rip it out text = soup.get_text() # get text lines = (line.strip() for line in text.splitlines() ) # break into lines and remove leading and trailing space # on each chunks = (phrase.strip() for line in lines for phrase in line.split(" ") ) # break multi-headlines into a # line each text = '\n'.join(chunk for chunk in chunks if chunk) # drop blank lines write_a_csv(Index(text, page_url))
def __init__(self, ai_settings, screen): Alien.__init__(self, ai_settings, screen) self.type = 1 self.screen = screen self.ai_settings = ai_settings self.value = 10 ims = Index() self.images = ims.a1 self.index = 0 self.image = self.images[self.index] self.rect = self.image.get_rect() self.timer = pygame.time.get_ticks() # Start each new alien near the top left of the screen. self.rect.x = self.rect.width self.rect.y = self.rect.height # Store the alien's exact position. self.x = float(self.rect.x)
def __init__(self, host, port, server_key, authorized_keys, host_keys=None, incoming=None): threading.Thread.__init__(self) self.server_socket = self.create_socket(host, port) self.incoming = (incoming if incoming else queue.Queue()) self.private_key = paramiko.RSAKey(filename=server_key) self.authorized_keys = authorized_keys self.host_keys = host_keys self.selector = selectors.DefaultSelector() self.num_conn = 0 self.stop_event = threading.Event() self.index = Index() # Filesystem observer self.observer = Observer() # Server functionality self.channels = {} self.paramiko_server = SSHServer(self.authorized_keys) # Client functionality self.clients = [] self.to_watch = queue.Queue() # ??? # Responder threads handler = FileSystemEventHandler(self.index, self.incoming) self.responder = Responder(index=self.index, channels=self.channels, incoming=self.incoming, observer=self.observer, handler=handler)
def __init__(self, ai_settings, screen): Alien.__init__(self, ai_settings, screen) self.type = 4 self.screen = screen self.ai_settings = ai_settings self.on = False # Load the alien image, and set its rect attribute. ims = Index() self.images = ims.a4 self.index = 0 self.image = self.images[self.index] self.rect = self.image.get_rect() self.timer = pygame.time.get_ticks() # Start each new alien near the top left of the screen. self.rect.x = self.rect.width self.rect.y = self.rect.height # Store the alien's exact position. self.x = float(self.rect.x)
def _build_commands(self, index: Index) -> list: commands = list() unique_imports = self._get_imports_from_script_paths() script_paths = self.get_script_paths() arguments = Arguments() for script_path in script_paths: if not self.project.options.disable_indexer: if index.compare(script_path): continue arguments.clear() arguments.append_quoted(self.compiler_path) arguments.append_quoted(script_path) arguments.append_quoted(self.output_path, 'o') arguments.append_quoted(';'.join(unique_imports), 'i') arguments.append_quoted(self.flags_path, 'f') if self.project.is_fallout4: release = self.root_node.get('Release') if release and release.casefold() == 'true': arguments.append('-release') final = self.root_node.get('Final') if final and final.casefold() == 'true': arguments.append('-final') optimize = self.root_node.get('Optimize') if optimize and optimize.casefold() == 'true': arguments.append('-op') commands.append(arguments.join()) return commands
def runner( PATH_DATA, RATIO_TEST_DATA, RATIO_SPECIFICITY, RATIO_CONFIDENCE, EXPERIMENTS, fe, setting_name ): results = [] errors = Counter() qtypes = QuestionTypes() for e in range(1, EXPERIMENTS + 1): start = time.time() dataset = Dataset(PATH_DATA) dataset.load() invprob = InverseProbabilities(dataset) index = Index(invprob) train = [ # (bow(fe, label, RATIO_SPECIFICITY, prob_filter=invprob) + bow(fe, text, prob_filter=invprob), label, mark) (bow(fe, text, RATIO_SPECIFICITY, prob_filter=invprob), label, mark) for text, label, mark in dataset.train() ] train = train * 4 test = [ (bow(fe, label, RATIO_SPECIFICITY, prob_filter=invprob), label, mark) # (bow(fe, text, RATIO_SPECIFICITY, prob_filter=invprob), label, mark) for text, label, mark in dataset.test() if mark ][:int(len(train) * RATIO_TEST_DATA)] test += [ (bow(fe, label, RATIO_SPECIFICITY, prob_filter=invprob), label, mark) # (bow(fe, text, RATIO_SPECIFICITY, prob_filter=invprob), label, mark) for text, label, mark in dataset.test() if not mark ][:len(test)] for tbow, label, mark in train: index.update(tbow) index.add(label) tp, tn, fp, fn, prec, rec, f, duration = 0, 0, 0, 0, 0.0, 0.0, 0.0, 0.0 marked = sum([1 for _, _, mark in test if mark]) for tbow, label, mark in test: qtypes.increment(label) expectation = sum([ invprob[w] for w in set(bow(fe, label, RATIO_SPECIFICITY, prob_filter=invprob)) ]) matches = index(tbow) if not matches and not mark: tn += 1 continue elif not matches and mark: fn += 1 errors[('fn', '', label)] += 1 qtypes.update('fn', None, label) continue best_match = matches[0] guess = best_match[2] sim = best_match[0] ratio = sim / (expectation + 0.1) if ratio <= RATIO_CONFIDENCE: if not mark: tn += 1 continue else: fn += 1 errors[('fn', '', label)] += 1 qtypes.update('fn', None, label) continue else: if mark and guess == label: tp += 1 else: fp += 1 _qtype = '_'.join(guess.lower().split()[:2]) errors[('fp', guess, label)] += 1 qtypes.update('fp', guess, label) duration = time.time() - start if tp: prec = tp / float(tp + fp) rec = tp / float(tp + fn) f = f1(prec, rec) else: prec, rec, f = 0.0, 0.0, 0.0 vector = (e, _r(tp), _r(tn), _r(fp), _r(fn), _r(prec), _r(rec), _r(f), _r(duration)) results.append(vector) print '%d, tp: %d, tn: %d, fp: %d, fn: %d, all: %d, prec: %.2f, rec: %.2f, f1: %.2f, time=%.2f' % (e, tp, tn, fp, fn, sum([tp, tn, fp, fn]), prec, rec, f, duration) precs, recs, fs = zip(*results)[-4:-1] print e, avg(precs), avg(recs), avg(fs) print '---' if not results: return None cols = columns(results) columns_int = [avg(col) for col in cols[:4]] columns_float = [_r(avg(col)) for col in cols[4:]] summary_row = [ tuple(['all'] + columns_int + columns_float) ] create_folder(RESULTS_FOLDER) to_csv( RESULTS_KEYS + results + summary_row, '%ssecond_task.%s.results.csv' % (RESULTS_FOLDER, setting_name) ) to_csv( [tuple([f] + list(key)) for key, f in errors.most_common()], '%ssecond_task.%s.errors.csv' % (RESULTS_FOLDER, setting_name) ) to_csv( qtypes.dump(), '%ssecond_task.error.%s.question_types.csv' % (RESULTS_FOLDER, setting_name) ) return summary_row[0]
def init(self): Index.init(self, "info", False) return
class QueryManagerITC: def __init__(self, db, cdb): # self.cacheIndex = Index() # self.cacheIndex.createFromCursor(cdb.index()) # print self.cacheIndex.find('берзански') self.cacheIndex = Index() self.cacheIndex.createFromCursor(cdb.findAll()) self.db = db self.numDocs = 127000 def toArray(self, query): return query.split(' ') def getArray(self, word): arr = self.cacheIndex.find(word) if not arr: arr = self.db.find(word) if not arr: arr = [] return arr def intersect(self, lstA, lstB): result = [] if not lstA and not lstB: return [] if not lstA: return lstB if not lstB: return lstA i = 0 j = 0 lstA.append(None) lstB.append(None) while lstA[i] is not None and lstB[j] is not None: if lstA[i] == lstB[j]: result.append(lstA[i]) j += 1 i += 1 elif lstA[i] < lstB[j]: i += 1 elif lstA[i] > lstB[j]: j += 1 elif lstB is None: result.append(lstA[i]) i += 1 elif lstA is None: result.append(lstB[j]) j += 1 return result def execute(self, query): qArr = self.toArray(query) colRes = {} result = [] wt = {} val = 0 for word in qArr: colRes[word] = self.getArray(word) qw = WordsList(1) qw.insertList(qArr) for key in qw.list(): tf = round(1 + math.log10(qw.getWord(key)), 2) idf = round(math.log10(self.numDocs/len(colRes[key])), 2) wt[key] = tf*idf val += tf*idf*tf*idf val = math.sqrt(val) for key in qw.list(): wt[key] = wt[key]/val arrIntersect = [] for i in range(len(qArr)): tmpArr = [] for j in range(len(colRes[qArr[i]])): tmpArr.append(colRes[qArr[i]][j][0]) arrIntersect = self.intersect(tmpArr, arrIntersect) arrIntersect.append(None) dictIntersect = {} for word in qArr: niza = colRes[word] dictIntersect[word] = [] i = 0 j = 0 while arrIntersect[j] is not None: if niza[i][0] == arrIntersect[j]: dictIntersect[word].append(niza[i]) i += 1 j += 1 elif niza[i][0] > arrIntersect[j]: j += 1 elif niza[i][0] < arrIntersect[j]: i += 1 for i in range(len(arrIntersect) - 1): value = 0 for key in dictIntersect.keys(): value += dictIntersect[key][i][1] * wt[key] result.append((arrIntersect[i], round(value, 2))) return sorted(result, key=itemgetter(1), reverse=True)
query = [x.rstrip() for x in open(args["query"][0])] else: query = args['query'] large = args['large'] feature = args["feature"] typepost = args["typepost"] lstparam = args["postparam"] req = Requete() restotal = [] nbrestotal = 0 try: for f in index: if verb: sys.stderr.write(f+'\n') idx = Index(f,"",verb) idx.lectureBase() if verb: sys.stderr.write('fin initialisation base\n') for q in query: req.putIndex(idx) req.putRequete(q) if verb: sys.stderr.write("Requête : "+q+"\n") res = req.calculRequete() nbrestotal += len(res) if verb: sys.stderr.write('resultat : '+str(len(res))+'\n') restotal.append([idx,res,q]) if large: idx.close()
class Program: def __init__(self): self.index = Index() self.folder = ".btrfs" self.matchAllocation = re.compile("^inode (\d+) file offset (\d+) len (\d+) disk start (\d+) offset (\d+) gen (\d+) flags (\w+) (.*)$") self.subvolume = Subvolume() def start(self): self.index.create() self.index.scan() def logic(self, list): print list sum = 0 lookup = {} for item in list: hex = hashlib.sha1(item).hexdigest() file = self.folder + "/" + hex + ".lz4" print "processing: " + file with open(file, "r") as fd: compressed = fd.read() decompressed = lz4.decompress(compressed) lines = decompressed.split('\n') for line in lines: file = Entry(line) if file.sha1 in lookup: continue lookup[file.sha1] = True sum += file.size print "Accumulated size is: ", self.printSize(sum) def remove(self, list): omega, crap = self.buildLookup(self.subvolume.listSubvolumes()) exclude, files = self.buildLookup(list) result = self.blabla(omega, exclude, files) print "Accumulated freed size is: ", self.printSize(result) #print "Files to remove: ", result def blabla(self, omega, exclude, files): sum = 0 for key in exclude: if omega[key] == exclude[key]: sum += files[key].size pass return sum def buildLookup(self, subvolumes): lookup = {} files = {} for subvolume in subvolumes: print "* " + subvolume hex = hashlib.sha1(subvolume).hexdigest() file = self.folder + "/" + hex + ".lz4" with open(file, "r") as fd: compressed = fd.read() decompressed = lz4.decompress(compressed) lines = decompressed.split('\n') for line in lines: entry = Entry(line) files[entry.sha1] = entry if entry.sha1 in lookup: lookup[entry.sha1] += 1 else: lookup[entry.sha1] = 1 return lookup, files def printSize(self, size): kb = 1024 mb = kb*kb gb = kb*mb resolution = 3 if (size >= gb): return str(round(size / float(gb), resolution)) + " gb" elif (size >= mb): return str(round(size / float(mb), resolution)) + " mb" elif (size >= kb): return str(round(size / float(kb), resolution)) + " kb" return str(size) + " b" def indexAll(self, param): wordlist = Wordlist() lookup = Lookup() for key in self.subvolume.listSubvolumes(): print key for file in self.subvolume.read(key): e = Entry(file) for str in re.split("[^a-z0-9]*", e.path.lower()): ix = wordlist.add(str) lookup.add(ix, e.path) lookup.search(wordlist.add("review"))
def __init__(self): self.index = Index() self.folder = ".btrfs" self.matchAllocation = re.compile("^inode (\d+) file offset (\d+) len (\d+) disk start (\d+) offset (\d+) gen (\d+) flags (\w+) (.*)$") self.subvolume = Subvolume()
class QueryManager: def __init__(self, db, cdb): self.cacheIndex = Index() self.cacheIndex.createFromCursor(cdb.findAll()) self.db = db def toArray(self, query): strArr = [] tmpStr = '' j = 0 level = 0 for i in range(len(query)): if query[i] == '(' and i < len(query): if tmpStr != '': strArr.append(tmpStr) tmpStr = '' strArr.append('(') elif query[i] == ')' and i < len(query): if tmpStr != '': strArr.append(tmpStr) tmpStr = '' strArr.append(')') elif query[i] == ' ': if tmpStr != '': strArr.append(tmpStr) tmpStr = '' else: tmpStr += query[i] if tmpStr != '': strArr.append(tmpStr) return strArr def andQ(self, A, B): # A ke e zbor ili lista # Ako ne e lista ke zejme lista od kes ili baza lstA = A lstB = B if not A or not B: raise Exception('Грешка.' ) return None if not isinstance(A, list): lstA = self.cacheIndex.find(A) if not lstA: lstA = self.db.find(A) if not lstA: raise Exception('Зборот "' + A + '" не постои.' ) return None if not isinstance(B, list): lstB = self.cacheIndex.find(B) if not lstB: lstB = self.db.find(B) if not lstB: raise Exception('Зборот "' + B + '" не постои.' ) return None result = [] i = 0 j = 0 lstA.append(None) lstB.append(None) while lstA[i] is not None and lstB[j] is not None: if lstA[i] == lstB[j]: result.append(lstA[i]) j += 1 i += 1 elif lstA[i] < lstB[j]: i += 1 elif lstA[i] > lstB[j]: j += 1 return result def orQ(self, A, B): lstA = A lstB = B if not isinstance(A, list): lstA = self.cacheIndex.find(A) if not lstA: lstA = self.db.find(A) if not lstA: raise Exception('Зборот "' + A + '" не постои.') return None if not isinstance(B, list): lstB = self.cacheIndex.find(B) if not lstB: lstB = self.db.find(B) if not lstB: raise Exception('Зборот "' + B + '" не постои.') return None return list(set(lstA + lstB)) def buildTree(self, query): tree = Tree.buildOrTree(query) return tree def execute(self, query): qArr = self.toArray(query) if len(qArr) == 1: lst = self.cacheIndex.find(qArr[0]) if not lst: lst = self.db.find(qArr[0]) return lst qTree = self.buildTree(qArr) result = self.executeQuery(qTree) return result def executeQuery(self, tree): lnode = tree.left rnode = tree.right if lnode and rnode: if tree.value == 'OR': try: a = self.executeQuery(lnode) b = self.executeQuery(rnode) return self.orQ(a, b) except Exception, e: print e.message return None elif tree.value == 'AND': try: a = self.executeQuery(lnode) b = self.executeQuery(rnode) return self.andQ(a, b) except Exception, e: print e.message return None
else: ficlog = open(log,"w") trans = None if rules: trans = Transduction() for elt in rules: if os.path.isfile(elt): for r in open(elt): if r[0] != "#": r = r.rstrip() if verb: ficlog.write('ajout de la règle : '+r+'\n') trans.addRules(r) for file in input: if verb: ficlog.write(file+'\n') idx = Index(file,database,verb,ficlog) idx.initDB() idx.initFicDocument() idx.createBase(listfeature) if not xml: idx.initTokenizer('txt',dicts,'dico',dictc) else: idx.initTokenizer('xml','') idx.indexTexte(trans) idx.sauveBase() idx.renameFicDocument() idx.closeBase() idx.createMeta()
def run(self, clean_first): idx = Index(self.solr_service, self.site, "site_code:%s" % self.site) # if clean_first is True then wipe the index if clean_first: idx.clean() idx.commit() total = 0 for (dirpath, dirnames, filenames) in os.walk(self.input_folder): for f in filenames: file_handle = os.path.join(dirpath, f) doc = etree.parse(file_handle) idx.submit(etree.tostring(doc), file_handle) total += len(filenames) idx.commit() idx.optimize() # compare the local file count to what's in the index # log an error if not the same url ="%s/select?q=site_code:%s&rows=0&wt=json" % (self.solr_service, self.site) data = json.loads(requests.get(url).text) if total != data['response']['numFound']: log.error("Number of files in index doesn't match local count. index: %s local: %s" % (data['response']['numFound'], total))
def __init__(self, db, cdb): self.cacheIndex = Index() self.cacheIndex.createFromCursor(cdb.findAll()) self.db = db
from RequeteIndex import RequeteIndex as Requete from Post import Post from Index import Index from Dico import Dico if dic != "": dictionnaire = Dico() dictionnaire.load([dic]) else: dictionnaire = None tidx = [] for index in lstindex: if verb: print(index) idx = Index(index,"",verb) idx.lectureBase() tidx.append(idx) recom = re.compile('([A-Z]+[a-z]*) (.*)') reconf = re.compile('([a-z]+) ([0-9]+)') conf = {"range":3,"taille":100} # Classe Serveur class Server(socketserver.BaseRequestHandler): def handle_timeout(self): print("timeout") def handle(self): global tidx
def init(self): Index.init(self, "debug", False) return
import pyjd # this is dummy in pyjs. import pygwt from Index import Index if __name__ == '__main__': pyjd.setup("public/Main.html") app = Index() app.onModuleLoad() pyjd.run()
def init(self): Index.init(self, "info", False) if journal.hasProxy: self._stateFactory = self._proxyState return
def init(self): Index.init(self, "error", True) return
def init(self): Index.init(self, "warning", True) if journal.hasProxy: self._stateFactory = self._proxyState return
class Registrar(threading.Thread): def __init__(self, host, port, server_key, authorized_keys, host_keys=None, incoming=None): threading.Thread.__init__(self) self.server_socket = self.create_socket(host, port) self.incoming = (incoming if incoming else queue.Queue()) self.private_key = paramiko.RSAKey(filename=server_key) self.authorized_keys = authorized_keys self.host_keys = host_keys self.selector = selectors.DefaultSelector() self.num_conn = 0 self.stop_event = threading.Event() self.index = Index() # Filesystem observer self.observer = Observer() # Server functionality self.channels = {} self.paramiko_server = SSHServer(self.authorized_keys) # Client functionality self.clients = [] self.to_watch = queue.Queue() # ??? # Responder threads handler = FileSystemEventHandler(self.index, self.incoming) self.responder = Responder(index=self.index, channels=self.channels, incoming=self.incoming, observer=self.observer, handler=handler) def connect(self, host, port): client = paramiko.SSHClient() client.load_system_host_keys(self.host_keys) client.set_missing_host_key_policy(paramiko.WarningPolicy()) client.connect(host, port=port, pkey=self.private_key) # input, output, err = client.exec_command("ls -la") # for line in output.readlines(): # print(line) transport = client.get_transport() channel = transport.open_channel(kind="session") self.clients.append(client) self.to_watch.put(channel) return channel def get_incoming(self): return self.incoming def run(self): self.server_socket.listen(10) self.selector.register(self.server_socket, selectors.EVENT_READ) self.responder.start() self.observer.start() print("[Server] All set, listening for SSH connections.") while not self.stop_event.is_set(): events = self.selector.select(timeout=1) #print('Events: '+str(events)) # 1 - Handle incoming client registrations, messages and disconnects for key,event in events: channel = key.fileobj # 1.1 - New registration if channel is self.server_socket: client_socket, address = self.server_socket.accept() client_channel = self.negotiate_channel(client_socket) if not client_channel: continue # Successful negotiation print("[Server] Now have secure channel with " + str(address)) self.register_channel(client_channel) else: channel_id = key.data["channel_id"] try: databin = channel.recv(1024 ^ 2) # 1.2 - Client disconnection if not databin: print("[Server] Disconnection") self.remove_channel(channel, channel_id) # 1.3 - Client message else: data = pickle.loads(databin) self.incoming.put(( "remote", channel_id, data)) print("[Server] Received: " + str(data)) except socket.error: self.remove_channel(channel, channel_id) # 2 - Register connected server channels for observation while True: try: server_channel = self.to_watch.get(block=False) self.register_channel(server_channel) except queue.Empty: break print("[Registrar] Stopping") def register_channel(self, channel): self.num_conn += 1 self.index.add_channel(self.num_conn) self.channels[self.num_conn] = channel self.selector.register(channel, selectors.EVENT_READ, data={"channel_id": self.num_conn}) print("Added channel #"+str(self.num_conn)) def remove_channel(self, channel, channel_id): self.selector.unregister(channel) self.index.remove_channel(channel_id) def negotiate_channel(self, client_socket): handler = paramiko.Transport(client_socket) handler.add_server_key(self.private_key) handler.start_server(server=self.paramiko_server) return handler.accept(20) def create_socket(self, address, port): server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) #server_socket.listen(100) server_socket.bind((address, port)) return server_socket def stop(self): self.stop_event.set() self.responder.stop()
def init(self): Index.init(self, "firewall", defaultState=True, fatal=True) if journal.hasProxy: self._stateFactory = self._proxyState return
tabul = '\t' nl = '\n' if nosep: tabul = '' nl = '' # path to library sys.path.append(os.path.dirname(os.path.abspath(__file__)) + "/../Corpindex") from Index import Index if verb: sys.stderr.write(index) for elt in index: idx = Index(elt,"") idx.lectureBase() op = "" div = "" if output == "xml": print("<text>",end=nl) if ident != "": print('<div id="'+ident+'">',end=nl) elif output == "json": print("[") for tok in idx.getIndexTokens(): if output == "json": print(tok.getJson(),",",end=nl) elif output == "txt": op += tok.getFeat("f")+" " if tok.getFeat("f") in [".",";","?","!"]: