def run(self): global threads docs = get_docs(CORPUS_PATH) mapper = Mapper() i = 1 for files in self.__chunk(docs): doc_contents = [] for f in files: with open(f, 'r') as d: doc_contents.append(d.read()) doc_contents = ''.join(doc_contents) self.__run_batch(parse(doc_contents), i, mapper) i += 1 print "Writing the mapper to file -------------------------------------" mapper.write(self.name) print "Writing DocLengths to file --------------------------------------" self.__writeDocLengths() while len(self.catalogs) != 1: print self.chunk_catalog() for pair in self.chunk_catalog(): print pair if len(pair) != 2: break else: cat1 = self.catalogs[pair[0]] cat2 = self.catalogs[pair[1]] self.__merge(cat1, cat2, pair) print "Writing the catalog to file for later use -----------------------" Catalog.write(self.catalogs, self.name)
def get(self): form = MerchantForm() message = '' merchant_id = self.get_argument('nid', None) if not merchant_id: crumbs = '添加商户' method = 'POST' print(crumbs) else: crumbs = '编辑商户' # 依赖注入 Mapper.register(modelMerchantService, MerchantRepository()) Mapper.register(MerchantService, modelMerchantService()) merchant_service = MerchantService() # 获取当前商户的详细信息 detail = merchant_service.get_merchant_detail_by_nid(merchant_id) # print(detail.success) # print(detail.message) # print(detail.rows) # 获取错误信息,默认空字符串 message = detail.message county_caption = detail.rows.pop('county_caption') county_id = detail.rows.get('county_id') form.county_id.widget.choices.append({ 'value': county_id, 'text': county_caption }) method = 'PUT' # put方法用来做修改操作 form.init_value(detail.rows) self.render('Merchant/MerchantEdit.html', form=form, crumbs=crumbs, method=method, summary=message, nid=merchant_id)
def main(args): # data = make_circles(100, shuffle=True)[0] # data = ReadPlyFile('data/bun000.ply').get_data() data = ReadPlyFile('data/drill_1.6mm_0_cyb.ply').get_data() # data = ReadPlyFile('data/dragonStandRight_0.ply').get_data() # print(len(data)) def filter_norm(point): return np.linalg.norm(point - np.array(data).min(0)) def filter_x(point): return point[0] mapper = Mapper(data, resolution=0.2, overlap=0.4, cluster_alg='kmeans', max_clusters=5, filter=filter_norm) graph = mapper.run() print(graph) viz = Visualization(graph) viz.draw(36, 3000) persistence = Persistence(graph) persistence.draw()
def post(self): # 依赖注入 Mapper.register(ModelProductService, ProductRepository()) Mapper.register(ProductService, ModelProductService()) product_service = ProductService() jd_buy_cookie = self.get_cookie('jd_buy_list') buy_str = escape.url_unescape(jd_buy_cookie) buy_list = json.loads(buy_str) data_list = [] for item in buy_list: print(item) temp = {} product_title = item['product_title'] product_img = item['product_img'] count = item['count'] price_id = item['price_id'] temp["count"] = count temp["price_id"] = price_id result = product_service.fetch_price_detail(int(price_id)) data_list.append(temp) print(data_list) print(result) self.write(json.dumps(data_list))
def main(args = None): if args is None: args = sys.argv robotName = 'rys' nodeName = 'remote' app = QtWidgets.QApplication(args) global gamepadBridge global rosBridge global mapper gamepadBridge = GamepadBridge(app) rosBridge = QTRosBridge(app, robotName, nodeName) mapper = Mapper(app, rosBridge, cellSize = 0.04, mapSize = 2.56) mainWindow = RysRemoteMainWindow(None, gamepadBridge, rosBridge, mapper) app.aboutToQuit.connect(quitEventHandler) gamepadBridge.start() rosBridge.start() mapper.start() mainWindow.show() sys.exit(app.exec_())
def __init__(self, worker_id, host, port): self.host = host self.port = port self.worker_id = worker_id self.mapper = Mapper() self.reducer = Reducer() self.logger = logging.getLogger('worker ' + str(self.worker_id)) self.logger.debug('Worker connecting to %s:%d', self.host, self.port)
def export(self, basedir): report = codecs.open(os.path.join(basedir, "avlog.json"), "w", errors='ignore') json.dump(self.data, report, sort_keys=False, indent=4) report.close() m = Mapper(basedir) m.add_data(self.data) m.write_svg()
def main(): mapper = Mapper() reducer = Reducer() arrayMap = mapper.map("Esta à Frase, fRASe tomas frase esta unica única") arrayMap2 = mapper.map("única Este cena\n. frase única") arrayMap3 = mapper.map("à frase à") mapFinal = reducer.reduce([arrayMap3]) print("Reduced: ", mapFinal)
def makeClassifier(): jiraGitMapper = Mapper() # Create a mapping of jira commits to git tickets ticketsToCommits = jiraGitMapper.mapCommitsToTickets(gitData, jiraData, "SONAR-") # Take the git commits and associate them with java class names ticketsAndCommitsToClasses = jiraGitMapper.mapCommitsToClasses(ticketsToCommits) ticketsToClasses = ticketsAndCommitsToClasses[0] commitsToClasses = ticketsAndCommitsToClasses[1] classifier = Classifier() results = classifier.classifyClasses(ticketsToClasses) # results = classifier.randomClassifyClasses(ticketsToClasses) print("Precision: %.3f, Recall: %.3f, Accuracy: %.3f, f1 score: %.3f, hamming loss: %.3f" % (results[0], results[1], results[2], results[3], results[4]))
def yardage_type(yard_tok): # Look left, right, and then parent of word mapper = Mapper() print(yard_tok) for left in list(yard_tok.lefts): print(left) if left.text in mapper.passing: return StatType.PASS_YDS elif left.text in mapper.rushing: return StatType.RUSH_YDS for right in list(yard_tok.rights): print(right) if right.text in mapper.passing: return StatType.PASS_YDS elif right.text in mapper.rushing: return StatType.RUSH_YDS parent = yard_tok.head if parent.text in mapper.passing: return StatType.PASS_YDS elif parent.text in mapper.rushing: return StatType.RUSH_YDS return StatType.TOT_YDS
def execute_page_rank(url_set, graph_index, graph_type, out_file, reverse_map=False): es_util = ElasticSearchUtility() web_graph = es_util.get_web_graph(graph_index, graph_type) page_rank_dict = page_rank(url_set, web_graph) # clear memory web_graph = None sorted_tuples = sorted(page_rank_dict.items(), key=lambda x: x[1], reverse=True)[:OUTPUT_SIZE] # clear memory page_rank_dict = None if reverse_map: print 'getting reverse url map...' url_reverse_map = Mapper.fromFile(MAPPING_FILE_NAME, reverse=True).mappings decoded_tuples = [] for t in sorted_tuples: decoded_url = url_reverse_map[t[0]] # decode url score = t[1] # score as it is decoded_tuple = (decoded_url, score) decoded_tuples.append(decoded_tuple) else: decoded_tuples = sorted_tuples print 'writing pagerank results...' write(out_file, decoded_tuples)
def execute_hits(crawl_index_name, crawl_index_type, graph_index, graph_type): es_util = ElasticSearchUtility() web_graph = es_util.get_web_graph(graph_index, graph_type) link_map = Mapper.fromFile(MAPPING_FILE_NAME).mappings hubs, authorities = hits(crawl_index_name, crawl_index_type, web_graph, QUERY_STRING, link_map) # clear memory web_graph = None link_map = None print 'sorting hubs...' sorted_hubs = sorted(hubs.items(), key=lambda x: x[1], reverse=True)[:OUTPUT_SIZE] # clear memory hubs = None print 'sorting authorities...' sorted_auth = sorted(authorities.items(), key=lambda x: x[1], reverse=True)[:OUTPUT_SIZE] # clear memory authorities = None print 'getting reverse url map...' url_reverse_map = Mapper.fromFile(MAPPING_FILE_NAME, reverse=True).mappings sorted_hubs_decoded = [] for t in sorted_hubs: decoded_url = url_reverse_map[t[0]] # decode url score = t[1] # score as it is decoded_tuple = (decoded_url, score) sorted_hubs_decoded.append(decoded_tuple) sorted_auth_decoded = [] for t in sorted_auth: decoded_url = url_reverse_map[t[0]] # decode url score = t[1] # score as it is decoded_tuple = (decoded_url, score) sorted_auth_decoded.append(decoded_tuple) print 'writing hubs...' write(HUBS_PATH, sorted_hubs_decoded) print 'writing authorities...' write(AUTH_PATH, sorted_auth_decoded)
def put(self): """ 修改 """ # 依赖注入 Mapper.register(modelMerchantService, MerchantRepository()) Mapper.register(MerchantService, modelMerchantService()) merchant_service = MerchantService() message = '' form = MerchantForm() merchant_id = self.get_argument('nid', None) try: is_valid = form.valid(self) if is_valid: if form._value_dict['county_id'] == '0': form._error_dict['county_id'] = '请选择县(区)ID' else: nid = form._value_dict.pop('nid') del form._value_dict['city_id'] del form._value_dict['province_id'] merchant_service.update_merchant(nid, **form._value_dict) self.redirect('MerchantManager.html') return else: form.init_value(form._value_dict) except Exception as e: message = str(e) detail = merchant_service.get_merchant_detail_by_nid(merchant_id) county_caption = detail.rows.pop('county_caption') county_id = detail.rows.get('county_id') form.county_id.widget.choices.append({ 'value': county_id, 'text': county_caption }) self.render('Merchant/MerchantEdit.html', form=form, crumbs='编辑商户', method='PUT', summary=message, nid=merchant_id)
def get(self, *args, **kwargs): product_id = kwargs.get('product_id', None) price_id = kwargs.get('price_id', None) if not product_id or not price_id: self.redirect('/Index.html') return # 依赖注入 Mapper.register(ModelProductService, ProductRepository()) Mapper.register(ProductService, ModelProductService()) product_service = ProductService() # 根据商品ID获取商品信息,商户信息,价格列表,图片 # p = ProductService(ProductRepository()) product_dict = product_service.fetch_product_detail( product_id, price_id) self.render('Home/Detail.html', product_dict=product_dict.rows)
def get(self): """ 根据参数,获取产品信息(type:自营(商户ID),type:所有商品) 后台管理用户登陆成功后,Session中保存自营ID 自营ID=1 """ # 手动获取京东自营ID为14 merchant_id = 14 page = int(self.get_argument('page', 1)) rows = int(self.get_argument('rows', 10)) start = (page - 1) * rows # 依赖注入 Mapper.register(modelProductService, ProductRepository()) Mapper.register(ProductService, modelProductService()) product_service = ProductService() response = product_service.get_page_by_merchant_id( merchant_id, start, rows)
class TableGenerator(): def __init__(self): self.mapper = Mapper() def generate_table(self, documents_cursor, concept_filter_list): columns = [] documents = [] for doc in documents_cursor.rewind(): processed_document = {} for key, value in doc.items(): snomed_ids_of_key = self.mapper.map_to_concept( self.mapper.prepare_fieldname(key)) for concept_filter in concept_filter_list: if concept_filter in snomed_ids_of_key: columns.append(concept_filter) processed_document[concept_filter] = value documents.append(processed_document) return list(set(columns)), documents
def delete(self): """ 删除 """ # 依赖注入 Mapper.register(modelMerchantService, MerchantRepository()) Mapper.register(MerchantService, modelMerchantService()) merchant_service = MerchantService() ret = {'success': False, 'message': ''} merchant_id = self.get_argument('nid', None) # print(merchant_id) if not merchant_id: ret['message'] = '请选着要删除的行' else: rows = merchant_service.delete_merchant(int(merchant_id)) ret = rows.__dict__ print(ret) self.write(json.dumps(ret))
def post(self): """ 创建商户 """ method = self.get_argument('_method', None) if method == 'PUT': return self.put(self) message = '' form = MerchantForm() try: is_valid = form.valid(self) if is_valid: if form._value_dict['county_id'] == '0': form._error_dict['county_id'] = '请选择县(区)ID' else: del form._value_dict['nid'] del form._value_dict['city_id'] del form._value_dict['province_id'] print(form._value_dict) # 依赖注入 Mapper.register(modelMerchantService, MerchantRepository()) Mapper.register(MerchantService, modelMerchantService()) merchant_service = MerchantService() merchant_service.create_merchant(**form._value_dict) self.redirect('/MerchantManager.html') return else: form.init_value(form._value_dict) except IntegrityError as e: message = '商户名称或登陆用户必须唯一' except Exception as e: message = str(e) self.render('Merchant/MerchantEdit.html', form=form, crumbs='添加商户', method='POST', summary=message, nid=None)
def create_links_map(self, links_index, links_type): mapper = Mapper() # query scroll scroll = self.es.search(index=links_index, doc_type=links_type, scroll='10m', size=10000, body={"query": { "match_all": {} }}) scroll_size = scroll['hits']['total'] size = 0 # retrieve results while scroll_size > 0: # scrolled data is in scroll['hits']['hits'] hits_list = scroll['hits']['hits'] for hit in hits_list: src_link = hit['_source']['SRC_LINK'] dst_link = hit['_source']['DST_LINK'] mapper.map(src_link) mapper.map(dst_link) # update scroll size scroll_size = len(scroll['hits']['hits']) size += scroll_size print "scrolled %s \n" % size # prepare next scroll scroll_id = scroll['_scroll_id'] # perform next scroll scroll = self.es.scroll(scroll_id=scroll_id, scroll='10m') mapper.write(MAPPINGS_PATH)
def get(self): # 依赖注入 Mapper.register(modelMerchantService, MerchantRepository()) Mapper.register(MerchantService, modelMerchantService()) merchant_service = MerchantService() ret = {'success': False, 'message': ""} # MerchantManager.html发送过来的 req_type = self.get_argument('type', None) if req_type == 'pagination': page = int(self.get_argument('page', 1)) rows = int(self.get_argument('rows', 10)) start = (page - 1) * rows rows_list = merchant_service.get_merchant_by_page(start, rows) rows_count = merchant_service.get_merchant_count() ret['success'] = all([rows_list.success, rows_count.success]) ret['message'] = rows_list.message + rows_count.message ret.update({'total': rows_count.rows, 'rows': rows_list.rows}) # print(ret) self.write(json.dumps(ret)) return self.render('Merchant/MerchantManager.html')
def create_encoded_graph(): es_util = ElasticSearchUtility() # mapper = Mapper() es_util.create_index(WEB_GRAPH_INDEX, CREATE_WEB_GRAPH) # es_util.create_index(ENCODED_LINKS_INDEX, CREATE_ENCODED_LINKS) # es_util.create_links_map(LINKS_INDEX, LINKS_TYPE) # mapper = None link_map = Mapper.fromFile(MAPPING_FILE_NAME).mappings # es_util.encode_crawled_links(LINKS_INDEX, LINKS_TYPE, link_map, ENCODED_LINKS_INDEX, ENCODED_LINKS_TYPE) es_util.encoded_links_to_web_graph(LINKS_INDEX, LINKS_TYPE, WEB_GRAPH_INDEX, WEB_GRAPH_TYPE, link_map)
def start(): mapper = Mapper() reducer = Reducer() # Produce a list of all the airports in a csv file with the headings 'Airport Code', and 'Null' mapper.setInputFile("./inputFiles/PassengerData.csv") mapper.setMapFunction(PassengersOnEachFlightUserCode.mapPassengerToFlight) reducer.setRedFunction(PassengersOnEachFlightUserCode.redCountPassengers) reducer.setOutputFile("./results/NumberOfPassengersOnEachFlight.csv") noPassengers = mapper.run() reducer.run(noPassengers, 'w') print(":: Task 2 complete")
def post(self): post_data = self.get_argument('post_data', None) post_data_dict = json.loads(post_data) if self.session['CheckCode'].upper() == post_data_dict.get( 'checkcode').upper(): user = post_data_dict.get('username', None) if re.match(pattern, user): email = user user = None else: email = None pwd = post_data_dict.get('password', None) # Service层 user_request = UserRequest(username=user, email=email, password=pwd) Mapper.register(ModelUserService, UserRepository()) Mapper.register(UserService, ModelUserService()) user_service = UserService() # 依赖注入Model(业务逻辑层)的对应‘协调’ response = user_service.check_login(user_request) if response.status: self.session['is_login'] = True response_str = json.dumps(response.status, cls=JsonCustomEncoder) self.write(response_str)
def start(): mapper = Mapper() reducer = Reducer() # Produce a list of all the airports in a csv file with the headings 'Airport Code', and 'Null' mapper.setInputFile("./inputFiles/PassengerData.csv") mapper.setMapFunction(FlightInformationUserCode.mapReOrder) reducer.setRedFunction(FlightInformationUserCode.redCalcFlightInfo) reducer.setOutputFile( "./results/ListOfFlightsWithDurationAndAllPassengers.csv") noPassengers = mapper.run() reducer.run(noPassengers, 'w') print(":: Task 3 complete")
def get_stat_type(doc, players): # Since the player is already accounted for, but also a noun, lets filter # them from the nouns nouns = list(doc.noun_chunks) noun_roots = [noun.root for noun in nouns] players_roots = [player.root for player in players] filtered = [noun for noun in nouns if noun not in players] mapper = Mapper() print(list(nouns)) for token in list(filtered): noun_root = token.root print(list(filtered)) if noun_root.text in mapper.yards: return yardage_type(noun_root) return StatType.EMPTY
def get(self, *args, **kwargs): # 依赖注入 Mapper.register(ModelCategoryService, CategoryRepository()) Mapper.register(CategoryService, ModelCategoryService()) category_service = CategoryService() # 获取一级分类 # 循环一级分类,获取二级分类 # 循环二级分类,获取三级分类 # c = CategoryService(CategoryRepository()) category_list = category_service.get_all_category() # print(category_list) # 依赖注入 Mapper.register(ModelProductService, ProductRepository()) Mapper.register(ProductService, ModelProductService()) product_service = ProductService() # p = ProductService(ProductRepository()) product_dict = product_service.fetch_index_product() # print(product_dict) self.render('Home/Index.html', category_list=category_list, product_dict=product_dict.rows)
def get_all_ids(self, index_name, index_type): """ Returns all ids of given index :param index_name: Name of the index :param index_type: Type of the index :return: List of ids of entire index """ # query scroll id_list = [] link_map = Mapper.fromFile(MAPPING_FILE_NAME).mappings scroll = self.es.search(index=index_name, doc_type=index_type, scroll='10m', size=10000, fields=['_id'], body={"query": { "match_all": {} }}) scroll_size = scroll['hits']['total'] size = 0 # retrieve results while scroll_size > 0: # scrolled data is in scroll['hits']['hits'] hits_list = scroll['hits']['hits'] for hit in hits_list: url = hit['_id'] encoded_id = link_map[iri_to_uri(url)] id_list.append(encoded_id) # update scroll size scroll_size = len(scroll['hits']['hits']) size += scroll_size print "scrolled %s \n" % size # prepare next scroll scroll_id = scroll['_scroll_id'] # perform next scroll scroll = self.es.scroll(scroll_id=scroll_id, scroll='10m') return id_list
def start(): mapper = Mapper() reducer = Reducer() mapper.setInputFile("./inputFiles/AComp_Passenger_data.csv") mapper.setMapFunction(StripErrorsUserCode.mapDuplicates) reducer.setRedFunction(StripErrorsUserCode.redWrite) reducer.setOutputFile("./inputFiles/PassengerData.csv") pairs = mapper.run() reducer.run(pairs, 'w') mapper.setInputFile("./inputFiles/PassengerData.csv") mapper.setMapFunction(StripErrorsUserCode.mapSpelling) pairs = mapper.run() reducer.run(pairs, 'w')
def plot_stations_map(self, detail_level='M'): from Mapper import Mapper mp = Mapper() mp.show_stations(self.stations, detail_level=detail_level, show_centroids_only=False)
def __bootstrap(self): self.mapper = Mapper.fromFile(self.index) self.catalog = Catalog.fromFile(self.index) self.reader = InvertedIndexReader( self.catalog, '/Users/admin/Documents/CS6200/HW2/Index/Indices/')
from Database import Database from Preprocessor import Preprocessor from Mapper import Mapper from collections import defaultdict #from Extractor import Extract dictionary1 = defaultdict(list) dictionary2 = defaultdict(list) database_object = Database() data_object = Preprocessor() FILE_PATH = ['sample1.rtf','sample2.rtf','sample3.rtf','sample4.rtf','sample5.rtf','sample6.rtf'] file_object = Extract(FILE_PATH) mapper = Mapper() value = [] var = ['unspecified','other','others'] query = " SELECT * FROM ICD_data WHERE code LIKE %s AND diagnosis LIKE %s" query1 = "UPDATE ICD_data SET diagnosis = %s WHERE code = %s " query2 = 'SELECT code,diagnosis FROM ICD_data' def update_data(): (icd, diagnosis) = data_object.get_full_data() for i in range( len(icd) ): value.append( ( ' '.join(diagnosis[i]) , icd[i].upper() ) ) print(database_object.insert_many( query1, value )) ''' def fetch_data():
{ 'id': (outlinks, inlinks) } ''' transformed_hits = {} for hit in hits: ID = hit['_id'] outlinks = hit['_source']['outlinks'] inlinks = hit['_source']['inlinks'] transformed_hits[ID] = (outlinks, inlinks) return transformed_hits def __merge_dicts(self, dict1, dict2): merged = dict1.copy() merged.update(dict2) return merged if __name__ == '__main__': client = ESClient() mapper = Mapper.fromFile('link_map') pages = client.getCrawledPages(mapper) print "Writing to file" with open('/Users/admin/Documents/CS6200/HW4/pages', 'w') as f: for page in pages: f.write(page + '\n') print "DONE!"