def main(): lib = Library() user1 = Reader("Djinn", "Co", 25) user2 = Reader("Artem", "KFC", 48) user3 = Reader("Suga", "Rogue", 35) book1 = Book("Ubik", "Philip k. Dick", 1969) book2 = Book("The Last Wish", "Andrzej Sapkowski", 2007) book3 = Book("It", "Stephen King", 1986) print(' ') lib.add_book_to_lib(book1) lib.add_book_to_lib(book2) lib.add_book_to_lib(book3) print(' ') lib.add_user_to_readerslist(user1) lib.add_user_to_readerslist(user2) lib.add_user_to_readerslist(user3) print(' ') time.sleep(3) lib.give_book_to_user(user1, book1) lib.give_book_to_user(user3, book1) time.sleep(3) print(' ') lib.show_books(available=True) print(' ') lib.show_books(available=False) print(' ') lib.sort_books('year')
def create_data2(self, year, tourist_file=TOURIST_FILE, weather_file=WEATHER_FILE, weather_file2=WEATHER_FILE2): """ 输入年份会根据tourist_file和weather_file中的数据构造数据集 :param year: 数据年份,数值类型 :param tourist_file: 客流量数据文件,默认路径TOURIST_FILE :param weather_file: 天气数据文件,默认路径WEATHER_FILE :param weather_file2: 天气详细数据文件,默认路径WEATHER_FILE2 :return: """ tourist_file = tourist_file.replace('YYYY', str(year)) weather_file = weather_file.replace('YYYY', str(year)) weather_out = self.WEATHER_OUT.replace('YYYY', str(year)) holiday_out = self.HOLIDAY_OUT.replace('YYYY', str(year)) file_out = self.FILE_OUT.replace('YYYY', str(year)) if not os.path.isfile(weather_out): Reader().get_weather(weather_file, weather_out) weather_dict = Reader().read_weather(weather_out) weather_dict2 = Reader().read_weather2(weather_file2) # 从api获得year的节假日数据存入holiday_out中,避免多次http请求 if not os.path.isfile(holiday_out): IfHoliday().get_year_holiday(year, holiday_out) holiday_dict = Reader().read_holiday(holiday_out) tourist_dict = Reader().read_tourist( tourist_file) #用字典可能会导致日期顺序不对,影响特征工程正确率 with codecs.open(file_out, 'a+', 'utf-8') as fout: fout.write( "scenic_area,date,tourist,holiday,weather,min_temperature,max_temperature,mean_temperature," + "humidity,wind_speed,precipitation,cloudage" + "\n") for date, tourist in tourist_dict.items(): name = "上饶灵山景区" #景区名称 weather = weather_dict[date][0] # max_temperature = weather_dict[date][1] # min_temperature = weather_dict[date][2] if date in weather_dict2.keys(): weather_info = str(weather_dict2[date]).strip('[]') else: weather_info = str("None," * 7).strip('[],') '''下面这部分代码可以考虑放进IfHoliday()中''' holiday = 0 day = datetime.strptime(date.replace('-', ''), "%Y%m%d").date() if date in holiday_dict.keys(): if holiday_dict[date]: holiday = 2 elif day.weekday() in [5, 6]: holiday = 1 text = name + "," + date + "," + str(tourist) + "," + str( holiday) + "," + weather + "," + weather_info + "\n" fout.write(text)
def testMakeobjectsfromxml(self): reader = Reader() soup = reader.readfile() threads = reader.makeobjectsfromxml(soup) for thread in threads: for document in thread._documents: print(document.text)
def calculate( self, source_code: str, priority_code: str, buy_price_code: str, sell_price_code: str, market: Market, start_date: date, end_date: date, ): logging.debug( f"{TAG} Start calculate start_date = {start_date}, end_date = {end_date}" ) compile_result = self.compiler.compile(source_code, priority_code, buy_price_code, sell_price_code) executor = Executor("") reader = Reader(executor) field_list = list(compile_result.fields) required_field_list = [Field.open, Field.close, Field.is_active] for field in required_field_list: if field not in field_list: field_list.append(field) rows = reader.get_simulating_data( Universe.total, field_list, start_date, end_date, [Field.open, Field.close, Field.ticker_id, Field.low, Field.high]) if len(rows) == 0: return None now = time.time() total_df = pd.DataFrame(rows) total_job_count = len(compile_result.item_list) completed_job_count = 0 for item in compile_result.item_list: # TODO: is_rank 값 잘 들어가는지 계속 확인하고, 검증할것 # 랭크 함수는 날짜 단위로 동작 if item.is_rank: y = total_df.groupby("date", as_index=False).apply( lambda df: self._calculate(item.code, df)) else: # 랭크 함수 이외의 함수는 종목 단위로 동작 y = total_df.groupby("ticker_id", as_index=False).apply( lambda df: self._calculate(item.code, df)) different_columns = total_df.columns.symmetric_difference( y.columns) for column in different_columns: total_df.insert(0, column, y[column]) completed_job_count += 1 self.progress = Decimal(total_job_count / completed_job_count) logging.debug("{} execute time : {:0.3f}s".format( TAG, time.time() - now)) return total_df
def test_final(self): reader = Reader() soup = reader.readfile() threads = reader.makeobjectsfromxml(soup) tokenizer = Tokenizer(threads) collection_tokenized = tokenizer.tokenize() coll_model = CollectionModel(collection_tokenized) doc_model = DocumentModel(collection_tokenized) ret_model = RetrievalModel(collection_tokenized, doc_model, coll_model) ret_model.calculate_relevance()
def testtokenizerfromfile(self): reader = Reader() soup = reader.readfile() threads = reader.makeobjectsfromxml(soup) tokenizer = Tokenizer(threads) threads_tokenized = tokenizer.tokenize() for thread in threads_tokenized: print(thread._query._body) for document in thread._documents: print(document._text)
def testdocumentmodel(self): reader = Reader() soup = reader.readfile() threads = reader.makeobjectsfromxml(soup) tokenizer = Tokenizer(threads) threads_tokenized = tokenizer.tokenize() collection_model = CollectionModel(threads_tokenized) freq_collection = collection_model.calculate_frequency() print(freq_collection) document_model = DocumentModel(threads_tokenized) freq_document = document_model.calculate_frequency() print(freq_document)
def train(): print("training...") reader = Reader() with tf.Graph().as_default(): global_step = tf.train.get_or_create_global_step() with tf.device('/cpu:0'): images, boxes = reader.distorted_inputs(FLAGS.train_dir, FLAGS.batch_size) logits = None #TODO loss = None #TODO train_op = None #TODO class _LoggerHook(tf.train.SessionRunHook): def begin(self): self._step = -1 self._start_time = time.time() def before_run(self, run_context): self._step += 1 return tf.train.SessionRunArgs(loss) def after_run(self, run_context, run_values): if self._step % FLAGS.log_frequency == 0: current_time = time.time() duration = current_time - self._start_time self._start_time = current_time #loss_value = run_values.results loss_value = .1 examples_per_sec = FLAGS.log_frequency * FLAGS.batch_size / duration sec_per_batch = float(duration / FLAGS.log_frequency) format_str = ( '%s: step: %d, loss = %.2f (%.1f examples/sec, %.3f sec/batch)' ) print(format_str % (datetime.now(), self._step, loss_value, examples_per_sec, sec_per_batch)) with tf.train.MonitoredTrainingSession( checkpoint_dir=FLAGS.train_state_dir, hooks=[ tf.train.StopAtStepHook(last_step=FLAGS.max_steps), tf.train.NanTensorHook(loss), _LoggerHook() ], config=tf.ConfigProto(log_device_placement=FLAGS. log_device_placement)) as mon_sess: while not mon_sess.should_stop(): mon_sess.run(train_op)
def main(): args = parse_args() create_dirs(args.model_name, [args.checkpoint_dir, args.log_dir]) sess = tf.Session() logger = Logger(args, sess) model = Model(args, logger) reader = Reader(args, sess, logger) if args.action == 'train': trainer = Trainer(sess, model, reader, args, logger) trainer.train() else: predictor = Estimator(sess, model, reader, args, logger) predictor.predict()
def run_pipeline(self): customer_order = CustomerOrder() read = Reader(self._file) line_order = ValidateInput() self._line_item = [] for order in read.result(): line_order.set(order) if line_order.valid_types: customer_order.order_type = line_order.value[0] customer_order.cash = line_order.value[1] customer_order.price = line_order.value[2] customer_order.bonus_ratio = line_order.value[3] self._line_item.append(customer_order.final_order)
def ingest_data(self, filepath, dataset): """ load data from a file to a dataframe and store it on the db Parameters ---------- filepath : String file path of the .csv file for the dataset dataset: DataSet The DataSet object that holds the Session ID for HoloClean Returns ------- No Return """ # Spawn new reader and load data into dataframe fileReader = Reader(self.holoEnv.spark_session) df = fileReader.read(filepath) # Store dataframe to DB table schema = df.schema.names name_table = self._add_info_to_meta('Init', schema, dataset) self._dataframe_to_table(name_table, df) table_attribute_string = self.get_schema(dataset, "Init") count = 0 map_schema = [] attributes = table_attribute_string.split(',') for attribute in attributes: if attribute != "index": count = count + 1 map_schema.append([count, attribute]) dataframe_map_schema = self.holoEnv.spark_session.createDataFrame( map_schema, StructType([ StructField("index", IntegerType(), False), StructField("attribute", StringType(), True) ])) self.add_db_table('Map_schema', dataframe_map_schema, dataset) for tuple in map_schema: self.attribute_map[tuple[1]] = tuple[0] return
def ingest_data(self, filepath, dataset): """ Load data from a file to a dataframe and store it on the db filepath : String File path of the .csv file for the dataset dataset: DataSet The DataSet object that holds the Session ID for HoloClean """ # Spawn new reader and load data into dataframe filereader = Reader(self.holo_env.spark_session) # read with an index column df = filereader.read(filepath,1) # Store dataframe to DB table schema = df.schema.names name_table = dataset.table_specific_name('Init') self.dataframe_to_table(name_table, df) dataset.attributes['Init'] = schema count = 0 map_schema = [] attribute_map = {} for attribute in schema: if attribute != GlobalVariables.index_name: count = count + 1 map_schema.append([count, attribute]) attribute_map[attribute] = count dataframe_map_schema = self.holo_env.spark_session.createDataFrame( map_schema, dataset.attributes['Map_schema']) self.add_db_table('Map_schema', dataframe_map_schema, dataset) for table_tuple in map_schema: self.attribute_map[table_tuple[1]] = table_tuple[0] return df, attribute_map
def get_response(self): int_len = 4 header = self.s.recv(7) packet_length = int_len.from_bytes(header[2:5], 'big') data = self.recvall(self.s, packet_length) r = Reader(data) code = r.readUInt32() if code == 7 or code == 8: self.fingerprint = json.loads(r.readFinger()) # fingerprint r.readInt32() r.readShort() self.assets_url = r.readString() # assets url r.skip(23) r.readString() r.skip(2) r.readString() d = Downloader(self.fingerprint, self.assets_url) d.download() else: _(f"Recived code {code} - returning!") return
parser.add_argument('input', metavar='txt_file', type=str, help='The path to txt file') args = parser.parse_args() model_rhyme = Rhyme() sent = SentimentExtractor() model_rhyme.load_model() counter = 1 src = args.input dest = src.replace("_txt", "_labeled") reader = Reader(src) # Read src file line by line with open(src, mode="r", encoding="utf-8") as src_file: content = src_file.readlines() # Open dest file dest_file = open(dest, mode="a", encoding="utf-8") stanza = list() header = "" footer = "" time_epoch = "" sentiment = "" tracker = 0 # Define time epoch based on name of txt file if ("1600" in src and "1700" in src) or ("1500" in src and "1600" in src):
loss_function = configs.loss hidden = configs.hidden reg = configs.reg n_neg_samples = configs.n_neg_samples dropout = configs.dropout if configs.debug: print( "loaded parameters dataset_name: %s, bern: %s, epochs: %d, batch_size: %d, learning_rate: %f, dim: %d, margin: %f, lr_decay: %f, loss_function: %s, hidden: %s" % (dataset_name, bern, epochs, batch_size, learning_rate, dim, margin, lr_decay, loss_function, hidden)) device = torch.device("cuda") os.environ["CUDA_VISIBLE_DEVICES"] = gpu reader = Reader(configs) n_train = reader.n_train n_ent = reader.n_ent n_rel = reader.n_rel stat = reader.stat corrupter = Corrupter(configs, n_ent, stat) def load_model(model_name): loaded_dict = torch.load( os.path.join(configs.save_path, model_name + ".mdl")) if model_name == "TransE": model = TransE(loaded_dict["configs"], n_ent, n_rel) else: model = ComplEx(loaded_dict["configs"], n_ent, n_rel)
# TODO = Parametros : Tamaño , Dimension del tablero , transversal ga.start() ga.report(0) def play_with_hc(initialSate, dimension): hc = HillClimbing(initialSate, dimension, False) # TODO = Usando tablero Incial como Estado , Dimension del tablero , # Poner True si desea reiniciar al momento de encontrar la solucion hc.start() hc.report() dimension = 10 # TODO = Se crea un tablero de 10 * 10 r = Reader('sample.txt') # TODO = Archivo de ejemplo para la creacion del tablero board = r.readFile() initialSate = State(board) # TODO = Setear el tablero en arrays para ser trabajado en el algoritmo if PLAT_WITH == 'GA': play_with_hc(initialSate, dimension) else: play_with_ga(dimension) # TODO = Conclusion # Al parecer, aquí el algoritmo genético está que realiza mucho más rápido # La lógica de encontrar la mejor solucion, en caso de que haya un bucle, # reiniciar el juego
def setUp(self): self.reader = Reader("")
def get_correct_array(self, current_file_path): my_reader = Reader() init_array = my_reader.read(current_file_path) return init_array
"batch_size": 20, "embedding_dims": 100, "nb_filter": 250, "filter_length": 20, "pool_length": 2, "hidden_size": 200, "nb_epoch": 50, "dropout": 0.5, "train_file": "data/train_pdtb_imp.json", "vocab_file": "data/vocab", "test_file": "", "valid_file": "data/dev_pdtb_imp.json", "vocab_size": 100000, } print str(conf) reader = Reader(conf) reader.get_full_train_data() reader.get_full_valid_data(get_id=True) features = [[[], []], [[], []]] targets = [] v_features = [[[], []], [[], []]] v_targets = [] v_id = [] v_im_features = [[[], []], [[], []]] v_im_targets = [] v_im_id = [] # for i in xrange(len(reader.train)): # features[0].append(reader.train[i][0][0]) # features[1].append(reader.train[i][0][1])
def run(from_date: datetime.date, to_date: datetime.date): executor = Executor("") reader = Reader(executor) return back_test(reader, from_date, to_date)
def read_data_from_file(self): my_reader = Reader() w_array = my_reader.read_report("generator" + "/" + self.filename + "/" + self.filename + "Output.txt") return w_array