示例#1
0
def main():
    lib = Library()
    user1 = Reader("Djinn", "Co", 25)
    user2 = Reader("Artem", "KFC", 48)
    user3 = Reader("Suga", "Rogue", 35)

    book1 = Book("Ubik", "Philip k. Dick", 1969)
    book2 = Book("The Last Wish", "Andrzej Sapkowski", 2007)
    book3 = Book("It", "Stephen King", 1986)
    print(' ')
    lib.add_book_to_lib(book1)
    lib.add_book_to_lib(book2)
    lib.add_book_to_lib(book3)
    print(' ')
    lib.add_user_to_readerslist(user1)
    lib.add_user_to_readerslist(user2)
    lib.add_user_to_readerslist(user3)
    print(' ')

    time.sleep(3)
    lib.give_book_to_user(user1, book1)
    lib.give_book_to_user(user3, book1)
    time.sleep(3)
    print(' ')
    lib.show_books(available=True)
    print(' ')
    lib.show_books(available=False)
    print(' ')
    lib.sort_books('year')
    def create_data2(self,
                     year,
                     tourist_file=TOURIST_FILE,
                     weather_file=WEATHER_FILE,
                     weather_file2=WEATHER_FILE2):
        """
        输入年份会根据tourist_file和weather_file中的数据构造数据集
        :param year: 数据年份,数值类型
        :param tourist_file: 客流量数据文件,默认路径TOURIST_FILE
        :param weather_file:  天气数据文件,默认路径WEATHER_FILE
        :param weather_file2: 天气详细数据文件,默认路径WEATHER_FILE2
        :return:
        """
        tourist_file = tourist_file.replace('YYYY', str(year))
        weather_file = weather_file.replace('YYYY', str(year))
        weather_out = self.WEATHER_OUT.replace('YYYY', str(year))
        holiday_out = self.HOLIDAY_OUT.replace('YYYY', str(year))
        file_out = self.FILE_OUT.replace('YYYY', str(year))

        if not os.path.isfile(weather_out):
            Reader().get_weather(weather_file, weather_out)
        weather_dict = Reader().read_weather(weather_out)
        weather_dict2 = Reader().read_weather2(weather_file2)

        # 从api获得year的节假日数据存入holiday_out中,避免多次http请求
        if not os.path.isfile(holiday_out):
            IfHoliday().get_year_holiday(year, holiday_out)
        holiday_dict = Reader().read_holiday(holiday_out)

        tourist_dict = Reader().read_tourist(
            tourist_file)  #用字典可能会导致日期顺序不对,影响特征工程正确率

        with codecs.open(file_out, 'a+', 'utf-8') as fout:
            fout.write(
                "scenic_area,date,tourist,holiday,weather,min_temperature,max_temperature,mean_temperature,"
                + "humidity,wind_speed,precipitation,cloudage" + "\n")
            for date, tourist in tourist_dict.items():
                name = "上饶灵山景区"  #景区名称

                weather = weather_dict[date][0]
                # max_temperature = weather_dict[date][1]
                # min_temperature = weather_dict[date][2]
                if date in weather_dict2.keys():
                    weather_info = str(weather_dict2[date]).strip('[]')
                else:
                    weather_info = str("None," * 7).strip('[],')
                '''下面这部分代码可以考虑放进IfHoliday()中'''
                holiday = 0
                day = datetime.strptime(date.replace('-', ''), "%Y%m%d").date()
                if date in holiday_dict.keys():
                    if holiday_dict[date]:
                        holiday = 2
                elif day.weekday() in [5, 6]:
                    holiday = 1

                text = name + "," + date + "," + str(tourist) + "," + str(
                    holiday) + "," + weather + "," + weather_info + "\n"
                fout.write(text)
 def testMakeobjectsfromxml(self):
     reader = Reader()
     soup = reader.readfile()
     threads = reader.makeobjectsfromxml(soup)
     for thread in threads:
         for document in thread._documents:
             print(document.text)
示例#4
0
    def calculate(
        self,
        source_code: str,
        priority_code: str,
        buy_price_code: str,
        sell_price_code: str,
        market: Market,
        start_date: date,
        end_date: date,
    ):
        logging.debug(
            f"{TAG} Start calculate start_date = {start_date}, end_date = {end_date}"
        )
        compile_result = self.compiler.compile(source_code, priority_code,
                                               buy_price_code, sell_price_code)

        executor = Executor("")
        reader = Reader(executor)
        field_list = list(compile_result.fields)
        required_field_list = [Field.open, Field.close, Field.is_active]
        for field in required_field_list:
            if field not in field_list:
                field_list.append(field)
        rows = reader.get_simulating_data(
            Universe.total, field_list, start_date, end_date,
            [Field.open, Field.close, Field.ticker_id, Field.low, Field.high])

        if len(rows) == 0:
            return None

        now = time.time()
        total_df = pd.DataFrame(rows)
        total_job_count = len(compile_result.item_list)
        completed_job_count = 0

        for item in compile_result.item_list:
            # TODO: is_rank 값 잘 들어가는지 계속 확인하고, 검증할것
            # 랭크 함수는 날짜 단위로 동작
            if item.is_rank:
                y = total_df.groupby("date", as_index=False).apply(
                    lambda df: self._calculate(item.code, df))
            else:  # 랭크 함수 이외의 함수는 종목 단위로 동작
                y = total_df.groupby("ticker_id", as_index=False).apply(
                    lambda df: self._calculate(item.code, df))

            different_columns = total_df.columns.symmetric_difference(
                y.columns)
            for column in different_columns:
                total_df.insert(0, column, y[column])
            completed_job_count += 1
            self.progress = Decimal(total_job_count / completed_job_count)

        logging.debug("{} execute time : {:0.3f}s".format(
            TAG,
            time.time() - now))
        return total_df
 def test_final(self):
     reader = Reader()
     soup = reader.readfile()
     threads = reader.makeobjectsfromxml(soup)
     tokenizer = Tokenizer(threads)
     collection_tokenized = tokenizer.tokenize()
     coll_model = CollectionModel(collection_tokenized)
     doc_model = DocumentModel(collection_tokenized)
     ret_model = RetrievalModel(collection_tokenized, doc_model, coll_model)
     ret_model.calculate_relevance()
 def testtokenizerfromfile(self):
     reader = Reader()
     soup = reader.readfile()
     threads = reader.makeobjectsfromxml(soup)
     tokenizer = Tokenizer(threads)
     threads_tokenized = tokenizer.tokenize()
     for thread in threads_tokenized:
         print(thread._query._body)
         for document in thread._documents:
             print(document._text)
 def testdocumentmodel(self):
     reader = Reader()
     soup = reader.readfile()
     threads = reader.makeobjectsfromxml(soup)
     tokenizer = Tokenizer(threads)
     threads_tokenized = tokenizer.tokenize()
     collection_model = CollectionModel(threads_tokenized)
     freq_collection = collection_model.calculate_frequency()
     print(freq_collection)
     document_model = DocumentModel(threads_tokenized)
     freq_document = document_model.calculate_frequency()
     print(freq_document)
示例#8
0
def train():
    print("training...")
    reader = Reader()
    with tf.Graph().as_default():
        global_step = tf.train.get_or_create_global_step()
        with tf.device('/cpu:0'):
            images, boxes = reader.distorted_inputs(FLAGS.train_dir,
                                                    FLAGS.batch_size)

        logits = None  #TODO
        loss = None  #TODO
        train_op = None  #TODO

        class _LoggerHook(tf.train.SessionRunHook):
            def begin(self):
                self._step = -1
                self._start_time = time.time()

            def before_run(self, run_context):
                self._step += 1
                return tf.train.SessionRunArgs(loss)

            def after_run(self, run_context, run_values):
                if self._step % FLAGS.log_frequency == 0:
                    current_time = time.time()
                    duration = current_time - self._start_time
                    self._start_time = current_time

                    #loss_value = run_values.results
                    loss_value = .1
                    examples_per_sec = FLAGS.log_frequency * FLAGS.batch_size / duration
                    sec_per_batch = float(duration / FLAGS.log_frequency)
                    format_str = (
                        '%s: step: %d, loss = %.2f (%.1f examples/sec, %.3f sec/batch)'
                    )
                    print(format_str % (datetime.now(), self._step, loss_value,
                                        examples_per_sec, sec_per_batch))

            with tf.train.MonitoredTrainingSession(
                    checkpoint_dir=FLAGS.train_state_dir,
                    hooks=[
                        tf.train.StopAtStepHook(last_step=FLAGS.max_steps),
                        tf.train.NanTensorHook(loss),
                        _LoggerHook()
                    ],
                    config=tf.ConfigProto(log_device_placement=FLAGS.
                                          log_device_placement)) as mon_sess:

                while not mon_sess.should_stop():
                    mon_sess.run(train_op)
示例#9
0
def main():
    args = parse_args()
    create_dirs(args.model_name, [args.checkpoint_dir, args.log_dir])

    sess = tf.Session()

    logger = Logger(args, sess)
    model = Model(args, logger)
    reader = Reader(args, sess, logger)

    if args.action == 'train':
        trainer = Trainer(sess, model, reader, args, logger)
        trainer.train()
    else:
        predictor = Estimator(sess, model, reader, args, logger)
        predictor.predict()
示例#10
0
    def run_pipeline(self):
        customer_order = CustomerOrder()

        read = Reader(self._file)
        line_order = ValidateInput()

        self._line_item = []

        for order in read.result():
            line_order.set(order)
            if line_order.valid_types:
                customer_order.order_type = line_order.value[0]
                customer_order.cash = line_order.value[1]
                customer_order.price = line_order.value[2]
                customer_order.bonus_ratio = line_order.value[3]
                self._line_item.append(customer_order.final_order)
示例#11
0
    def ingest_data(self, filepath, dataset):
        """
        load data from a file to a dataframe and store it on the db

         Parameters
        ----------
        filepath : String
            file path of the .csv file for the dataset
        dataset: DataSet
            The DataSet object that holds the Session ID for HoloClean
        Returns
        -------
        No Return
        """
        # Spawn new reader and load data into dataframe
        fileReader = Reader(self.holoEnv.spark_session)
        df = fileReader.read(filepath)

        # Store dataframe to DB table
        schema = df.schema.names
        name_table = self._add_info_to_meta('Init', schema, dataset)
        self._dataframe_to_table(name_table, df)
        table_attribute_string = self.get_schema(dataset, "Init")
        count = 0
        map_schema = []
        attributes = table_attribute_string.split(',')
        for attribute in attributes:
            if attribute != "index":
                count = count + 1
                map_schema.append([count, attribute])

        dataframe_map_schema = self.holoEnv.spark_session.createDataFrame(
            map_schema,
            StructType([
                StructField("index", IntegerType(), False),
                StructField("attribute", StringType(), True)
            ]))
        self.add_db_table('Map_schema', dataframe_map_schema, dataset)

        for tuple in map_schema:
            self.attribute_map[tuple[1]] = tuple[0]
        return
示例#12
0
    def ingest_data(self, filepath, dataset):
        """
        Load data from a file to a dataframe and store it on the db

        filepath : String
            File path of the .csv file for the dataset
        dataset: DataSet
            The DataSet object that holds the Session ID for HoloClean

        """

        # Spawn new reader and load data into dataframe
        filereader = Reader(self.holo_env.spark_session)

        # read with an index column
        df = filereader.read(filepath,1)

        # Store dataframe to DB table
        schema = df.schema.names
        name_table = dataset.table_specific_name('Init')
        self.dataframe_to_table(name_table, df)
        dataset.attributes['Init'] = schema
        count = 0
        map_schema = []
        attribute_map = {}
        for attribute in schema:
            if attribute != GlobalVariables.index_name:
                count = count + 1
                map_schema.append([count, attribute])
                attribute_map[attribute] = count

        dataframe_map_schema = self.holo_env.spark_session.createDataFrame(
            map_schema, dataset.attributes['Map_schema'])
        self.add_db_table('Map_schema', dataframe_map_schema, dataset)

        for table_tuple in map_schema:
            self.attribute_map[table_tuple[1]] = table_tuple[0]

        return df, attribute_map
示例#13
0
    def get_response(self):
        int_len = 4
        header = self.s.recv(7)
        packet_length = int_len.from_bytes(header[2:5], 'big')
        data = self.recvall(self.s, packet_length)
        r = Reader(data)
        code = r.readUInt32()

        if code == 7 or code == 8:
            self.fingerprint = json.loads(r.readFinger())  # fingerprint
            r.readInt32()
            r.readShort()
            self.assets_url = r.readString()  # assets url
            r.skip(23)
            r.readString()
            r.skip(2)
            r.readString()

            d = Downloader(self.fingerprint, self.assets_url)
            d.download()
        else:
            _(f"Recived code {code} - returning!")
            return
示例#14
0
parser.add_argument('input',
                    metavar='txt_file',
                    type=str,
                    help='The path to txt file')

args = parser.parse_args()

model_rhyme = Rhyme()
sent = SentimentExtractor()
model_rhyme.load_model()
counter = 1

src = args.input
dest = src.replace("_txt", "_labeled")
reader = Reader(src)

# Read src file line by line
with open(src, mode="r", encoding="utf-8") as src_file:
    content = src_file.readlines()
# Open dest file
dest_file = open(dest, mode="a", encoding="utf-8")
stanza = list()
header = ""
footer = ""
time_epoch = ""
sentiment = ""
tracker = 0

# Define time epoch based on name of txt file
if ("1600" in src and "1700" in src) or ("1500" in src and "1600" in src):
示例#15
0
loss_function = configs.loss
hidden = configs.hidden
reg = configs.reg
n_neg_samples = configs.n_neg_samples
dropout = configs.dropout

if configs.debug:
    print(
        "loaded parameters dataset_name: %s, bern: %s, epochs: %d, batch_size: %d, learning_rate: %f, dim: %d, margin: %f, lr_decay: %f, loss_function: %s, hidden: %s"
        % (dataset_name, bern, epochs, batch_size, learning_rate, dim, margin,
           lr_decay, loss_function, hidden))

device = torch.device("cuda")
os.environ["CUDA_VISIBLE_DEVICES"] = gpu

reader = Reader(configs)

n_train = reader.n_train
n_ent = reader.n_ent
n_rel = reader.n_rel
stat = reader.stat
corrupter = Corrupter(configs, n_ent, stat)


def load_model(model_name):
    loaded_dict = torch.load(
        os.path.join(configs.save_path, model_name + ".mdl"))
    if model_name == "TransE":
        model = TransE(loaded_dict["configs"], n_ent, n_rel)
    else:
        model = ComplEx(loaded_dict["configs"], n_ent, n_rel)
示例#16
0
    # TODO = Parametros : Tamaño , Dimension del tablero , transversal
    ga.start()
    ga.report(0)


def play_with_hc(initialSate, dimension):
    hc = HillClimbing(initialSate, dimension, False)
    # TODO = Usando tablero Incial como Estado , Dimension del tablero ,
    #  Poner True si desea reiniciar al momento de encontrar la solucion
    hc.start()
    hc.report()


dimension = 10
# TODO = Se crea un tablero de 10 * 10
r = Reader('sample.txt')
# TODO = Archivo de ejemplo para la creacion del tablero
board = r.readFile()
initialSate = State(board)
# TODO = Setear el tablero en arrays para ser trabajado en el algoritmo

if PLAT_WITH == 'GA':
    play_with_hc(initialSate, dimension)
else:
    play_with_ga(dimension)

# TODO = Conclusion
#  Al parecer, aquí el algoritmo genético está que realiza mucho más rápido
#  La lógica de encontrar la mejor solucion, en caso de que haya un bucle,
#  reiniciar el juego
示例#17
0
 def setUp(self):
     self.reader = Reader("")
示例#18
0
 def get_correct_array(self, current_file_path):
     my_reader = Reader()
     init_array = my_reader.read(current_file_path)
     return init_array
示例#19
0
    "batch_size": 20,
    "embedding_dims": 100,
    "nb_filter": 250,
    "filter_length": 20,
    "pool_length": 2,
    "hidden_size": 200,
    "nb_epoch": 50,
    "dropout": 0.5,
    "train_file": "data/train_pdtb_imp.json",
    "vocab_file": "data/vocab",
    "test_file": "",
    "valid_file": "data/dev_pdtb_imp.json",
    "vocab_size": 100000,
}
print str(conf)
reader = Reader(conf)
reader.get_full_train_data()
reader.get_full_valid_data(get_id=True)

features = [[[], []], [[], []]]
targets = []
v_features = [[[], []], [[], []]]
v_targets = []
v_id = []
v_im_features = [[[], []], [[], []]]
v_im_targets = []
v_im_id = []

# for i in xrange(len(reader.train)):
#     features[0].append(reader.train[i][0][0])
#     features[1].append(reader.train[i][0][1])
示例#20
0
def run(from_date: datetime.date, to_date: datetime.date):
    executor = Executor("")
    reader = Reader(executor)
    return back_test(reader, from_date, to_date)
示例#21
0
 def read_data_from_file(self):
     my_reader = Reader()
     w_array = my_reader.read_report("generator" + "/" + self.filename + "/" + self.filename + "Output.txt")
     return w_array