示例#1
0
def main(argv):
    if len(argv) != 2:
        return 1

    input_file = argv[1]

    file_reader = FileReader(input_file)
    threads = []

    while True:
        try:
            raw_world = file_reader.get_world()
        except StopIteration:
            break
        except FormatError:
            continue

        world = World(len(threads) + 1, raw_world)

        thread = world.run()
        if thread:
            threads.append(thread)
            thread.start()
        else:
            Country.clean_cash(len(threads) + 1)

    for thread in threads:
        thread.join()

    res = sorted(World.result)

    for key in res:
        print("Case Number ", key)
        for country in sorted(World.result[key]):
            print(country[1], country[0])
示例#2
0
async def main():
    conf = Config()

    logging.basicConfig(level=logging.DEBUG)
    logging.config.dictConfig(conf.DEFAULT_LOGGING)
    logger = logging.getLogger(__name__)

    db = ExtendedDBManager(init_db(conf))
    db.database.create_tables([Article], safe=True)

    executor = ThreadPoolExecutor(max_workers=10)
    loop.set_default_executor(executor)

    DATA_FOR_MATPLOTLIB = {}

    await truncate(db=db)
    await vacuum(db=db)
    await drop_index(db=db)

    for mode in ["noindex", 'index']:
        await truncate(db=db)
        await vacuum(db=db)
        if mode == 'index':
            await create_index(db=db)
        else:
            await drop_index(db=db)

        for i in range(1, 81):
            await buck_create_new(db=db, epoch_count=i, count=10**6, mode=mode)
            row1 = await db.get(Article.select().limit(1))
            row2 = await db.get(Article.select().order_by(
                Article.created_date.desc()).limit(1))

            if mode == 'noindex':
                arv_time__noindex1 = await call_avr_time(db=db, text=row1.name)
                arv_time__noindex2 = await call_avr_time(db=db, text=row2.name)
                arv_time__noindex = max(arv_time__noindex1, arv_time__noindex2)

                logger.info(f"Time NoIndex={arv_time__noindex}")
                DATA_FOR_MATPLOTLIB[str(i)] = {"noindex": arv_time__noindex}
            else:
                arv_time__index1 = await call_avr_time(db=db, text=row1.name)
                arv_time__index2 = await call_avr_time(db=db, text=row2.name)
                arv_time__index = max(arv_time__index1, arv_time__index2)

                logger.info(f"Time Index={arv_time__index}")
                DATA_FOR_MATPLOTLIB[str(i)].update({"index": arv_time__index})

            logger.info(f"")
            now_count = await db.count(Article.select())
            logger.info(f"Row in db count = {now_count}")
            logger.info(f"==  ==  " * 15)
            logger.info(f"==  ==  " * 15)

    FileReader.write_data(DATA_FOR_MATPLOTLIB)
    logger.info(f"Exit")
示例#3
0
def main():
    if len(sys.argv) != 2:
        print("wrong parameters")
    else:
        file = FileReader.readFile(sys.argv[1])
        r = Robots()
        for ua in file:
            r.addUserAgent(FileReader.parseUserAgent(ua))

        print(r.getUserAgentsNames())
示例#4
0
def main():
    reader = FileReader()
    for day in range(1, DAYS + 1):
        for task in range(1, TASKS + 1):
            if (day, task) not in SKIP_LIST:
                try:
                    task_input = reader.read_file(f"day_{day}/input.txt")
                    module = importlib.import_module(f"day_{day}.task_{task}.solution")
                    solver = getattr(module, "Solver")(task_input)
                    answer = solver.solve()
                    print(f"t{task}d{day}:", answer)
                except (FileNotFoundError, ModuleNotFoundError) as e:
                    print(e)
                    pass
示例#5
0
    def __init__(self):
        file_reader = FileReader()
        file_path = os.path.abspath('navigation.properties')
        prop = file_reader.read_file(file_path)

        #initialize instances for basic operations and location manager
        self._basic_operations = BasicOperations()
        self._location_manager = LocationManager()

        #initialize locations of navigation elements
        self._locNav = LocNav()
        self._locNav.set_loc_link_menu(prop['loc_link_menu_mobile'])
        self._locNav.set_loc_link_products_services(
            prop['loc_link_productsServices'])
        self._locNav.set_loc_link_mediaportal(prop['loc_link_mediaPortal'])
def read_file(log_file=None, file_format="txt", spool_manager=None):
    query = """
        INSERT INTO nginx (request_id, visitor_id, user_id, process_name,
        ip, request_type, http_status, url, redirection_url,
        event_at, adposition, device_type, device_model, network,
        network_category, utm_campaign, utm_medium, utm_source,
        utm_term, keyword, gclid, creative, source, utm_content,
        created_at)
    """
    writer = RedshitWriter(
        query,
        buffer_size=1000,
        timeout=1000,
    )

    with FileReader(log_file, "%s.spool" % log_file, "txt") as file_reader:
        for log, spool_data in file_reader.get_next_line():
            try:
                row = parser(log, format="json")
            except:
                logger.error("Exception occurred while parsing log (%s)" % log,
                             exc_info=True)

            writer.write(
                (row["request_id"], row["visitor_id"], row["user_id"],
                 row["process_name"], row["ip"], row["request_type"],
                 row["http_status"], row["url"], row["redirection_url"],
                 row["event_at"], row["adposition"], row["device_type"],
                 row["device_model"], row["network"], row["network_category"],
                 row["utm_campaign"], row["utm_medium"], row["utm_source"],
                 row["utm_term"], row["keyword"], row["gclid"],
                 row["creative"], row["source"], row["utm_content"],
                 row["created_at"]), spool_data)
示例#7
0
    def __init__(self):
        file_reader = FileReader()
        file_path = os.path.abspath('mediaportal.properties')
        prop = file_reader.read_file(file_path)

        #initialize instances for basic operations and location manager
        self._basic_operations = BasicOperations()
        self._location_manager = LocationManager()

        #initialize locations of media portal elements
        self._locMediaPortal = LocMediaPortal()
        self._locMediaPortal.set_loc_textArea_module_connect(
            prop['loc_textarea_moduleConnect'])
        self._locMediaPortal.set_loc_textArea_module_newsAnalytics(
            prop['loc_textarea_moduleNewsAnalytics'])
        self._locMediaPortal.set_loc_textArea_module_social(
            prop['loc_textarea_moduleSocial'])
        self._locMediaPortal.set_loc_modules(prop['num_of_modules'])
示例#8
0
def run(mini_batch):
    print(f'dataprep start: {__file__}, run({mini_batch})')
    dict_ = {
        'paper_id': [],
        'doi': [],
        'abstract': [],
        'body_text': [],
        'authors': [],
        'title': [],
        'journal': [],
        'abstract_summary': []
    }
    for entry in mini_batch:
        try:
            content = FileReader(entry)
        except Exception as e:
            continue  # invalid paper format, skip

        # get metadata information
        meta_data = meta_df.loc[meta_df['sha'] == content.paper_id]
        print('found meta_data', meta_data)
        # no metadata, skip this paper
        if len(meta_data) == 0:
            continue

        dict_['abstract'].append(content.abstract)
        dict_['paper_id'].append(content.paper_id)
        dict_['body_text'].append(content.body_text)

        # also create a column for the summary of abstract to be used in a plot
        if len(content.abstract) == 0:
            # no abstract provided
            dict_['abstract_summary'].append("Not provided.")
        elif len(content.abstract.split(' ')) > 100:
            # abstract provided is too long for plot, take first 100 words append with ...
            info = content.abstract.split(' ')[:100]
            summary = get_breaks(' '.join(info), 40)
            dict_['abstract_summary'].append(summary + "...")
        else:
            # abstract is short enough
            summary = get_breaks(content.abstract, 40)
            dict_['abstract_summary'].append(summary)

        # get metadata information
        meta_data = meta_df.loc[meta_df['sha'] == content.paper_id]

        try:
            # if more than one author
            authors = meta_data['authors'].values[0].split(';')
            if len(authors) > 2:
                # if more than 2 authors, take them all with html tag breaks in between
                dict_['authors'].append(get_breaks('. '.join(authors), 40))
            else:
                # authors will fit in plot
                dict_['authors'].append(". ".join(authors))
        except Exception as e:
            # if only one author - or Null valie
            dict_['authors'].append(meta_data['authors'].values[0])

        # add the title information, add breaks when needed
        try:
            title = get_breaks(meta_data['title'].values[0], 40)
            dict_['title'].append(title)
        # if title was not provided
        except Exception as e:
            dict_['title'].append(meta_data['title'].values[0])

        # add the journal information
        dict_['journal'].append(meta_data['journal'].values[0])

        # add doi
        dict_['doi'].append(meta_data['doi'].values[0])

    df_covid = pd.DataFrame(dict_,
                            columns=[
                                'paper_id', 'doi', 'abstract', 'body_text',
                                'authors', 'title', 'journal',
                                'abstract_summary'
                            ])
    print('processed: ', df_covid)

    return df_covid
示例#9
0
from utils import FileReader

if __name__ == '__main__':
    import matplotlib.pyplot as plt
    # line 1 points
    DATA_FOR_MATPLOTLIB = FileReader.read_data(fie_name='app/avr_time_80.csv')
    # line 1 points
    x1 = [int(data["count"]) for data in DATA_FOR_MATPLOTLIB]
    y1 = [float(data["index"]) for data in DATA_FOR_MATPLOTLIB]

    # line 2 points
    x2 = [int(data["count"]) for data in DATA_FOR_MATPLOTLIB]
    y2 = [float(data["noindex"]) for data in DATA_FOR_MATPLOTLIB]

    # plotting the line 1 - 2 points
    fig, (ax1, ax2) = plt.subplots(2)
    fig.suptitle('Index vs NoIndex')
    ax1.set_title('Index plot')
    ax1.plot(x1, y1)
    ax2.set_title('NoIndex plot')
    ax2.plot(x2, y2)

    ax1.set(xlabel='Count of rows (M)', ylabel='time, ms')
    ax2.set(xlabel='Count of rows (M)', ylabel='time, ms')
    # # Set a title of the current axes.
    plt.legend()
    # Display a figure.
    plt.show()
示例#10
0
 def initializeaza_date(self):
     for d in list(DOMENII) + ['baza']:
         fr = FileReader(d + '.txt')
         date = fr.citire_date()
         for i in date:
             self.store.adauga_intrebare(i[0], i[1], d)
示例#11
0
#!/usr/bin/env python

import pandas as pd
import lightgbm as lgb
from sklearn.model_selection import train_test_split
from utils import FileReader

reader = FileReader()
files = ['./data/train.csv']
#         './data/merchants.csv',
#         './data/historical_transactions.csv',
#         './data/new_merchant_transactions.csv']

data = reader.load_file(files, is_batch=True)

train = data['train']
train['first_active_month'] = pd.to_datetime(train['first_active_month'])
train['year'] = train['first_active_month'].dt.year
train['month'] = train['first_active_month'].dt.month
train_x = train[['year', 'month', 'feature_1', 'feature_2', 'feature_3']]
train_y = train['target']

train_x, valid_x, train_y, valid_y = \
train_test_split(train_x, train_y, test_size=0.25, random_state=1234)
train_data = lgb.Dataset(train_x, label=train_y)
valid_data = lgb.Dataset(valid_x, label=valid_y, reference=train_data)

param = {
    'num_leaves': 63,
    'num_iterations': 1000,
    'learning_rate': 0.01,