def main(argv): if len(argv) != 2: return 1 input_file = argv[1] file_reader = FileReader(input_file) threads = [] while True: try: raw_world = file_reader.get_world() except StopIteration: break except FormatError: continue world = World(len(threads) + 1, raw_world) thread = world.run() if thread: threads.append(thread) thread.start() else: Country.clean_cash(len(threads) + 1) for thread in threads: thread.join() res = sorted(World.result) for key in res: print("Case Number ", key) for country in sorted(World.result[key]): print(country[1], country[0])
async def main(): conf = Config() logging.basicConfig(level=logging.DEBUG) logging.config.dictConfig(conf.DEFAULT_LOGGING) logger = logging.getLogger(__name__) db = ExtendedDBManager(init_db(conf)) db.database.create_tables([Article], safe=True) executor = ThreadPoolExecutor(max_workers=10) loop.set_default_executor(executor) DATA_FOR_MATPLOTLIB = {} await truncate(db=db) await vacuum(db=db) await drop_index(db=db) for mode in ["noindex", 'index']: await truncate(db=db) await vacuum(db=db) if mode == 'index': await create_index(db=db) else: await drop_index(db=db) for i in range(1, 81): await buck_create_new(db=db, epoch_count=i, count=10**6, mode=mode) row1 = await db.get(Article.select().limit(1)) row2 = await db.get(Article.select().order_by( Article.created_date.desc()).limit(1)) if mode == 'noindex': arv_time__noindex1 = await call_avr_time(db=db, text=row1.name) arv_time__noindex2 = await call_avr_time(db=db, text=row2.name) arv_time__noindex = max(arv_time__noindex1, arv_time__noindex2) logger.info(f"Time NoIndex={arv_time__noindex}") DATA_FOR_MATPLOTLIB[str(i)] = {"noindex": arv_time__noindex} else: arv_time__index1 = await call_avr_time(db=db, text=row1.name) arv_time__index2 = await call_avr_time(db=db, text=row2.name) arv_time__index = max(arv_time__index1, arv_time__index2) logger.info(f"Time Index={arv_time__index}") DATA_FOR_MATPLOTLIB[str(i)].update({"index": arv_time__index}) logger.info(f"") now_count = await db.count(Article.select()) logger.info(f"Row in db count = {now_count}") logger.info(f"== == " * 15) logger.info(f"== == " * 15) FileReader.write_data(DATA_FOR_MATPLOTLIB) logger.info(f"Exit")
def main(): if len(sys.argv) != 2: print("wrong parameters") else: file = FileReader.readFile(sys.argv[1]) r = Robots() for ua in file: r.addUserAgent(FileReader.parseUserAgent(ua)) print(r.getUserAgentsNames())
def main(): reader = FileReader() for day in range(1, DAYS + 1): for task in range(1, TASKS + 1): if (day, task) not in SKIP_LIST: try: task_input = reader.read_file(f"day_{day}/input.txt") module = importlib.import_module(f"day_{day}.task_{task}.solution") solver = getattr(module, "Solver")(task_input) answer = solver.solve() print(f"t{task}d{day}:", answer) except (FileNotFoundError, ModuleNotFoundError) as e: print(e) pass
def __init__(self): file_reader = FileReader() file_path = os.path.abspath('navigation.properties') prop = file_reader.read_file(file_path) #initialize instances for basic operations and location manager self._basic_operations = BasicOperations() self._location_manager = LocationManager() #initialize locations of navigation elements self._locNav = LocNav() self._locNav.set_loc_link_menu(prop['loc_link_menu_mobile']) self._locNav.set_loc_link_products_services( prop['loc_link_productsServices']) self._locNav.set_loc_link_mediaportal(prop['loc_link_mediaPortal'])
def read_file(log_file=None, file_format="txt", spool_manager=None): query = """ INSERT INTO nginx (request_id, visitor_id, user_id, process_name, ip, request_type, http_status, url, redirection_url, event_at, adposition, device_type, device_model, network, network_category, utm_campaign, utm_medium, utm_source, utm_term, keyword, gclid, creative, source, utm_content, created_at) """ writer = RedshitWriter( query, buffer_size=1000, timeout=1000, ) with FileReader(log_file, "%s.spool" % log_file, "txt") as file_reader: for log, spool_data in file_reader.get_next_line(): try: row = parser(log, format="json") except: logger.error("Exception occurred while parsing log (%s)" % log, exc_info=True) writer.write( (row["request_id"], row["visitor_id"], row["user_id"], row["process_name"], row["ip"], row["request_type"], row["http_status"], row["url"], row["redirection_url"], row["event_at"], row["adposition"], row["device_type"], row["device_model"], row["network"], row["network_category"], row["utm_campaign"], row["utm_medium"], row["utm_source"], row["utm_term"], row["keyword"], row["gclid"], row["creative"], row["source"], row["utm_content"], row["created_at"]), spool_data)
def __init__(self): file_reader = FileReader() file_path = os.path.abspath('mediaportal.properties') prop = file_reader.read_file(file_path) #initialize instances for basic operations and location manager self._basic_operations = BasicOperations() self._location_manager = LocationManager() #initialize locations of media portal elements self._locMediaPortal = LocMediaPortal() self._locMediaPortal.set_loc_textArea_module_connect( prop['loc_textarea_moduleConnect']) self._locMediaPortal.set_loc_textArea_module_newsAnalytics( prop['loc_textarea_moduleNewsAnalytics']) self._locMediaPortal.set_loc_textArea_module_social( prop['loc_textarea_moduleSocial']) self._locMediaPortal.set_loc_modules(prop['num_of_modules'])
def run(mini_batch): print(f'dataprep start: {__file__}, run({mini_batch})') dict_ = { 'paper_id': [], 'doi': [], 'abstract': [], 'body_text': [], 'authors': [], 'title': [], 'journal': [], 'abstract_summary': [] } for entry in mini_batch: try: content = FileReader(entry) except Exception as e: continue # invalid paper format, skip # get metadata information meta_data = meta_df.loc[meta_df['sha'] == content.paper_id] print('found meta_data', meta_data) # no metadata, skip this paper if len(meta_data) == 0: continue dict_['abstract'].append(content.abstract) dict_['paper_id'].append(content.paper_id) dict_['body_text'].append(content.body_text) # also create a column for the summary of abstract to be used in a plot if len(content.abstract) == 0: # no abstract provided dict_['abstract_summary'].append("Not provided.") elif len(content.abstract.split(' ')) > 100: # abstract provided is too long for plot, take first 100 words append with ... info = content.abstract.split(' ')[:100] summary = get_breaks(' '.join(info), 40) dict_['abstract_summary'].append(summary + "...") else: # abstract is short enough summary = get_breaks(content.abstract, 40) dict_['abstract_summary'].append(summary) # get metadata information meta_data = meta_df.loc[meta_df['sha'] == content.paper_id] try: # if more than one author authors = meta_data['authors'].values[0].split(';') if len(authors) > 2: # if more than 2 authors, take them all with html tag breaks in between dict_['authors'].append(get_breaks('. '.join(authors), 40)) else: # authors will fit in plot dict_['authors'].append(". ".join(authors)) except Exception as e: # if only one author - or Null valie dict_['authors'].append(meta_data['authors'].values[0]) # add the title information, add breaks when needed try: title = get_breaks(meta_data['title'].values[0], 40) dict_['title'].append(title) # if title was not provided except Exception as e: dict_['title'].append(meta_data['title'].values[0]) # add the journal information dict_['journal'].append(meta_data['journal'].values[0]) # add doi dict_['doi'].append(meta_data['doi'].values[0]) df_covid = pd.DataFrame(dict_, columns=[ 'paper_id', 'doi', 'abstract', 'body_text', 'authors', 'title', 'journal', 'abstract_summary' ]) print('processed: ', df_covid) return df_covid
from utils import FileReader if __name__ == '__main__': import matplotlib.pyplot as plt # line 1 points DATA_FOR_MATPLOTLIB = FileReader.read_data(fie_name='app/avr_time_80.csv') # line 1 points x1 = [int(data["count"]) for data in DATA_FOR_MATPLOTLIB] y1 = [float(data["index"]) for data in DATA_FOR_MATPLOTLIB] # line 2 points x2 = [int(data["count"]) for data in DATA_FOR_MATPLOTLIB] y2 = [float(data["noindex"]) for data in DATA_FOR_MATPLOTLIB] # plotting the line 1 - 2 points fig, (ax1, ax2) = plt.subplots(2) fig.suptitle('Index vs NoIndex') ax1.set_title('Index plot') ax1.plot(x1, y1) ax2.set_title('NoIndex plot') ax2.plot(x2, y2) ax1.set(xlabel='Count of rows (M)', ylabel='time, ms') ax2.set(xlabel='Count of rows (M)', ylabel='time, ms') # # Set a title of the current axes. plt.legend() # Display a figure. plt.show()
def initializeaza_date(self): for d in list(DOMENII) + ['baza']: fr = FileReader(d + '.txt') date = fr.citire_date() for i in date: self.store.adauga_intrebare(i[0], i[1], d)
#!/usr/bin/env python import pandas as pd import lightgbm as lgb from sklearn.model_selection import train_test_split from utils import FileReader reader = FileReader() files = ['./data/train.csv'] # './data/merchants.csv', # './data/historical_transactions.csv', # './data/new_merchant_transactions.csv'] data = reader.load_file(files, is_batch=True) train = data['train'] train['first_active_month'] = pd.to_datetime(train['first_active_month']) train['year'] = train['first_active_month'].dt.year train['month'] = train['first_active_month'].dt.month train_x = train[['year', 'month', 'feature_1', 'feature_2', 'feature_3']] train_y = train['target'] train_x, valid_x, train_y, valid_y = \ train_test_split(train_x, train_y, test_size=0.25, random_state=1234) train_data = lgb.Dataset(train_x, label=train_y) valid_data = lgb.Dataset(valid_x, label=valid_y, reference=train_data) param = { 'num_leaves': 63, 'num_iterations': 1000, 'learning_rate': 0.01,