def demo2(): cl.section('Demo 2') username = '' while not username: username = cl.input('Username: '******'' while not password: password = cl.password('Password: '******'Successfully logged in.') with cl.progress('Checking for update...', mode=cl.PROGRESS_SPIN): time.sleep(3) choice = '' while choice.lower() not in {'y', 'n'}: choice = cl.question( 'A new version is present, would you like to update? (Y/N)').strip( ) if choice.lower() == 'y': with cl.progress('Downloading ', mode=cl.PROGRESS_DETERMINATE) as p: time.sleep(1) p.update(0.2, ' 20% (1MB/5MB) ETA 4s') time.sleep(2) p.update(0.4, ' 40% (2MB/5MB) ETA 3s') cl.error('Failed to download package. SSL handshake error.') else: cl.warning('Update delayed!')
def demo1(): cl.section('Demo 1') cl.info('Test program started.') with cl.progress('Running test case 1...', cl.PROGRESS_SPIN, erase=True): time.sleep(3) cl.success('Test case 1: Passed') with cl.progress('Running test case 2...', cl.PROGRESS_SPIN, erase=True): time.sleep(3) cl.success('Test case 2: Passed') with cl.progress('Running test case 3...', cl.PROGRESS_SPIN, erase=True): time.sleep(3) cl.success('Test case 3: Passed') with cl.progress('Running test case 4...', cl.PROGRESS_SPIN, erase=True): time.sleep(3) cl.error('Test case 4: Failed') cl.info('Input: 1111') cl.info('Expected output: 2222') cl.info('Got: 3333') cl.section('Test Result') cl.info('3 out of 4 test cases passed.') cl.info('Pass rate: 75%')
def animations(): cl.section('Progress Animations') cl.item('Static') cl.progress('Downloading...') time.sleep(3) cl.item('Spin') with cl.progress('Downloading...', mode=cl.PROGRESS_SPIN): time.sleep(3) cl.item('Expand') with cl.progress('Downloading', mode=cl.PROGRESS_EXPAND): time.sleep(6) cl.item('Move') with cl.progress('Downloading ', mode=cl.PROGRESS_MOVE): time.sleep(4) cl.item('Determinate') with cl.progress('Downloading ', mode=cl.PROGRESS_DETERMINATE) as p: time.sleep(1) p.update(0.2, ' 20% (1MB/5MB) ETA 4s') time.sleep(1) p.update(0.4, ' 40% (2MB/5MB) ETA 3s') time.sleep(1) p.update(0.6, ' 60% (3MB/5MB) ETA 2s') time.sleep(1) p.update(0.8, ' 80% (4MB/5MB) ETA 1s') time.sleep(1) p.update(1, ' 100% (5MB/5MB)')
def text_preprocessor(input_filename, *, preprocessor_cls='TextPreprocessor', custom_stop_words=None, lem_ignore_patterns=None, remove_duplicates=False): cl.section('Text Preprocessor') input_filename = data_source_file(input_filename) preprocessor_cls = globals()[preprocessor_cls] with TimeMeasure('preprocess_text'): result = preprocess_csv(input_filename, preprocessor_cls=preprocessor_cls, custom_stop_words=custom_stop_words, lem_ignore_patterns=lem_ignore_patterns) if remove_duplicates: result = remove_duplicate_text(result) result = tuple(result) cl.info('Effective data size: %d' % len(result)) with TimeMeasure('save_preprocessed'): save_preprocessed(result, input_filename)
def retweets_recover(csvfilename): cl.section('Retweets Recover') cl.info('Recovering file: %s' % csvfilename) csvfilename = data_source_file(csvfilename) result = recover_from_csv(csvfilename) exportfilename = name_with_title_suffix(csvfilename, '-recovered') export_csv(result, exportfilename) return os.path.basename(exportfilename)
def welcome(): cl.section('ColorLabels Demo') cl.newline() cl.item('1. Overview of Labels') cl.item('2. Progress Animations') cl.item('3. Show Demo 1') cl.item('4. Show Demo 2') cl.item('5. Exit') cl.newline()
def overview(): cl.section('Overview of Labels') cl.success('Good job! All test cases passed!') cl.warning('Warning! Security update delayed!') cl.error('Error! Failed to write file!') cl.info('Server listening on port 8888.') cl.progress('Downloading package, please wait...') cl.plain('Nothing interesting.') cl.question('A new version is present, would you like to update? (Y/N)')
def model_analyzer(modeldesc, sourcedesc, *, num_top_words=30, num_top_docs=30, debug=False): cl.section('LDA Model Analyzer') cl.info('Model description: %s' % modeldesc) cl.info('Source description: %s' % sourcedesc) with TimeMeasure('load_all'): ldamodel, corpus, prep_items, source_texts = load_all(modeldesc, sourcedesc) with TimeMeasure('analyzing'): prep_ids = tuple(item[0] for item in prep_items) dictionary = ldamodel.id2word num_topics = ldamodel.num_topics topics = [{ 'topic_id': i, 'words': get_topic_words(ldamodel, i, num_top_words), 'popularity': 0.0, 'documents': collections.defaultdict(float) } for i in range(num_topics)] if debug: debugfilename = model_file('ldadoctopics-%s.txt' % modeldesc) with open(debugfilename, 'w', encoding='utf-8') as debugfile: for index, doc in enumerate(corpus): text_id = prep_ids[index] doc_topics = ldamodel.get_document_topics(doc) text = source_texts[text_id].strip() debugfile.write('%s -> %r, %s\n' % (text_id, doc_topics, text)) term_topics_cache = {} for word in dictionary: term_topics_cache[word] = ldamodel.get_term_topics(word) for index, doc in enumerate(corpus): for topic_id, prob in ldamodel.get_document_topics(doc): topics[topic_id]['popularity'] += prob for word, freq in doc: if word not in dictionary: continue for topic_id, prob in term_topics_cache[word]: topics[topic_id]['documents'][index] += prob * freq for topic in topics: topic['documents'] = get_topic_top_docs(topic['documents'], num_top_docs, prep_ids, source_texts) topics = sorted(topics, key=lambda x: x['popularity'], reverse=True) with TimeMeasure('export_markdown'): export_markdown(modeldesc, sourcedesc, topics)
def visualization_twlda(keyword, desc, desc_show, userinfofile, topusers=20, encoding='utf-8', portable=True, open_browser=True): cl.section('Twitter-LDA Visualization') user_topic = parse_user_topic(desc, encoding=encoding) topic_words = parse_topic_words(desc, encoding=encoding) user_info = load_user_info(data_source_file(userinfofile)) result = organize_data('test', user_topic, topic_words, user_info, topusers) return export_html(keyword, desc_show, result, portable, open_browser)
def twlda_multiple_run(num_topics_range, iteration, desc_prefix, show_console_output=True): cl.section('Twitter-LDA Multiple Run') for topics in num_topics_range: cl.info('Running with %d topics' % topics) twitter_lda(output_desc='%s-%d' % (desc_prefix, topics), topics=topics, iteration=iteration, show_console_output=show_console_output)
def data_retriever(data_source, query, save_filename, *, lang='', proxy=None, remove_duplicates=False, twapi_max=None, twapi_sleep_time=0, twscrape_poolsize=20, twscrape_begindate=None, ghapi_org=None, ghapi_since=None, soapi_begindate=None): cl.section('Data Retriever') cl.info('Starting to retrieve query: %s, or org: %s' % (query, ghapi_org)) cl.info('From data source: %s' % data_source) cl.info('Using proxy: %s' % proxy) cl.info('Remove duplicates: %s' % remove_duplicates) if proxy: os.environ['HTTP_PROXY'] = proxy os.environ['HTTPS_PROXY'] = proxy if data_source == 'twitter_standard_api': data = twapi_search(query, twapi_max, sleep_time=twapi_sleep_time, lang=lang) elif data_source == 'twitterscraper': data = twscrape_search(query, lang=lang, poolsize=twscrape_poolsize, begindate=twscrape_begindate) elif data_source == 'github_api': data = github_issue_org_fetch(ghapi_org, ghapi_since) elif data_source == 'stackoverflow_api': data = soapi_search(query, begindate=soapi_begindate) else: cl.error('Data source %r is not implemented' % data_source) sys.exit(-1) if remove_duplicates: data = iterator_aggregate_list(data) data_no_duplicate_text = remove_duplicate_text(data) cl.info('Exporting data without duplicate text') export_csv(data_no_duplicate_text, data_source_file(save_filename)) save_filename_full = name_with_title_suffix(save_filename, '-full') cl.info('Exporting full data') export_csv(data, data_source_file(save_filename_full)) else: export_csv(data, data_source_file(save_filename))
def random_sampler(csvfilename, amount): cl.section('Data Random Sampler') cl.info('Random sampling file: %s' % csvfilename) cl.info('Amount: %d' % amount) csvfilename = data_source_file(csvfilename) data = list(csv_reader(csvfilename)) random.shuffle(data) data = data[:amount] exportfilename = name_with_title_suffix(csvfilename, '-sample-%d' % amount) export_csv(data, exportfilename)
def text_preprocessor_user(sourcedesc): cl.section('Text Preprocessor Grouping By User') assert re.fullmatch(r'[-_0-9a-zA-Z+]+', sourcedesc) csvfilename = data_source_file('%s.csv' % sourcedesc) with TimeMeasure('preprocess_text'): result = list(preprocess_csv(csvfilename)) with TimeMeasure('save_preprocessed'): savefilename = name_with_title_suffix(csvfilename, '-user') export_csv(result, savefilename)
def show_level_stats(level_status, items_per_row=5): cl.newline() cl.section('Level stats:') field_width = len(str(NUM_LEVELS)) rows = math.ceil(NUM_LEVELS / items_per_row) for row in range(rows): cl.info(' '.join( show_level_block(x + 1, field_width, level_status[x]) for x in range(row * items_per_row, min((row + 1) * items_per_row, NUM_LEVELS)))) cl.progress(f'Your progress: {sum(level_status)}/{NUM_LEVELS}') check_milestones(level_status) cl.newline()
def main(): cl.section('Welcome to Python Challenges') cl.info(f'Python version: {PYTHON_VERSION}') level_status = [False] * NUM_LEVELS while True: show_level_stats(level_status) cl.info(f'Enter a level number (1-{NUM_LEVELS}) to solve a level, ' 'or enter 0 to view source code') level_number = get_level_number() if level_number == 0: print(SOURCE, end='') continue if level_status[level_number - 1]: cl.success('You already solved this level') continue level_func = globals()[f'level_{level_number}'] answer = get_input(f'Your answer for level {level_number}: ') timer = threading.Timer(CHALLENGE_TIMEOUT, die, args=('Timeout!', )) timer.start() try: global_check(answer) answer = ast.literal_eval(answer.strip()) except Exception: # pylint: disable=broad-except timer.cancel() cl.error('Wrong answer') if DEBUG_MODE: traceback.print_exc(file=sys.stdout) continue try: level_func(answer) except Exception: # pylint: disable=broad-except timer.cancel() cl.error('Wrong answer') if DEBUG_MODE: traceback.print_exc(file=sys.stdout) continue timer.cancel() cl.success('Correct answer') level_status[level_number - 1] = True
def twitter_lda(*, output_desc, topics, iteration, alpha_g=None, beta_word=0.01, beta_b=0.01, gamma=20, show_console_output=True): cl.section('Twitter-LDA Runner') cl.info('Output description: %s' % output_desc) assert re.fullmatch(r'[-_0-9a-zA-Z]+', output_desc) if alpha_g is None: alpha_g = 50 / topics set_parameters(topics, alpha_g, beta_word, beta_b, gamma, iteration) with TimeMeasure('Twitter-LDA training'): run_twlda(show_console_output=show_console_output) move_result(output_desc)
def text_preprocessor_twlda(sourcedesc, *, tweet_min_length=3, user_min_tweets=1, remove_duplicates=False): cl.section('Text Preprocessor For Twitter-LDA') assert re.fullmatch(r'[-_0-9a-zA-Z+]+', sourcedesc) input_filename = data_source_file('%s.csv' % sourcedesc) with TimeMeasure('preprocess_text'): prepdata, sourcedata = preprocess_csv(input_filename, tweet_min_length, user_min_tweets, remove_duplicates) with TimeMeasure('save_preprocessed'): save_preprocessed(prepdata, sourcedata)
def lda_topic_model(input_filename, keyword, size, *, num_topics, iterations=50, passes=1, chunksize=2000, eval_every=10, verbose=False, gamma_threshold=0.001, filter_no_below=5, filter_no_above=0.5, filter_keep_n=100000, open_browser=True): cl.section('LDA Topic Model Training') cl.info('Keyword: %s' % keyword) cl.info('Data size: %d' % size) cl.info('Number of topics: %d' % num_topics) cl.info('Iterations: %d' % iterations) cl.info('Passes: %d' % passes) cl.info('Chunk size: %d' % chunksize) cl.info('Eval every: %s' % eval_every) cl.info('Verbose: %s' % verbose) cl.info('Gamma Threshold: %f' % gamma_threshold) cl.info('Filter no below: %d' % filter_no_below) cl.info('Filter no above: %f' % filter_no_above) cl.info('Filter keep n: %d' % filter_keep_n) assert re.fullmatch(r'[-_0-9a-zA-Z+]+', keyword) input_filename = data_source_file(input_filename) description = '%s-%d-%d-%dx%d-%s' % (keyword, size, num_topics, iterations, passes, time.strftime('%Y%m%d%H%M%S')) if verbose: log_filename = log_file('ldalog-%s.log' % description) logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.DEBUG, filename=log_filename) cl.info('Writing logs into file: %s' % log_filename) with TimeMeasure('load_preprocessed_text'): preprocessed_texts = file_read_json(input_filename) preprocessed_texts = [item[1] for item in preprocessed_texts] with TimeMeasure('gen_dict_corpus'): cl.progress('Generating dictionary and corpus...') dictionary = Dictionary(preprocessed_texts, prune_at=None) dictionary.filter_extremes(no_below=filter_no_below, no_above=filter_no_above, keep_n=filter_keep_n) dictionary.compactify() corpus = [dictionary.doc2bow(text) for text in preprocessed_texts] corpusfilename = model_file('ldacorpus-%s.json' % description) file_write_json(corpusfilename, corpus) cl.success('Corpus saved as: %s' % corpusfilename) with TimeMeasure('training'): cl.progress('Performing training...') with NoConsoleOutput(): ldamodel = LdaMulticore(corpus, workers=N_WORKERS, id2word=dictionary, num_topics=num_topics, iterations=iterations, passes=passes, chunksize=chunksize, eval_every=eval_every, gamma_threshold=gamma_threshold, alpha='symmetric', eta='auto') cl.success('Training finished.') with TimeMeasure('save_model'): modelfilename = 'ldamodel-%s' % description ldamodel.save(model_file(modelfilename)) cl.success('Model saved as: %s' % modelfilename) with TimeMeasure('measure_coherence'): cl.progress('Measuring topic coherence...') measure_coherence(ldamodel, preprocessed_texts, corpus, dictionary) with TimeMeasure('vis_save'): cl.progress('Preparing visualization...') vis = pyLDAvis.gensim.prepare(ldamodel, corpus, dictionary) htmlfilename = 'ldavis-%s.html' % description htmlfilename = report_file(htmlfilename) pyLDAvis.save_html(vis, htmlfilename) cl.success('Visualized result saved in file: %s' % htmlfilename) if open_browser: open_html_in_browser(htmlfilename)
def user_info_retriever(usernames, csvfilename): cl.section('Twitter User Info Retriever') csvfilename = data_source_file(csvfilename) result = retrieve_user_info(usernames) export_csv(result, csvfilename)