def preprocess_research(input_str): """ test of an em --- dash test of an em — dash """ import utool as ut inside = ut.named_field('ref', '.*?') input_str = re.sub(r'\\emph{' + inside + '}', ut.bref_field('ref'), input_str) # input_str = input_str.decode('utf-8') input_str = ut.ensure_unicode(input_str) pause = re.escape(' <break time="300ms"/> ') # pause = ', ' emdash = u'\u2014' # # print('input_str = %r' % (input_str,)) # print('emdash = %r' % (emdash,)) # print('emdash = %s' % (emdash,)) input_str = re.sub('\s?' + re.escape('---') + '\s?', pause, input_str) input_str = re.sub('\s?' + emdash + '\s?', pause, input_str) # print('input_str = %r' % (input_str,)) input_str = re.sub('\\\\cite{[^}]*}', '', input_str) input_str = re.sub('et al.', 'et all', input_str) # Let rob say et al. input_str = re.sub(' i\.e\.', ' i e ' + pause, input_str) # Let rob say et al. input_str = re.sub(r'\\r', '', input_str) # input_str = re.sub(r'\\n', '', input_str) # input_str = re.sub('\\\\', '', input_str) # #input_str = re.sub('[a-z]?[a-z]', 'et all', input_str) # Let rob say et al. input_str = re.sub('\\.[^a-zA-Z0-1]+', '.\n', input_str) # Split the document at periods input_str = re.sub('\r\n', '\n', input_str) input_str = re.sub('^ *$\n', '', input_str) input_str = re.sub('\n\n*', '\n', input_str) return input_str
def make_standard_csv(column_list, column_lbls=None): from six.moves import cStringIO as StringIO import utool as ut import csv stream = StringIO() row_list = ut.listT(column_list) if six.PY2: row_list = [[ut.ensure_unicode(c).encode('utf-8') for c in r] for r in row_list] if column_lbls is not None: column_lbls = [ ut.ensure_unicode(c).encode('utf-8') for c in column_lbls ] writer = csv.writer(stream, dialect=csv.excel) if column_lbls is not None: writer.writerow(column_lbls) writer.writerows(row_list) csv_str = stream.getvalue() return csv_str
def make_standard_csv(column_list, column_lbls=None): from six.moves import cStringIO as StringIO import utool as ut import csv stream = StringIO() row_list = ut.listT(column_list) if six.PY2: row_list = [[ut.ensure_unicode(c).encode('utf-8') for c in r] for r in row_list] if column_lbls is not None: column_lbls = [ut.ensure_unicode(c).encode('utf-8') for c in column_lbls] writer = csv.writer(stream, dialect=csv.excel) if column_lbls is not None: writer.writerow(column_lbls) writer.writerows(row_list) csv_str = stream.getvalue() return csv_str
def __init__(drive, root_dpath=None, state_fpath=None): drive.root_dpath = ut.truepath(ut.ensure_unicode(root_dpath)) print('Initializing drive %s' % (drive.root_dpath,)) ut.assert_exists(drive.root_dpath) # Mapping from dpath strings to fpath indexes assert state_fpath is None, 'not yet supported for external analysis' drive.cache_fname = join(drive.root_dpath, 'ut_pathreg_cache.shelf') drive.fpath_bytes_list_ = None drive.dpath_to_fidx_ = None drive.fpath_hashX_list_ = None drive.hash_to_fidxs_ = None drive.cache = ut.ShelfCacher(drive.cache_fname)
def __init__(drive, root_dpath=None, state_fpath=None): drive.root_dpath = ut.truepath(ut.ensure_unicode(root_dpath)) print('Initializing drive %s' % (drive.root_dpath, )) ut.assert_exists(drive.root_dpath) # Mapping from dpath strings to fpath indexes assert state_fpath is None, 'not yet supported for external analysis' drive.cache_fname = join(drive.root_dpath, 'ut_pathreg_cache.shelf') drive.fpath_bytes_list_ = None drive.dpath_to_fidx_ = None drive.fpath_hashX_list_ = None drive.hash_to_fidxs_ = None drive.cache = ut.ShelfCacher(drive.cache_fname)
def is_paragraph_end(line_): # Hack, par_marker_list should be an argument import utool as ut striped_line = ut.ensure_unicode(line_.strip()) isblank = striped_line == '' if isblank: return True par_marker_list = [ #'\\noindent', '\\begin{equation}', '\\end{equation}', '% ---', ] return any(striped_line.startswith(marker) for marker in par_marker_list)
def speak(r, to_speak, rate=-5): import unicodedata import utool as ut ut.assert_installed_debian('espeak') #if not ut.check_installed_debian('espeak'): # raise AssertionError('espeak must be installed. run sudo apt-get install espeak') # ts1 = to_speak.decode('utf-8') ts1 = ut.ensure_unicode(to_speak) ts2 = unicodedata.normalize('NFKD', ts1) ts3 = ts2.encode('ascii', 'ignore') # ts4 = str(robi.preprocess_research(repr(ts3))) ts4 = ts3 print('-----------') print('[robos.speak()] Speaking at rate ' + str(rate) + ':\n\n ') print(ts4) print('-----------') cmd_parts = ['espeak'] # Interpret SSML markup cmd_parts += ['-m'] # Speed in words per minute if rate == '3': cmd_parts += ['-s', '240'] elif rate == '2': cmd_parts += ['-s', '220'] else: cmd_parts += ['-s', str(200 + int(rate))] # Amplitude cmd_parts += ['-a', '10'] # Pitch adjustment cmd_parts += ['-p', '80'] cmd_parts += [ts4] #pause = ['-g', '1'] # pause between words (10ms) units #speed = ['-s', '175'] # 80 to 450 wpm #def 175 #pitch = ['-p', '50'] #stdout = ['--stdout'] proc = subprocess.Popen(cmd_parts, stdout=subprocess.PIPE, stderr=subprocess.PIPE) output = proc.communicate() return output
def make_csv_table(column_list=[], column_lbls=None, header='', column_type=None, row_lbls=None, transpose=False, precision=2, use_lbl_width=True, comma_repl='<com>', raw=False, new=False, standardize=False): """ Creates a csv table with aligned columns make_csv_table Args: column_list (list): column_lbls (None): header (str): column_type (None): row_lbls (None): transpose (bool): Returns: str: csv_text Example: >>> # ENABLE_DOCTEST >>> from utool.util_csv import * # NOQA >>> column_list = [[1, 2, 3], ['A', 'B', 'C']] >>> column_lbls = ['num', 'alpha'] >>> header = '# Test CSV' >>> column_type = (int, str) >>> row_lbls = None >>> transpose = False >>> csv_text = make_csv_table(column_list, column_lbls, header, column_type, row_lbls, transpose) >>> result = csv_text >>> print(result) # Test CSV # num_rows=3 # num, alpha 1, A 2, B 3, C """ import utool as ut assert comma_repl.find(',') == -1, 'comma_repl cannot contain a comma!' if transpose: column_lbls, row_lbls = row_lbls, column_lbls column_list = list(map(list, zip(*column_list))) if row_lbls is not None: if isinstance(column_list, np.ndarray): column_list = column_list.tolist() if isinstance(row_lbls, np.ndarray): row_lbls = row_lbls.tolist() column_list = [row_lbls] + column_list column_lbls = ['ROWLBL'] + list(map(six.text_type, column_lbls)) if column_type is not None: column_type = [six.text_type] + column_type if len(column_list) == 0: print('[csv] No columns') return header column_len = [len(col) for col in column_list] num_data = column_len[0] if num_data == 0: #print('[csv.make_csv_table()] No data. (header=%r)' % (header,)) return header if any([num_data != clen for clen in column_len]): print('[csv] column_lbls = %r ' % (column_lbls, )) print('[csv] column_len = %r ' % (column_len, )) print('[csv] inconsistent column lengths') return header if column_type is None: column_type = list(map(type, ut.get_list_column(column_list, 0))) #column_type = [type(col[0]) for col in column_list] csv_rows = [] if new: csv_rows.append(header) elif not raw: csv_rows.append(header) if not standardize: csv_rows.append('# num_rows=%r' % num_data) column_maxlen = [] column_str_list = [] if column_lbls is None: column_lbls = [''] * len(column_list) def _toint(c): if c is None: return 'None' try: if np.isnan(c): return 'nan' except TypeError as ex: print('------') print('[csv] TypeError %r ' % ex) print('[csv] _toint(c) failed') print('[csv] c = %r ' % c) print('[csv] type(c) = %r ' % type(c)) print('------') raise return ('%d') % int(c) import uuid textable_types = [uuid.UUID, six.text_type] try: if standardize: def csv_format(r): text = ut.repr2(r, precision=precision) #text = six.text_type(r) # Check if needs escape escape_chars = ['"', ' ', ','] if any([c in text for c in escape_chars]): # escape quotes with quotes text = text.replace('"', '""') # encapsulate with quotes text = '"' + text + '"' return text for col, lbl, coltype in zip(column_list, column_lbls, column_type): col_str = [csv_format(r) for r in col] column_str_list.append(col_str) pass else: # Loop over every column for col, lbl, coltype in zip(column_list, column_lbls, column_type): # Loop over every row in the column (using list comprehension) if coltype is list or util_type.is_list(coltype): col_str = [ six.text_type(c).replace(',', ' ').replace('.', '<dot>') for c in col ] elif (coltype is float or util_type.is_float(coltype) or coltype == np.float32 or util_type.is_valid_floattype(coltype)): precision_fmtstr = '%.' + six.text_type(precision) + 'f' col_str = [ 'None' if r is None else precision_fmtstr % float(r) for r in col ] #col_ = [r if r is None else float(r) for r in col] #col_str = [ut.repr2(r, precision=2) for r in col_] elif coltype is int or util_type.is_int( coltype) or coltype == np.int64: col_str = [_toint(c) for c in (col)] elif coltype in textable_types or util_type.is_str(coltype): col_str = [ six.text_type(c).replace(',', comma_repl) for c in col ] else: print('[csv] is_unknown coltype=%r' % (coltype, )) try: col_str = [six.text_type(c) for c in (col)] except UnicodeDecodeError: try: col_str = [ut.ensure_unicode(c) for c in (col)] except Exception: col_str = [repr(c) for c in (col)] column_str_list.append(col_str) for col_str, lbl in zip(column_str_list, column_lbls): col_lens = [len(s) for s in (col_str)] max_len = max(col_lens) if use_lbl_width: # The column label counts towards the column width max_len = max(len(lbl), max_len) column_maxlen.append(max_len) except Exception as ex: #ut.embed() ut.printex(ex, keys=['col', 'lbl', 'coltype']) raise def _fmtfn(maxlen): return ''.join(['%', six.text_type(maxlen + 2), 's']) fmtstr = ','.join([_fmtfn(maxlen) for maxlen in column_maxlen]) try: if new: csv_rows.append('# ' + fmtstr % tuple(column_lbls)) elif not raw: csv_rows.append('# ' + fmtstr % tuple(column_lbls)) #csv_rows.append('# ' + fmtstr % column_lbls) except Exception as ex: #print(len(column_list)) #ut.embed() ut.printex(ex, keys=['fmtstr', 'column_lbls']) raise for row in zip(*column_str_list): csv_rows.append(' ' + fmtstr % row) csv_text = '\n'.join(csv_rows) return csv_text
# Download latest by parsing webpage import utool as ut from os.path import join # from six.moves import urllib #NOQA import urllib2 headers = { 'User-Agent' : 'Mozilla/5.0' } req = urllib2.Request(r'https://cmake.org/download/', None, headers) page = urllib2.urlopen(req) page_str = page.read() page_str = ut.ensure_unicode(page_str) next = False lines = page_str.split('\n') for index, x in enumerate(lines): if next: print(x) import parse url_suffix = parse.parse('{foo}href="{href}"{other}', x)['href'] url = r'https://cmake.org' + url_suffix break if 'Linux x86_64' in x: next = True url = url.replace('.sh', '.tar.gz') cmake_unzipped_fpath = ut.grab_zipped_url(url) install_prefix = ut.unixpath('~') for dname in ['bin', 'doc', 'man', 'share']: install_dst = join(install_prefix, dname) install_src = join(cmake_unzipped_fpath, dname) # FIXME: this broke #ut.util_path.copy(install_src, install_dst) # HACK AROUND IT
def make_csv_table(column_list=[], column_lbls=None, header='', column_type=None, row_lbls=None, transpose=False, precision=2, use_lbl_width=True, comma_repl='<com>', raw=False, new=False, standardize=False): """ Creates a csv table with aligned columns make_csv_table Args: column_list (list): column_lbls (None): header (str): column_type (None): row_lbls (None): transpose (bool): Returns: str: csv_text Example: >>> # ENABLE_DOCTEST >>> from utool.util_csv import * # NOQA >>> column_list = [[1, 2, 3], ['A', 'B', 'C']] >>> column_lbls = ['num', 'alpha'] >>> header = '# Test CSV' >>> column_type = (int, str) >>> row_lbls = None >>> transpose = False >>> csv_text = make_csv_table(column_list, column_lbls, header, column_type, row_lbls, transpose) >>> result = csv_text >>> print(result) # Test CSV # num_rows=3 # num, alpha 1, A 2, B 3, C """ import utool as ut assert comma_repl.find(',') == -1, 'comma_repl cannot contain a comma!' if transpose: column_lbls, row_lbls = row_lbls, column_lbls column_list = list(map(list, zip(*column_list))) if row_lbls is not None: if isinstance(column_list, np.ndarray): column_list = column_list.tolist() if isinstance(row_lbls, np.ndarray): row_lbls = row_lbls.tolist() column_list = [row_lbls] + column_list column_lbls = ['ROWLBL'] + list(map(six.text_type, column_lbls)) if column_type is not None: column_type = [six.text_type] + column_type if len(column_list) == 0: print('[csv] No columns') return header column_len = [len(col) for col in column_list] num_data = column_len[0] if num_data == 0: #print('[csv.make_csv_table()] No data. (header=%r)' % (header,)) return header if any([num_data != clen for clen in column_len]): print('[csv] column_lbls = %r ' % (column_lbls,)) print('[csv] column_len = %r ' % (column_len,)) print('[csv] inconsistent column lengths') return header if column_type is None: column_type = list(map(type, ut.get_list_column(column_list, 0))) #column_type = [type(col[0]) for col in column_list] csv_rows = [] if new: csv_rows.append(header) elif not raw: csv_rows.append(header) if not standardize: csv_rows.append('# num_rows=%r' % num_data) column_maxlen = [] column_str_list = [] if column_lbls is None: column_lbls = [''] * len(column_list) def _toint(c): if c is None: return 'None' try: if np.isnan(c): return 'nan' except TypeError as ex: print('------') print('[csv] TypeError %r ' % ex) print('[csv] _toint(c) failed') print('[csv] c = %r ' % c) print('[csv] type(c) = %r ' % type(c)) print('------') raise return ('%d') % int(c) import uuid textable_types = [uuid.UUID, six.text_type] try: if standardize: def csv_format(r): text = ut.repr2(r, precision=precision) #text = six.text_type(r) # Check if needs escape escape_chars = ['"', ' ', ','] if any([c in text for c in escape_chars]): # escape quotes with quotes text = text.replace('"', '""') # encapsulate with quotes text = '"' + text + '"' return text for col, lbl, coltype in zip(column_list, column_lbls, column_type): col_str = [csv_format(r) for r in col] column_str_list.append(col_str) pass else: # Loop over every column for col, lbl, coltype in zip(column_list, column_lbls, column_type): # Loop over every row in the column (using list comprehension) if coltype is list or util_type.is_list(coltype): col_str = [six.text_type(c).replace(',', ' ').replace('.', '<dot>') for c in col] elif (coltype is float or util_type.is_float(coltype) or coltype == np.float32 or util_type.is_valid_floattype(coltype)): precision_fmtstr = '%.' + six.text_type(precision) + 'f' col_str = ['None' if r is None else precision_fmtstr % float(r) for r in col] #col_ = [r if r is None else float(r) for r in col] #col_str = [ut.repr2(r, precision=2) for r in col_] elif coltype is int or util_type.is_int(coltype) or coltype == np.int64: col_str = [_toint(c) for c in (col)] elif coltype in textable_types or util_type.is_str(coltype): col_str = [six.text_type(c).replace(',', comma_repl) for c in col] else: print('[csv] is_unknown coltype=%r' % (coltype,)) try: col_str = [six.text_type(c) for c in (col)] except UnicodeDecodeError: try: col_str = [ut.ensure_unicode(c) for c in (col)] except Exception: col_str = [repr(c) for c in (col)] column_str_list.append(col_str) for col_str, lbl in zip(column_str_list, column_lbls): col_lens = [len(s) for s in (col_str)] max_len = max(col_lens) if use_lbl_width: # The column label counts towards the column width max_len = max(len(lbl), max_len) column_maxlen.append(max_len) except Exception as ex: #ut.embed() ut.printex(ex, keys=['col', 'lbl', 'coltype']) raise def _fmtfn(maxlen): return ''.join(['%', six.text_type(maxlen + 2), 's']) fmtstr = ','.join([_fmtfn(maxlen) for maxlen in column_maxlen]) try: if new: csv_rows.append('# ' + fmtstr % tuple(column_lbls)) elif not raw: csv_rows.append('# ' + fmtstr % tuple(column_lbls)) #csv_rows.append('# ' + fmtstr % column_lbls) except Exception as ex: #print(len(column_list)) #ut.embed() ut.printex(ex, keys=['fmtstr', 'column_lbls']) raise for row in zip(*column_str_list): csv_rows.append(' ' + fmtstr % row) csv_text = '\n'.join(csv_rows) return csv_text
def find_pyfunc_above_row(line_list, row, orclass=False): """ originally part of the vim plugin CommandLine: python -m utool.util_inspect --test-find_pyfunc_above_row Example: >>> # ENABLE_DOCTEST >>> from utool.util_inspect import * # NOQA >>> import utool as ut >>> func = find_pyfunc_above_row >>> fpath = meta_util_six.get_funcglobals(func)['__file__'].replace('.pyc', '.py') >>> line_list = ut.read_from(fpath, aslines=True) >>> row = meta_util_six.get_funccode(func).co_firstlineno + 1 >>> pyfunc, searchline = find_pyfunc_above_row(line_list, row) >>> result = pyfunc >>> print(result) find_pyfunc_above_row Example: >>> # DISABLE_DOCTEST >>> from utool.util_inspect import * # NOQA >>> import utool as ut >>> fpath = ut.util_inspect.__file__.replace('.pyc', '.py') >>> line_list = ut.read_from(fpath, aslines=True) >>> row = 1608 >>> pyfunc, searchline = find_pyfunc_above_row(line_list, row, orclass=True) >>> result = pyfunc >>> print(result) find_pyfunc_above_row """ import utool as ut searchlines = [] # for debugging funcname = None # Janky way to find function name func_sentinal = 'def ' method_sentinal = ' def ' class_sentinal = 'class ' for ix in range(200): func_pos = row - ix searchline = line_list[func_pos] searchline = ut.ensure_unicode(searchline) cleanline = searchline.strip(' ') searchlines.append(cleanline) if searchline.startswith(func_sentinal): # and cleanline.endswith(':'): # Found a valid function name funcname = parse_callname(searchline, func_sentinal) if funcname is not None: break if orclass and searchline.startswith(class_sentinal): # Found a valid class name (as funcname) funcname = parse_callname(searchline, class_sentinal) if funcname is not None: break if searchline.startswith(method_sentinal): # and cleanline.endswith(':'): # Found a valid function name funcname = parse_callname(searchline, method_sentinal) if funcname is not None: classline, classpos = find_pyclass_above_row(line_list, func_pos) classname = parse_callname(classline, class_sentinal) if classname is not None: funcname = '.'.join([classname, funcname]) break else: funcname = None foundline = searchline return funcname, searchlines, func_pos, foundline
def set_figtitle( figtitle, subtitle='', forcefignum=True, incanvas=True, size=None, fontfamily=None, fontweight=None, fig=None, font=None, ): r""" Args: figtitle (?): subtitle (str): (default = '') forcefignum (bool): (default = True) incanvas (bool): (default = True) fontfamily (None): (default = None) fontweight (None): (default = None) size (None): (default = None) fig (None): (default = None) CommandLine: python -m wbia.plottool.custom_figure set_figtitle --show Example: >>> # DISABLE_DOCTEST >>> from wbia.plottool.custom_figure import * # NOQA >>> import wbia.plottool as pt >>> fig = pt.figure(fnum=1, doclf=True) >>> result = pt.set_figtitle(figtitle='figtitle', fig=fig) >>> ut.quit_if_noshow() >>> import wbia.plottool as pt >>> pt.show_if_requested() """ # if size is None: # size = FIGTITLE_SIZE if font is not None: print('WARNING set_figtitle font kwarg is DEPRICATED') if figtitle is None: figtitle = '' if fig is None: fig = gcf() figtitle = ut.ensure_unicode(figtitle) subtitle = ut.ensure_unicode(subtitle) if incanvas: if subtitle != '': subtitle = '\n' + subtitle prop = { 'family': fontfamily, 'weight': fontweight, 'size': size, } prop = {k: v for k, v in prop.items() if v is not None} sup = fig.suptitle(figtitle + subtitle) if prop: fontproperties = sup.get_fontproperties().copy() for key, val in prop.items(): getattr(fontproperties, 'set_' + key)(val) sup.set_fontproperties(fontproperties) # fontproperties = mpl.font_manager.FontProperties(**prop) else: fig.suptitle('') # Set title in the window window_figtitle = ('fig(%d) ' % fig.number) + figtitle window_figtitle = window_figtitle.replace('\n', ' ') fig.canvas.set_window_title(window_figtitle)