def parse_exponents(ltx): """ Parse a string with latex code to replace all exp(x) with unicode equivalents, has to happen before to_unicode takes place """ while "^" in ltx: # find where the next exponent is idx = ltx.find("^") # get base if ltx[idx - 1] == "}": base_start = get_opened_parenthesis(ltx[:idx]) base = ltx[base_start:idx - 1] else: base_start = idx - 1 base = ltx[idx - 1] # get exponent indices exp_end = get_closed_parenthesis(ltx[idx + 1:]) exp = ltx[idx + 2:idx + 1 + exp_end] # to unicode if len(base) > 1: uni = replace(f"{{{base}}}^{{{exp}}}") else: uni = replace(f"{base}^{{{exp}}}") ltx = ltx[:base_start] + uni + ltx[idx + exp_end + 2:] return ltx
def sanitize_text(text): text = text.replace('\n', ' ') text = text.replace('\r', ' ') text = text.lower().strip() if '\\' in text or '_{' in text or '^{' in text: text = replace([text])[0] return text
def explain(self, unicode: bool = False): name = self.__class__.__name__ doc = self.__doc__ if doc is None: cleaned_doc = "None" else: cleaned_doc = [] for line in doc.split('\n'): if len(line.strip()) == 0: continue cleaned_doc.append(line.strip()) cleaned_doc = '\n'.join(cleaned_doc) if hasattr(self, 'description'): max_symbol_length = len(max(self.description.keys(), key=lambda k: len(k))) max_type_length = 0 if len(self.description) > 0: for key in self.description: value = self.description[key] if isinstance(value, str): continue max_type_length = max(max_type_length, len(format_type(value[1]))) symbols = [] for symbol in self.description: data = self.description[symbol] if unicode: symbol = unicodeit.replace(symbol) padded_symbol = symbol + ' ' * (max_symbol_length - len(symbol)) message = f"{padded_symbol}: " if isinstance(data, str): message += ' ' * (max_type_length + 3) + data else: padded_type = format_type(data[1]) + ' ' * (max_type_length - len(format_type(data[1]))) message += f"[{padded_type}] {data[0]}" symbols.append(message) symbols = '\n'.join(symbols) else: symbols = 'None' result = f"""{'-' * 20} description for '{name}': {cleaned_doc} symbols: {symbols} {'-' * 20}""" print(result)
def to_unicode(ltx): """ Convert a latex string to unicode characters """ ltx = ltx.replace("-", "MINUS") ltx_to_replace = [ ("=", " = "), ("+", " +"), ("*", " *"), ("-", " -"), ("}", ""), ("{", ""), ("( ", "("), (" )", ")"), ] for to, rep in ltx_to_replace: ltx = ltx.replace(to, rep) # to unicode uni = replace(ltx).replace("MINUS", " -") return uni
def UnicodeIt(): # derived from https://stackoverflow.com/a/27936782 import unicodeit buf = vim.current.buffer # lnum* is the line number (counting from 1) # col* is the column number (counting from 0) (lnum1, col1) = buf.mark('<') (lnum2, col2) = buf.mark('>') lines = vim.eval('getline({}, {})'.format(lnum1, lnum2)) before = lines[0][0:col1] after = lines[-1][col2 + 1:] if len(lines) == 1: lines[0] = lines[0][col1:col2 + 1] else: lines[0] = lines[0][col1:] lines[-1] = lines[-1][:col2 + 1] # lines is a list of strings, potentially of length one. # unicodeit.replace takes a list of strings, potentially of length one and # returns a list of strings. lines = unicodeit.replace(lines) before = bytes(before, encoding='utf-8') after = bytes(after, encoding='utf-8') # `vim.current.buffer[2:2] = ...` inserts after line 2 (counting from 1) # `vim.current.buffer[2:3] = ...` replaces line 3 (counting from 1) if len(lines) == 1: vim.current.buffer[lnum1-1:lnum1] = \ [bytes(before) + lines[0] + bytes(after)] elif len(lines) == 2: vim.current.buffer[lnum1 - 1:lnum1 + 1] = [before + lines[0], lines[1] + after] else: vim.current.buffer[lnum1-1:lnum2] = \ [before + lines[0]] + \ lines[1:-1] + \ [lines[-1] + after]
pygtk.require('2.0') import gtk from unicodeit import replace import subprocess def ShowPopup(txt): label = gtk.Label(txt) label.set_selectable(True) window = gtk.Window(gtk.WINDOW_TOPLEVEL) window.connect("destroy", lambda w: gtk.main_quit()) window.set_title("UnicodeIt") window.add(label) window.resize(100,100) window.show_all() clipboard = gtk.clipboard_get("PRIMARY") cb2 = gtk.clipboard_get() text = clipboard.wait_for_text() utxt = replace([text]) cb2.set_text(utxt[0]) cb2.store() #use xdotool to tget active window and type into it: process = subprocess.Popen(['xdotool','getactivewindow'], shell=False, stdout=subprocess.PIPE) winid = process.communicate()[0].strip() subprocess.Popen([r'xdotool','key','--window',winid,'ctrl+v']).wait() #ShowPopup(winid) #gtk.main()
def test_superscript_minus1(): assert unicodeit.replace('cm^{-1}') == 'cm⁻¹'
def test_superscript_12(): assert unicodeit.replace('a^{12}') == 'a¹²'
def test_subscript_minus1(): assert unicodeit.replace('cm_{-1}') == 'cm₋₁'
def test_subscript_12(): assert unicodeit.replace('a_{12}') == 'a₁₂'
def get(self, latex): latex = urllib.unquote_plus(latex) result = unicodeit.replace([latex]) self.response.out.write(result[0])
def tweet(title, collaborations, url, version=1): """ Announce addition or revision of a HEPData record on Twitter. :param title: :param collaborations: :param url: :param version: :return: """ if USE_TWITTER: OAUTH_TOKEN = current_app.config['OAUTH_TOKEN'] OAUTH_SECRET = current_app.config['OAUTH_SECRET'] CONSUMER_KEY = current_app.config['CONSUMER_KEY'] CONSUMER_SECRET = current_app.config['CONSUMER_SECRET'] if not OAUTH_TOKEN or not OAUTH_SECRET or not CONSUMER_KEY or not CONSUMER_SECRET: # log this error print("Twitter credentials must be supplied!") else: twitter = Twitter(auth=OAuth(OAUTH_TOKEN, OAUTH_SECRET, CONSUMER_KEY, CONSUMER_SECRET)) cleaned_title = decode_string( encode_string(title)) # in case of binary characters in title cleaned_title = replace( cleaned_title) # use UnicodeIt to replace LaTeX expressions cleaned_title = cleanup_latex( cleaned_title) # remove some remaining LaTeX encodings words = len(cleaned_title.split()) # Try to tweet with complete paper title. # If tweet exceeds 280 characters, keep trying with one less word each time. tweeted = False while words and not tweeted: try: if version == 1: status = "Added{0} data on \"{1}\" to {2}".format( get_collaboration_string(collaborations), truncate_string(cleaned_title, words), url) else: status = "Revised{0} data on \"{1}\" at {2}?version={3}".format( get_collaboration_string(collaborations), truncate_string(cleaned_title, words), url, version) twitter.statuses.update(status=status) tweeted = True print("Tweeted: {}".format(status)) except Exception as e: # It would be nice to get a stack trace here if e.e.code == 403: error = json.loads(e.response_data.decode('utf8')) if error["errors"][0][ "code"] == 186: # Status is over 140 characters. words = words - 1 # Try again with one less word. else: break else: break if not tweeted: print(e.__str__()) print("(P) Failed to post tweet for record {0}".format(url))
def test_incomplete_combiningmark(): assert unicodeit.replace('\\breve{') == '\\breve{'