from setuptools import setup, find_packages try: from html2rest import html2rest from StringIO import StringIO import urllib # read content of article (plain html template) and convert to restructuredText for long_description stringIO = StringIO() html_content = urllib.urlopen("http://fabrice.douchant.com/mypyapps-framework-for-python-developments?lang=en").read() html2rest(html_content, writer=stringIO) long_description = stringIO.getvalue() except (ImportError, IOError): print("Can't use spip article as description, use README.txt instead") long_description = open('README.txt').read() import myPyApps setup( name='myPyApps', version='.'.join(map(str, myPyApps.__version__)), packages=find_packages(), package_data={ 'myPyApps': ['config/*.default', 'logs/.empty'], }, author='Fabrice Douchant', author_email='*****@*****.**', description='Allow quick Python programs development', long_description=long_description,
def grab(name, subdir = "" ): print "grabWiki: getting",name,"...", h1 = HTTPConnection(WIKIHOST) fullname = WIKIROOT + name print "gW19: getting ... ",fullname h1.request("GET", fullname) r1 = h1.getresponse() d1 = r1.read() # kludgy way to remove edit links d1= re.sub(r"\[.*?action=edit.*?\]", "", d1) #print repr(d1) try: dom1 = parseString(d1) except: i = 1 for line in d1.split("\n"): print i,":",line i +=1 raise divs = dom1.getElementsByTagName("div") # print "gW29:", dom1.toxml() div = None for divcand in divs: # print "gW32:", divcand.getAttribute("class") if divcand.getAttribute("class") == "WIKIDOC": div = divcand break try: tocdivs= div.getElementsByTagName("table") for divcand in tocdivs: if divcand.getAttribute("class") == "toc": div.removeChild(divcand) break except AttributeError: pass # page has no TOC if div == None: print ('ERROR: Article %s has no div with class="WIKIDOC"' % name + "\nUse {{START-WIKIDOC}} and {{END-WIKIDOC}} templates for text " "to be included in document.") return def killLink(node): child = node.firstChild while child is not None: if child: killLink(child) nextchild = child.nextSibling if child.nodeType == xml.dom.minidom.Node.ELEMENT_NODE: if child.tagName == "a": node.removeChild(child) child = nextchild # print "before",html # not needed now killLink(div) html = div.toxml() outfname = os.path.join("source",subdir, "gen."+name+".rst") outf = file(outfname, "w+") html2rest(html, outf) outf.close() print "to", outfname
def main(group_id, location, time_boundary, event_status, pandoc): key_path = os.path.normpath(os.path.expanduser('~/.meetup.com-key')) if os.path.exists(key_path): with open(key_path) as fh: key = fh.read().strip() cache = FileCache('.web_cache', forever=True) requests = CacheControl( Session(), cache, cache_etags=False, heuristic=ExpiresAfter(days=1) ) while True: resp = requests.get('https://api.meetup.com/status', params=dict(key=key)) if resp.status_code == 200: break elif resp.status_code == 401: click.echo('Your meetup.com key is required. You can get it from https://secure.meetup.com/meetup_api/key/\n') if click.confirm('Open https://secure.meetup.com/meetup_api/key/ in your web browser?'): click.launch('https://secure.meetup.com/meetup_api/key/') click.echo('') key = click.prompt('Key', hide_input=True) else: click.fail('Failed to get meetup.com status. Response was {!r}'.format(resp.text)) click.secho('For convenience your key is saved in `{}`.\n'.format(key_path), fg='magenta') with open(key_path, 'w') as fh: fh.write(key) while not location: location = location or get_input('Location: ', completer=WordCompleter(['cluj', 'iasi', 'timisoara'], ignore_case=True)) while True: group_id = group_id or get_input('Group ID: ', completer=WordCompleter(['Cluj-py', 'RoPython-Timisoara'], ignore_case=True)) resp = requests.get('https://api.meetup.com/2/events', params=dict( key=key, group_urlname=group_id, time=time_boundary, status=event_status, )) if resp.status_code == 200: json = resp.json() if json['results']: break else: click.secho('Invalid group `{}`. It has no events!'.format(group_id), fg='red') group_id = None if resp.status_code == '400': click.fail('Failed to get make correct request. Response was {!r}'.format(resp.text)) else: click.secho('Invalid group `{}`. Response was [{}] {!r}'.format(group_id, resp.status_code, resp.text), fg='red') # click.echo(pformat(dict(resp.headers))) for event in json['results']: dt = datetime.fromtimestamp(event['time']/1000) click.echo("{}: {}".format( dt.strftime('%Y-%m-%d %H:%M:%S'), event['name'] )) existing_path = glob(os.path.join('content', '*', dt.strftime('%Y-%m-%d*'), 'index.rst')) if existing_path: if len(existing_path) > 1: click.secho('\tERROR: multiple paths matched: {}'.format(existing_path)) else: click.secho('\t`{}` already exists. Not importing.'.format(*existing_path), fg='yellow') else: target_dir = os.path.join('content', location, '{}-{}'.format(dt.strftime('%Y-%m-%d'), slugify(event['name']))) target_path = os.path.join(target_dir, 'index.rst') if not os.path.exists(target_dir): os.makedirs(target_dir) if pandoc: with tempfile.NamedTemporaryFile(delete=False) as fh: fh.write(event['description'].encode('utf-8')) rst = subprocess.check_output(['pandoc', '--from=html', '--to=rst', fh.name]).decode('utf-8') print fh.name #os.unlink(fh.name) else: stream = StringIO() html2rest(event['description'].encode('utf-8'), writer=stream) rst = stream.getvalue().decode('utf-8') with io.open(target_path, 'w', encoding='utf-8') as fh: fh.write('''{name} ############################################################### :tags: unknown :registration: meetup.com: {event_url} {rst}'''.format(rst=rst, **event)) click.secho('\tWrote `{}`.'.format(target_path), fg='green')
for i in range(len(param_array[1][5]) + 4): header_sep = header_sep + "=" #f.write(header_sep + chr(13)) f[1].write(param_array[1][5] + chr(13)) f[1].write(header_sep + chr(13)) f[1].write(" " + chr(13)) stream = StringIO() #soup = bs(param_array[4], "html.parser") #images = soup.findAll('img') #print(param_array[2]) #for image in images: #print(image["src"]) #print("+++++++++++++++++++++++++++++++++++++++++++++") #print("===============================================") html2rest(param_array[1][4], writer=stream) f[1].write(stream.getvalue().decode("utf8") + chr(13)) f[1].write(" " + chr(13)) for q in range(2, 7, 1): isParent = False for row_child in rowsArray: param_array[q] = parse_csv(row_child) maxdepth = 6 - (q - 2) if param_array[q][3] == param_array[q - 1][2]: isParent = True if isParent == True: f[q - 1].write(".. toctree::" + chr(13)) f[q - 1].write(" :maxdepth: " + str(maxdepth) + chr(13))
def html2rest(txt): s = StringIO() crappylibrary.html2rest(txt, s) s.seek(0) return s.read()
def _restify(text): stream = StringIO() if text and not text.startswith('*'): text = '*' + text html2rest(text.encode('utf-8'), writer=stream) return stream.getvalue().replace('*', '\n* ')
print("IMAGE ID: " + str(img_id)) img_file = img_dir + img_id + ".png" if os.path.isfile(img_file) == False: img_file = img_dir + img_id + ".jpg" if os.path.isfile(img_file) == False: img_file = img_dir + img_id + ".gif" if os.path.isfile(img_file) == False: img_file = img_dir + img_id + ".PNG" if os.path.isfile(img_file) == False: img_file = img_dir + img_id + ".JPG" if os.path.isfile(img_file) == False: img_file = img_dir + img_id + ".GIF" param_array[4] = param_array[4] + "<br> .. " + "|" + img_file + "| image:: " + img_file + " <br /> :scale: 100%<br />" param_array[4] = param_array[4].replace(param_array[4][ img_start_pos : img_end_pos+1 ], " |" + img_file + "| ", 1) #param_array[4] = param_array[4].replace(param_array[4][ img_start_pos : img_end_pos+1 ], ".. image:: " + img_file + " <br /> :scale: 100% <br />", 1) html2rest(param_array[4], writer = stream) rst_code = stream.getvalue().decode("utf8") rst_code = rst_code.replace(".. image:: " , chr(13) +"" + chr(13) +" .. image:: ") rst_code = rst_code.replace(":scale: 100%" , " :scale: 100%") f.write(rst_code + chr(13)) f.write(" " + chr(13)) isParent = False for row_child in rowsArray: param_array2 = parse_csv(row_child) maxdepth = 6 if param_array2[3] == param_array[2]: isParent = True if isParent == True: f.write(".. toctree::" + chr(13))
Requires html2rest==0.2.2 1. Recurses through all directories looking for `htm` files, and writes a `rst` version of the file to the same directory. 2. Prints path to created rst file. Pipe this out to create a list of rst files. """ import os from html2rest import html2rest for dirpath, dirnames, filenames in os.walk("."): path=dirpath.split(os.sep) for file in filenames: current_file = file.split(os.extsep) if current_file[1] == "htm": # Prints out path for use with content.rst print (dirpath+"/"+current_file[0]+".rst") # Opens destination file, writes to it, closes destination file. write_file = open(os.path.join(dirpath,current_file[0]+'.rst'),"w") html2rest(open(os.path.join(dirpath,file)).read(),writer=write_file) # TODO: # - strip useless information from rst files # e.g. vestigial navigation from templates write_file.close() # Remove original htm file os.remove(os.path.join(dirpath,current_file[0]+"."+current_file[1]))
from html2rest import html2rest from io import BytesIO as StringIO stream = StringIO() f = open("test.txt", "r") html = f.read() #stream = "" html2rest(html, writer=stream) rst = stream.getvalue().decode("utf8") print(rst)