def outputData(diter, schema, connection, *args, **formatArgs): ### Parameter handling ### where = None if len(args) > 0: where = args[0] elif 'file' in formatArgs: where = formatArgs['file'] else: raise functions.OperatorError(__name__.rsplit('.')[-1], "No destination provided") if 'file' in formatArgs: del formatArgs['file'] if 'mode' not in formatArgs: formatArgs['mode'] = autotype(where, {'csv': 'csv', 'tsv': 'tsv', 'xls': 'tsv', 'db': 'db', 'json': 'json'}) if 'header' not in formatArgs: header = False else: header = formatArgs['header'] del formatArgs['header'] if 'compression' not in formatArgs: formatArgs['compression'] = False if 'compressiontype' not in formatArgs: formatArgs['compressiontype'] = 'gz' orderby = None if 'orderby' in formatArgs: orderby = formatArgs['orderby'] del formatArgs['orderby'] if 'orderbydesc' in formatArgs: orderby = formatArgs['orderbydesc'] + ' desc' del formatArgs['orderbydesc'] append = False if 'append' in formatArgs: append = formatArgs['append'] del formatArgs['append'] type2ext = {'csv': 'csv', 'tsv': 'xls', 'plain': 'txt', 'db': 'db', 'json': 'json'} where = autoext(where, formatArgs['mode'], type2ext) filename, ext = os.path.splitext(os.path.basename(where)) fullpath = os.path.split(where)[0] if not os.path.exists(fullpath): os.makedirs(fullpath) if not (formatArgs['mode'] == 'db' or (formatArgs['mode'] == 'json' and 'split' in formatArgs)): fileIter = getoutput(where, append, formatArgs['compression'], formatArgs['compressiontype']) del formatArgs['compressiontype'] del formatArgs['compression'] try: if formatArgs['mode'] == 'json': del formatArgs['mode'] import json je = json.JSONEncoder(separators=(',', ':'), ensure_ascii=True, check_circular=False).encode if 'split' in formatArgs: def cjs(): unikey = unicode(key) t = open(os.path.join(fullpath, filename + '.' + unikey + ext), 'w') print >> t, je({'schema': schema[1:]}) splitkeys[unikey] = t jsfiles[key] = t # Case for number as key if unikey != key: splitkeys[key] = splitkeys[unikey] return splitkeys[key] jsfiles = {} splitkeys = defaultdict(cjs) gc.disable() for row in diter: key = row[0] print >> splitkeys[key], je(row[1:]) gc.enable() # Create other parts maxparts = 1 try: maxparts = int(formatArgs['split']) except ValueError: maxparts = 1 if maxparts > 1: for i in xrange(0, maxparts): if i not in splitkeys: key = i tmp = splitkeys[key] for f in jsfiles.values(): if f is not None: f.close() else: fileIter.write(je({'schema': schema}) + '\n') for row in diter: print >> fileIter, je(row) elif formatArgs['mode'] == 'csv': del formatArgs['mode'] csvprinter = writer(fileIter, 'excel', **formatArgs) if header: csvprinter.writerow([h[0] for h in schema]) for row in diter: csvprinter.writerow(row) elif formatArgs['mode'] == 'tsv': del formatArgs['mode'] csvprinter = writer(fileIter, 'excel-tab', **formatArgs) if header: csvprinter.writerow([h[0] for h in schema]) for row in diter: csvprinter.writerow( [x.replace('\t', ' ') if type(x) is str or type(x) is unicode else x for x in row]) elif formatArgs['mode'] == 'gtable': vtoutpugtformat(fileIter, diter, simplejson=False) elif formatArgs['mode'] == 'gjson': vtoutpugtformat(fileIter, diter, simplejson=True) elif formatArgs['mode'] == 'html': raise functions.OperatorError(__name__.rsplit('.')[-1], "HTML format not available yet") elif formatArgs['mode'] == 'plain': for row in diter: fileIter.write(((''.join([unicode(x) for x in row])) + '\n').encode('utf-8')) elif formatArgs['mode'] == 'db': def createdb(where, tname, schema, page_size=16384): c = apsw.Connection(where) cursor = c.cursor() if not append: cursor.execute('DROP TABLE IF EXISTS ' + tname) list(cursor.execute('pragma page_size=' + str( page_size) + ';pragma cache_size=-1000;pragma legacy_file_format=false;pragma synchronous=0;pragma journal_mode=OFF;PRAGMA locking_mode = EXCLUSIVE')) if orderby: tname = '_' + tname create_schema = 'create temp table ' + tname + '(' else: create_schema = 'create table ' + tname + '(' create_schema += '`' + unicode(schema[0][0]) + '`' + ( ' ' + unicode(schema[0][1]) if schema[0][1] != None else '') for colname, coltype in schema[1:]: create_schema += ',`' + unicode(colname) + '`' + (' ' + unicode(coltype) if coltype != None else '') create_schema += '); begin exclusive;' list(cursor.execute(create_schema)) insertquery = "insert into " + tname + ' values(' + ','.join(['?'] * len(schema)) + ')' return c, cursor, insertquery if 'pagesize' in formatArgs: page_size = int(formatArgs['pagesize']) else: page_size = list(connection.cursor().execute('pragma page_size'))[0][0] tablename = filename if 'tablename' in formatArgs: tablename = formatArgs['tablename'] if 'split' in formatArgs: maxparts = 0 try: maxparts = int(formatArgs['split']) except ValueError: maxparts = 0 # If not split parts is defined if maxparts == 0: ns = lambda x: x def cdb(): unikey = unicode(key) t = createdb(os.path.join(fullpath, filename + '.' + unikey + ext), tablename, schema[1:], page_size) splitkeys[unikey] = t[1].execute ns.insertqueryw = t[2] dbcon[key] = t[0], t[1] # Case for number as key if unikey != key: splitkeys[key] = splitkeys[unikey] return splitkeys[key] dbcon = {} splitkeys = defaultdict(cdb) gc.disable() for row in diter: key = row[0] splitkeys[key](ns.insertqueryw, row[1:]) gc.enable() for c, cursor in dbcon.values(): if c != None: cursor.execute('commit') c.close() else: # Splitparts defined cursors = [] dbcon = [] if "MSPW" in functions.apsw_version: iters = [] senders = [] for i in xrange(0, maxparts): t = createdb(os.path.join(fullpath, filename + '.' + str(i) + ext), tablename, schema[1:], page_size) it = t[1].executesplit(t[2]) iters.append(it) senders.append(it.send) it.send(None) dbcon.append((t[0], t[1])) senders = tuple(senders) for row in diter: senders[hash(row[0]) % maxparts](row) for it in iters: it.close() else: for i in xrange(0, maxparts): t = createdb(os.path.join(fullpath, filename + '.' + str(i) + ext), tablename, schema[1:], page_size) cursors.append(t[1].execute) dbcon.append((t[0], t[1])) insertqueryw = t[2] cursors = tuple(cursors) for row in diter: cursors[hash(row[0]) % maxparts](insertqueryw, row[1:]) for c, cursor in dbcon: if c != None: if orderby: cursor.execute('pragma cache_size=-' + str( 100000) + ';create table ' + tablename + ' as select * from _' + tablename + ' order by ' + orderby) cursor.execute('commit') c.close() else: # Write to db without split c, cursor, insertquery = createdb(where, tablename, schema, page_size) gc.disable() cursor.executemany(insertquery, diter) gc.enable() list(cursor.execute('commit')) c.close() else: raise functions.OperatorError(__name__.rsplit('.')[-1], "Unknown mode value") except StopIteration, e: pass
def buildrawprinter(separator): return writer(sys.stdout, dialect=mtermoutput(), delimiter=str(separator))
def outputData(diter, schema, connection, *args, **formatArgs): ### Parameter handling ### where=None if len(args)>0: where=args[0] elif 'file' in formatArgs: where=formatArgs['file'] else: raise functions.OperatorError(__name__.rsplit('.')[-1],"No destination provided") if 'file' in formatArgs: del formatArgs['file'] if 'mode' not in formatArgs: formatArgs['mode']=autotype(where, {'csv':'csv', 'tsv':'tsv', 'xls':'tsv', 'db':'db', 'json':'json'}) if 'header' not in formatArgs: header=False else: header=formatArgs['header'] del formatArgs['header'] if 'compression' not in formatArgs: formatArgs['compression']=False if 'compressiontype' not in formatArgs: formatArgs['compressiontype']='gz' orderby = None if 'orderby' in formatArgs: orderby = formatArgs['orderby'] del formatArgs['orderby'] if 'orderbydesc' in formatArgs: orderby = formatArgs['orderbydesc'] + ' desc' del formatArgs['orderbydesc'] append=False if 'append' in formatArgs: append=formatArgs['append'] del formatArgs['append'] type2ext={'csv':'csv', 'tsv':'xls', 'plain':'txt', 'db':'db', 'json':'json'} where=autoext(where, formatArgs['mode'], type2ext) filename, ext=os.path.splitext(os.path.basename(where)) fullpath=os.path.split(where)[0] if not (formatArgs['mode'] == 'db' or (formatArgs['mode']=='json' and 'split' in formatArgs)): fileIter=getoutput(where,append,formatArgs['compression'],formatArgs['compressiontype']) del formatArgs['compressiontype'] del formatArgs['compression'] try: if formatArgs['mode']=='json': del formatArgs['mode'] import json je = json.JSONEncoder(separators = (',',':'), ensure_ascii = True, check_circular = False).encode if 'split' in formatArgs: def cjs(): unikey = unicode(key) t=open(os.path.join(fullpath, filename+'.'+unikey+ext), 'w') print >> t, je( {'schema':schema[1:]} ) splitkeys[unikey]=t jsfiles[key]=t # Case for number as key if unikey != key: splitkeys[key] = splitkeys[unikey] return splitkeys[key] jsfiles = {} splitkeys=defaultdict(cjs) gc.disable() for row in diter: key=row[0] print >> splitkeys[key], je(row[1:]) gc.enable() # Create other parts maxparts = 1 try: maxparts = int(formatArgs['split']) except ValueError: maxparts = 1 if maxparts > 1: for i in xrange(0, maxparts): if i not in splitkeys: key = i tmp = splitkeys[key] for f in jsfiles.values(): if f is not None: f.close() else: fileIter.write(je({'schema':schema}) + '\n') for row in diter: print >> fileIter, je(row) elif formatArgs['mode'] == 'csv': del formatArgs['mode'] csvprinter = writer(fileIter, 'excel', **formatArgs) if header: csvprinter.writerow([h[0] for h in schema]) for row in diter: csvprinter.writerow(row) elif formatArgs['mode'] == 'tsv': del formatArgs['mode'] csvprinter = writer(fileIter, 'excel-tab', **formatArgs) if header: csvprinter.writerow([h[0] for h in schema]) for row in diter: csvprinter.writerow([x.replace('\t', ' ') if type(x) is str or type(x) is unicode else x for x in row]) elif formatArgs['mode']=='gtable': vtoutpugtformat(fileIter,diter,simplejson=False) elif formatArgs['mode']=='gjson': vtoutpugtformat(fileIter,diter,simplejson=True) elif formatArgs['mode']=='html': raise functions.OperatorError(__name__.rsplit('.')[-1],"HTML format not available yet") elif formatArgs['mode']=='plain': for row in diter: fileIter.write(((''.join([unicode(x) for x in row]))+'\n').encode('utf-8')) elif formatArgs['mode']=='db': def createdb(where, tname, schema, page_size=16384): c = apsw.Connection(where) cursor = c.cursor() list(cursor.execute('pragma page_size='+str(page_size)+';pragma cache_size=-1000;pragma legacy_file_format=false;pragma synchronous=0;pragma journal_mode=OFF;PRAGMA locking_mode = EXCLUSIVE')) if orderby: tname = '_' + tname create_schema='create temp table '+tname+'(' else: create_schema='create table '+tname+'(' create_schema+='`'+unicode(schema[0][0])+'`'+ (' '+unicode(schema[0][1]) if schema[0][1]!=None else '') for colname, coltype in schema[1:]: create_schema+=',`'+unicode(colname)+'`'+ (' '+unicode(coltype) if coltype!=None else '') create_schema+='); begin exclusive;' list(cursor.execute(create_schema)) insertquery="insert into "+tname+' values('+','.join(['?']*len(schema))+')' return c, cursor, insertquery if 'pagesize' in formatArgs: page_size=int(formatArgs['pagesize']) else: page_size=list(connection.cursor().execute('pragma page_size'))[0][0] tablename=filename if 'tablename' in formatArgs: tablename=formatArgs['tablename'] if 'split' in formatArgs: maxparts = 0 try: maxparts = int(formatArgs['split']) except ValueError: maxparts = 0 # If not split parts is defined if maxparts == 0: ns = lambda x:x def cdb(): unikey = unicode(key) t=createdb(os.path.join(fullpath, filename+'.'+unikey+ext), tablename, schema[1:], page_size) splitkeys[unikey]=t[1].execute ns.insertqueryw = t[2] dbcon[key]=t[0], t[1] # Case for number as key if unikey != key: splitkeys[key] = splitkeys[unikey] return splitkeys[key] dbcon = {} splitkeys=defaultdict(cdb) gc.disable() for row in diter: key=row[0] splitkeys[key](ns.insertqueryw, row[1:]) gc.enable() for c, cursor in dbcon.values(): if c != None: cursor.execute('commit') c.close() else: # Splitparts defined cursors = [] dbcon = [] if "MSPW" in functions.apsw_version: iters = [] senders = [] for i in xrange(0, maxparts): t = createdb(os.path.join(fullpath, filename+'.'+str(i)+ext), tablename, schema[1:], page_size) it = t[1].executesplit(t[2]) iters.append(it) senders.append(it.send) it.send(None) dbcon.append((t[0], t[1])) senders = tuple(senders) for row in diter: senders[hash(row[0]) % maxparts](row) for it in iters: it.close() else: for i in xrange(0, maxparts): t = createdb(os.path.join(fullpath, filename+'.'+str(i)+ext), tablename, schema[1:], page_size) cursors.append(t[1].execute) dbcon.append((t[0], t[1])) insertqueryw = t[2] cursors = tuple(cursors) for row in diter: cursors[hash(row[0]) % maxparts](insertqueryw, row[1:]) for c, cursor in dbcon: if c != None: if orderby: cursor.execute('pragma cache_size=-'+str(100000)+';create table '+tablename+' as select * from _'+tablename+' order by '+orderby) cursor.execute('commit') c.close() else: # Write to db without split c, cursor, insertquery=createdb(where, tablename, schema, page_size) gc.disable() cursor.executemany(insertquery, diter) gc.enable() list(cursor.execute('commit')) c.close() else: raise functions.OperatorError(__name__.rsplit('.')[-1],"Unknown mode value") except StopIteration,e: pass
sargs.append(args[i]) try: if params: largs, kargs = argsparse.parse(args,csvargs.boolargs,csvargs.nonstringargs,csvargs.needsescape) else: largs=[] kargs=dict() except Exception,e: raise functions.MadisError(e) if len(largs)>0: raise functions.OperatorError("strjoin","Unknown argument %s" %(''.join(largs))) if 'dialect' not in kargs: kargs['dialect']=csvargs.defaultcsv() f=StringIO.StringIO() try: csvprinter=writer(f,**kargs) except Exception,e: raise functions.MadisError(e) csvprinter.writerow(sargs) f.seek(0) s=f.read() return s strjoin.registered=True def dateformat(*args): """ .. function:: dateformat(date[,inpformat,outformat])