def scraper_delete_data(request, short_name): from frontend.utilities.messages import send_message scraper,resp = getscraperorresponse(request, "scraper", short_name, None, "delete_data") if resp: return resp try: dataproxy = DataStore(scraper.short_name) dataproxy.request({"maincommand":"clear_datastore"}) scraper.scraper.update_meta() scraper.save() dataproxy.close() except: pass send_message( request, { "message": "Your data has been deleted", "level" : "warning", "actions": [ ("Undo?", reverse('scraper_undo_delete_data', args=[short_name]), False,) ] } ) return HttpResponseRedirect(reverse('code_overview', args=[scraper.wiki_type, short_name]))
def code_info(request, wiki_type, short_name): from codewiki.models import ScraperRunEvent, DomainScrape, Code from tagging.models import Tag try: scraper = Code.objects.get(short_name=short_name) except Code.DoesNotExist: result = json.dumps({ 'error': 'Sorry, this scraper does not exist', 'short_name': short_name }) return HttpResponseNotFound(result) info = {} info['short_name'] = scraper.short_name info['language'] = scraper.language info['created'] = scraper.created_at.isoformat() info['title'] = scraper.title info['description'] = scraper.description_safepart() info['tags'] = [tag.name for tag in Tag.objects.get_for_object(scraper)] info['wiki_type'] = scraper.wiki_type info['privacy_status'] = scraper.privacy_status status = scraper.get_vcs_status(-1) info['code'] = status["code"] if 'filemodifieddate' in status: info['filemodifieddate'] = status['filemodifieddate'].isoformat() info['userroles'] = {} for ucrole in scraper.usercoderole_set.all(): if ucrole.role not in info['userroles']: info['userroles'][ucrole.role] = [] info['userroles'][ucrole.role].append(ucrole.user.username) if scraper.wiki_type == 'scraper': info[ 'last_run'] = scraper.scraper.last_run and scraper.scraper.last_run.isoformat( ) or '' info['run_interval'] = scraper.scraper.run_interval info['records'] = scraper.scraper.record_count dataproxy = DataStore(scraper.short_name) sqlitedata = dataproxy.request({ "maincommand": "sqlitecommand", "command": "datasummary", "val1": 0, "val2": None }) if sqlitedata and type(sqlitedata) not in [str, unicode]: info['datasummary'] = sqlitedata info_json = json.dumps([info], indent=4) response = HttpResponse(info_json, mimetype='application/json; charset=utf-8') return response
def export_sqlite(request, short_name): scraper = getscraperor404(request, short_name, "exportsqlite") memblock=100000 dataproxy = DataStore(scraper.short_name) initsqlitedata = dataproxy.request({"maincommand":"sqlitecommand", "command":"downloadsqlitefile", "seek":0, "length":0}) if "filesize" not in initsqlitedata: return HttpResponse(str(initsqlitedata), mimetype="text/plain") response = HttpResponse(stream_sqlite(dataproxy, initsqlitedata["filesize"], memblock), mimetype='application/octet-stream') response['Content-Disposition'] = 'attachment; filename=%s.sqlite' % (short_name) response["Content-Length"] = initsqlitedata["filesize"] return response
def get_scraper_data(scraper): scraper_data = [] dataproxy = None try: dataproxy = DataStore(scraper.short_name) sqlite_data = dataproxy.request({ "maincommand": "sqlitecommand", "command": "datasummary", "limit": 1 }) if 'tables' in sqlite_data: for table_name, table_info in sqlite_data['tables'].iteritems(): table_meta = { "table_name": table_name, "table_name_safe": re.sub(r'(^_|_$)', '', re.sub(r'[^a-zA-Z0-9]+', '_', table_name)), "total_rows": table_info['count'], "column_names": table_info['keys'], "rows": [] } query = 'select * from "%s" limit 100' % re.sub( r'"', '""', table_name) sqlite_data = dataproxy.request({ "maincommand": "sqliteexecute", "sqlquery": query, "attachlist": "", "streamchunking": False, "data": "" }) if 'error' in sqlite_data: logger.error("Error in get_scraper_data: %s" % sqlite_data) else: table_meta['rows'] = sqlite_data['data'] scraper_data.append(table_meta) except Exception, e: print e
def scraper_delete_data(request, short_name): from frontend.utilities.messages import send_message scraper, resp = getscraperorresponse(request, "scraper", short_name, None, "delete_data") if resp: return resp try: dataproxy = DataStore(scraper.short_name) dataproxy.request({"maincommand": "clear_datastore"}) scraper.scraper.update_meta() scraper.save() dataproxy.close() except: pass send_message( request, { "message": "Your data has been deleted", "level": "warning", "actions": [( "Undo?", reverse('scraper_undo_delete_data', args=[short_name]), False, )] }) return HttpResponseRedirect( reverse('code_overview', args=[scraper.wiki_type, short_name]))
def export_sqlite(request, short_name): scraper = getscraperor404(request, short_name, "exportsqlite") memblock = 100000 dataproxy = DataStore(scraper.short_name) initsqlitedata = dataproxy.request({ "maincommand": "sqlitecommand", "command": "downloadsqlitefile", "seek": 0, "length": 0 }) if "filesize" not in initsqlitedata: return HttpResponse(str(initsqlitedata), mimetype="text/plain") response = HttpResponse(stream_sqlite(dataproxy, initsqlitedata["filesize"], memblock), mimetype='application/octet-stream') response['Content-Disposition'] = 'attachment; filename=%s.sqlite' % ( short_name) response["Content-Length"] = initsqlitedata["filesize"] return response
def scraper_data_view(request, wiki_type, short_name, table_name): """ DataTable ( http://www.datatables.net/usage/server-side ) implementation for the new scraper page """ from django.utils.html import escape mime = 'application/json' def local_escape(s): if s is None: return "" return escape(s) #if not wiki_type == 'scraper': # 415 - Unsupported Media Type # The entity of the request is in a format not supported by the requested resource #return HttpResponse( status=415 ) scraper,resp = getscraperorresponse( request, wiki_type, short_name, "code_overview", "overview") if resp: return resp # We have *mostly* validated the request now. So we need to load up the # parameters we have been sent and the table_name we have been given and # work out a query that satisfies it. We also need to get the columns and # put them in a list so that we can use them to sort on. offset = int( request.REQUEST.get('iDisplayStart', '0') ) limit = int( request.REQUEST.get('iDisplayLength', '10') ) total_rows = 0 total_after_filter = 0 sortbyidx = int( request.REQUEST.get('iSortCol_0','0') ) sortdir = request.REQUEST.get('sSortDir_0', 'asc') columns = [] data = [] # Interact with the database dataproxy = None try: dataproxy = DataStore(scraper.short_name) # We will ask for a datasummary (pending new metadata call) sqlite_data = dataproxy.request({"maincommand":"sqlitecommand", "command":"datasummary", "limit":1}) if 'tables' in sqlite_data and table_name in sqlite_data['tables']: table = sqlite_data['tables'][table_name] total_rows = table['count'] total_after_filter = total_rows sql = table['sql'] columns = table['keys'] else: raise Http404() sorting_columns = [ "`%s`" % c for c in columns] selecting_columns = [ "CASE WHEN length(`%s`)<1000 THEN `%s` ELSE substr(`%s`, 1, 1000)||'... {{MOAR||%s||'||rowid||'||NUFF}}' END AS `%s`" % (c,c,c,c,c) for c in columns] # jQuery can now use a regexp like... # {{MOAR\|\|([^\|]+)\|\|([^\|]+)\|\|NUFF}}$ # ...to fish out the cell's column name and rowid # and show its full content if the user wants. # Build query and do the count for the same query sortby = "%s %s" % (sorting_columns[sortbyidx], sortdir,) query = 'select %s from `%s` order by %s limit %d offset %d' % (','.join(selecting_columns), table_name, sortby, limit, offset,) sqlite_data = dataproxy.request({"maincommand":"sqliteexecute", "sqlquery": query, "attachlist":"", "streamchunking": False, "data": ""}) # We need to now convert this to the aaData list of lists if 'error' in sqlite_data: # Log the error data = [ ] logger.error("Error in scraper_data_view: " + str(sqlite_data)) else: # For each row map each item in that row against escape data = map( lambda b: map(local_escape, b), sqlite_data['data']) except Exception, e: print e
def scraper_data_view(request, wiki_type, short_name, table_name): """ DataTable ( http://www.datatables.net/usage/server-side ) implementation for the new scraper page """ from django.utils.html import escape mime = 'application/json' def local_escape(s): if s is None: return "" return escape(s) #if not wiki_type == 'scraper': # 415 - Unsupported Media Type # The entity of the request is in a format not supported by the requested resource #return HttpResponse( status=415 ) scraper, resp = getscraperorresponse(request, wiki_type, short_name, "code_overview", "overview") if resp: return resp # We have *mostly* validated the request now. So we need to load up the # parameters we have been sent and the table_name we have been given and # work out a query that satisfies it. We also need to get the columns and # put them in a list so that we can use them to sort on. offset = int(request.REQUEST.get('iDisplayStart', '0')) limit = int(request.REQUEST.get('iDisplayLength', '10')) total_rows = 0 total_after_filter = 0 sortbyidx = int(request.REQUEST.get('iSortCol_0', '0')) sortdir = request.REQUEST.get('sSortDir_0', 'asc') columns = [] data = [] # Interact with the database dataproxy = None try: dataproxy = DataStore(scraper.short_name) # We will ask for a datasummary (pending new metadata call) sqlite_data = dataproxy.request({ "maincommand": "sqlitecommand", "command": "datasummary", "limit": 1 }) if 'tables' in sqlite_data and table_name in sqlite_data['tables']: table = sqlite_data['tables'][table_name] total_rows = table['count'] total_after_filter = total_rows sql = table['sql'] columns = table['keys'] else: raise Http404() sorting_columns = ["`%s`" % c for c in columns] selecting_columns = [ "CASE WHEN length(`%s`)<1000 THEN `%s` ELSE substr(`%s`, 1, 1000)||'... {{MOAR||%s||'||rowid||'||NUFF}}' END AS `%s`" % (c, c, c, c, c) for c in columns ] # jQuery can now use a regexp like... # {{MOAR\|\|([^\|]+)\|\|([^\|]+)\|\|NUFF}}$ # ...to fish out the cell's column name and rowid # and show its full content if the user wants. # Build query and do the count for the same query sortby = "%s %s" % ( sorting_columns[sortbyidx], sortdir, ) query = 'select %s from `%s` order by %s limit %d offset %d' % ( ','.join(selecting_columns), table_name, sortby, limit, offset, ) sqlite_data = dataproxy.request({ "maincommand": "sqliteexecute", "sqlquery": query, "attachlist": "", "streamchunking": False, "data": "" }) # We need to now convert this to the aaData list of lists if 'error' in sqlite_data: # Log the error data = [] logger.error("Error in scraper_data_view: " + str(sqlite_data)) else: # For each row map each item in that row against escape data = map(lambda b: map(local_escape, b), sqlite_data['data']) except Exception, e: print e