def create_subset(username=None, folder_id=None, ex_id=None): docIDs = dict() for x in request.form.getlist('docIdxs[]'): docIDs[int(x)] = 0 #first make the new folder fOld = Folder.query.get(folder_id) name = request.form['name'] user_id = g.user.id fNew = Folder(fOld.dataset_id,name,docIDs) db.session.add(fNew) db.session.commit() fNew.initialize() return Response(status='200')
def upload_drop(username=None): name = "My New Dataset" summary = "A summary of the dataset" user_id = g.user.id dset = Dataset(user_id,name,summary) db.session.add(dset) db.session.commit() did = dset.id main_folder = Folder(dset.id,"Main Folder",dict()) db.session.add(main_folder) db.session.commit() main_folder.initialize() db.session.commit() print "DROP" dset = Dataset.query.get(did) ufilename = request.form['filename'] fid = request.files.getlist('file')[0] #grab only a single file fn,ext = os.path.splitext(ufilename) userpath = "refinery/static/users/" + username + "/documents" channel = username + "Xmenus" if ext == ".zip": zip_file = zipfile.ZipFile(fid) files = zip_file.namelist() nFiles = len(files) lastProg = 0 count = 0.0 for member in files: filename = os.path.basename(member) if filename: fn,ext = os.path.splitext(filename) if ext == ".txt" or ext == ".pdf": add_txt(os.path.join(userpath,filename),zip_file.open(member),filename,dset) count += 1.0 update = str(int(count / float(nFiles) * 100)) if update != lastProg: lastProg = update s = 'uprog,' + update msgServer.publish(channel, "%s" % s) elif ext == ".txt" or ext == ".pdf": add_txt(os.path.join(userpath,filename),fid,filename,dset) elif ext == ".tar" or ext == ".gz" or ext == ".bz2": import tarfile tar_file = tarfile.open(fileobj=fid) tar_filename = os.path.join(userpath,ufilename) valid_names = [x for x in tar_file.getnames() if (os.path.splitext(x)[1] == '.txt') or (os.path.splitext(x)[1] == '.pdf')] nFiles = len(valid_names) print nFiles lastProg = 0 count = 0.0 for member in valid_names: filename = os.path.basename(member) if filename: add_txt(os.path.join(userpath,filename), tar_file.extractfile(member),filename,dset) count += 1.0 update = str(int(count / float(nFiles) * 100)) if update != lastProg: lastProg = update s = 'uprog,' + update msgServer.publish(channel, "%s" % s) else: print "unknown file format",ext,filename dset.dirty = "dirty" db.session.commit() print "GOT ",len(dset.folders[0].docIDs), "Documents" msgServer.publish(channel, "ucomplete," + ufilename) return Response(status="200")
def upload_drop(username=None): name = "My New Dataset" summary = "A summary of the dataset" user_id = g.user.id dset = Dataset(user_id, name, summary) db.session.add(dset) db.session.commit() did = dset.id main_folder = Folder(dset.id, "Main Folder", dict()) db.session.add(main_folder) db.session.commit() main_folder.initialize() db.session.commit() print "DROP" dset = Dataset.query.get(did) ufilename = request.form['filename'] fid = request.files.getlist('file')[0] #grab only a single file fn, ext = os.path.splitext(ufilename) userpath = "refinery/static/users/" + username + "/documents" channel = username + "Xmenus" if ext == ".zip": zip_file = zipfile.ZipFile(fid) files = zip_file.namelist() nFiles = len(files) lastProg = 0 count = 0.0 for member in files: filename = os.path.basename(member) if filename: fn, ext = os.path.splitext(filename) if ext == ".txt" or ext == ".pdf": add_txt(os.path.join(userpath, filename), zip_file.open(member), filename, dset) count += 1.0 update = str(int(count / float(nFiles) * 100)) if update != lastProg: lastProg = update s = 'uprog,' + update msgServer.publish(channel, "%s" % s) elif ext == ".txt" or ext == ".pdf": add_txt(os.path.join(userpath, filename), fid, filename, dset) elif ext == ".tar" or ext == ".gz" or ext == ".bz2": import tarfile tar_file = tarfile.open(fileobj=fid) tar_filename = os.path.join(userpath, ufilename) valid_names = [ x for x in tar_file.getnames() if (os.path.splitext(x)[1] == '.txt') or ( os.path.splitext(x)[1] == '.pdf') ] nFiles = len(valid_names) print nFiles lastProg = 0 count = 0.0 for member in valid_names: filename = os.path.basename(member) if filename: add_txt(os.path.join(userpath, filename), tar_file.extractfile(member), filename, dset) count += 1.0 update = str(int(count / float(nFiles) * 100)) if update != lastProg: lastProg = update s = 'uprog,' + update msgServer.publish(channel, "%s" % s) else: print "unknown file format", ext, filename dset.dirty = "dirty" db.session.commit() print "GOT ", len(dset.folders[0].docIDs), "Documents" msgServer.publish(channel, "ucomplete," + ufilename) return Response(status="200")