示例#1
0
def create_subset(username=None, folder_id=None, ex_id=None):

    docIDs = dict()
    for x in request.form.getlist('docIdxs[]'):
        docIDs[int(x)] = 0

    #first make the new folder
    fOld = Folder.query.get(folder_id)
    name = request.form['name']
    user_id = g.user.id
    
    fNew = Folder(fOld.dataset_id,name,docIDs)
    db.session.add(fNew)
    db.session.commit()

    fNew.initialize()
    
    return Response(status='200')
示例#2
0
def upload_drop(username=None):

    name = "My New Dataset"
    summary = "A summary of the dataset"
    user_id = g.user.id
    
    dset = Dataset(user_id,name,summary)
    db.session.add(dset)
    db.session.commit()
    did = dset.id

    main_folder = Folder(dset.id,"Main Folder",dict())
    db.session.add(main_folder)
    db.session.commit()

    main_folder.initialize()
    db.session.commit()
    
    print "DROP"

    dset = Dataset.query.get(did)

    ufilename = request.form['filename']
    fid = request.files.getlist('file')[0]  #grab only a single file

    fn,ext = os.path.splitext(ufilename)

    userpath = "refinery/static/users/" + username + "/documents"
    channel = username + "Xmenus"

    if ext == ".zip":
        zip_file = zipfile.ZipFile(fid)
        files = zip_file.namelist()
        nFiles = len(files)
        lastProg = 0
        count = 0.0
        for member in files:
            filename = os.path.basename(member)
            if filename:
                fn,ext = os.path.splitext(filename)
                if ext == ".txt" or ext == ".pdf":
                    add_txt(os.path.join(userpath,filename),zip_file.open(member),filename,dset)
            count += 1.0
            update = str(int(count / float(nFiles) * 100))
            if update != lastProg:
                lastProg = update
                s = 'uprog,' + update
                msgServer.publish(channel, "%s" % s)
            
    elif ext == ".txt" or ext == ".pdf":
        add_txt(os.path.join(userpath,filename),fid,filename,dset)

    elif ext == ".tar" or ext == ".gz" or ext == ".bz2":
        import tarfile
        tar_file = tarfile.open(fileobj=fid)
        tar_filename = os.path.join(userpath,ufilename)
        valid_names = [x for x in tar_file.getnames() if (os.path.splitext(x)[1] == '.txt') or (os.path.splitext(x)[1] == '.pdf')]
        nFiles = len(valid_names)
        print nFiles
        lastProg = 0
        count = 0.0
        for member in valid_names:
            filename = os.path.basename(member)
            if filename:
                add_txt(os.path.join(userpath,filename), tar_file.extractfile(member),filename,dset)
            count += 1.0
            update = str(int(count / float(nFiles) * 100))
            if update != lastProg:
                lastProg = update
                s = 'uprog,' + update
                msgServer.publish(channel, "%s" % s)        

    else:
        print "unknown file format",ext,filename

    dset.dirty = "dirty"

    db.session.commit()

    print "GOT ",len(dset.folders[0].docIDs), "Documents"

    msgServer.publish(channel, "ucomplete," + ufilename) 

    return Response(status="200")
示例#3
0
def upload_drop(username=None):

    name = "My New Dataset"
    summary = "A summary of the dataset"
    user_id = g.user.id

    dset = Dataset(user_id, name, summary)
    db.session.add(dset)
    db.session.commit()
    did = dset.id

    main_folder = Folder(dset.id, "Main Folder", dict())
    db.session.add(main_folder)
    db.session.commit()

    main_folder.initialize()
    db.session.commit()

    print "DROP"

    dset = Dataset.query.get(did)

    ufilename = request.form['filename']
    fid = request.files.getlist('file')[0]  #grab only a single file

    fn, ext = os.path.splitext(ufilename)

    userpath = "refinery/static/users/" + username + "/documents"
    channel = username + "Xmenus"

    if ext == ".zip":
        zip_file = zipfile.ZipFile(fid)
        files = zip_file.namelist()
        nFiles = len(files)
        lastProg = 0
        count = 0.0
        for member in files:
            filename = os.path.basename(member)
            if filename:
                fn, ext = os.path.splitext(filename)
                if ext == ".txt" or ext == ".pdf":
                    add_txt(os.path.join(userpath, filename),
                            zip_file.open(member), filename, dset)
            count += 1.0
            update = str(int(count / float(nFiles) * 100))
            if update != lastProg:
                lastProg = update
                s = 'uprog,' + update
                msgServer.publish(channel, "%s" % s)

    elif ext == ".txt" or ext == ".pdf":
        add_txt(os.path.join(userpath, filename), fid, filename, dset)

    elif ext == ".tar" or ext == ".gz" or ext == ".bz2":
        import tarfile
        tar_file = tarfile.open(fileobj=fid)
        tar_filename = os.path.join(userpath, ufilename)
        valid_names = [
            x for x in tar_file.getnames()
            if (os.path.splitext(x)[1] == '.txt') or (
                os.path.splitext(x)[1] == '.pdf')
        ]
        nFiles = len(valid_names)
        print nFiles
        lastProg = 0
        count = 0.0
        for member in valid_names:
            filename = os.path.basename(member)
            if filename:
                add_txt(os.path.join(userpath, filename),
                        tar_file.extractfile(member), filename, dset)
            count += 1.0
            update = str(int(count / float(nFiles) * 100))
            if update != lastProg:
                lastProg = update
                s = 'uprog,' + update
                msgServer.publish(channel, "%s" % s)

    else:
        print "unknown file format", ext, filename

    dset.dirty = "dirty"

    db.session.commit()

    print "GOT ", len(dset.folders[0].docIDs), "Documents"

    msgServer.publish(channel, "ucomplete," + ufilename)

    return Response(status="200")