def getstuffOfra(directory_name, border1, border2):
    #open the xml file and find all tags with text
#    soup = BS.BeautifulSoup(open(os.path.join(os.getcwd(), xml_name)), "lxml")
    page_history = Page('colWriting')
    for a in os.walk("papers"):
        for b in a[2]:    
            filename = b 
            values = b.split('_')
            title = values[0]
            revisionNum = values[1]
            timestamp = values[2]
            author = values[3]
            fileToOpen=os.path.join(os.getcwd(), "papers", b)
            with open (fileToOpen, "r") as myfile:
                text=myfile.read()
#                print data 
            page_history.add_revision(text, '',timestamp, author)
#    page_history.reduce_revisions()
    page_history.create_paras()
    return page_history
def getstuff(xml_name, border1, border2):
    #open the xml file and find all tags with text
    soup = BS.BeautifulSoup(open(os.path.join(os.getcwd(), xml_name)), "lxml")
#    soup = BS.BeautifulSoup(open(xml_name), "lxml")

    #text extraction
    pages = soup.find_all('text')
    texts = [a.text for a in pages]
    texts1 = cleanset(texts[:border1])
    texts1.extend(cleanset(texts[border2:]))

    #additional information
    revision = soup.find_all('revision')
    comments = []
    timestamps = []
    users = []
    for r in revision:
        timestamps.append(r.timestamp.text)
        if r.comment is not None:
            comments.append(r.comment.text)
        else:
            comments.append('')
        if r.contributor.username is not None:
            users.append(r.contributor.username.text)
        else:
            users.append('')

    #strip out the not working parts
    comments1 = comments[:border1]
    comments1.extend(comments[border2:])

    timestamps1 = timestamps[:border1]
    timestamps1.extend(timestamps[border2:])

    users1 = users[:border1]
    users1.extend(users[border2:])

    #get title of page
    title = soup.find('title').text
    #create history object
    page_history = Page(title)
    for i in range(len(texts1)):
        page_history.add_revision(texts1[i], comments1[i], timestamps1[i], users1[i])
    page_history.reduce_revisions()
    page_history.create_paras()
    return page_history