Пример #1
0
def downloadMyCourses():
    import myHttplib
    import logging
    import os
    FORMAT= '%(asctime)-15s : %(levelname)s : %(module)s : %(lineno)d : %(message)s'
    logging.basicConfig(filename='hl2_logger.log', level=logging.INFO,
                        format=FORMAT)
    logging.info(u"""
------------------------------Downloadin Courses--------------------------------
    """)
    url_portal = 'http://hl2.bgu.ac.il/PortalCategories.asp?&test=&Opened02=yesyes'
    [content_portal, header_portal , url_portal_retrieved] =  myHttplib.connect(url_portal, COOKIEFILE = "D:\\main\\Dropbox\\projects\\python\\downloader\\hl2_cookies.lwp")
    [root_portal, encoding_portal] = myHttplib.getRoot(content_portal, header_portal)
    coursesList_raw = root_portal.xpath("id('TotalRecordsTR02')")[0]
    logging.info(u'found %d courses',coursesList_raw.__len__() - 2)
    coursesList = coursesList_raw[0].getchildren()
    for courseLine in coursesList[1:-1]: #remove the header and last row of the table
        courseName = courseLine[0][0].text
        if not os.path.isdir(courseName):
            os.mkdir(courseName)
        os.chdir(courseName)
        coursePage_raw = courseLine[0][0].get('onclick')
        coursePage_clean = coursePage_raw.split("'")[1]
        getCoursePage(coursePage_clean)
        os.chdir('..')
Пример #2
0
def getNews():
    import logging
    logging.info(u"""
---------------------------------Getting news-----------------------------------
    """)
    import myHttplib
    url = 'http://hl2.bgu.ac.il/PortalCategories.asp'
    [content, header , url_retrieved] =  myHttplib.connect(url, COOKIEFILE = "D:\\main\\Dropbox\\projects\\python\\downloader\\hl2_cookies.lwp")
    [root, encoding] = myHttplib.getRoot(content, header)
Пример #3
0
def login():
    import logging
    FORMAT= '%(asctime)-15s : %(levelname)s : %(module)s : %(lineno)d : %(message)s'
    logging.basicConfig(filename='hl2_logger.log', level=logging.INFO,
                        format=FORMAT)
    
    logging.info(u"""
--------------------------------hl2_bgu.login-----------------------------------
    """)
    import myHttplib
    #import urllib
    
    url_login_base = "http://hl2.bgu.ac.il/"
    domain_hl = "hl2.bgu.ac.il"
    #get_login = {}
    #url_result = url_result_base + "?" + urllib.urlencode(get_result)
    url_login = url_login_base
    [content_login, header_login, url_retrieved_login] =  myHttplib.connect(url_login, COOKIEFILE = "D:\\main\\Dropbox\\projects\\python\\downloader\\hl2_cookies.lwp")
    [root_login, encoding_login] = myHttplib.getRoot(content_login, header_login)
    
    url_login2_raw = root_login[1][2][2].get("src")
    #url_login2_clean = url_login2_raw.split('"')[1]
    #url_login2_base = "http://hl2.bgu.ac.il/NewLoginFrames.asp"
    url_login2 = url_login_base + url_login2_raw
    [content_login2, header_login2, url_retrieved_login2] =  myHttplib.connect(url_login2, ref_url = url_retrieved_login, domain = domain_hl, COOKIEFILE = "hl2_cookies.lwp")
    [root_login2, encoding_login2] = myHttplib.getRoot(content_login2, header_login2)

    logging.info(u"build request for login")   
    inputs = myHttplib.getInputFields(root_login2)
    inputs["userID"] = "hagayd"
    inputs["password"] = "******"
    
    url_login3 = "http://hl2.bgu.ac.il/sso/login2.asp"
    [content_login3, header_login3, url_retrieved_login3] =  myHttplib.connect(url_login3, post = inputs, ref_url = url_retrieved_login2, domain = domain_hl, COOKIEFILE = "D:\\main\\Dropbox\\projects\\python\\downloader\\hl2_cookies.lwp")
    [root_login3, encoding_login3] = myHttplib.getRoot(content_login3, header_login3)
    
    logging.info(u"Not implemented - did the login Succeeded")
    
    return [root_login3, header_login3]
Пример #4
0
def download(root_result):
    import myHttplib
    import socket
    import logging
    url_download_base = "http://132.72.23.44/ac_files"
    #result_elm =  root_result[1][0][9][0][1][3][0][0]  
    result_elm=root_result.xpath("id('ctl00_ContentPlaceHolder1_GridView2')")[0]
    if result_elm == []:
       logging.info(u"no files...")
       return
    for i in range(1, result_elm.__len__()):
        relativePath_raw = result_elm[i][0][0].get('onclick')
        relativePath_clean = relativePath_raw.split("'")[1]
        url_download = url_download_base + relativePath_clean
        try:
            [content_result, header_result] =  myHttplib.connect(url_download, COOKIEFILE = "maagar_cookies.lwp")
        except socket.error:
            #kind of retry...
            [content_result, header_result] =  myHttplib.connect(url_download, COOKIEFILE = "maagar_cookies.lwp")
        localFileName = relativePath_clean[3:]
        localFileName = localFileName .replace("%2f","_")
        localFile = open(localFileName,"wb")
        localFile.write(content_result)
        localFile.close()
Пример #5
0
def main(courseNum):
    import logging
    FORMAT= '%(asctime)-15s : %(levelname)s : %(module)s : %(lineno)d : %(message)s'
    logging.basicConfig(filename='logger.log', level=logging.INFO, format=FORMAT)
    
    logging.info(u"""
-----------------------------------new_entry------------------------------------
    """)
    import myHttplib
    import urllib
    
    #url_req = "http://www.bgu4u.co.il/pages/%D7%9E%D7%90%D7%92%D7%A8_%D7%A1%D7%99%D7%9B%D7%95%D7%9E%D7%99%D7%9D.aspx"
    #url_maagar = "http://iriya.bgu4u.co.il/maagar/"
    url_result_base = "http://iriya.bgu4u.co.il/maagar/searchResults.aspx"
    get_result = {"q":"", "dep": 204, "Course" : courseNum , "PageResult" : 1, "chkSikomHartzaot": "true",
    "chkSikomTigulim" : "true", "chkTargilim" : "true", "chFormulas" : "true"}

    #logging.info(u"get requested content")
    #[content_req, header_req] = myHttplib.connect(url_req, COOKIEFILE = "maagar_cookies.lwp")
    #[root_req, encoding_req] = myHttplib.getRoot(content_req, header_req)
    
    #ucontent = unicode(content, encoding)
    #unicode_str = content.decode(encoding) #I think its better
    #text_req = etree.tostring(root_req, encoding = 'utf-8')
    
    #logging.info(u"get maagar page")
    #[content_maagar, header_maagar] = myHttplib.connect(url_maagar, COOKIEFILE = "maagar_cookies.lwp")
    #[root_maagar, encoding_maagar] = myHttplib.getRoot(content_maagar, header_maagar)    
    #inputs_maagar_list = myHttplib.getInputFields(root_maagar, encoding_maagar)
    #logging.info(u"inputs_maagar_list: %s", inputs_maagar_list)
 
    logging.info(u"get result page")
    url_result = url_result_base + "?" + urllib.urlencode(get_result)
    [content_result, header_result, url_retrieved_result] =  myHttplib.connect(url_result, COOKIEFILE = "maagar_cookies.lwp")
    [root_result, encoding_result] = myHttplib.getRoot(content_result, header_result)    
    
    logging.info(u"start downloading")
    download(root_result)
    #inputs_result = myHttplib.getInputFields(root_result)
    
    
    
    #text_maagar = etree.tostring(root_maagar, encoding = 'utf-8')   
    #print text_req.decode('utf-8')
    #print "---------------"    
    #print text_maagar.decode('utf-8')
    logging.info(u"ended gracefully")
    return None
Пример #6
0
def downloadPage(sid):
    import myHttplib
    import logging
    import random
    from math import floor
    
    
    logging.info(u'downloading directory with sid: %s', sid)
    url_base = "http://hl2.bgu.ac.il"    
    url_subjectSkin_path =  "/bareket/SubjectSkin.asp"
    rand = random.random()
    url_subjectSkin_param = "?sid=" + str(sid) + "&rand=" + str(rand)
    url_subjectSkin = url_base + url_subjectSkin_path + url_subjectSkin_param
    url_subjectSkin_param_post = url_subjectSkin_param[1:]
    
    [content_subjectSkin, header_subjectSkin , url_subjectSkin_retrieved] = \
    myHttplib.connect(url_subjectSkin, post = url_subjectSkin_param_post, COOKIEFILE = "D:\\main\\Dropbox\\projects\\python\\downloader\\hl2_cookies.lwp")
    [root_subjectSkin, encoding_subjectSkin] = \
    myHttplib.getRoot(content_subjectSkin, header_subjectSkin)
    
    url_ItemMenu_path =  "/BareketNet/ItemMenu.aspx"
    myrnd = floor(random.random() * (32222-10000))
    dotat = str(myrnd).find('.')
    myrnd = str(myrnd)[:dotat]
    cookie_subjectSkin = header_subjectSkin.dict["set-cookie"]
    logging.info(u'check for vcCourseGUID if cant find return')
    vcCourseGUID_inx = cookie_subjectSkin.find('vcCourseGUID')
    if vcCourseGUID_inx == -1:
        logging.info(u'cant find vcCourseGUID ')
        return #can be due to multiple cookie set
    
    vcCourseGUID_end_inx2 = cookie_subjectSkin[vcCourseGUID_inx:].find('&')
    if vcCourseGUID_end_inx2 == -1:
        vcCourseGUID_end_inx = cookie_subjectSkin[vcCourseGUID_inx:].find(';')
    else:
        vcCourseGUID_end_inx = vcCourseGUID_end_inx2
    vcCourseGUID = cookie_subjectSkin[vcCourseGUID_inx + 13:
        vcCourseGUID_inx + vcCourseGUID_end_inx]    
    
    url_ItemMenu_param = "?TreePressed=yes&sid=" + str(sid) +\
    "&rsid=0&random=" +  myrnd +  "&handler=EditHandler&vcCourseGuid=" + \
    vcCourseGUID + "&enableSkin=0&LinkBoardSID="
    url_ItemMenu = url_base + url_ItemMenu_path + url_ItemMenu_param
    [content_ItemMenu, header_ItemMenu , url_ItemMenu_retrieved] = \
    myHttplib.connect(url_ItemMenu, COOKIEFILE = "D:\\main\\Dropbox\\projects\\python\\downloader\\hl2_cookies.lwp")
    [root_ItemMenu, encoding_ItemMenu] = \
    myHttplib.getRoot(content_ItemMenu, header_ItemMenu)
    logging.info(u'getting item list')
    items = root_ItemMenu.xpath("id('dgItemsHolder')")
    
    for item in items[0][0][1:]:
        item_type = item[2][0][0][0][0][0][0].text
        item_date = item[3][0].text
        item_info = item[0][0].get('alt')
        item_page = item[1][0][0][0][0][0].get('href')
        item_openLink = item[1][0][0][0][0][0].get('onclick')
        [subjectID, itemID, sentfrom, courseGUID, courseIDorig, courseID,d1,
         UserGUID, headertype] = item_openLink[19:-2].split(',')
        logging.info(u'item_type:%s, item_date:%s, item_info:%s,item_page:%s ',item_type,item_date,item_info,item_page)
        logging.info(u'subjectID:%s, itemID:%s, sentfrom:%s, courseGUID:%s, courseIDorig:%s, courseID:%s,\
         UserGUID:%s, headertype:%s ',subjectID, itemID, sentfrom, courseGUID, courseIDorig, courseID,
        UserGUID, headertype)
        subjectID = subjectID[:-1]
        itemID = itemID[1:-1]
        sentfrom = sentfrom[1:-1]
        courseGUID = courseGUID[1:-1]
        courseIDorig = courseIDorig[1:-1]
        courseID = courseID[1:-1]
        UserGUID = UserGUID[1:-1]
        headertype = headertype[1:-1]
        cmdItemOpen(myrnd, subjectID, itemID, sentfrom, courseGUID, courseIDorig, headertype)
Пример #7
0
def cmdItemOpen(myrnd, subjectID, itemID, sentfrom, courseGUID, courseIDorig, headertype):
    import logging
    logging.info(u"""
---------------------------------download_item----------------------------------
    """)
    logging.info(u"(myrnd:%s, subjectID:%s, itemID:%s, sentfrom:%s, courseGUID:%s, courseIDorig:%s, headertype:%s",myrnd, subjectID, itemID, sentfrom, courseGUID, courseIDorig, headertype);
    import myHttplib
    import os
    import urllib
    url_base = "http://hl2.bgu.ac.il"

    #the browser connect first to 
    temp2 = "http://hl2.bgu.ac.il/Bareket/ShowOriginalItemType.asp?vcCourseID=54767&iid=379529"

    
    url_ShowItemByType_path =  "/bareket/ShowItemByType.asp"
    url_param = '?random=' + myrnd + '&sid=' + subjectID + '&iid=' + itemID \
    + '&sentfrom=' + sentfrom + '&vcCourseGuid=' + courseGUID + '&vcCourseID=' + courseIDorig \
    + '&headertype=' + headertype + 'resizable=no,scrollbars=yes,height=700,width=680,top=58,left=200' 
    
    url_ShowItemByType = url_base + url_ShowItemByType_path + url_param
  
    
    [content_ShowItemByType, header_ShowItemByType , url_ShowItemByType_retrieved] = \
    myHttplib.connect(url_ShowItemByType, COOKIEFILE = "D:\\main\\Dropbox\\projects\\python\\downloader\\hl2_cookies.lwp")
    [root_ShowItemByType, encoding_ShowItemByType] = \
    myHttplib.getRoot(content_ShowItemByType, header_ShowItemByType)
    logging.info(u"CleanPermission")
    url_CleanPermission_path = "/bareket/CleanPermission.asp"
    url_CleanPermission = url_base + url_CleanPermission_path + url_param
    [content_CleanPermission, header_CleanPermission , url_CleanPermission_retrieved] = \
    myHttplib.connect(url_CleanPermission, COOKIEFILE = "D:\\main\\Dropbox\\projects\\python\\downloader\\hl2_cookies.lwp")
    
    url_ShowItemByType2_path = "/bareket/ShowItemByType2.asp"
    url_ShowItemByType2 = url_base + url_ShowItemByType2_path + url_param
    [content_ShowItemByType2, header_ShowItemByType2 , url_ShowItemByType2_retrieved] = \
    myHttplib.connect(url_ShowItemByType2, COOKIEFILE = "D:\\main\\Dropbox\\projects\\python\\downloader\\hl2_cookies.lwp")
    [root_ShowItemByType2, encoding_ShowItemByType2] = \
    myHttplib.getRoot(content_ShowItemByType2, header_ShowItemByType2)
    
    script_text = root_ShowItemByType2[1].text
    url_Pos = script_text.find("frames['ItemBody'].window.location.href=")
    url_startPos = script_text.find('"',url_Pos)
    url_endPos = script_text.find('"',url_startPos+1)
    
    url_item_path = "http://hl2.bgu.ac.il/bareket/" + script_text[url_startPos+1:url_endPos]
    [content_item_path, header_item_path , url_item_path_retrieved] = \
    myHttplib.connect(url_item_path, COOKIEFILE = "D:\\main\\Dropbox\\projects\\python\\downloader\\hl2_cookies.lwp")
    [root_item_path, encoding_item_path] = \
    myHttplib.getRoot(content_item_path, header_item_path)
    
    
    url_item_raw = root_item_path[1][0][3].text
    url_item = url_item_raw.split("'")[1]
    
    logging.info(u"downloading: |%s|", url_item)
    [content_item, header_item , url_item_retrieved] = \
    myHttplib.connect(url_item, COOKIEFILE = "D:\\main\\Dropbox\\projects\\python\\downloader\\hl2_cookies.lwp")
    if url_item_retrieved == None:
        logging.warning(u"can't get url %s", url_item)
        return
    fileName = urllib.unquote(url_item[url_item.rfind("/")+1:])
    if not os.path.isfile(fileName):
        try:
            logging.info(u"trying to save to: %s", fileName)
            file_handler = open(fileName, 'wb')
        except IOError, e:
            logging.error(u"faild open file reason: %s", e)
            print e
            return
        file_handler.write(content_item)
        file_handler.close()
Пример #8
0
def getCoursePage(url_coursePage):
    """
    http://hl2.bgu.ac.il/Eclass/CourseFrames.asp?vcCourseGUID=&vcCourseID=54767&language=972
    /eClass/Announcements.asp?vcCourseID=54767&vcCourseName=%E4%F7%F9%F8%20%E4%EB%E9%EE%E9    
    
    root_knowledge:    
    http://hl2.bgu.ac.il/bareket/fromArik.asp?language=100&goto=ExerciseFrames.asp?user=student&vcCourseID=54767&vcCourseName=%E4%F7%F9%F8%20%E4%EB%E9%EE%E9    
    
    rootFolder - http://hl2.bgu.ac.il/bareketnet/toc.aspx?width=200&handler=EditHandler&DIR=RTL&RootDir=0xAF5700BEDA912846AB4F7B537A61E744&SID=&KBItemID=&KBItemDesc=&KBItemMetaData=&LinkPosition=&TestID=&LinkBUID=&ComeFrom=&EnableSubjectSkin=0
        ?subFulder
        
        folderList - http://hl2.bgu.ac.il/BareketNet/ItemMenu.aspx?TreePressed=yes&sid=152493&rsid=0&random=2531&handler=EditHandler&vcCourseGuid=0xAF5700BEDA912846AB4F7B537A61E744&enableSkin=0&LinkBoardSID=#IAHREF4635769499
            itemPage - http://hl2.bgu.ac.il/bareket/ShowItemByType.asp?random=4585&sid=152493&iid=463576&sentfrom=ExerciseMenu.asp&vcCourseGuid=AF5700BEDA912846AB4F7B537A61E744&vcCourseID=54767&headertype=2
    
parent.frames["vcCourseMain"].document.location.href="/bareket/fromArik.asp?language=100&vcCourseID=54767&goto=../eClass/administration.asp?Send=1&vcCourseGUID=AF5700BEDA912846AB4F7B537A61E744&vcParentGuid=&vcCourseName=%E4%F7%F9%F8+%E4%EB%E9%EE%E9
    
    
        [1][1][0][1][10][2]  
    הגשת ש.ב    
    [1][1][0][1][8][2]a: 
    פתרונות תרגילי בית    
    
[1][1][0][1][6][2]
תרגילי בית
    
[1][1][0][1][0][2]
סילבוס    
    
[1][1][0][1][0][1]img: 
{'src': 'foldericon.gif', 'imgleid': '54767', 'imgdef': 'foldericon.gif', 'imgid': '2304698', 'onclick': "setMainStatus(event);setActiveNode('2304698','0','BE0057AF-91DA-4628-AB4F-7B537A61E744','54767','foldericon.gif',event);", 'imglnk': '0', 'imgleguid': 'BE0057AF-91DA-4628-AB4F-7B537A61E744'}    
[1][1][0][1][0][2]a: 
{'id': '2304698'
[1][1][0][1][0][3]hidden: 
{'value': '2304698'}
[1][1][0][1][0][7]hidden: 
{'value': 'BE0057AF-91DA-4628-AB4F-7B537A61E744'}
[1][1][0][1][0][8]input: 
{'type': 'hidden', 'name': 'hidden', 'value': '2304698'}
[1][1][0][1][0][12]input: 
{'type': 'hidden', 'name': 'hidden', 'value': 'BE0057AF-91DA-4628-AB4F-7B537A61E744'}


 
[1][1][0][1][1]ul: 
{'class': 'hdn', 'id': 'ul2304698'}
None
None
[1][1][0][1][2]li: 
{'class': 'kid', 'id': 'li152493'}

    

  
[1][1][0][1][2][0]img: 
{'src': '/upload/misc/CssFiles/Images/plus.gif'}
[1][1][0][1][2][1]img: 
{'src': 'foldericon.gif', 'imgleid': '54767', 'imgdef': 'foldericon.gif', 'imgid': '152493', 'onclick': "setMainStatus(event);setActiveNode('152493','0','BE0057AF-91DA-4628-AB4F-7B537A61E744','54767','foldericon.gif',event);", 'imglnk': '0', 'imgleguid': 'BE0057AF-91DA-4628-AB4F-7B537A61E744'}
[1][1][0][1][2][2]a: 
{'id': '152493', 'href': 'javascript:void(0)', 'target': 'Main', 'onclick': 'javascript:NoOp()', 'title': u'\u05d4\u05e8\u05e6\u05d0\u05d5\u05ea'}
הרצאות
[1][1][0][1][2][3]hidden: 
{'value': '152493'}
[1][1][0][1][2][7]hidden: 
{'value': 'BE0057AF-91DA-4628-AB4F-7B537A61E744'}
[1][1][0][1][2][8]input: 
{'type': 'hidden', 'name': 'hidden', 'value': '152493'}

 rand:0.882085807621479   
    """    
    import logging
    logging.info(u"""
---------------------------------getCoursePage----------------------------------
    """)
    import myHttplib    
    
    url_base = "http://hl2.bgu.ac.il"
    url_coursePage = url_base + url_coursePage
    [content_coursePage, header_coursePage , url_coursePage_retrieved] =  myHttplib.connect(url_coursePage, COOKIEFILE = "D:\\main\\Dropbox\\projects\\python\\downloader\\hl2_cookies.lwp")
    [root_coursePage, encoding_coursePage] = myHttplib.getRoot(content_coursePage, header_coursePage)
      
    url_top_path =  root_coursePage[1][0].get('src')       
    url_top = url_base + "/Eclass/" + url_top_path    
    [content_top, header_top , url_top_retrieved] =  myHttplib.connect(url_top, COOKIEFILE = "D:\\main\\Dropbox\\projects\\python\\downloader\\hl2_cookies.lwp")
    [root_top, encoding_top] = myHttplib.getRoot(content_top, header_top)
    
    url_knowledge_path =  root_top[2][2][0][2][0].get('href')
    url_knowledge = url_base + url_knowledge_path    
    [content_knowledge, header_knowledge , url_knowledge_retrieved] =  myHttplib.connect(url_knowledge, COOKIEFILE = "D:\\main\\Dropbox\\projects\\python\\downloader\\hl2_cookies.lwp")
    [root_knowledge, encoding_knowledge] = myHttplib.getRoot(content_knowledge, header_knowledge)
    
    scriptText_knowledge = root_knowledge[1][0][0].text
    url_knowledge2_path = scriptText_knowledge.split('"')[19]
    url_knowledge2 = url_base + '/bareket/' + url_knowledge2_path    
    [content_knowledge2, header_knowledge2 , url_knowledge2_retrieved] =  myHttplib.connect(url_knowledge2, COOKIEFILE = "D:\\main\\Dropbox\\projects\\python\\downloader\\hl2_cookies.lwp")
    [root_knowledge2, encoding_knowledge2] = myHttplib.getRoot(content_knowledge2, header_knowledge2)
    
    url_rootFolder_path = root_knowledge2[2][1][1].get('src')
    url_rootFolder = url_base + url_rootFolder_path
    [content_rootFolder, header_rootFolder , url_rootFolder_retrieved] =  myHttplib.connect(url_rootFolder, COOKIEFILE = "D:\\main\\Dropbox\\projects\\python\\downloader\\hl2_cookies.lwp")
    [root_rootFolder, encoding_rootFolder] = myHttplib.getRoot(content_rootFolder, header_rootFolder)    
    ul_root = root_rootFolder.xpath("id('ulRoot')")[0]
    
    logging.info(u"getting course main dir")
    directoryHandler(ul_root)
    
    
    logging.info(u"getting course addresbook")