def __init__(self, libraryPath=None): self.book = Book() self.book.uuid = str(uuid.uuid4()) self.book.tag = None self.book.authors = list() self.libraryPath = libraryPath self.createDatabase = CreateDatabase(libraryPath=libraryPath)
def __init__(self, baseUrl=None): ''' Constructor ''' self.baseUrl = baseUrl self.directory_name = Workspace().libraryPath self.createDatabase = CreateDatabase() pass
def __init__(self, baseUrl=None): self.baseUrl = baseUrl self.directory_name = Workspace().libraryPath self.createDatabase = CreateDatabase() self.header_info = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:38.0) Gecko/20100101 Firefox/38.0'} # book image url self.imageUrl = None self.bookUrl = None pass
def testFindBook(self): print 'testFindBook' createDatabase = CreateDatabase() book = Book() book.name = 'java' # book.isbn='java' dbBookObj = Book() dbBookObj.bookName = book.name dbBookObj.isbn_13 = book.isbn books = createDatabase.findBook(dbBookObj) for book in books: print book pass
def __init__(self, parent): title = "Opal" size = wx.DefaultSize style = wx.DEFAULT_FRAME_STYLE | wx.MAXIMIZE | wx.SUNKEN_BORDER # wx.Frame.__init__(self, parent, wx.ID_ANY, title, pos, size, style) wx.Frame.__init__(self, parent, wx.ID_ANY, title=title, style=style) print '1----------------------->' image = wx.Image(os.path.join(Workspace().appPath, "images", "Library-icon.png"), wx.BITMAP_TYPE_PNG).ConvertToBitmap() icon = wx.EmptyIcon() icon.CopyFromBitmap(image) # set frame icon self.SetIcon(icon) if not os.path.exists(Workspace().libraryPath): self.createWizard() self.createDatabase = CreateDatabase() # self.creatingDatabase() self.books = list() self.thumbnail = None self.fileDropTarget = FileDropTarget(self) # self.grid = wx.grid.Grid(self, -1, wx.Point(0, 0), wx.Size(150, 250),wx.NO_BORDER | wx.WANTS_CHARS) self._mgr = aui.AuiManager() # tell AuiManager to manage this frame self._mgr.SetManagedWindow(self) # set up default notebook style self._notebook_style = aui.AUI_NB_DEFAULT_STYLE | aui.AUI_NB_TAB_EXTERNAL_MOVE | wx.NO_BORDER self._notebook_theme = 0 # Attributes self._textCount = 1 self._transparency = 255 self._snapped = False self._custom_pane_buttons = False self._custom_tab_buttons = False self._pane_icons = False self._veto_tree = self._veto_text = False print '1----------------------->', os.getcwd() os.chdir(os.path.dirname(os.path.abspath(__file__))) self.BuildPanes() self.CreateMenuBar() self.BindEvents() self.buildStatusBar()
def resetWorkspace(self): os.chdir(Workspace().path) print '---resetWorkspace---->', os.getcwd() listOfDir = os.listdir(Workspace().path) if len(listOfDir) > 0: # print len(listOfDir) isDatabase = False for sName in listOfDir: if ".sqlite" in str(sName): print sName isDatabase = True if not isDatabase: createDatabase = CreateDatabase() session = createDatabase.creatingDatabase() createDatabase.addingData()
def __init__(self, group=None, target=None, name=None, args=(), kwargs=None, verbose=None): ''' Constructor, setting location of downloaded book. ''' super(DownloadItEbook, self).__init__(group=group, target=target, name=name, verbose=verbose) self.args = args self.kwargs = kwargs self.directory_name = Workspace().libraryPath self.createDatabase = CreateDatabase() pass
class CreateDatabaseTest(unittest.TestCase): def setUp(self): print 'setUp' self.createDatabase = CreateDatabase() # self.createDatabase.creatingDatabase() def tearDown(self): print 'tearDown' @unittest.skip("demonstrating skipping") def testFindBook(self): print 'testFindBook' createDatabase = CreateDatabase() book = Book() book.name = 'java' # book.isbn='java' dbBookObj = Book() dbBookObj.bookName = book.name dbBookObj.isbn_13 = book.isbn books = createDatabase.findBook(dbBookObj) for book in books: print book pass @unittest.skip("demonstrating skipping") def testAddingData(self): print 'testAddingData' self.createDatabase.creatingDatabase() self.createDatabase.addingData() def testRemoveBook(self): print 'testRemoveBook' book = Book() book.id = 1 isSuccessfulDelete = self.createDatabase.removeBook(book) print isSuccessfulDelete
def __init__(self, parent): """Constructor""" aui.AuiNotebook.__init__(self, parent=parent) self.default_style = aui.AUI_NB_DEFAULT_STYLE | aui.AUI_NB_TAB_EXTERNAL_MOVE | wx.NO_BORDER self.SetWindowStyleFlag(self.default_style) # Create the first tab and add it to the notebook self.gallery = TabPanel(self) self.sizer = wx.BoxSizer(wx.VERTICAL) self.thumbnail = ThumbnailCtrl(self.gallery, imagehandler=NativeImageHandler) self.thumbnail._scrolled.EnableToolTips(enable=True) # # Todo books = list() print '1.---->', os.getcwd() # os.chdir('/home/vijay/Documents/Aptana_Workspace/util/src/dao') print '2.---->', os.getcwd() # session = CreateDatabase().creatingDatabase() # CreateDatabase().addingData() # books = CreateDatabase().findByBookName("python") books = CreateDatabase().findAllBook() if books != None: self.thumbnail.ShowDir(books) self.sizer.Add(self.thumbnail, 1, wx.EXPAND | wx.ALL, 10) self.gallery.SetSizer(self.sizer) self.tabOne = TabPanel(self) self.tabOne.addItems() # tabOne.SetBackgroundColour("Gray") bookImage = wx.ArtProvider.GetBitmap(wx.ART_HELP_BOOK, wx.ART_OTHER, wx.Size(16, 16)) galleryImage = wx.ArtProvider.GetBitmap(wx.ART_INFORMATION, wx.ART_OTHER, wx.Size(16, 16)) self.AddPage(self.gallery, "Gallery", False, galleryImage) self.AddPage(self.tabOne, "Books", False, bookImage) style = self.DEFAULT_STYLE self.SetWindowStyleFlag(style) self.SetArtProvider(aui.AuiDefaultTabArt())
def initUI(self): vBox = wx.BoxSizer(wx.VERTICAL) self.thumbnail = ThumbnailCtrl(self.frmPanel, imagehandler=NativeImageHandler) self.thumbnail._scrolled.EnableToolTips(enable=True) # toolbar = wx.ToolBar(self.thumbnail ) # toolbar.AddLabelTool(1, '', wx.ArtProvider.GetBitmap(wx.ART_NEW, wx.ART_TOOLBAR, (16, 16))) # toolbar.Realize() self.statusbar = self.CreateStatusBar() self.statusbar.SetStatusText('Ready') books = list() books = CreateDatabase().findAllBook() if books != None: self.thumbnail.ShowDir(books) # vBox.Add(toolbar) vBox.Add(self.thumbnail, 1, wx.EXPAND | wx.ALL, 10) self.frmPanel.SetSizer(vBox) self.frmPanel.Layout() self.Show(True)
def main(): # global books, frame # session = CreateDatabase().creatingDatabase() # # CreateDatabase().addingData(session) # books = CreateDatabase().findAllBook(session) # bookName = 'head' # books = CreateDatabase().findByBookName(session, bookName) if Workspace().libraryPath + os.sep + '_opal.sqlite': if os.stat(Workspace().libraryPath + os.sep + '_opal.sqlite').st_size == 0: c = CreateDatabase() c.creatingDatabase() c.addingData() print 'data loaded' app = wx.App(0) frame = MainWindow(None, "My Calibre") app.MainLoop()
def __init__(self): self.createDatabase = CreateDatabase() pass
def __init__(self): self.book = Book() self.book.uuid = str(uuid.uuid4()) self.book.tag = None self.book.authors = list() self.createDatabase = CreateDatabase()
class ItEbook(object): ''' This class downloads first page of itebookinfo ''' def __init__(self, baseUrl=None): ''' Constructor ''' self.baseUrl = baseUrl self.directory_name = Workspace().libraryPath self.createDatabase = CreateDatabase() pass def getUrl(self, baseUrl): '''this method will find and constuct all url of url given''' return self.baseUrl def findAllBookUrl(self): ''' This method retrive all the book url avaialbe in the page. ''' content = urllib2.urlopen(self.baseUrl).read() soup = BeautifulSoup(content, "lxml") skipList = ('IT eBooks', 'IT eBooks Group', u'IT-eBooks.Info', u'IT-eBooks API', u'IT-eBooks Search', 'Tweet') listOfBookName = list() for link in soup.find_all('a'): if link.text.strip() != '' and link.text not in skipList: listOfBookName.append(link.text) isBookAvailable = self.isBookNameAvailableInDatabase(link.text) if not isBookAvailable: print link.text, '\t', link.get('href'), isBookAvailable book = self.findBookDetail(link.get('href')) # print book try: self.firefoxDownloadJob(book, link.get('href')) self.updateDatabase() except: print link.get('href') traceback.print_exc() def updateDatabase(self): self.createDatabase.creatingDatabase() self.createDatabase.addingData() def isBookNameAvailableInDatabase(self, bookName=None): isBookPresent = False book = self.createDatabase.findByBookName(bookName) if book: isBookPresent = True return isBookPresent def findBookDetail(self, number): ''' This method will download book cover. It will provide book object.''' url = self.baseUrl + number content = urllib2.urlopen(url).read() soup = BeautifulSoup(content, "lxml") book = Book() book.authors.append(Author(soup.find_all(itemprop="author")[0].text)) book.isbn_13 = soup.find_all(itemprop="isbn")[0].text book.bookName = soup.find_all(itemprop="name")[0].text book.publisher = soup.find_all(itemprop="publisher")[0].text try: date = datetime.strptime( str(soup.find_all(itemprop="datePublished")[0].text), '%Y') except: date = datetime.now() book.publishedOn = date book.numberOfPages = soup.find_all(itemprop="numberOfPages")[0].text book.inLanguage = soup.find_all(itemprop="inLanguage")[0].text book.bookFormat = soup.find_all(itemprop="bookFormat")[0].text book.bookDescription = soup.find_all(itemprop="description")[0].text book.bookImgName = (soup.find_all(itemprop="image")[0]).get('src') try: book.subTitle = soup.h3.text except: traceback.print_exc() book.fileSize = soup.find_all('table')[3].find_all('tr')[7].find_all( 'td')[1].find_all('b')[0].text # book.fileSize= # .top > div:nth-child(2) > h3:nth-child(2) for link in soup.find_all('a'): if link.get('href').startswith('http://filepi.com'): book.name = link.text break return book def firefoxDownloadJob(self, book, refUrl): '''The function of this method is to download link of given URL.''' # Creating directory directory_name = self.downloadDir() # Creating Actual URL url = self.baseUrl + refUrl lsFiles = [] # Checking if there are three files in this URL. # Creating a list of absolute files. if 3 == len(os.listdir(directory_name)): for sName in os.listdir(directory_name): if os.path.isfile(os.path.join(directory_name, sName)): lsFiles.append(sName) # Checking if there are more than 3 files in the directory location. # Removing all the files from direcotry. elif 3 != len(os.listdir(directory_name)): for sName in os.listdir(directory_name): os.remove(directory_name + '/' + sName) imageUrl = self.baseUrl + book.bookImgName subUrl = book.bookImgName imageFileName = subUrl.split('/')[-1:][0] # Downloading book cover bookImagePath = os.path.join(directory_name, subUrl.split('/')[-1:][0]) urllib.urlretrieve(imageUrl, bookImagePath) book.bookImgName = imageFileName #writing json file self.writeJsonToDir(directory_name, book) binary = FirefoxBinary('/docs/python_projects/firefox/firefox') fp = webdriver.FirefoxProfile() fp.set_preference("webdriver.log.file", "/tmp/firefox_console") fp.set_preference("browser.download.folderList", 2) fp.set_preference('browser.download.manager.showWhenStarting', False) fp.set_preference('browser.download.manager.focusWhenStarting', False) fp.set_preference("browser.download.dir", directory_name) fp.set_preference("browser.download.manager.scanWhenDone", False) fp.set_preference("browser.download.manager.useWindow", False) fp.set_preference("browser.helperApps.neverAsk.saveToDisk", "application/octet-stream") fp.update_preferences() driver = webdriver.Firefox(firefox_profile=fp, firefox_binary=binary) # driver.find_element_by_xpath("html/body/table/tbody/tr[2]/td/div/table/tbody/tr/td[1]/img") driver.get(url) efd_link = driver.find_element_by_link_text(book.name) book.fileSize = driver.find_element_by_xpath( "html/body/table/tbody/tr[2]/td/div/table/tbody/tr/td[2]/table/tbody/tr[8]/td[2]/b" ).text book.bookFormat = driver.find_element_by_xpath( "html/body/table/tbody/tr[2]/td/div/table/tbody/tr/td[2]/table/tbody/tr[9]/td[2]/b" ).text efd_link.click() flag = True while (flag): # # checking part file time.sleep(10) lst = [] files = [] for sName in os.listdir(directory_name): if os.path.isfile(os.path.join(directory_name, sName)): lst.append(sName.split('.')[-1:][0]) files.append(os.path.join(directory_name, sName)) print lst if 'part' not in lst: flag = False time.sleep(10) driver.close() else: #print files # if not self.isBookDownloading(files): # driver.close() pass def writeJsonToDir(self, bookPath=None, book=None): ''' this function will write json file to given dir. ''' try: f = open(os.path.join(bookPath, 'book.json'), 'w') row2dict = book.__dict__ authors = [] if type(row2dict['publishedOn']) == datetime: row2dict['publishedOn'] = str(row2dict['publishedOn']) for a in row2dict['authors']: author = {} if type(a) == str: author['authorName'] = a else: author = a.__dict__ authors.append(author) row2dict['authors'] = authors f.write(json.dumps(row2dict, sort_keys=False, indent=4)) f.close() except: traceback.print_exc() def isBookDownloading(self, files): ''' This method will inform that book is getting downloading or not.''' #time.sleep(2) dic_files = {} time_dic_files = {} i = 1 checkFlagForSize = True isDownloading = True for fl in files: dic_files[fl] = str(os.stat(fl).st_size) while (checkFlagForSize): time_dic_files[i] = dic_files i = i + 1 if i > 4: size = set() for k in time_dic_files[i - 1]: if 'part' in k: size.add(time_dic_files[i - 1][k]) for k in time_dic_files[i - 2]: if 'part' in k: size.add(time_dic_files[i - 2][k]) for k in time_dic_files[i - 3]: if 'part' in k: size.add(time_dic_files[i - 3][k]) # print len(list(size)) if len(list(size)) > 1: isDownloading = False checkFlagForSize = False logging.info('isDownloading:') return isDownloading def startDownload(self): baseUrl = 'http://it-ebooks.info' itebook = ItEbook(baseUrl) # TODO need to be updated itebook.findAllBookUrl() def getMaxBookID(self): maxBookId = self.createDatabase.getMaxBookID() if not maxBookId: maxBookId = 0 return maxBookId def downloadDir(self): ''' This function will create directory to download book. @param number:it takes database maxId+1 to create new directory . ''' directory_name = os.path.join(self.directory_name, str(self.getMaxBookID() + 1)) if not os.path.exists(directory_name): os.makedirs(directory_name) os.chdir(directory_name) return directory_name
def reloadingDatabase(self, event): logger.debug('reloadingDatabase') self.createDatabase = CreateDatabase(libraryPath=self.libraryPath) self.createDatabase.creatingDatabase() self.createDatabase.addingData() self.loadingBook(searchText=self.search.GetValue())
class ThumbnailCtrlPaginationPanel(wx.Panel, WorkspaceHelper): def __init__(self, parent): wx.Panel.__init__(self, parent) WorkspaceHelper.__init__(self) pub.subscribe(self.reloadingDatabase, 'reloadingDatabase') vBox = wx.BoxSizer(wx.VERTICAL) #################################################################### self.libraryPath = self.getLibraryPath() self.fileDropTarget = FileDropTarget(self, libraryPath=self.libraryPath) self.fileOperations = FileOperations() self.search = wx.SearchCtrl(self, size=(200, -1), style=wx.TE_PROCESS_ENTER) self.search.ShowSearchButton(1) self.search.ShowCancelButton(1) self.search.SetMenu(None) self.search.Bind(wx.EVT_TEXT_ENTER, self.OnSearch) self.search.Bind(wx.EVT_TEXT, self.OnSearch) self.thumbnailCtrl = ThumbnailCtrl(self, -1, imagehandler=NativeImageHandler) self.thumbnailCtrl.EnableToolTips(enable=True) self.thumbnailCtrl.SetDropTarget(self.fileDropTarget) # self.thumbnailCtrl.ShowDir(r'/home/vijay/Pictures') # findingBook = FindingBook(libraryPath=r'/docs/new/library') # books = findingBook.searchingBook(searchText='head') self.page = Page() self.loadingBook() self.paginationBar = self.constructTopToolBar() self.setPaginationBarStatus() #################################################################### vBox.Add(self.search , 0, wx.EXPAND | wx.ALL) vBox.Add(self.thumbnailCtrl , 1, wx.EXPAND | wx.ALL) vBox.Add(self.paginationBar , 0, wx.EXPAND | wx.ALL, 0) # vBox.Add(self.tree , 1, wx.EXPAND | wx.ALL) sizer = wx.BoxSizer(wx.VERTICAL) sizer.Add(vBox, 1, wx.EXPAND , 0) self.SetSizer(sizer) @debounce(1) def OnSearch(self, event): logger.debug('onSearch') self.page.searchText = self.search.GetValue() self.loadingBook(searchText=self.search.GetValue()) self.updatePangnation() def loadingBook(self, searchText=None): books = None totalBooks=0 count = 0 if self.libraryPath and os.path.exists(self.libraryPath): findingBook = FindingBook(libraryPath=self.libraryPath) if self.page.searchText: books, count = findingBook.searchingBook(searchText=self.page.searchText, pageSize=self.page.pageSize, offset=self.page.pageSize * self.page.currentPage) else: books, count = findingBook.findAllBooks(pageSize=self.page.pageSize, offset=self.page.pageSize * self.page.currentPage) totalBooks=findingBook.countAllBooks() self.page.pageData = books self.page.total = count self.page.pages = int(self.page.total // self.page.pageSize) + 1 self.page.searchText = searchText self.thumbnailCtrl.ShowBook(books) self.updateStatusBar(text=f'found : {count} of {totalBooks}') # update pagination toolbar status # self.setPaginationBarStatus() def updateStatusBar(self, text=None): if text and str(type(self.GetTopLevelParent())) == "<class 'src.view.TheEclipseView.EclipseMainFrame'>": self.GetTopLevelParent().SetStatusText(text, 0) def setPaginationBarStatus(self): logger.debug(f'setPaginationBarStatus:{self.page.pages}') if self.page.pages == 1: self.paginationBar.EnableTool(ID_FIRST_RESULT, False) self.paginationBar.EnableTool(ID_PREVIOUS_RESULT, False) self.paginationBar.EnableTool(ID_NEXT_RESULT, False) self.paginationBar.EnableTool(ID_LAST_RESULT, False) if self.page.pages > 1: self.paginationBar.EnableTool(ID_FIRST_RESULT, False) self.paginationBar.EnableTool(ID_PREVIOUS_RESULT, False) self.paginationBar.EnableTool(ID_NEXT_RESULT, True) self.paginationBar.EnableTool(ID_LAST_RESULT, True) self.paginationBar.Realize() def updatePangnation(self): pageNumbers = [f'{1+pageNum}' for pageNum in range(self.page.pages)] if hasattr(self, 'pageNumbersCountText'): self.pageNumbersCountText.SetLabel(f"/{len(pageNumbers)}") self.setPaginationBarStatus() # if hasattr(self, 'pageNumberCtrl'): # self.pageNumberCtrl.Set(pageNumbers) # self.pageNumberCtrl.SetSelection(0) def constructTopToolBar(self): # create some toolbars tb1 = aui.AuiToolBar(self, -1, wx.DefaultPosition, (10, 10), agwStyle=aui.AUI_TB_DEFAULT_STYLE | aui.AUI_TB_OVERFLOW) # tb1.SetToolBitmapSize(wx.Size(16, 16)) # id, name, image, name, method, kind pageSizeText = TransparentText(tb1, -1, "Page Size") # tb1.AddControl(pageSizeText) pageNumber = [f'{pageNum}' for pageNum in range(5, 101, 20)] self.pageSizeCtrl = wx.Choice(tb1, 10, (-1, -1), (50, 25), pageNumber, style=0) index = pageNumber.index(f'{self.page.pageSize}') self.pageSizeCtrl.SetSelection(index) pageNumbers = [f'{1+pageNum}' for pageNum in range(self.page.pages)] self.pageNumberCtrl = wx.Choice(tb1, 11, (-1, -1), (50, 25), pageNumbers, style=0) self.pageNumberCtrl.SetSelection(0) self.pageNumbersCountText = TransparentText(tb1, -1, f"/{len(pageNumbers)}") # tb1.AddControl(choice) tools = [ ('control', pageSizeText), ('control', self.pageSizeCtrl), (ID_FIRST_RESULT, "First", "resultset_first.png", 'First', lambda e:self.onToolButtonClick(e), wx.ITEM_NORMAL), (ID_PREVIOUS_RESULT, "Previous", "resultset_previous.png", 'Previous', lambda e:self.onToolButtonClick(e), wx.ITEM_NORMAL), ('control', self.pageNumberCtrl), ('control', self.pageNumbersCountText), (ID_NEXT_RESULT, "Next", "resultset_next.png", 'Next', lambda e:self.onToolButtonClick(e), wx.ITEM_NORMAL), (ID_LAST_RESULT, "Last", "resultset_last.png", 'Last', lambda e:self.onToolButtonClick(e), wx.ITEM_CHECK), # (ID_REFRESH_ROW, "Result refresh", "resultset_refresh.png", 'Result refresh \tF5', self.onRefresh), # (ID_ADD_ROW, "Add a new row", "row_add.png", 'Add a new row', self.onAddRow), # (ID_DUPLICATE_ROW, "Duplicate selected row", "row_copy.png", 'Duplicate selected row', self.onDuplicateRow), # (ID_DELETE_ROW, "Delete selected row", "row_delete.png", 'Delete selected row', self.onDeleteRow), ] for tool in tools: if len(tool) == 0: tb1.AddSeparator() elif len(tool) == 2: tb1.AddControl(tool[1]) else: logger.debug(tool) toolItem = tb1.AddSimpleTool(tool[0], tool[1], self.fileOperations.getImageBitmap(imageName=tool[2]), kind=tool[5], short_help_string=tool[3]) if tool[4]: self.Bind(wx.EVT_MENU, tool[4], id=tool[0]) # tb1.AddControl(choice) self.Bind(wx.EVT_CHOICE, self.onPageNumberCtrl, self.pageNumberCtrl) self.Bind(wx.EVT_CHOICE, self.onPageSizeCtrl, self.pageSizeCtrl) tb1.Realize() return tb1 def onPageNumberCtrl(self, event): logger.debug('onPageNumberCtrl') self.page.currentPage = int(event.GetString()) - 1 self.loadingBook() def onPageSizeCtrl(self, event): logger.debug('onPageSizeCtrl') self.page.pageSize = int(event.GetString()) self.loadingBook() if hasattr(self, 'pageNumberCtrl'): pageNumbers = [f'{1+pageNum}' for pageNum in range(self.page.pages)] self.pageNumberCtrl.Set(pageNumbers) self.pageNumberCtrl.SetSelection(0) if hasattr(self, 'pageNumbersCountText'): self.pageNumbersCountText.SetLabel(f"/{len(pageNumbers)}") self.setPaginationBarStatus() def onToolButtonClick(self, e): if e.Id == ID_FIRST_RESULT: logger.debug('ID_FIRST_RESULT') self.pageNumberCtrl.SetSelection(self.page.getFirstPageNumber()) self.paginationBar.EnableTool(ID_FIRST_RESULT, False) self.paginationBar.EnableTool(ID_PREVIOUS_RESULT, False) self.paginationBar.EnableTool(ID_NEXT_RESULT, True) if e.Id == ID_PREVIOUS_RESULT: logger.debug('ID_PREVIOUS_RESULT') if self.page.hasPrevious(): self.paginationBar.EnableTool(ID_NEXT_RESULT, True) self.paginationBar.EnableTool(ID_LAST_RESULT, True) self.pageNumberCtrl.SetSelection(self.page.getPreviousPageNumber()) else: self.paginationBar.EnableTool(ID_PREVIOUS_RESULT, False) self.paginationBar.EnableTool(ID_FIRST_RESULT, False) if e.Id == ID_NEXT_RESULT: logger.debug('ID_NEXT_RESULT') if self.page.hasNext(): self.paginationBar.EnableTool(ID_PREVIOUS_RESULT, True) self.paginationBar.EnableTool(ID_FIRST_RESULT, True) self.paginationBar.EnableTool(ID_LAST_RESULT, True) nextPageNumber = self.page.getNextPageNumber() logger.debug(nextPageNumber) self.pageNumberCtrl.SetSelection(nextPageNumber) else: self.paginationBar.EnableTool(ID_NEXT_RESULT, False) self.paginationBar.EnableTool(ID_LAST_RESULT, False) if e.Id == ID_LAST_RESULT: logger.debug('ID_LAST_RESULT') self.pageNumberCtrl.SetSelection(self.page.getLastPageNumber()) self.paginationBar.EnableTool(ID_LAST_RESULT, False) self.paginationBar.EnableTool(ID_NEXT_RESULT, False) self.paginationBar.EnableTool(ID_FIRST_RESULT, True) self.paginationBar.EnableTool(ID_PREVIOUS_RESULT, True) self.paginationBar.Realize() self.loadingBook() self.updatePangnation() def reloadingDatabase(self, event): logger.debug('reloadingDatabase') self.createDatabase = CreateDatabase(libraryPath=self.libraryPath) self.createDatabase.creatingDatabase() self.createDatabase.addingData() self.loadingBook(searchText=self.search.GetValue())
class FindingBook(): ''' This class searches book detail in Opal database.this database would be created in workspace(Opal library). ''' def __init__(self, libraryPath=None): self.libraryPath = libraryPath self.createDatabase = CreateDatabase(libraryPath=libraryPath) pass def searchingBook(self, searchText=None, exactSearchFlag=False, pageSize=10, offset=0): ''' This method return list of books matching with search text. @param searchText: may be a book name ''' books = list() if searchText != None and searchText != '': os.chdir(self.libraryPath) if exactSearchFlag: books, count = self.createDatabase.findByBookName(searchText) else: books, count = self.createDatabase.findBySimlarBookName(bookName=searchText, limit=pageSize, offset=0) else: books, count = self.findAllBooks() return books, count def countAllBooks(self): bookCount = self.createDatabase.countAllBooks() return bookCount def findBookByNextMaxId(self, bookId=None): return self.createDatabase.findBookByNextMaxId(bookId) def findBookByPreviousMaxId(self, bookId=None): return self.createDatabase.findBookByPreviousMaxId(bookId) def findAllBooks(self, pageSize=None, offset=0): ''' This method will give all the books list in book library. ''' books = list() os.chdir(self.libraryPath) books, count = self.createDatabase.findAllBook(pageSize=pageSize, offset=offset) return books, count def findBookByIsbn(self, isbn_13): bs = self.createDatabase.findBookByIsbn(isbn_13) return bs def getMaxBookId(self): os.chdir(self.libraryPath) def deleteBook(self, book): ''' removing book from database and files. @param book: book object ''' bookPath = book.bookPath isSuccessfulDatabaseDelete = self.createDatabase.removeBook(book) if isSuccessfulDatabaseDelete: BookTerminal().removeBook(bookPath=bookPath) def findFolderWithoutBook(self): ''' this method will find all the folder without book. ''' directory_name = self.libraryPath os.chdir(directory_name) listOfDir = [ name for name in os.listdir(directory_name) if os.path.isdir(os.path.join(directory_name, name)) ] if listOfDir: listOfDir.sort(key=int) defaulterList = list() for dir in listOfDir: lst = list() levelOne = os.path.join(directory_name, dir) for sName in os.listdir(levelOne): if os.path.isfile(os.path.join(levelOne, sName)): lst.append(sName.split('.')[-1:][0]) # if 'pdf' not in lst: # defaulterList.append(levelOne) if len(lst) < 3: defaulterList.append(levelOne)
class AddBook(): ''' This class have been written to add book to Opal workspace library. ''' def __init__(self, libraryPath=None): self.book = Book() self.book.uuid = str(uuid.uuid4()) self.book.tag = None self.book.authors = list() self.libraryPath = libraryPath self.createDatabase = CreateDatabase(libraryPath=libraryPath) def getMaxBookID(self): maxBookId = self.createDatabase.getMaxBookID() if maxBookId == None: maxBookId = 0 return maxBookId def addingBookToWorkspace(self, sourcePath=None, maxBookId=None): ''' This function will be creating a new dir. Get the max of id in Book table. Create the folder name with max of id plus one. @param sourcePath: This is the path of selected book. -1. Check if database present in workspace. There is possibility of a new workspace. 0. Check if book already present in workspace. 1. Create a folder with max_book_id+1 . 2. Copy the book file in the directory. 3. Create metadata i.e. (book.json) 4. Make an entry in database. ''' if sourcePath: # if maxBookId: # maxBookId = self.createDatabase.getMaxBookID() # # if maxBookId == None: # maxBookId = 0 # workspacePath = Workspace().libraryPath self.book.bookPath = os.path.join(self.libraryPath, str(maxBookId + 1)) head, tail = os.path.split(sourcePath) self.book.bookFileName = tail self.book.inLanguage = 'English' self.book.hasCover = 'Y' splited_name = tail.split(".") self.book.bookFormat = splited_name[-1:][0] splited_name.remove(self.book.bookFormat) book_file_name = '.'.join(splited_name) self.book.bookName = book_file_name self.book.wishListed = 'No' if not self.findingSameBook(): self.book.bookPath = os.path.join(self.libraryPath, str(maxBookId + 1)) if not os.path.exists(self.book.bookPath): os.makedirs(self.book.bookPath) dest = os.path.join(self.book.bookPath, tail) if sourcePath != dest: shutil.copy(sourcePath, dest) if 'pdf' == self.book.bookFormat: self.getPdfMetadata(sourcePath) if 'epub' == self.book.bookFormat: self.getEpubMetadata(sourcePath) pass os.chdir(self.book.bookPath) self.book.bookImgName = book_file_name + '.jpg' BookImage().getBookImage(self.book.bookPath, book_file_name, self.book.bookFormat) book_copy1 = copy.deepcopy(self.book) self.writeBookJson(self.book.bookPath, book_copy1) self.addingBookInfoInDatabase(self.book) def getImageFileName(self): imgFilePath = os.path.join(self.book.bookPath, self.book.bookImgName) if not os.path.exists(imgFilePath): directory = '.' pattern = re.compile(r"\-(\d*)\.jpg$") for file in os.listdir(directory): print(file) m = pattern.search(file) if m: # print(m.groups()) imgFilePath = m.group() bookImgName = self.currentBook.bookImgName.replace( '.jpg', m.group()) imgFilePath = os.path.join(self.currentBook.bookPath, bookImgName) return bookImgName def findingSameBook(self): ''' This method will allow you to find the same book available in workspace already. 1. check for same book name. 2. check for isbn. ''' logger.debug('findingSameBook') isSameBookPresent = False books = self.createDatabase.findBookByFileName(self.book.bookFileName) logger.debug('len(books): %s', len(books)) if len(books) > 0: isSameBookPresent = True return isSameBookPresent def addingBookInfoInDatabase(self, book): ''' This method will add new book info in database. ''' logger.debug('addingBookInfoInDatabase') self.createDatabase.saveBook(book) def writeBookJson(self, newDirPath=None, book=None): ''' This function will write book.json (metadata) of the newly added book in workspace. ''' logger.debug('writeBookJson newDirPath: %s', newDirPath) f = open(os.path.join(newDirPath, 'book.json'), 'w') row2dict = dict(book.__dict__) authors = [] try: for a in row2dict['authors']: author = {} if type(a) == str: author['authorName'] = a else: author = a.__dict__ if '_sa_instance_state' in author: del author['_sa_instance_state'] if 'book_assoc' in author: del author['book_assoc'] authors.append(author) if '_sa_instance_state' in row2dict: del row2dict['_sa_instance_state'] if 'authors' in row2dict: del row2dict['authors'] if 'book_assoc' in row2dict: del row2dict['book_assoc'] row2dict['authors'] = authors row2dict['publishedOn'] = str(datetime.now()) row2dict['createdOn'] = str(datetime.now()) except Exception as e: logger.error(e) # print newDirPath # print row2dict f.write(json.dumps(row2dict, sort_keys=True, indent=4)) f.close() def getEpubMetadata(self, path=None): logger.debug('getEpubMetadata') os.chdir(self.book.bookPath) file_name = self.book.bookName + '.epub' epubBook = EpubBook() epubBook.open(file_name) epubBook.parse_contents() authorList = list() for authorName in epubBook.get_authors(): author = Author() author.authorName = authorName author.aboutAuthor = 'aboutAuthor' authorList.append(author) self.book.authors = authorList self.book.tag = epubBook.subjectTag epubBook.extract_cover_image(outdir='.') self.book.createdOn = datetime.now() def getPdfMetadata(self, path=None): ''' This method will get the pdf metadata and return book object. ''' logger.debug('getPdfMetadata path: %s', path) if path: try: input = PdfFileReader(open(path, "rb")) logger.debug('getIsEncrypted : %s ', input.getIsEncrypted()) except Exception as e: logger.error(e, exc_info=True) pdf_info = None try: pdf_toread = PdfFileReader(open(path, "rb")) if pdf_toread.isEncrypted: try: pdf_toread.decrypt('') except Exception as e: logger.error(e, exc_info=True) except Exception as e: logger.error(e, exc_info=True) try: pdf_info = pdf_toread.getDocumentInfo() logger.debug('NumPages:%s', pdf_toread.getNumPages()) self.book.numberOfPages = pdf_toread.getNumPages() # value = pdf_info.subject subject = None if pdf_info.subject and type(pdf_info.subject) == str: # Ignore errors even if the string is not proper UTF-8 or has # broken marker bytes. # Python built-in function unicode() can do this. subject = pdf_info.subject # else: # # Assume the value object has proper __unicode__() method # value = unicode(pdf_info.subject) # print 'else' if not self.book.tag and subject: self.book.tag = subject elif self.book.tag and subject: self.book.tag = self.book.tag + '' + subject except Exception as e: logger.error(e, exc_info=True) try: if pdf_info.title != None and pdf_info.title.strip() != '': self.book.bookName = str(pdf_info.title) except Exception as e: logger.error(e, exc_info=True) try: if pdf_info.creator: self.book.publisher = str(pdf_info.creator.encode('utf-8')) except Exception as e: logger.error(e, exc_info=True) self.book.createdOn = datetime.now() try: # print str(pdf_info['/CreationDate'])[2:10] date = datetime.strptime( str(pdf_info['/CreationDate'])[2:10], '%Y%m%d') self.book.publishedOn = date except Exception as e: logger.error(e, exc_info=True) logger.error('CreationDate not found') logger.debug(Util().convert_bytes(os.path.getsize(path))) self.book.fileSize = Util().convert_bytes(os.path.getsize(path)) # if 'ISBN'.lower() in str(pdf_info['/Subject']).lower(): # self.book.isbn_13 = str(pdf_info['/Subject'])[6:] author = Author() val = 'Unknown' try: if pdf_info.author != None and pdf_info.author.strip() != '': val = pdf_info.author # val = val.encode("utf8", "ignore") except Exception as e: logger.error(e, exc_info=True) author.authorName = val authorList = list() authorList.append(author) self.book.authors = authorList
class MainFrame(wx.Frame): def __init__(self, parent): title = "Opal" size = wx.DefaultSize style = wx.DEFAULT_FRAME_STYLE | wx.MAXIMIZE | wx.SUNKEN_BORDER # wx.Frame.__init__(self, parent, wx.ID_ANY, title, pos, size, style) wx.Frame.__init__(self, parent, wx.ID_ANY, title=title, style=style) print '1----------------------->' image = wx.Image( os.path.join(Workspace().appPath, "images", "Library-icon.png"), wx.BITMAP_TYPE_PNG).ConvertToBitmap() icon = wx.EmptyIcon() icon.CopyFromBitmap(image) # set frame icon self.SetIcon(icon) if not os.path.exists(Workspace().libraryPath): self.createWizard() self.createDatabase = CreateDatabase() # self.creatingDatabase() self.books = list() self.thumbnail = None self.fileDropTarget = FileDropTarget(self) # self.grid = wx.grid.Grid(self, -1, wx.Point(0, 0), wx.Size(150, 250),wx.NO_BORDER | wx.WANTS_CHARS) self._mgr = aui.AuiManager() # tell AuiManager to manage this frame self._mgr.SetManagedWindow(self) # set up default notebook style self._notebook_style = aui.AUI_NB_DEFAULT_STYLE | aui.AUI_NB_TAB_EXTERNAL_MOVE | wx.NO_BORDER self._notebook_theme = 0 # Attributes self._textCount = 1 self._transparency = 255 self._snapped = False self._custom_pane_buttons = False self._custom_tab_buttons = False self._pane_icons = False self._veto_tree = self._veto_text = False print '1----------------------->', os.getcwd() os.chdir(os.path.dirname(os.path.abspath(__file__))) self.BuildPanes() self.CreateMenuBar() self.BindEvents() self.buildStatusBar() def BuildPanes(self): # min size for the frame itself isn't completely done. # see the end up AuiManager.Update() for the test # code. For now, just hard code a frame minimum size self.SetMinSize(wx.Size(400, 300)) # prepare a few custom overflow elements for the toolbars' overflow buttons prepend_items, append_items = [], [] # add the toolbars to the manager tb1 = wx.ToolBar(self, id=-1, pos=wx.DefaultPosition, size=wx.DefaultSize, style=wx.TB_FLAT | wx.TB_NODIVIDER | wx.TB_TEXT) tb1.SetToolBitmapSize(wx.Size(24, 24)) tb1.AddLabelTool(id=ID_otherWorkspace, label="Workspace Home", shortHelp="Home", bitmap=wx.ArtProvider_GetBitmap(wx.ART_GO_HOME)) tb1.AddSeparator() tb1.AddLabelTool(id=ID_search, label="Search", shortHelp="Search", bitmap=wx.ArtProvider_GetBitmap(wx.ART_FIND)) tb1.AddLabelTool(id=ID_editMetadata, label="Edit metadata", shortHelp="Edit metadata", bitmap=wx.ArtProvider_GetBitmap(wx.ART_WARNING)) tb1.AddLabelTool(id=ID_addBook, label="Add book", shortHelp="Add book", bitmap=wx.Bitmap( os.path.join(Workspace().appPath, "images", "add_book.png"))) tb1.AddLabelTool(id=ID_deleteBook, label="Delete book", shortHelp="Delete book", bitmap=wx.ArtProvider_GetBitmap(wx.ART_DELETE)) # tb1.AddLabelTool(id=ID_deleteBook, label="Delete book", shortHelp="Delete book", bitmap=wx.Bitmap(os.path.join(Workspace().appPath, "images", "delete_book.png"))) tb1.AddLabelTool(id=ID_reLoadDatabase, label="Reload database", shortHelp="Reload database", bitmap=wx.Bitmap( os.path.join(Workspace().appPath, "images", "database_refresh.png"))) tb1.AddLabelTool(id=ID_Rest_view, label="Reset View", shortHelp="Reset View", bitmap=wx.ArtProvider_GetBitmap(wx.ART_LIST_VIEW)) tb1.AddLabelTool(id=ID_cover_flow, label="Cover Flow", shortHelp="Cover Flow", bitmap=wx.ArtProvider_GetBitmap(wx.ART_HELP_BOOK)) tb1.AddLabelTool(id=ID_FullCircle, label="Full Circle Magazine", shortHelp="download Full Circle Magazine", bitmap=wx.Bitmap( os.path.join(Workspace().appPath, "images", "fullcircle.png"))) tb1.AddLabelTool(id=ID_Preferences, label="Preferences", shortHelp="Preferences", bitmap=wx.ArtProvider_GetBitmap( wx.ART_EXECUTABLE_FILE)) tb1.Realize() self._mgr.AddPane( tb1, aui.AuiPaneInfo().Name("tb1").Caption("Big Toolbar").ToolbarPane(). Top().LeftDockable(True).RightDockable(False)) # add a bunch of panes bookInfoPan = aui.AuiPaneInfo().Name("bookInfo").Caption( "Text Pane").Right().Layer(1).Position(1).CloseButton( True).MaximizeButton(True) self._mgr.AddPane(self.CreateTextCtrl(), bookInfoPan) # self._mgr.AddPane(SettingsPanel(self, self), wx.aui.AuiPaneInfo().Name("settings").Caption("Dock Manager Settings").Dockable(True).Float().Hide().CloseButton(True).MaximizeButton(True)) self._mgr.AddPane( self.searchCtrl(), aui.AuiPaneInfo().Name("searchCtrl").Top().CaptionVisible( False).CloseButton(False).Show()) # self._mgr.AddPane(self.CreateGrid(), wx.aui.AuiPaneInfo().Name("grid_content").CenterPane().CloseButton(True).Show()) self._mgr.AddPane( self.CreateGrid(), aui.AuiPaneInfo().Name("grid_content").Caption( "Grid").Center().CloseButton(True).MaximizeButton( True).LeftDockable(True).MinimizeButton(True)) thumbInfo = aui.AuiPaneInfo().Name("test1").Caption( "Thumb book").Center().Dockable(True).Movable(True).MaximizeButton( True).MinimizeButton(True).PinButton(True).CloseButton( True).Position(0) self._mgr.AddPane(self.CreateThumbCtrl(), thumbInfo) # self._mgr.AddPane(self.CreateTreeCtrl(), wx.aui.AuiPaneInfo().Name("tree_content").CenterPane().Hide()) # self._mgr.AddPane(self.CreateSizeReportCtrl(), wx.aui.AuiPaneInfo().Name("sizereport_content").CenterPane().Show()) # self._mgr.AddPane(self.CreateTextCtrl(), wx.aui.AuiPaneInfo().Name("text_content").CenterPane().Show()) html_content = aui.AuiPaneInfo().Caption("Book Information").Name( "html_content").Right().Dockable(True).Layer(1).Position( 1).CloseButton(True).MaximizeButton(True).MaximizeButton(True) self._mgr.AddPane(self.CreateHTMLCtrl(), html_content) # perspective_all = self._mgr.SavePerspective() self.perspective_default = self._mgr.SavePerspective() perspective_default = self._mgr.SavePerspective() # make some default perspectives perspective_all = self._mgr.SavePerspective() self._perspectives = [] self._perspectives.append(perspective_default) self._perspectives.append(perspective_all) all_panes = self._mgr.GetAllPanes() for pane in all_panes: if not pane.IsToolbar(): # pane.Hide() pane.Show() perspective_default = self._mgr.SavePerspective() self._perspectives = [] self._perspectives.append(perspective_default) self._perspectives.append(perspective_all) self._nb_perspectives = [] # "commit" all changes made to FrameManager self._mgr.Update() def CreateMenuBar(self): # create menu mb = wx.MenuBar() file_menu = wx.Menu() # qmi = wx.MenuItem(file_menu, wx.ID_EXIT, '&Quit\tCtrl+Q') # qmi.SetBitmap(wx.Bitmap('/home/vijay/Documents/Aptana_Workspace/util/src/ui/view/opalview/images/exit-16.png')) switchWorkspaceMenu = wx.Menu() switchWorkspaceMenu.Append(ID_otherWorkspace, 'Other...') # file_menu.AppendMenu(wx.ID_ANY, 'I&mport', switchWorkspaceMenu) file_menu.AppendMenu(ID_switchWorkspace, 'Switch Workspace', switchWorkspaceMenu) file_menu.Append(wx.ID_EXIT, '&Quit\tCtrl+Q') view_menu = wx.Menu() view_menu.Append(ID_Rest_view, "Reset view to default") windowMenu = wx.Menu() windowMenu.Append(ID_Preferences, "Preference") help_menu = wx.Menu() help_menu.Append(ID_About, "&About...") mb.Append(file_menu, "File") mb.Append(view_menu, "View") mb.Append(windowMenu, "Window") mb.Append(help_menu, "Help") self.SetMenuBar(mb) def BindEvents(self): # Show How To Use The Closing Panes Event self.Bind(aui.EVT_AUI_PANE_CLOSE, self.OnPaneClose) self.Bind(wx.EVT_ERASE_BACKGROUND, self.OnEraseBackground) self.Bind(wx.EVT_SIZE, self.OnSize) self.Bind(wx.EVT_CLOSE, self.OnClose) self.Bind(wx.EVT_MENU, self.OnExit, id=wx.ID_EXIT) self.Bind(wx.EVT_MENU, self.OnAbout, id=ID_About) self.Bind(wx.EVT_MENU, self.OnRestView, id=ID_Rest_view) self.Bind(wx.EVT_MENU, self.OnCoverFlow, id=ID_cover_flow) self.Bind(wx.EVT_MENU, self.OnFullCircle, id=ID_FullCircle) self.Bind(wx.EVT_MENU, self.onOtherWorkspace, id=ID_otherWorkspace) self.Bind(wx.EVT_MENU, self.onAddBookToWorkspace, id=ID_addBook) self.Bind(wx.EVT_MENU, self.onDeleteBookToWorkspace, id=ID_deleteBook) self.Bind(wx.EVT_MENU, self.onReLoadDatabaseToWorkspace, id=ID_reLoadDatabase) self.Bind(wx.EVT_MENU, self.onSearch, id=ID_search) self.Bind(wx.EVT_MENU, self.onEditMetadata, id=ID_editMetadata) self.Bind(wx.EVT_MENU, self.OnRestView, id=ID_Rest_view) self.Bind(wx.EVT_MENU, self.OnRestView, id=ID_Rest_view) self.Bind(wx.EVT_MENU, self.OnPreferences, id=ID_Preferences) def buildStatusBar(self): self.statusbar = self.CreateStatusBar(2, wx.ST_SIZEGRIP) self.statusbar.SetStatusWidths([-2, -3]) self.statusbar.SetStatusText("Opal version 0.1", 0) findingBook = FindingBook() totalBookCount = findingBook.countAllBooks() self.statusbar.SetStatusText( "selected : " + str(len(self.books)) + " of " + str(totalBookCount), 1) # self.statusbar.SetStatusText("Number of books :" + str(len(self.books)), 1) def creatingDatabase(self): if not os.path.exists(Workspace().libraryPath): os.mkdir(Workspace().libraryPath) os.chdir(Workspace().libraryPath) listOfDir = os.listdir(Workspace().libraryPath) isDatabase = False for sName in listOfDir: if ("_opal.sqlite" in str(sName)) and ( os.stat(Workspace().libraryPath + os.sep + '_opal.sqlite').st_size != 0): print sName isDatabase = True if not isDatabase: self.createDatabase.addingData() def onEditMetadata(self, event): print 'onEditMetadata' if self.thumbnail._scrolled._selected != None: book = self.thumbnail._scrolled._items[ self.thumbnail._scrolled._selected].book # frame = BookPropertyFrame(parent=None,book) frame = BookPropertyFrame(None, book) def onSearch(self, event): print 'onSearch' frame = SearchFrame(parent=None) def OnClose(self, event): logger.info('win OnClose') print 'OnClose' self._mgr.UnInit() del self._mgr self.Destroy() def OnExit(self, event): logger.info('win OnClose') print 'OnExit' self.Close() def OnAbout(self, event): msg = "Opal\n" + \ "An advanced book management library \n" + \ "(c) Copyright 2005-2006,All rights reserved. \n original \"BSD License \" \n" + \ "version : 0.1\n" + \ "build : 0.1\n" dlg = wx.MessageDialog(self, msg, "About Opal", wx.OK | wx.ICON_INFORMATION) dlg.ShowModal() dlg.Destroy() def OnPreferences(self, event): print 'OnPreferences' # frame1 = OpalPreferenceFrames(None) frame1 = OpalPreference(None, "Opal preferences") def OnRestView(self, event): print 'OnResetView' self._mgr.LoadPerspective(self.perspective_default) def OnFullCircle(self, event): print 'OnFullCircle' fullCircleMagazine = FullCircleMagazine() fullCircleMagazine.startDownload() def OnCoverFlow(self, event): print 'OnCoverFlow' try: thread.start_new_thread(self.startShell, (1, )) # MyApp().run() except: print "Error: unable to start thread" # exit_code = call("python3 2.py", shell=True) # exit_code = subprocess.call("python3 2.py", shell=False) # cmd = "python3 2.py" # p = subprocess.Popen(cmd, shell=False, bufsize=1024, stdin=subprocess.PIPE, stdout=subprocess.PIPE) # pid = os.popen(cmd) # print pid # print exit_code # MyApp().run() # self._mgr.LoadPerspective(self.perspective_default) def startShell(self, a): # books = FindingBook().findAllBooks() self.picture = PicturesApp() self.picture.setValue(books=self.books) self.picture.run() # PicturesApp(self.books).run() # from subprocess import call # os.chdir('/docs/github/Opal/src/ui/view/kivy') # print '-----1----1------','---> ',os.getcwd() # cmd ='bsh /docs/github/Opal/src/ui/view/kivy/shell.sh' # subprocess.call(['./shell.sh']) def searchCtrl(self): self.searchCtrlPanel = SearchPanel(self) # self.searchCtrl.SetToolTip(wx.ToolTip('Search')) # self.searchCtrl.Bind(wx.EVT_TEXT, self.OnTextEntered) return self.searchCtrlPanel # def OnTextEntered(self, event): # text = self.searchCtrl.GetValue() # # self.doSearch(text) # print 'OnTextEntered', text def GetDockArt(self): return self._mgr.GetArtProvider() def DoUpdate(self): self._mgr.Update() def OnEraseBackground(self, event): event.Skip() def OnSize(self, event): event.Skip() def OnPaneClose(self, event): caption = event.GetPane().caption print caption if caption in ["Tree Pane", "Dock Manager Settings", "Fixed Pane"]: msg = "Are You Sure You Want To Close This Pane?" dlg = wx.MessageDialog( self, msg, "AUI Question", wx.YES_NO | wx.NO_DEFAULT | wx.ICON_QUESTION) if dlg.ShowModal() in [wx.ID_NO, wx.ID_CANCEL]: event.Veto() dlg.Destroy() def CreateThumbCtrl(self): # ctrl = SizeReportCtrl(self, -1, wx.DefaultPosition, wx.Size(width, height), self._mgr) # self.books=FindingBook().findAllBooks() if not self.thumbnail: self.thumbnail = ThumbnailCtrl(self, imagehandler=NativeImageHandler) self.thumbnail._scrolled.EnableToolTips(enable=True) self.thumbnail.SetDropTarget(self.fileDropTarget) # # Todo # print 'before', len(self.books) # self.books=list() # findingBook=FindingBook() # books=findingBook.searchingBook(text) # self.fileDropTarget = FileDropTarget(self) # print 'CreateThumbCtrl', len(self.books) try: self.thumbnail.ShowDir(self.books) except: traceback.print_exc() return self.thumbnail def CreateTextCtrl(self): text = ("This is text box %d") % (1) return wx.TextCtrl(self, -1, text, wx.Point(0, 0), wx.Size(600, 400), wx.NO_BORDER | wx.TE_MULTILINE) def CreateHTMLCtrl(self): # self.ctrl = wx.html.HtmlWindow(self, -1, wx.DefaultPosition, wx.Size(600, 400)) # if "gtk2" in wx.PlatformInfo or "gtk3" in wx.PlatformInfo: # self.ctrl.SetStandardFonts() # self.ctrl.SetPage(self.GetIntroText()) if sys.platform == 'win32': self.browser = wx.html2.WebView.New(self) self.browser.LoadURL( "C:\\Users\\vijay\\workspace\\3d_cover_flow\\WebContent\\3D-Cover-Flip-Animations-with-jQuery-CSS3-Transforms-Cover3D\\indexSimpleDemo.html" ) else: self.browser = wx.html.HtmlWindow(self, -1, wx.DefaultPosition, wx.Size(600, 400)) if "gtk2" in wx.PlatformInfo or "gtk3" in wx.PlatformInfo: self.browser.SetStandardFonts() self.browser.SetDropTarget(self.fileDropTarget) return self.browser def CreateGrid(self): try: # books=FindingBook().searchingBook('flex') # self.LoadingBooks() opalStart = OpalStart() jsonFileStr = opalStart.readWorkspace() startObject = opalStart.jsonToObject(jsonFileStr) print startObject.workspace[0]['Preference']['recordPerPage'] recordPerPage = startObject.workspace[0]['Preference'][ 'recordPerPage'] self.books = FindingBook().findAllBooks(pageSize=recordPerPage) # self.books=FindingBook().findAllBooks() colnames = [ 'id', 'bookName', 'bookFormat', 'authors', 'bookPath', 'isbn_13', 'isbn_10', 'inLanguage', 'series', 'rating', 'subTitle', 'uuid', 'publishedOn', 'editionNo', 'numberOfPages', 'hasCover', 'fileSize', 'publisher', 'hasCode', 'createdOn', 'dimension', 'bookDescription', 'customerReview' ] data = [] noOfBooks = len(self.books) bookId_rowNo_dict = {} print 'CreateGrid: noOfBooks:', noOfBooks for i in range(noOfBooks): d = {} data.append((str(i), self.dicForGrid(self.books[i]))) bookId_rowNo_dict[self.books[i].id] = i self.grid = MegaGrid(self, data, colnames) self.grid.bookId_rowNo_dict = bookId_rowNo_dict self.grid.Reset() except: print 'error in grid', traceback.print_exc() # self.grid.books=self.books self.grid.SetDropTarget(self.fileDropTarget) return self.grid def dicForGrid(self, book): ''' this method has been used for constructing grid data. ''' dicForBook = book.__dict__ authorsName = list() for author in book.authors: # print author.__dict__ authorsName.append(author.authorName) dicForBook['authorName'] = (" \n").join(authorsName) return dicForBook def GetIntroText(self): return overview def onReLoadDatabaseToWorkspace(self, event): print 'onReLoadDatabaseToWorkspace' self.reloadingDatabase() def reloadingDatabase(self): self.createDatabase.creatingDatabase() self.createDatabase.addingData() text = self.searchCtrlPanel.searchCtrl.GetValue() self.searchCtrlPanel.doSearch(text) def onDeleteBookToWorkspace(self, event): print 'onDeleteBookToWorkspace' pass def onAddBookToWorkspace(self, event): print 'onAddBookToWorkspace' print("CWD: %s\n" % os.getcwd()) # Create the dialog. In this case the current directory is forced as the starting # directory for the dialog, and no default file name is forced. This can easilly # be changed in your program. This is an 'open' dialog, and allows multitple # file selections as well. # # Finally, if the directory is changed in the process of getting files, this # dialog is set up to change the current working directory to the path chosen. dlg = wx.FileDialog(self, message="Select a book", defaultDir=os.getcwd(), defaultFile="", wildcard=wildcard, style=wx.OPEN | wx.MULTIPLE | wx.CHANGE_DIR) # Show the dialog and retrieve the user response. If it is the OK response, # process the data. if dlg.ShowModal() == wx.ID_OK: # This returns a Python list of files that were selected. paths = dlg.GetPaths() print('You selected %d files:' % len(paths)) for path in paths: self.selectedFilePath = path print(' %s\n' % path) AddBook().addingBookToWorkspace(path) text = self.searchCtrlPanel.searchCtrl.GetValue() self.searchCtrlPanel.doSearch(text) # Compare this with the debug above; did we change working dirs? print("CWD: %s\n" % os.getcwd()) # Destroy the dialog. Don't do this until you are done with it! # BAD things can happen otherwise! dlg.Destroy() def onOtherWorkspace(self, event): ''' This method need to be called in following scenario. 1. if there is no opal_start.json. 2. if file present and no valid path. ''' print 'onOtherWorkspace' # panel = WorkspacePanel(self) win = WorkspaceFrame(self, -1, "Workspace Launcher", size=(470, 290), style=wx.DEFAULT_FRAME_STYLE) win.Show(True) def LoadingBooks(self): self.createDatabase.addingData() def createWizard(self): # Create the wizard and the pages wizard = Wizard(self, -1, "Opal welcome wizard", wx.EmptyBitmap(200, 200)) page1 = TitledPage(wizard, "Welcome to Opal") page2 = TitledPage(wizard, "Page 2") page3 = TitledPage(wizard, "Page 3") page4 = TitledPage(wizard, "Page 4") self.page1 = page1 self.page1 = page1 vbox = wx.BoxSizer(wx.HORIZONTAL) lable = wx.StaticText(page1, -1, "Choose your language:") choice = wx.Choice(page1, -1, (0, 0), choices=['English']) choice.SetSelection(0) vbox.Add(lable, 0, wx.ALIGN_CENTRE | wx.ALL, 5) vbox.Add(choice, 0, wx.ALIGN_CENTRE | wx.ALL, 5) page1.sizer.Add(vbox) # vbox = wx.BoxSizer(wx.HORIZONTAL) # t1 = wx.TextCtrl(page1, -1, "Test it out and see", size=(125, -1)) # vbox.Add(t1, 0, wx.ALIGN_CENTRE | wx.ALL, 5) # page1.sizer.Add(vbox) page1.sizer.Add( wx.StaticText( page1, -1, """ Choose a location of your workspace. When you add books to Opal, they will be copied here. Use an empty folder for a new Opal workspace."""), 0, wx.ALIGN_LEFT | wx.ALL, 1) dbb = DirBrowseButton(page1, -1, size=(450, -1), changeCallback=self.dbbCallback) dbb.SetFocus() dbb.SetLabel("Book Library Location") dbb.SetHelpText('Please set your default workspace location.') dbb.textControl.SetValue(Workspace().path) page1.sizer.Add(dbb, 0, wx.ALIGN_CENTRE | wx.ALL, 5) wizard.FitToPage(page1) # Use the convenience Chain function to connect the pages WizardPageSimple.Chain(page1, page2) # WizardPageSimple.Chain(page2, page3) # WizardPageSimple.Chain(page3, page4) wizard.GetPageAreaSizer().Add(page1) if wizard.RunWizard(page1): pass # print '------------',wx.MessageBox("Wizard completed successfully", "That's all folks!") # else: # print '------------',wx.MessageBox("Wizard was cancelled", "That's all folks!") def dbbCallback(self, evt): print('DirBrowseButton: %s\n' % evt.GetString()) if evt.GetString(): Workspace().path = evt.GetString()
def __init__(self, libraryPath=None): self.libraryPath = libraryPath self.createDatabase = CreateDatabase(libraryPath=libraryPath) pass
class PacktpubCrawl: def __init__(self): self.baseUrl = "https://www.packtpub.com/" self.directory_name = Workspace().libraryPath self.createDatabase = CreateDatabase() def findBookUrl(self): directory_name = '.' binary = FirefoxBinary('/docs/python_projects/firefox/firefox') fp = webdriver.FirefoxProfile() fp.set_preference("webdriver.log.file", "/tmp/firefox_console"); fp.set_preference("browser.download.folderList", 2) fp.set_preference('browser.download.manager.showWhenStarting', False) fp.set_preference('browser.download.manager.focusWhenStarting', False) fp.set_preference("browser.download.dir", directory_name) fp.set_preference("browser.download.manager.scanWhenDone", False) fp.set_preference("browser.download.manager.useWindow", False) # fp.set_preference("browser.helperApps.neverAsk.saveToDisk", "application/octet-stream") fp.set_preference("browser.helperApps.neverAsk.saveToDisk", "application/octet-stream,application/xml,application/pdf,text/plain,text/xml,image/jpeg,text/csv,application/zip,application/x-rar-compressed"); fp.set_preference("browser.helperApps.alwaysAsk.force", False); fp.set_preference("browser.popups.showPopupBlocker", False); fp.update_preferences() driver = webdriver.Firefox(firefox_profile=fp, firefox_binary=binary) # driver.find_element_by_xpath("html/body/table/tbody/tr[2]/td/div/table/tbody/tr/td[1]/img") driver.get(self.baseUrl) efd_link = driver.find_element_by_css_selector(".login-popup > div:nth-child(1)") efd_link.click() try: emailEl = driver.find_element_by_css_selector('#packt-user-login-form > div:nth-child(1) > div:nth-child(1) > div:nth-child(1) > div:nth-child(1) > input:nth-child(1)') # emailEl = driver.find_element_by_name("email") ''' Login with user credential ''' emailEl.send_keys('*****@*****.**') passwordEl = driver.find_element_by_css_selector("#packt-user-login-form > div:nth-child(1) > div:nth-child(1) > div:nth-child(2) > div:nth-child(1) > input:nth-child(1)") passwordEl.send_keys('default') loginEl = driver.find_element_by_css_selector("#packt-user-login-form > div:nth-child(1) > div:nth-child(1) > div:nth-child(3) > input:nth-child(1)") loginEl.click() if True: ''' clicking on My Account ''' myAccountEl = driver.find_element_by_css_selector('#account-bar-logged-in > a:nth-child(1) > div:nth-child(1) > strong:nth-child(1)') myAccountEl.click() ''' clicking My ebooks ''' myEbook = driver.get(self.baseUrl + 'account/my-ebooks') productListEls = driver.find_elements_by_css_selector('div.product-line') print len(productListEls) bookList = list() for productEl in productListEls: print productEl try: bookName = productEl.find_element_by_css_selector('.title').text book = self.createBookDetail(bookName) productEl.click() readMeEl = productEl.find_element_by_css_selector('.fake-button-text') print 'new page', isbnEl = productEl.find_elements_by_css_selector('div > div:nth-child(2) > div:nth-child(1)> a:nth-child(1) > div:nth-child(1)') book.isbn_13 = isbnEl[0].get_attribute('isbn') # readMeEl.click() print 'div.product-line:nth-child(1) > div:nth-child(2) > div:nth-child(1) > a:nth-child(1) > div:nth-child(1)', # readMeEl.find_element_by_css_selector('h2.ng-binding') # # readingEl = driver.get('https://www.packtpub.com/mapt/book/All%20Books/' + book.isbn_13) # bookName1=driver.find_elements_by_css_selector('h2.ng-binding')[0].text bookList.append(book) except Exception as e: print e # product_account_list_el=driver.find_elements_by_css_selector('#product-account-list') driver.get('https://www.packtpub.com/packt/offers/free-learning') try: ''' clicking on Claim your free ebook ''' bookNameEl_1 = driver.find_element_by_css_selector('.dotd-title > h2:nth-child(1)') isBookAlreadyAvailable = False bookName_1 = bookNameEl_1.text for book in bookList: if bookName_1 in book.bookName: isBookAlreadyAvailable = True break if not isBookAlreadyAvailable: claimFreeEbookEl = driver.find_element_by_css_selector('.book-claim-token-inner > input:nth-child(3)') claimFreeEbookEl.click() except Exception as e: print e # myEbook.click() except Exception as e: print e finally: print 'completed' print 'hi' def createBookDetail(self, bookName=None): book = Book() book.bookName = bookName book.bookFormat = 'pdf' book.tag = 'Technology' book.inLanguage = 'English' book.subTitle = None book.publisher = "Packt Publishing Limited" book.bookImgName = bookName + '.jpg' book.hasCover = 'Yes' book.hasCode = None return book def getMaxBookID(self): ''' This function will get max book id. @param number:it takes database maxId+1 to create new directory . ''' maxBookId = self.createDatabase.getMaxBookID() if not maxBookId: maxBookId = 0 return maxBookId def downloadDir(self): ''' This function will create directory to download book. @param number:it takes database maxId+1 to create new directory . ''' directory_name = os.path.join(self.directory_name, str(self.getMaxBookID() + 1)) if not os.path.exists(directory_name): os.makedirs(directory_name) os.chdir(directory_name) return directory_name
class ItEbook(object): ''' This class downloads first page of itebookinfo ''' def __init__(self, baseUrl=None): ''' Constructor ''' self.baseUrl = baseUrl self.directory_name = Workspace().libraryPath self.createDatabase = CreateDatabase() self.header_info = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:38.0) Gecko/20100101 Firefox/38.0' } pass def getUrl(self, baseUrl): '''this method will find and constuct all url of url given''' return self.baseUrl def findAllBookUrl(self, subUrl=None): ''' This method retrive all the book url avaialbe in the page. http://itebooks.website/page-2.html ''' url = self.baseUrl + '/' + subUrl print url # content = urllib2.urlopen(url).read() r = requests.get(url, headers=self.header_info, timeout=30) if r.status_code == 200: soup = BeautifulSoup(r.content, "lxml") skipList = (u'\nCategories', u'\nContact', u'\nUpload', u'\nDonate', u'IT eBooks', u'Prev', u'Next') listOfBookName = list() for link in soup.find_all('a'): if link.text.strip() != '' and link.text not in skipList: listOfBookName.append(link.text) isBookAvailable = self.isBookNameAvailableInDatabase( link.text) if not isBookAvailable: print link.text, '\t', link.get( 'href'), isBookAvailable book = self.findBookDetail(link.get('href')) # print book try: print 'uploading database' self.firefoxDownloadJob(book, link.get('href')) self.updateDatabase() except: print link.get('href') traceback.print_exc() def updateDatabase(self): self.createDatabase.creatingDatabase() self.createDatabase.addingData() def isBookNameAvailableInDatabase(self, bookName=None): isBookPresent = False book = self.createDatabase.findByBookName(bookName) if book: isBookPresent = True return isBookPresent def findBookDetail(self, subUrl): ''' This method will download book cover. It will provide book object.''' book = None url = self.baseUrl + '/' + subUrl r = requests.get(url, headers=self.header_info, timeout=30) if r.status_code == 200: soup = BeautifulSoup(r.content, "lxml") book = Book() book.authors.append( Author(soup.find_all(itemprop="author")[0].text)) book.isbn_10 = soup.find_all(itemprop="isbn")[0].text book.isbn_13 = soup.find_all(itemprop="isbn")[1].text book.bookName = soup.find_all(itemprop="name")[0].text book.publisher = soup.find_all(itemprop="publisher")[0].text try: date = datetime.strptime( str(soup.find_all(itemprop="datePublished")[0].text), '%Y') except: date = datetime.now() book.publishedOn = date book.numberOfPages = soup.find_all( itemprop="numberOfPages")[0].text book.inLanguage = soup.find_all(itemprop="inLanguage")[0].text book.bookFormat = soup.find_all(itemprop="bookFormat")[0].text book.bookDescription = soup.find_all("div", {"class": "span12"})[3].text print soup.find_all(itemprop="image") book.bookImgName = (soup.find_all(itemprop="image")[0]).get('src') try: book.subTitle = soup.find_all("div", {"class": "span12"})[1].text except: traceback.print_exc() # book.fileSize = soup.find_all('table')[3].find_all('tr')[7].find_all('td')[1].find_all('b')[0].text book.fileSize = soup.find_all( 'table', {"class": "table table-bordered" })[1].find_all('tr')[5].find_all('td')[1].text # book.fileSize= # .top > div:nth-child(2) > h3:nth-child(2) # for link in soup.find_all('a'): # if link.get('href').startswith('http://filepi.com'): # book.name = link.text # break return book def firefoxDownloadJob(self, book, refUrl): '''The function of this method is to download link of given URL.''' # Creating directory directory_name = self.downloadDir() # Creating Actual URL url = self.baseUrl + refUrl lsFiles = [] # Checking if there are three files in this URL. # Creating a list of absolute files. if 3 == len(os.listdir(directory_name)): for sName in os.listdir(directory_name): if os.path.isfile(os.path.join(directory_name, sName)): lsFiles.append(sName) # Checking if there are more than 3 files in the directory location. # Removing all the files from direcotry. elif 3 != len(os.listdir(directory_name)): for sName in os.listdir(directory_name): os.remove(directory_name + '/' + sName) imageUrl = self.baseUrl + book.bookImgName subUrl = book.bookImgName imageFileName = subUrl.split('/')[-1:][0] # Downloading book cover bookImagePath = os.path.join(directory_name, subUrl.split('/')[-1:][0]) # urllib.urlretrieve(imageUrl,bookImagePath) from PIL import Image from StringIO import StringIO r = requests.get(imageUrl, headers=self.header_info, timeout=30) print '--------------->', r.url with open(bookImagePath, 'wb') as imageFile: imageFile.write(r.content) book.bookImgName = imageFileName #writing json file self.writeJsonToDir(directory_name, book) fp = webdriver.FirefoxProfile() fp.set_preference("webdriver.log.file", "/tmp/firefox_console") fp.set_preference("browser.download.folderList", 2) fp.set_preference('browser.download.manager.showWhenStarting', True) fp.set_preference('browser.download.manager.focusWhenStarting', True) fp.set_preference("browser.download.dir", directory_name) fp.set_preference("browser.download.manager.scanWhenDone", True) fp.set_preference("browser.download.manager.useWindow", True) fp.set_preference("browser.helperApps.neverAsk.saveToDisk", "application/octet-stream") fp.update_preferences() driver = webdriver.Chrome() # driver.find_element_by_xpath("html/body/table/tbody/tr[2]/td/div/table/tbody/tr/td[1]/img") driver.get(url) efd_link = driver.find_element_by_id(id_='download') # efd_link.click() efd_link.send_keys(Keys.RETURN) flag = True while (flag): # # checking part file time.sleep(10) lst = [] files = [] for sName in os.listdir(directory_name): if os.path.isfile(os.path.join(directory_name, sName)): lst.append(sName.split('.')[-1:][0]) files.append(os.path.join(directory_name, sName)) print lst if 'part' not in lst: flag = False time.sleep(10) driver.close() else: #print files # if not self.isBookDownloading(files): # driver.close() pass def writeJsonToDir(self, bookPath=None, book=None): ''' this function will write json file to given dir. ''' try: f = open(os.path.join(bookPath, 'book.json'), 'w') row2dict = book.__dict__ authors = [] if type(row2dict['publishedOn']) == datetime: row2dict['publishedOn'] = str(row2dict['publishedOn']) for a in row2dict['authors']: author = {} if type(a) == str: author['authorName'] = a else: author = a.__dict__ authors.append(author) row2dict['authors'] = authors f.write(json.dumps(row2dict, sort_keys=False, indent=4)) f.close() except: traceback.print_exc() def isBookDownloading(self, files): ''' This method will inform that book is getting downloading or not.''' #time.sleep(2) dic_files = {} time_dic_files = {} i = 1 checkFlagForSize = True isDownloading = True for fl in files: dic_files[fl] = str(os.stat(fl).st_size) while (checkFlagForSize): time_dic_files[i] = dic_files i = i + 1 if i > 4: size = set() for k in time_dic_files[i - 1]: if 'part' in k: size.add(time_dic_files[i - 1][k]) for k in time_dic_files[i - 2]: if 'part' in k: size.add(time_dic_files[i - 2][k]) for k in time_dic_files[i - 3]: if 'part' in k: size.add(time_dic_files[i - 3][k]) # print len(list(size)) if len(list(size)) > 1: isDownloading = False checkFlagForSize = False logging.info('isDownloading:') return isDownloading def startDownload(self): baseUrl = 'http://itebooks.website' itebook = ItEbook(baseUrl) # TODO need to be updated logicTrue = True i = 2 while logicTrue: subUrl = 'page-' + str(i) + '.html' itebook.findAllBookUrl(subUrl) i = i + 1 print 'startDownload---------->', str(i) # if i==4: # break def getMaxBookID(self): maxBookId = self.createDatabase.getMaxBookID() if not maxBookId: maxBookId = 0 return maxBookId def downloadDir(self): ''' This function will create directory to download book. @param number:it takes database maxId+1 to create new directory . ''' directory_name = os.path.join(self.directory_name, str(self.getMaxBookID() + 1)) if not os.path.exists(directory_name): os.makedirs(directory_name) os.chdir(directory_name) return directory_name
def __init__(self): self.baseUrl = "https://www.packtpub.com/" self.directory_name = Workspace().libraryPath self.createDatabase = CreateDatabase()
class DownloadItEbook(threading.Thread): ''' This class will download books from itebook.info ''' def __init__(self, group=None, target=None, name=None, args=(), kwargs=None, verbose=None): ''' Constructor, setting location of downloaded book. ''' super(DownloadItEbook, self).__init__(group=group, target=target, name=name, verbose=verbose) self.args = args self.kwargs = kwargs self.directory_name = Workspace().libraryPath self.createDatabase = CreateDatabase() pass def run(self): print('running with %s and %s', self.args, self.kwargs) return def getUrl(self, baseUrl, number): '''this method will find and constuct all url of url given''' return baseUrl + '/book/' + str(number) def findBookDetail(self, baseUrl, number): ''' This method will download book cover. It will provide book object.''' url = self.getUrl(baseUrl, number) content = urllib2.urlopen(url).read() soup = BeautifulSoup(content) book = Book() book.authors.append(Author(soup.find_all(itemprop="author")[0].text)) book.isbn_13 = soup.find_all(itemprop="isbn")[0].text book.bookName = soup.find_all(itemprop="name")[0].text book.publisher = soup.find_all(itemprop="publisher")[0].text try: date = datetime.strptime( str(soup.find_all(itemprop="datePublished")[0].text), '%Y') except: date = datetime.now() book.publishedOn = date book.numberOfPages = soup.find_all(itemprop="numberOfPages")[0].text book.inLanguage = soup.find_all(itemprop="inLanguage")[0].text book.bookFormat = soup.find_all(itemprop="bookFormat")[0].text book.bookDescription = soup.find_all(itemprop="description")[0].text book.bookImgName = (soup.find_all(itemprop="image")[0]).get('src') try: book.subTitle = soup.h3.text except: traceback.print_exc() book.fileSize = soup.find_all('table')[3].find_all('tr')[7].find_all( 'td')[1].find_all('b')[0].text # book.fileSize= # .top > div:nth-child(2) > h3:nth-child(2) for link in soup.find_all('a'): if link.get('href').startswith('http://filepi.com'): book.name = link.text break return book def getMaxBookID(self): maxBookId = self.createDatabase.getMaxBookID() if not maxBookId: maxBookId = 0 return maxBookId def downloadDir(self): ''' This function will create directory to download book. @param number:it takes database maxId+1 to create new directory . ''' directory_name = os.path.join(self.directory_name, str(self.getMaxBookID() + 1)) if not os.path.exists(directory_name): os.makedirs(directory_name) return directory_name def firefoxDownloadJob(self, book, baseUrl, number): '''The function of this method is to download link of given URL.''' directory_name = self.downloadDir() # Creating Actual URL url = self.getUrl(baseUrl, number) if not os.path.exists(directory_name): os.makedirs(directory_name) lsFiles = [] # Checking if there are three files in this URL. # Creating a list of absolute files. if 3 == len(os.listdir(directory_name)): for sName in os.listdir(directory_name): if os.path.isfile(os.path.join(directory_name, sName)): lsFiles.append(sName) # Checking if there are more than 3 files in the directory location. # Removing all the files from direcotry. elif 3 != len(os.listdir(directory_name)): for sName in os.listdir(directory_name): os.remove(directory_name + '/' + sName) imageUrl = url + book.bookImgName subUrl = book.bookImgName imageFileName = subUrl.split('/')[-1:][0] logging.info(imageUrl) # Downloading book cover bookImagePath = os.path.join(directory_name, subUrl.split('/')[-1:][0]) urllib.urlretrieve(baseUrl + book.bookImgName, bookImagePath) book.bookImgName = imageFileName f = open(os.path.join(directory_name, 'book.json'), 'w') row2dict = book.__dict__ authors = [] if type(row2dict['publishedOn']) == datetime: row2dict['publishedOn'] = str(row2dict['publishedOn']) for a in row2dict['authors']: author = {} if type(a) == str: author['authorName'] = a else: author = a.__dict__ authors.append(author) row2dict['authors'] = authors f.write(json.dumps(row2dict, sort_keys=False, indent=4)) f.close() fp = webdriver.FirefoxProfile() fp.set_preference("browser.download.folderList", 2) fp.set_preference('browser.download.manager.showWhenStarting', False) fp.set_preference('browser.download.manager.focusWhenStarting', False) fp.set_preference("browser.download.dir", directory_name) fp.set_preference("browser.download.manager.scanWhenDone", False) fp.set_preference("browser.download.manager.useWindow", False) fp.set_preference("browser.helperApps.neverAsk.saveToDisk", "application/octet-stream") fp.update_preferences() driver = webdriver.Firefox(firefox_profile=fp) # driver.find_element_by_xpath("html/body/table/tbody/tr[2]/td/div/table/tbody/tr/td[1]/img") driver.get(url) efd_link = driver.find_element_by_link_text(book.name) book.fileSize = driver.find_element_by_xpath( "html/body/table/tbody/tr[2]/td/div/table/tbody/tr/td[2]/table/tbody/tr[8]/td[2]/b" ).text book.bookFormat = driver.find_element_by_xpath( "html/body/table/tbody/tr[2]/td/div/table/tbody/tr/td[2]/table/tbody/tr[9]/td[2]/b" ).text efd_link.click() flag = True while (flag): # # checking part file time.sleep(10) lst = [] files = [] for sName in os.listdir(directory_name): if os.path.isfile(os.path.join(directory_name, sName)): logging.info(sName.split('.')[-1:][0]) lst.append(sName.split('.')[-1:][0]) files.append(os.path.join(directory_name, sName)) print lst if 'part' not in lst: logging.info("flag :" + str(flag)) flag = False time.sleep(10) driver.close() else: # print files # if not self.isBookDownloading(files): # driver.close() pass def writeJsonToDir(self, bookPath=None, book=None): ''' this function will write json file to given dir. ''' try: f = open(os.path.join(bookPath, 'book.json'), 'w') row2dict = book.__dict__ authors = [] if type(row2dict['publishedOn']) == datetime: row2dict['publishedOn'] = str(row2dict['publishedOn']) for a in row2dict['authors']: author = {} if type(a) == str: author['authorName'] = a else: author = a.__dict__ authors.append(author) row2dict['authors'] = authors f.write(json.dumps(row2dict, sort_keys=False, indent=4)) f.close() except: traceback.print_exc() def isBookDownloading(self, files): ''' This method will inform that book is getting downloading or not.''' # time.sleep(2) dic_files = {} time_dic_files = {} i = 1 checkFlagForSize = True isDownloading = True for fl in files: dic_files[fl] = str(os.stat(fl).st_size) while (checkFlagForSize): time_dic_files[i] = dic_files i = i + 1 if i > 4: size = set() for k in time_dic_files[i - 1]: if 'part' in k: size.add(time_dic_files[i - 1][k]) for k in time_dic_files[i - 2]: if 'part' in k: size.add(time_dic_files[i - 2][k]) for k in time_dic_files[i - 3]: if 'part' in k: size.add(time_dic_files[i - 3][k]) # print len(list(size)) if len(list(size)) > 1: isDownloading = False checkFlagForSize = False logging.info('isDownloading:') return isDownloading def startDownload(self): baseUrl = 'http://it-ebooks.info' miss = Missing() # lst = miss.missingNumbers() lst = [1464348534, 7102] for number in lst: print number # for number in range(6998, 0, -1): itebook = DownloadItEbook() url = itebook.getUrl(baseUrl, number) a = urllib2.urlopen(url) strig = a.geturl() if '404' != strig[-4:-1]: book = itebook.findBookDetail(baseUrl, number) # Is this book already availble (downloaded) # check book whethere it is existing in database. bs = FindingBook().findBookByIsbn(isbn_13=book.isbn_13) if bs: print 'this books is already present.', book.isbn_13, book.bookName else: try: self.firefoxDownloadJob(book, baseUrl, number) self.updateDatabase() except: print number, baseUrl traceback.print_exc() # try: # thread.start_new_thread( self.updateDatabase, ()) # except: # traceback.print_exc() # logging.info("checking Is this book already availble (downloaded)" + book.bookName) def updateDatabase(self): self.createDatabase.creatingDatabase() self.createDatabase.addingData() def updateBooksMetadata(self): miss = Missing() listOfDir = miss.availableNumbers() listOfDir = listOfDir[1391:] baseUrl = 'http://it-ebooks.info' for number in listOfDir: print '------------------->', number # url = self.getUrl(baseUrl, number) # a = urllib2.urlopen(url) # strig = a.geturl() # if '404' != strig[-4:-1]: # number=7102 # genUrl=self.downloadItEbook.getUrl(baseUrl, number) try: book = self.findBookDetail(baseUrl, number) book.itEbookUrlNumber = number subUrl = book.bookImgName imageFileName = subUrl.split('/')[-1:][0] book.bookImgName = imageFileName bookPath = os.path.join(Workspace().libraryPath, number) self.writeJsonToDir(bookPath, book) except: traceback.print_exc()
def setUp(self): print 'setUp' self.createDatabase = CreateDatabase()
class ItEbook(object): ''' This class downloads first page of itebookinfo ''' def __init__(self, baseUrl=None): ''' Constructor ''' self.baseUrl = baseUrl self.directory_name = Workspace().libraryPath self.createDatabase = CreateDatabase() self.header_info = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:38.0) Gecko/20100101 Firefox/38.0' } # book image url self.imageUrl = None self.bookUrl = None pass def getUrl(self, baseUrl): '''this method will find and constuct all url of url given''' return self.baseUrl def findAllBookUrl(self, subUrl=None): ''' This method retrive all the book url avaialbe in the page. http://itebooks.website/page-2.html ''' url = self.baseUrl + '/' + subUrl # print url # content = urllib2.urlopen(url).read() r = requests.get(url, headers=self.header_info, timeout=30) if r.status_code == 200: soup = BeautifulSoup(r.content, "lxml") skipList = [ 'HOME', 'Category', 'Animals', 'Architecture', 'Art', 'Astronomy', 'Biography', 'Biology', 'Business', 'Chemistry', 'Cinema', 'Cookbooks', 'Cryptography', 'Culture', 'Design', 'Drawing', 'Economics', 'Encyclopedia and Dictionary', 'Engineering and Technology', 'Family and Friendship', 'Fitness', 'Gambling', 'Games', 'Hardware', 'Healthcare', 'History', 'Hobbies', 'Information Technologies', 'IT ebooks', 'Languages', 'Martial Arts', 'Mathematics', 'Medicine', 'Military', 'Music', 'Novels', 'Other', 'Personality', 'Philosophy', 'Photo', 'Physics', 'Poetry', 'Politics and Sociology', 'Programming', 'Psychology', 'Relationships', 'Religion', 'Science', 'Security', 'Sexuality', 'Software', 'Sport', 'Travel', 'Web Development' ] # with open(os.path.dirname(__file__) + os.sep + 'skipList.txt', 'r') as f: # for line in f: # skipList.append(line.rstrip('\n')) # f.close listOfBookName = list() for link in soup.find_all('a', 'title'): if link.text.strip() != '' and link.text not in skipList: listOfBookName.append(link.text) isBookAvailable = self.isBookNameAvailableInDatabase( link.text) # self.isIsbnAvailableInDatabase() # print isBookAvailable, link.text if not isBookAvailable: # print link.text, '\t', link.get('href'), isBookAvailable book, bookUrl = self.findBookDetail(link.get('href')) isBookAvailable = self.isIsbnAvailableInDatabase( book.isbn_13) # print book if not isBookAvailable: try: print 'uploading database' directory_name = self.downloadEbook( book, link.get('href'), bookUrl) self.updateDatabase(directory_name) except: print link.get('href') traceback.print_exc() def updateDatabase(self, directory_name): # self.createDatabase.creatingDatabase() # self.createDatabase.addingData() self.createDatabase.addSingleBookData(directory_name) def isIsbnAvailableInDatabase(self, isbn_13=None): isBookPresent = False book = self.createDatabase.findByIsbn_13Name(isbn_13) if book: isBookPresent = True return isBookPresent def isBookNameAvailableInDatabase(self, bookName=None): isBookPresent = False book = self.createDatabase.findByBookName(bookName) if book: isBookPresent = True return isBookPresent def findBookDetail(self, subUrl): ''' This method will download book cover. It will provide book object. http://www.ebook777.com/shut-youre-welcome/ ''' book = None # url=self.baseUrl+'/'+subUrl url = subUrl r = requests.get(url, headers=self.header_info, timeout=30) if r.status_code == 200: soup = BeautifulSoup(r.content, "lxml") book = Book() book.bookDescription = soup.find(id="main-content-inner").p.text book.bookName = soup.find(id="main-content-inner").find( class_='article-details').find(class_='title').text book.subTitle = soup.find(id="main-content-inner").find( class_='article-details').find(class_='subtitle').text bookUrl = soup.find(id="main-content-inner").find( class_='download-links').find('a')['href'] table_body = soup.find('table') rows = table_body.find_all('tr') for row in rows: cols = row.find_all('td') if len(cols) == 3: book.bookImgName = cols[0].img.attrs['alt'] self.imageUrl = cols[0].img.attrs['src'] if cols[1].text == 'Author': # print cols[2].text author = Author() author.authorName = cols[2].text book.authors.append(author) # book.authors.append() if len(cols) == 2: if cols[0].text == 'File size': book.fileSize = cols[1].text if cols[0].text == 'Year': try: date = datetime.strptime(cols[1].text, '%Y') except: date = datetime.now() book.publishedOn = date if cols[0].text == 'Pages': book.numberOfPages = cols[1].text if cols[0].text == 'Language': book.inLanguage = cols[1].text if cols[0].text == 'File format': book.bookFormat = cols[1].text if cols[0].text == 'Category': book.tag = cols[1].text if cols[0].text == 'File format': book.bookFormat = cols[1].text if cols[0].text == 'Isbn': book.isbn_13 = cols[1].text # print cols return book, bookUrl def downloadEbook(self, book, refUrl, bookUrl): directory_name = self.downloadDir() url = refUrl bookImagePath = os.path.join(directory_name, book.bookImgName) self.downloadBookImage(bookImagePath, self.imageUrl) self.writeJsonToDir(directory_name, book) r = requests.get(bookUrl, headers=self.header_info, timeout=30) print '--------------->', r.url bookPath = os.path.join(directory_name, bookUrl.split('/')[-1]) with open(bookPath, 'wb') as bookFile: bookFile.write(r.content) try: self.extractRar(directory_name) except: traceback.print_exc() pass return directory_name def firefoxDownloadJob(self, book, refUrl): '''The function of this method is to download link of given URL.''' # Creating directory directory_name = self.downloadDir() # Creating Actual URL # url = self.baseUrl+refUrl url = refUrl lsFiles = [] # Checking if there are three files in this URL. # Creating a list of absolute files. if 3 == len(os.listdir(directory_name)): for sName in os.listdir(directory_name): if os.path.isfile(os.path.join(directory_name, sName)): lsFiles.append(sName) # Checking if there are more than 3 files in the directory location. # Removing all the files from direcotry. elif 3 != len(os.listdir(directory_name)): for sName in os.listdir(directory_name): os.remove(directory_name + '/' + sName) # Downloading book cover bookImagePath = os.path.join(directory_name, book.bookImgName) self.downloadBookImage(bookImagePath, self.imageUrl) # writing json file self.writeJsonToDir(directory_name, book) binary = FirefoxBinary('/docs/python_projects/firefox/firefox') fp = webdriver.FirefoxProfile() fp.set_preference("webdriver.log.file", "/tmp/firefox_console") fp.set_preference("browser.download.folderList", 2) fp.set_preference('browser.download.manager.showWhenStarting', False) fp.set_preference('browser.download.manager.focusWhenStarting', False) fp.set_preference("browser.download.dir", directory_name) fp.set_preference("browser.download.manager.scanWhenDone", False) fp.set_preference("browser.download.manager.useWindow", False) # fp.set_preference("browser.helperApps.neverAsk.saveToDisk", "application/octet-stream") fp.set_preference( "browser.helperApps.neverAsk.saveToDisk", "application/octet-stream,application/xml,application/pdf,text/plain,text/xml,image/jpeg,text/csv,application/zip,application/x-rar-compressed" ) fp.set_preference("browser.helperApps.alwaysAsk.force", False) fp.set_preference("browser.popups.showPopupBlocker", False) fp.update_preferences() driver = webdriver.Firefox(firefox_profile=fp, firefox_binary=binary) # driver.find_element_by_xpath("html/body/table/tbody/tr[2]/td/div/table/tbody/tr/td[1]/img") driver.get(url) efd_link = driver.find_element_by_css_selector( ".download-links > a:nth-child(1)") efd_link.click() # efd_link.send_keys(Keys.RETURN) flag = True while (flag): # # checking part file time.sleep(10) lst = [] files = [] for sName in os.listdir(directory_name): if os.path.isfile(os.path.join(directory_name, sName)): lst.append(sName.split('.')[-1:][0]) files.append(os.path.join(directory_name, sName)) # print lst if 'part' not in lst: flag = False time.sleep(10) driver.close() else: # print files # if not self.isBookDownloading(files): # driver.close() pass self.extractRar(directory_name) def downloadBookImage(self, bookImagePath=None, imageUrl=None): ''' this method will download image from imageUrl location and keep it at bookImagePath ''' from PIL import Image from StringIO import StringIO r = requests.get(imageUrl, headers=self.header_info, timeout=30) print '--------------->', r.url with open(bookImagePath, 'wb') as imageFile: imageFile.write(r.content) def writeJsonToDir(self, bookPath=None, book=None): ''' this function will write json file to given dir. ''' try: f = open(os.path.join(bookPath, 'book.json'), 'w') row2dict = book.__dict__ authors = [] if type(row2dict['publishedOn']) == datetime: row2dict['publishedOn'] = str(row2dict['publishedOn']) for a in row2dict['authors']: author = {} if type(a) == str: author['authorName'] = a else: author = a.__dict__ authors.append(author) row2dict['authors'] = authors if not row2dict['isbn_13'] == None: if str(row2dict['isbn_13']).strip() == '': row2dict['isbn_13'] = None f.write(json.dumps(row2dict, sort_keys=False, indent=4)) f.close() except: traceback.print_exc() def isBookDownloading(self, files): ''' This method will inform that book is getting downloading or not.''' # time.sleep(2) dic_files = {} time_dic_files = {} i = 1 checkFlagForSize = True isDownloading = True for fl in files: dic_files[fl] = str(os.stat(fl).st_size) while (checkFlagForSize): time_dic_files[i] = dic_files i = i + 1 if i > 4: size = set() for k in time_dic_files[i - 1]: if 'part' in k: size.add(time_dic_files[i - 1][k]) for k in time_dic_files[i - 2]: if 'part' in k: size.add(time_dic_files[i - 2][k]) for k in time_dic_files[i - 3]: if 'part' in k: size.add(time_dic_files[i - 3][k]) # print len(list(size)) if len(list(size)) > 1: isDownloading = False checkFlagForSize = False logging.info('isDownloading:') return isDownloading def startDownload(self): # baseUrl = 'http://itebooks.website' # baseUrl = 'http://it-ebooks.directory' baseUrl = 'http://www.ebook777.com' itebook = ItEbook(baseUrl) # TODO need to be updated logicTrue = True i = 1100 while logicTrue: subUrl = 'page/' + str(i) + '/' itebook.findAllBookUrl(subUrl) i = i + 1 print 'startDownload---------->', str(i) # if i==4: # break def getMaxBookID(self): maxBookId = self.createDatabase.getMaxBookID() if not maxBookId: maxBookId = 0 return maxBookId def downloadDir(self): ''' This function will create directory to download book. @param number:it takes database maxId+1 to create new directory . ''' directory_name = os.path.join(self.directory_name, str(self.getMaxBookID() + 1)) if not os.path.exists(directory_name): os.makedirs(directory_name) os.chdir(directory_name) return directory_name def extractRar(self, directory_name): ''' extracting rar file ''' os.chdir(directory_name) # directory_name = '/docs/new/library/8006' listOfFiles = [ name for name in os.listdir(directory_name) if not os.path.isdir(os.path.join(directory_name, name)) ] for fileName in listOfFiles: if fileName.endswith(".rar"): # print fileName directory_name rar = rarfile.RarFile(os.path.join(directory_name, fileName)) # print rar.namelist() infoList = rar.infolist() nameList = rar.namelist() for name in nameList: if not ((name.endswith('.html')) or (name.endswith('.htm')) or (name.endswith('.txt'))): rar.extract(name, directory_name) pass
class FullCircleMagazine(): def __init__(self, baseUrl=None): self.baseUrl = baseUrl self.directory_name = Workspace().libraryPath self.createDatabase = CreateDatabase() self.header_info = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:38.0) Gecko/20100101 Firefox/38.0'} # book image url self.imageUrl = None self.bookUrl = None pass def downloadFullCircleMagazine(self, url, book=None, bookUrl=None): ''' AQGPK3595C ''' # url = 'http://dl.fullcirclemagazine.org/issue1_en.pdf' # 'http://dl.fullcirclemagazine.org/issue3_en.pdf' directory_name = self.createDownloadDir() bookImagePath = os.path.join(directory_name, book.bookImgName) os.chdir(directory_name) r = requests.get(url, headers=self.header_info, timeout=30) if r.status_code == 200: print r.status_code, url print '------->', int(r.headers["content-length"]) / 1000000 book.fileSize = str(round(int(r.headers["content-length"]) / 1000000 , 2)) + ' MB' self.writeJsonToDir(directory_name, book) self.downloadBookImage(bookImagePath, self.imageUrl) # r = requests.get(bookUrl, headers=self.header_info, timeout=30) print '--------------->', r.url bookPath = os.path.join(directory_name, url.split('/')[-1]) print bookPath with open(bookPath, 'wb') as bookFile: bookFile.write(r.content) self.updateDatabase(directory_name) return r.status_code, directory_name def createBookDetail(self, bookName=None): book = Book() book.bookName = "Full Circle "+ bookName book.bookFormat = 'pdf' book.tag = 'Technology' book.inLanguage = 'English' book.subTitle = 'Magazine' book.publisher = "Full Circle" book.bookImgName = bookName + '.jpg' book.hasCover = 'Yes' book.hasCode = 'No' return book def writeJsonToDir(self, bookPath=None, book=None): try: f = open(os.path.join(bookPath, 'book.json'), 'w') row2dict = book.__dict__ authors = [] if type(row2dict['publishedOn']) == datetime: row2dict['publishedOn'] = str(row2dict['publishedOn']) for a in row2dict['authors']: author = {} if type(a) == str: author['authorName'] = a else: author = a.__dict__ authors.append(author) row2dict['authors'] = authors if not row2dict['isbn_13'] == None: if str(row2dict['isbn_13']).strip() == '': row2dict['isbn_13'] = None f.write(json.dumps(row2dict, sort_keys=False, indent=4)) f.close() except: traceback.print_exc() def downloadBookImage(self, bookImagePath=None, imageUrl=None): ''' this method will download image from imageUrl location and keep it at bookImagePath ''' print imageUrl head, data = imageUrl.split(',', 1) bits = head.split(';') mime_type = bits[0] if bits[0] else 'text/plain' charset, b64 = 'ASCII', False for bit in bits: if bit.startswith('charset='): charset = bit[8:] elif bit == 'base64': b64 = True # Do something smart with charset and b64 instead of assuming plaindata = data.decode("base64") # Do something smart with mime_type with open(bookImagePath, 'wb') as f: f.write(plaindata) print 'write image complete' # from PIL import Image # from StringIO import StringIO # r = requests.get(imageUrl, headers=self.header_info, timeout=30) # print '--------------->', r.url # with open(bookImagePath, 'wb') as imageFile: # imageFile.write(r.content) def updateDatabase(self, directory_name): # self.createDatabase.creatingDatabase() # self.createDatabase.addingData() self.createDatabase.addSingleBookData(directory_name) def isIsbnAvailableInDatabase(self, isbn_13=None): isBookPresent = False book = self.createDatabase.findByIsbn_13Name(isbn_13) if book: isBookPresent = True return isBookPresent def isBookNameAvailableInDatabase(self, bookName=None): isBookPresent = False book = self.createDatabase.findByBookName(bookName) if book: isBookPresent = True return isBookPresent def createDownloadDir(self): ''' This function will create directory to download book. @param number:it takes database maxId+1 to create new directory . ''' directory_name = os.path.join(self.directory_name, str(self.getMaxBookID() + 1)) if not os.path.exists(directory_name): os.makedirs(directory_name,755) os.chdir(directory_name) return directory_name def getMaxBookID(self): maxBookId = self.createDatabase.getMaxBookID() if not maxBookId: maxBookId = 0 return maxBookId def getImageUrl(self, completeUrl, issueCount): print completeUrl imageUrl = None r = requests.get(completeUrl, headers=self.header_info, timeout=30) if r.status_code == 200: soup = BeautifulSoup(r.content, "lxml") # print soup alt = soup.find(class_='issuetable').find('img')['alt'] if alt == 'Cover for Issue '+issueCount+' in English': imageUrl = soup.find(class_='issuetable').find('img')['src'] print imageUrl return imageUrl def startDownload(self): logic = True i = 1 while logic: pdfUrl = 'http://dl.fullcirclemagazine.org/issue' + str(i) + '_en.pdf' completeUrl = 'http://fullcirclemagazine.org/issue-' + str(i) + '/' if not self.isIssuePresent(str(i)): self.imageUrl = self.getImageUrl(completeUrl,str(i)) book = self.createBookDetail('Issue ' + str(i)) status_code, directory_name = self.downloadFullCircleMagazine(book=book, url=pdfUrl) print completeUrl, status_code if status_code != 200: logic = False i = i + 1 def isIssuePresent(self, issue=None): isBookPresent = False bookName="Full Circle Issue " + issue book = self.createDatabase.findByBookName(bookName) if book: isBookPresent = True return isBookPresent def getIssueDetail(self): url='https://wiki.ubuntu.com/UbuntuMagazine/FullIssueIndex' r = requests.get(url, headers=self.header_info, timeout=30) if r.status_code == 200: soup = BeautifulSoup(r.content, "lxml") tables=soup.findAll('table') for table in tables: print table