def makeNote(noteStore, noteTitle, noteBody, sourceUrl='', resources=[], parentNotebook=None): nBody = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" nBody += "<!DOCTYPE en-note SYSTEM \"http://xml.evernote.com/pub/enml2.dtd\">" nBody += "<en-note>" + noteBody + "</en-note>" logger.info(nBody) ## Create note object ourNote = Types.Note() ourNote.title = noteTitle ourNote.content = nBody if resources: ourNote.resources = resources if sourceUrl: note_attrs = Types.NoteAttributes() note_attrs.sourceURL = sourceUrl ourNote.attributes = note_attrs ## parentNotebook is optional; if omitted, default notebook is used # if parentNotebook and hasattr(parentNotebook, 'guid'): # ourNote.notebookGuid = parentNotebook.guid if parentNotebook: ourNote.notebookGuid = parentNotebook ## Attempt to create note in Evernote account note = noteStore.createNote(ourNote) ## Something was wrong with the note data ## See EDAMErrorCode enumeration for error code explanation ## http://dev.evernote.com/documentation/reference/Errors.html#Enum_EDAMErrorCode ## Parent Notebook GUID doesn't correspond to an actual notebook ## Return created note object return note
def get_content(self): logger.info("get_content url %s" % self.url) r = requests.get(self.url, headers=self.headers) logger.info("get_content res %s " % r.text) res_json = r.json() content = res_json.get('content', '') soup = BeautifulSoup(content, "html5lib") soup.find_all(self.remove_attrs) soup.html.unwrap() soup.head.unwrap() soup.body.unwrap() title = res_json.get('title', '') # question_id = res_json.get('question', {}).get('id', '') id = res_json.get('id', '') note_url = 'https://zhuanlan.zhihu.com/p/%s' % (id) res = self.change_img(soup) title_list = title.split('\n') title = '' for t in title_list: title += t logger.info("note_url %s" % note_url) logger.info("title %s" % title) html_content = str(soup) res = EvernoteMethod.makeNote(self.noteStore, title.encode('utf8'), html_content, note_url, res, self.parent_note)
def get_list(self, url): r = s.get(url, headers=self.headers) res_json = r.json() data_info = res_json.get('data', []) next_url = None if data_info and self.force_check: paging_dict = res_json.get('paging', {}) next_url = paging_dict.get('next', None) for data in data_info: type_info = data.get('type', '') if type_info == 'article': data_url = data.get('url') data_id = (data.get('id')) data_title = data.get('title') if not data_url or not data_id or not data_title: logger.error("%s error" % data) continue if redis_obj.sismember('zhihu_zhuanlan_id', data_id): logger.warning("%s %s %s exits" % (data_url, data_id, data_title)) continue logger.info("+++++++++++++++++++++++++++++++++++++++++++") logger.info(data_url) logger.info(data_id) logger.info(data_title) self.push_fav(data) logger.info("+++++++++++++++++++++++++++++++++++++++++++") logger.info("\n") # return if next_url: logger.info("next url %s" % next_url) self.get_list(next_url)