def __fetch_commits_from_repo(self, repo, from_date, to_date, branches, no_update): if branches is None: branches_text = "all" elif len(branches) == 0: branches_text = "no" else: branches_text = ", ".join(branches) logger.info("Fetching commits: '%s' git repository from %s to %s; %s branches", self.uri, str(from_date), str(to_date), branches_text) # Ignore default datetime to avoid problems with git # or convert to UTC if to_date == DEFAULT_LAST_DATETIME: to_date = None else: to_date = datetime_to_utc(to_date) if from_date == DEFAULT_DATETIME: from_date = None else: from_date = datetime_to_utc(from_date) if not no_update: repo.update() gitlog = repo.log(from_date, to_date, branches) return self.parse_git_log_from_iter(gitlog)
def fetch(self, category=CATEGORY_PULL_REQUEST, from_date=DEFAULT_DATETIME, to_date=DEFAULT_LAST_DATETIME): """Fetch the issues/pull requests from the repository. The method retrieves, from a BitBucket repository, the issues/pull requests updated since the given date. :param category: the category of items to fetch :param from_date: obtain issues/pull requests updated since this date :param to_date: obtain issues/pull requests until a specific date (included) :returns: a generator of issues """ if not from_date: from_date = DEFAULT_DATETIME if not to_date: to_date = DEFAULT_LAST_DATETIME from_date = datetime_to_utc(from_date) to_date = datetime_to_utc(to_date) kwargs = { 'from_date': from_date, 'to_date': to_date } items = super().fetch(category, **kwargs) return items
def test_conversion(self): """Check if it converts some timestamps to timestamps with UTC+0.""" date = datetime.datetime(2001, 12, 1, 23, 15, 32, tzinfo=dateutil.tz.tzoffset(None, -21600)) expected = datetime.datetime(2001, 12, 2, 5, 15, 32, tzinfo=dateutil.tz.tzutc()) utc = datetime_to_utc(date) self.assertIsInstance(utc, datetime.datetime) self.assertEqual(utc, expected) date = datetime.datetime(2001, 12, 1, 23, 15, 32, tzinfo=dateutil.tz.tzutc()) expected = datetime.datetime(2001, 12, 1, 23, 15, 32, tzinfo=dateutil.tz.tzutc()) utc = datetime_to_utc(date) self.assertIsInstance(utc, datetime.datetime) self.assertEqual(utc, expected) date = datetime.datetime(2001, 12, 1, 23, 15, 32) expected = datetime.datetime(2001, 12, 1, 23, 15, 32, tzinfo=dateutil.tz.tzutc()) utc = datetime_to_utc(date) self.assertIsInstance(utc, datetime.datetime) self.assertEqual(utc, expected)
def fetch(self, category=CATEGORY_EVENT, from_date=DEFAULT_DATETIME, to_date=DEFAULT_LAST_DATETIME): """Fetch the issue events from the repository. The method retrieves, from a GitHub repository, the issue events since/until a given date. :param category: the category of items to fetch :param from_date: obtain issue events since this date :param to_date: obtain issue events until this date (included) :returns: a generator of events """ if not from_date: from_date = DEFAULT_DATETIME if not to_date: to_date = DEFAULT_LAST_DATETIME from_date = datetime_to_utc(from_date) to_date = datetime_to_utc(to_date) kwargs = { 'from_date': from_date, 'to_date': to_date } items = super().fetch(category, **kwargs) return items
def test_invalid_timezone(self): """Check whether an invalid timezone is converted to UTC+0""" # Python 3.6 does not put any restriction on the offset range. # Thus, this test is valid only for prior Python versions. if sys.version_info.major == 3 and sys.version_info.minor == 6: return date = datetime.datetime(2001, 12, 1, 23, 15, 32, tzinfo=dateutil.tz.tzoffset(None, -3407)) expected = datetime.datetime(2001, 12, 1, 23, 15, 32, tzinfo=dateutil.tz.tzutc()) utc = datetime_to_utc(date) self.assertIsInstance(utc, datetime.datetime) self.assertEqual(utc, expected)
def issues(self, from_date=DEFAULT_DATETIME, offset=None, max_issues=MAX_ISSUES): """Get the information of a list of issues. :param from_date: retrieve issues that where updated from that date; dates are converted to UTC :param offset: starting position for the search :param max_issues: maximum number of issues to reteurn per query """ resource = self.RISSUES + self.CJSON ts = datetime_to_utc(from_date) ts = ts.strftime("%Y-%m-%dT%H:%M:%SZ") # By default, Redmine returns open issues only. # Parameter 'status_id' is set to get all the statuses. params = { self.PSTATUS_ID: '*', self.PSORT: self.PUPDATED_ON, self.PUPDATED_ON: '>=' + ts, self.PLIMIT: max_issues } if offset is not None: params[self.POFFSET] = offset response = self._call(resource, params) return response
def fetch(self, category=CATEGORY_EVENT, from_date=DEFAULT_DATETIME, to_date=None, filter_classified=False): """Fetch the events from the server. This method fetches those events of a group stored on the server that were updated since the given date. Data comments and rsvps are included within each event. :param category: the category of items to fetch :param from_date: obtain events updated since this date :param to_date: obtain events updated before this date :param filter_classified: remove classified fields from the resulting items :returns: a generator of events """ if not from_date: from_date = DEFAULT_DATETIME from_date = datetime_to_utc(from_date) kwargs = {"from_date": from_date, "to_date": to_date} items = super().fetch(category, filter_classified=filter_classified, **kwargs) return items
def run(self, backend_args, archive_args=None, resume=False): """Run the backend with the given parameters. The method will run the backend assigned to this job, storing the fetched items in a Redis queue. The ongoing status of the job, can be accessed through the property `result`. When `resume` is set, the job will start from the last execution, overwriting 'from_date' and 'offset' parameters, if needed. Setting to `True` the parameter `fetch_from_archive`, items can be fetched from the archive assigned to this job. Any exception during the execution of the process will be raised. :param backend_args: parameters used to un the backend :param archive_args: archive arguments :param resume: fetch items starting where the last execution stopped """ args = backend_args.copy() if archive_args: self.initialize_archive_manager(archive_args['archive_path']) if not resume: max_date = backend_args.get('from_date', None) offset = backend_args.get('offset', None) if max_date: max_date = datetime_to_utc(max_date).timestamp() self._result = JobResult(self.job_id, self.task_id, self.backend, self.category, None, max_date, 0, offset=offset, nresumed=0) else: if self.result.max_date: args['from_date'] = unixtime_to_datetime(self.result.max_date) if self.result.offset: args['offset'] = self.result.offset self._result.nresumed += 1 for item in self._execute(args, archive_args): self.conn.rpush(self.qitems, pickle.dumps(item)) self._result.nitems += 1 self._result.last_uuid = item['uuid'] if not self.result.max_date or self.result.max_date < item[ 'updated_on']: self._result.max_date = item['updated_on'] if 'offset' in item: self._result.offset = item['offset']
def bugs(self, from_date=DEFAULT_DATETIME, offset=None, max_bugs=MAX_BUGS): """Get the information of a list of bugs. :param from_date: retrieve bugs that where updated from that date; dates are converted to UTC :param offset: starting position for the search; i.e to return 11th element, set this value to 10. :param max_bugs: maximum number of bugs to reteurn per query """ date = datetime_to_utc(from_date) date = date.strftime("%Y-%m-%dT%H:%M:%SZ") params = { self.PLAST_CHANGE_TIME: date, self.PLIMIT: max_bugs, self.PORDER: self.VCHANGE_DATE_ORDER, self.PINCLUDE_FIELDS: self.VINCLUDE_ALL } if offset: params[self.POFFSET] = offset response = self.call(self.RBUG, params) return response
def __fetch_crates(self, from_date): """Fetch crates""" from_date = datetime_to_utc(from_date) crates_groups = self.client.crates() for raw_crates in crates_groups: crates = json.loads(raw_crates) for crate_container in crates['crates']: if str_to_datetime(crate_container['updated_at']) < from_date: continue crate_id = crate_container['id'] crate = self.__fetch_crate_data(crate_id) crate['owner_team_data'] = self.__fetch_crate_owner_team( crate_id) crate['owner_user_data'] = self.__fetch_crate_owner_user( crate_id) crate[ 'version_downloads_data'] = self.__fetch_crate_version_downloads( crate_id) crate['versions_data'] = self.__fetch_crate_versions(crate_id) yield crate
def fetch(self, category=CATEGORY_HISTORICAL_CONTENT, from_date=DEFAULT_DATETIME): """Fetch the contents by version from the server. This method fetches the different historical versions (or snapshots) of the contents stored in the server that were updated since the given date. Only those snapshots created or updated after `from_date` will be returned. Take into account that the seconds of `from_date` parameter will be ignored because the Confluence REST API only accepts the date and hours and minutes for timestamps values. :param category: the category of items to fetch :param from_date: obtain historical versions of contents updated since this date :returns: a generator of historical versions """ if not from_date: from_date = DEFAULT_DATETIME from_date = datetime_to_utc(from_date) kwargs = {'from_date': from_date} items = super().fetch(category, **kwargs) return items
def tasks(self, from_date=DEFAULT_DATETIME): """Retrieve tasks. :param from_date: retrieve tasks that where updated from that date; dates are converted epoch time. """ # Convert 'from_date' to epoch timestamp. # Zero value (1970-01-01 00:00:00) is not allowed for # 'modifiedStart' so it will be set to 1, by default. ts = int(datetime_to_utc(from_date).timestamp()) or 1 consts = {self.PMODIFIED_START: ts} attachments = {self.PPROJECTS: True} params = { self.PCONSTRAINTS: consts, self.PATTACHMENTS: attachments, self.PORDER: self.VOUTDATED, } while True: r = self._call(self.MANIPHEST_TASKS, params) yield r j = json.loads(r) after = j['result']['cursor']['after'] if not after: break params[self.PAFTER] = after
def _fetch_gerrit(self, from_date=DEFAULT_DATETIME): last_item = self.client.next_retrieve_group_item() reviews = self._get_reviews(last_item) last_nreviews = len(reviews) # Convert date to Unix time from_ut = datetime_to_utc(from_date) from_ut = from_ut.timestamp() while reviews: review = reviews.pop(0) try: last_item += 1 except Exception: pass # last_item is a string in old gerrits updated = review['lastUpdated'] if updated <= from_ut: logger.debug("No more updates for %s" % (self.hostname)) break else: yield review if not reviews and last_nreviews >= self.max_reviews: logger.debug("GETTING MORE REVIEWS %i >= %i " % (last_nreviews, self.max_reviews)) last_item = self.client.next_retrieve_group_item( last_item, review) reviews = self._get_reviews(last_item) last_nreviews = len(reviews)
def fetch_items(self, category, **kwargs): """Fetch the questions :param category: the category of items to fetch :param kwargs: backend arguments :returns: a generator of items """ from_date = datetime_to_utc(kwargs['from_date']).timestamp() questions_groups = self.client.get_api_questions( AskbotClient.API_QUESTIONS) for questions in questions_groups: for question in questions['questions']: updated_at = int(question['last_activity_at']) if updated_at > from_date: html_question = self.__fetch_question(question) if not html_question: continue logger.debug("Fetching HTML question %s", question['id']) comments = self.__fetch_comments(question) question_obj = self.__build_question( html_question, question, comments) question.update(question_obj) yield question
def events(self, group, from_date=DEFAULT_DATETIME): """Fetch the events pages of a given group.""" date = datetime_to_utc(from_date) date = date.strftime("since:%Y-%m-%dT%H:%M:%S.000Z") resource = urijoin(group, self.REVENTS) # Hack required due to Metup API does not support list # values with the format `?param=value1¶m=value2`. # It only works with `?param=value1,value2`. # Morever, urrlib3 encodes comma characters when values # are given using params dict, which it doesn't work # with Meetup, either. fixed_params = '?' + self.PFIELDS + '=' + ','.join(self.VEVENT_FIELDS) fixed_params += '&' + self.PSTATUS + '=' + ','.join(self.VSTATUS) resource += fixed_params params = { self.PORDER: self.VUPDATED, self.PSCROLL: date, self.PPAGE: self.max_items } try: for page in self._fetch(resource, params): yield page except requests.exceptions.HTTPError as error: if error.response.status_code == 410: msg = "Group is no longer accessible: {}".format(error) raise RepositoryError(cause=msg) else: raise error
def fetch(self, from_date=None): """Fetch the mbox files from the remote archiver. Stores the archives in the path given during the initialization of this object. Those archives which a not valid extension will be ignored. Groups.io archives are returned as a .zip file, which contains one file in mbox format. :param from_date: fetch messages after a given date (included) expressed in ISO format :returns: a list of tuples, storing the links and paths of the fetched archives """ logger.info("Downloading mboxes from '%s'", self.uri) logger.debug("Storing mboxes in '%s'", self.dirpath) if not os.path.exists(self.dirpath): os.makedirs(self.dirpath) group_id = self.__find_group_id() url = urijoin(GROUPSIO_API_URL, self.DOWNLOAD_ARCHIVES) payload = {'group_id': group_id} if from_date: payload['start_time'] = datetime_to_utc(from_date).isoformat() filepath = os.path.join(self.dirpath, MBOX_FILE) success = self._download_archive(url, payload, filepath) return success
def test_get_pages_from_allrevisions_from_date(self): HTTPServer.routes() body = read_file('data/mediawiki/mediawiki_pages_allrevisions.json') client = MediaWikiClient(MEDIAWIKI_SERVER_URL) namespaces = ['0'] str_date = '2016-01-01 00:00' dt = str_to_datetime(str_date) from_date = datetime_to_utc(dt) response = client.get_pages_from_allrevisions(namespaces, from_date) req = HTTPServer.requests_http[-1] self.assertEqual(response, body) self.assertEqual(req.method, 'GET') self.assertRegex(req.path, '/api.php') # Check request params expected = { 'action': ['query'], 'list': ['allrevisions'], 'arvnamespace': ['0'], 'arvdir': ['newer'], 'arvlimit': ['max'], 'format': ['json'], 'arvprop': ['ids'], 'arvstart': ['2016-01-01T00:00:00Z'] } self.assertDictEqual(req.querystring, expected) from_date = datetime.datetime(2016, 1, 1, 0, 0, 0) with self.assertRaises(ValueError): _ = client.get_pages_from_allrevisions(namespaces, from_date)
def fetch_items(self, category, **kwargs): """Fetch the messages :param category: the category of items to fetch :param kwargs: backend arguments :returns: a generator of items """ from_date = kwargs['from_date'] latest = kwargs['latest'] logger.info("Fetching messages of '%s' channel from %s", self.channel, str(from_date)) raw_info = self.client.channel_info(self.channel) channel_info = self.parse_channel_info(raw_info) oldest = datetime_to_utc(from_date).timestamp() # Minimum value supported by Slack is 0 not 0.0 if oldest == 0.0: oldest = 0 # Slack does not include on its result the lower limit # of the search if it has the same date of 'oldest'. To get # this messages too, we substract a low value to be sure # the dates are not the same. To avoid precision problems # it is substracted by five decimals and not by six. if oldest > 0.0: oldest -= .00001 fetching = True nmsgs = 0 while fetching: raw_history = self.client.history(self.channel, oldest=oldest, latest=latest) messages, fetching = self.parse_history(raw_history) for message in messages: # Fetch user data user_id = None if 'user' in message: user_id = message['user'] elif 'comment' in message: user_id = message['comment']['user'] if user_id: message['user_data'] = self.__get_or_fetch_user(user_id) message['channel_info'] = channel_info yield message nmsgs += 1 if fetching: latest = float(message['ts']) logger.info("Fetch process completed: %s message fetched", nmsgs)
def fetch_items(self, category, **kwargs): """Fetch the messages :param category: the category of items to fetch :param kwargs: backend arguments :returns: a generator of items """ from_date = kwargs['from_date'] latest = kwargs['latest'] logger.info("Fetching messages of '%s' channel from %s", self.channel, str(from_date)) raw_info = self.client.channel_info(self.channel) channel_info = self.parse_channel_info(raw_info) if channel_info['is_archived']: channel_info['num_members'] = None logger.warning( "channel_info.num_members is None for archived channels %s", self.channel) else: channel_info['num_members'] = self.client.conversation_members( self.channel) oldest = datetime_to_utc(from_date).timestamp() fetching = True nmsgs = 0 while fetching: raw_history = self.client.history(self.channel, oldest=oldest, latest=latest) messages, fetching = self.parse_history(raw_history) for message in messages: # Fetch user data user_id = None if 'user' in message: user_id = message['user'] elif 'comment' in message: user_id = message['comment']['user'] if user_id: message['user_data'] = self.__get_or_fetch_user(user_id) message['channel_info'] = channel_info yield message nmsgs += 1 if fetching: latest = float(message['ts']) logger.info("Fetch process completed: %s message fetched", nmsgs)
def fetch_items(self, category, **kwargs): """Fetch the entries. :param category: the category of items to fetch :param kwargs: backend arguments :returns: a generator of items """ logger.info("Looking for a meeting table at feed '%s'", self.origin) nentries = 0 nskipped = 0 entries = self.client.get_entries() for item in _parse_entries(entries): # Need to pass which columns are IDs to metadata_id static function ret = {'_id_columns': ID_COLUMNS} for i, column in enumerate(CSV_HEADER.split(',')): value = item[i] if isinstance(item[i], str): value = item[i].strip() # If it's the date column, parse value and add it as 'timestamp' in the item if column == DATE_COLUMN: try: dt = str_to_datetime(value) ret[DATE_ISO] = datetime_to_utc(dt).isoformat() ret[TIMESTAMP] = datetime_to_utc(dt).timestamp() except InvalidDateError: logger.warning( "Skipping entry due to wrong date format: '%s'", value) nskipped += 1 break ret[column.strip()] = value if 'timestamp' in ret: yield ret nentries += 1 logger.info("Done. %s/%s meeting entries fetched; %s ignored", nentries, nentries + nskipped, nskipped)
def fetch(self, from_date=DEFAULT_DATETIME): """Fetch the mbox files from the remote archiver. This method stores the archives in the path given during the initialization of this object. HyperKitty archives are accessed month by month and stored following the schema year-month. Archives are fetched from the given month till the current month. :param from_date: fetch archives that store messages equal or after the given date; only year and month values are compared :returns: a list of tuples, storing the links and paths of the fetched archives """ logger.info("Downloading mboxes from '%s' to since %s", self.client.base_url, str(from_date)) logger.debug("Storing mboxes in '%s'", self.dirpath) self.client.fetch(self.client.base_url) from_date = datetime_to_utc(from_date) to_end = datetime_utcnow() to_end += dateutil.relativedelta.relativedelta(months=1) months = months_range(from_date, to_end) fetched = [] if not os.path.exists(self.dirpath): os.makedirs(self.dirpath) tmbox = 0 for dts in months: tmbox += 1 start, end = dts[0], dts[1] filename = start.strftime("%Y-%m.mbox.gz") filepath = os.path.join(self.dirpath, filename) url = urijoin(self.client.base_url, 'export', filename) params = { 'start': start.strftime("%Y-%m-%d"), 'end': end.strftime("%Y-%m-%d") } success = self._download_archive(url, params, filepath) if success: fetched.append((url, filepath)) logger.info("%s/%s MBoxes downloaded", len(fetched), tmbox) return fetched
def __get_max_date(self, reviews): """"Get the max date in unixtime format from reviews.""" max_ts = 0 for review in reviews: ts = str_to_datetime(review['timestamp']) ts = datetime_to_utc(ts) if ts.timestamp() > max_ts: max_ts = ts.timestamp() return max_ts
def test_invalid_timezone(self): """ Check whether datetime converts to UTC when timezone invalid """ date = datetime.datetime(2001, 12, 1, 23, 15, 32, tzinfo=dateutil.tz.tzoffset(None, 93600)) expected = datetime.datetime(2001, 12, 1, 23, 15, 32, tzinfo=dateutil.tz.tzutc()) utc = datetime_to_utc(date) self.assertIsInstance(utc, datetime.datetime) self.assertEqual(utc, expected)
def _fetch_gerrit28(self, from_date=DEFAULT_DATETIME): """ Specific fetch for gerrit 2.8 version. Get open and closed reviews in different queries. Take the newer review from both lists and iterate. """ # Convert date to Unix time from_ut = datetime_to_utc(from_date) from_ut = from_ut.timestamp() filter_open = "status:open" filter_closed = "status:closed" last_item_open = self.client.next_retrieve_group_item() last_item_closed = self.client.next_retrieve_group_item() reviews_open = self._get_reviews(last_item_open, filter_open) reviews_closed = self._get_reviews(last_item_closed, filter_closed) last_nreviews_open = len(reviews_open) last_nreviews_closed = len(reviews_closed) while reviews_open or reviews_closed: if reviews_open and reviews_closed: if reviews_open[0]['lastUpdated'] >= reviews_closed[0][ 'lastUpdated']: review_open = reviews_open.pop(0) review = review_open else: review_closed = reviews_closed.pop(0) review = review_closed elif reviews_closed: review_closed = reviews_closed.pop(0) review = review_closed else: review_open = reviews_open.pop(0) review = review_open updated = review['lastUpdated'] if updated <= from_ut: logger.debug("No more updates for %s" % (self.hostname)) break else: yield review if not reviews_open and last_nreviews_open >= self.max_reviews: last_item_open = self.client.next_retrieve_group_item( last_item_open, review_open) reviews_open = self._get_reviews(last_item_open, filter_open) last_nreviews_open = len(reviews_open) if not reviews_closed and last_nreviews_closed >= self.max_reviews: last_item_closed = self.client.next_retrieve_group_item( last_item_closed, review_closed) reviews_closed = self._get_reviews(last_item_closed, filter_closed) last_nreviews_closed = len(reviews_closed)
def fetch(self, from_date=DEFAULT_DATETIME): """Fetch the mbox files from the remote archiver. Stores the archives in the path given during the initialization of this object. Those archives which a not valid extension will be ignored. Pipermail archives usually have on their file names the date of the archives stored following the schema year-month. When `from_date` property is called, it will return the mboxes which their year and month are equal or after that date. :param from_date: fetch archives that store messages equal or after the given date; only year and month values are compared :returns: a list of tuples, storing the links and paths of the fetched archives """ logger.info("Downloading mboxes from '%s' to since %s", self.url, str(from_date)) logger.debug("Storing mboxes in '%s'", self.dirpath) from_date = datetime_to_utc(from_date) r = requests.get(self.url, verify=self.verify) r.raise_for_status() links = self._parse_archive_links(r.text) fetched = [] if not os.path.exists(self.dirpath): os.makedirs(self.dirpath) for l in links: filename = os.path.basename(l) mbox_dt = self._parse_date_from_filepath(filename) if ((from_date.year == mbox_dt.year and from_date.month == mbox_dt.month) or from_date < mbox_dt): filepath = os.path.join(self.dirpath, filename) success = self._download_archive(l, filepath) if success: fetched.append((l, filepath)) logger.info("%s/%s MBoxes downloaded", len(fetched), len(links)) return fetched
def init_metadata(self, origin, backend_name, backend_version, category, backend_params): """Init metadata information. Metatada is composed by basic information needed to identify where archived data came from and how it can be retrieved and built into Perceval items. :param: origin: identifier of the repository :param: backend_name: name of the backend :param: backend_version: version of the backend :param: category: category of the items fetched :param: backend_params: dict representation of the fetch parameters raises ArchiveError: when an error occurs initializing the metadata """ created_on = datetime_to_utc(datetime_utcnow()) created_on_dumped = created_on.isoformat() backend_params_dumped = pickle.dumps(backend_params, 0) metadata = ( origin, backend_name, backend_version, category, backend_params_dumped, created_on_dumped, ) try: cursor = self._db.cursor() insert_stmt = "INSERT INTO " + self.METADATA_TABLE + " "\ "(origin, backend_name, backend_version, " \ "category, backend_params, created_on) " \ "VALUES (?, ?, ?, ?, ?, ?)" cursor.execute(insert_stmt, metadata) self._db.commit() cursor.close() except sqlite3.DatabaseError as e: msg = "metadata initialization error; cause: %s" % str(e) raise ArchiveError(cause=msg) self.origin = origin self.backend_name = backend_name self.backend_version = backend_version self.category = category self.backend_params = backend_params self.created_on = created_on logger.debug("Metadata of archive %s initialized to %s", self.archive_path, metadata)
def fetch(self, category=CATEGORY_ISSUE, from_date=DEFAULT_DATETIME, to_date=DEFAULT_LAST_DATETIME, filter_classified=False): """Fetch the issues/pull requests from the repository. The method retrieves, from a GitHub repository, the issues/pull requests updated since the given date. :param category: the category of items to fetch :param from_date: obtain issues/pull requests updated since this date :param to_date: obtain issues/pull requests until a specific date (included) :param filter_classified: remove classified fields from the resulting items :returns: a generator of issues """ self.exclude_user_data = filter_classified if self.exclude_user_data: logger.info( "Excluding user data. Personal user information won't be collected from the API." ) if not from_date: from_date = DEFAULT_DATETIME if not to_date: to_date = DEFAULT_LAST_DATETIME from_date = datetime_to_utc(from_date) to_date = datetime_to_utc(to_date) kwargs = {'from_date': from_date, 'to_date': to_date} # 调用父类Backend的fetch() items = super().fetch(category, filter_classified=filter_classified, **kwargs) return items
def archived_after(self, value): if value is None: self._archived_after = None elif isinstance(value, datetime.datetime): self._archived_after = datetime_to_utc(value) elif isinstance(value, str): try: self._archived_after = str_to_datetime(value) except InvalidDateError as e: raise ValueError("'archived_after' is invalid; %s" % str(e)) else: raise ValueError("'archived_after' must be either a str or a datetime; %s given" % str(type(value)))
def test_init_metadata(self): """Test whether metadata information is properly initialized""" archive_path = os.path.join(self.test_path, 'myarchive') archive = Archive.create(archive_path) before_dt = datetime_to_utc(datetime_utcnow()) archive.init_metadata('marvel.com', 'marvel-comics-backend', '0.1.0', 'issue', {'from_date': before_dt}) after_dt = datetime_to_utc(datetime_utcnow()) archive_copy = Archive(archive_path) # Both copies should have the same parameters for arch in [archive, archive_copy]: self.assertEqual(arch.origin, 'marvel.com') self.assertEqual(arch.backend_name, 'marvel-comics-backend') self.assertEqual(arch.backend_version, '0.1.0') self.assertEqual(arch.category, 'issue') self.assertGreaterEqual(arch.created_on, before_dt) self.assertLessEqual(arch.created_on, after_dt) self.assertDictEqual(arch.backend_params, {'from_date': before_dt})
def fetch(self, category=CATEGORY_FUNCTEST, from_date=DEFAULT_DATETIME, to_date=None): """Fetch tests data from the server. This method fetches tests data from a server that were updated since the given date. :param category: the category of items to fetch :param from_date: obtain data updated since this date :param to_date: obtain data updated before this date :returns: a generator of items """ from_date = datetime_to_utc( from_date) if from_date else DEFAULT_DATETIME to_date = datetime_to_utc(to_date) if to_date else datetime_utcnow() kwargs = {"from_date": from_date, "to_date": to_date} items = super().fetch(category, **kwargs) return items