def _get_build_url(self, datestamp, regex, what): url = "http://ftp.mozilla.org/pub/mozilla.org/" + \ self.build_base_repo_name + "/nightly/" year = str(datestamp.year) month = "%02d" % datestamp.month day = "%02d" % datestamp.day inbound_branch = self.get_inbound_branch(datestamp) url += year + "/" + month + "/" link_regex = '^' + year + '-' + month + '-' + day + '-' \ + r'[\d-]+' + inbound_branch + '/$' cachekey = year + '-' + month if cachekey in self._monthlinks: monthlinks = self._monthlinks[cachekey] else: monthlinks = url_links(url) self._monthlinks[cachekey] = monthlinks # first parse monthly list to get correct directory matches = [] for dirlink in monthlinks: if re.match(link_regex, dirlink): # now parse the page for the correct build url for link in url_links(url + dirlink, regex=regex): matches.append(url + dirlink + link) if not matches: print "Tried to get %s from %s that match '%s' but didn't find any." % \ (what, url, self.build_regex) return None else: return sorted(matches)[-1] # the most recent build url
def test_url_with_links_regex(self, get): get.return_value = Mock(text=""" <body> <a href="thing/">thing</a> <a href="thing2/">thing2</a> </body> """) self.assertEquals(utils.url_links('', regex="thing2.*"), ['thing2/'])
def _get_valid_builds(self, build_url, timestamp, raw_revisions): builds = [] for link in url_links(build_url, regex=r"^.+\.txt$"): url = "%s/%s" % (build_url, link) response = requests.get(url) remote_revision = None for line in response.iter_lines(): # Filter out Keep-Alive new lines. if not line: continue parts = line.split("/rev/") if len(parts) == 2: remote_revision = parts[1] break # for line if remote_revision: for revision in raw_revisions: if remote_revision in revision: builds.append({"revision": revision, "timestamp": timestamp}) return builds
def _get_valid_build(self, build_url, timestamp, raw_revisions): for link in url_links(build_url, regex=r'^.+\.txt$'): url = "%s/%s" % (build_url, link) response = requests.get(url) remote_revision = None for line in response.iter_lines(): # Filter out Keep-Alive new lines. if not line: continue parts = line.split('/rev/') if len(parts) == 2: remote_revision = parts[1] break # for line if remote_revision: for revision in raw_revisions: if remote_revision in revision: return { 'revision': revision, 'timestamp': timestamp, } return False
def _get_valid_builds(self, build_url, timestamp, raw_revisions): builds = [] for link in url_links(build_url, regex=r'^.+\.txt$'): url = "%s/%s" % (build_url, link) response = requests.get(url) remote_revision = None for line in response.iter_lines(): # Filter out Keep-Alive new lines. if not line: continue parts = line.split('/rev/') if len(parts) == 2: remote_revision = parts[1] break # for line if remote_revision: for revision in raw_revisions: if remote_revision in revision: builds.append({ 'revision': revision, 'timestamp': timestamp, }) return builds
def find_build_info(self, url, read_txt_content=False): """ Retrieve information from a build folder url. Returns a dict with keys build_url and build_txt_url if respectively a build file and a build info file are found for the url. If read_txt_content is True, the dict is updated with data found by calling :meth:`find_build_info_txt` """ data = {} if not url.endswith('/'): url += '/' for link in url_links(url): if not 'build_url' in data and self.build_regex.match(link): data['build_url'] = url + link elif not 'build_txt_url' in data and self.build_info_regex.match(link): data['build_txt_url'] = url + link if read_txt_content and 'build_txt_url' in data: data.update(self.find_build_info_txt(data['build_txt_url'])) return data
def find_build_info(self, url, read_txt_content=False): """ Retrieve information from a build folder url. Returns a dict with keys build_url and build_txt_url if respectively a build file and a build info file are found for the url. If read_txt_content is True, the dict is updated with data found by calling :meth:`find_build_info_txt` """ data = {} if not url.endswith('/'): url += '/' for link in url_links(url): if 'build_url' not in data and self.build_regex.match(link): data['build_url'] = url + link elif 'build_txt_url' not in data \ and self.build_info_regex.match(link): data['build_txt_url'] = url + link if read_txt_content and 'build_txt_url' in data: data.update(self.find_build_info_txt(data['build_txt_url'])) return data
def _get_month_links(self, url): with self._lock: if url not in self._cache_months: self._cache_months[url] = url_links(url) return self._cache_months[url]
def test_url_no_links(self, get): get.return_value = Mock(text='') self.assertEquals(utils.url_links(''), [])
def _extract_paths(self): paths = filter(lambda l: l.isdigit(), map(lambda l: l.strip('/'), url_links(self.build_base_url))) return [(p, int(p)) for p in paths]
def _extract_paths(self): paths = filter( lambda l: l.isdigit(), map(lambda l: l.strip('/'), url_links(self.build_base_url))) return [(p, int(p)) for p in paths]
def _extract_paths(self): paths = filter( lambda l: l.isdigit(), map(lambda l: l.strip('/'), url_links(self.fetch_config.inbound_base_url()))) return [(p, int(p)) for p in paths]
def get_build_url(self, timestamp): base_url = "%s%s/" % (self.build_finder.build_base_url, timestamp) matches = [base_url + url for url in url_links(base_url, regex=self.build_regex)] matches.sort() return matches[-1] # the most recent build url
def _extract_paths(self): paths = filter(lambda l: l.isdigit(), map(lambda l: l.strip('/'), url_links(self.fetch_config.inbound_base_url()))) return [(p, int(p)) for p in paths]