def _get_session_url(base_url): request = cernrequests.get(base_url) page_content = request.content.decode("utf-8") session_id = re.search(r"session\/.*'", page_content).group() session_id = re.sub(r"'", "", session_id) return "{}{}".format(base_url, session_id)
def test_dqmgui(): """ The DQM GUI does not require cookies, but does require Grid User Certificates :return: """ url = "https://cmsweb.cern.ch/dqm/offline/jsonfairy/archive/321012/StreamExpress/Run2018D-Express-v1/DQMIO/" expected = json.loads('{"hist": "unsupported type"}') assert expected == cernrequests.get(url).json()
def test_wbm(): """ The CMS WBM webiste requires the CERN Root Certificate Authority """ url = "https://cmswbm.cern.ch/cmsdb/servlet/RunSummary?RUN=211831&FORMAT=XML" cookies = get_sso_cookies(url, verify=False) response = cernrequests.get(url, cookies=cookies, verify=False) expected = "<nLumiSections>160</nLumiSections>" assert expected in response.text
def _get_oms_resource_authenticated(relative_url, cookies=None): url = "{}{}".format(OMS_ALTERNATIVE_API_URL, relative_url) if cookies is None: print("Getting SSO Cookies for {}...".format(url)) print("Looking for certificates in {}".format(CERT_TUPLE)) cookies = cernrequests.get_sso_cookies(url, CERT_TUPLE, verify=False) print("The cookies are {}".format(cookies)) return cernrequests.get(url, cookies=cookies, verify=False)
def test_cernrequests(): url = "https://jsonplaceholder.typicode.com/todos/1" expected = json.loads("""{ "userId": 1, "id": 1, "title": "delectus aut autem", "completed": false }""") assert expected == cernrequests.get(url, cert=None, verify=True).json()
def get_lumisections(self, run_number, reconstruction_type): if run_number in self.cache.lumis: return self.cache.lumis[run_number] dataset = self.get_dataset(run_number, reconstruction_type) dataset_encoded = dataset.replace("/", "%2F") third_step = ( "/state?auto=300000;latency=3685;call=852;server=778.517332;render=12" ) fourth_step = "/select?type=offline_data;dataset={};runnr={};importversion=1".format( dataset_encoded, run_number) cernrequests.get("{}{}".format(self.session_url, third_step)) response = cernrequests.get("{}{}".format(self.session_url, fourth_step)) lumis = eval(response.text)[1]["lumi"] self.cache.lumis[run_number] = lumis return lumis
def get_available_datasets(self, run_number): """ Retrieving a list of all available datasets for a specific run in a very painful and slow way First sets the session to choose samples by doing: '/chooseSample?vary=run;order=dataset' then sets the session to click on the "any" checkbox by doing: "/modify?vary=any" then sets the session to use the given run number by doing: "/modify?pat=321012" :param run_number: Run number :return: list of datasets """ run_number = str(run_number) if run_number in self.cache.datasets: return self.cache.datasets[run_number] first_step = "/chooseSample?vary=run;order=dataset" second_step = "/modify?vary=any;pat={}".format(run_number) cernrequests.get("{}{}".format(self.session_url, first_step)) response = cernrequests.get("{}{}".format(self.session_url, second_step)) json_response = eval(response.text) try: items = json_response[1]["items"][0]["items"] except IndexError: raise RunDoesNotExist( "Unable to find datasets for run '{}'".format(run_number)) datasets = [item["dataset"] for item in items] self.cache.update_datasets(run_number, datasets) return datasets
def _get_resource(servlet, parameters, cookies=None): if "FORMAT" not in parameters: parameters["FORMAT"] = "XML" params = "&".join( ["{}={}".format(key, value) for key, value in parameters.items()]) url = "{base}{servlet}?{params}".format(base=WBM_URL, servlet=servlet, params=params) if not cookies: cookies = get_sso_cookies(url) response = cernrequests.get(url, cookies=cookies) return xmltodict.parse(response.content)
def get_json(run_number, dataset, resource, service): """ :param run_number: run number :param dataset: full dataset name :param resource: path of the plot :param service: "online" or "offline" :return: """ url = construct_url(run_number, dataset, resource, service) json_response = cernrequests.get(url).json() if json_response["hist"] == "unsupported type": plot_name = re.search(r"\w+$", resource).group(0) raise JSONNotFound("Unable to find plot '{}' for run '{}'".format( plot_name, run_number)) return json_response
def test_invalid_certificate(): fake_certs = ("bla", "blub") url = "https://jsonplaceholder.typicode.com/todos/1" with pytest.raises(IOError): cernrequests.get(url, cert=fake_certs)
def _get_oms_resource_authenticated(relative_url, cookies=None): url = "{}{}".format(OMS_ALTERNATIVE_API_URL, relative_url) if cookies is None: print("Getting SSO Cookie for {}...".format(url)) cookies = cernrequests.get_sso_cookies(url) return cernrequests.get(url, cookies=cookies)
def getruns_afs(year="all", rdirs='False'): baseurl = 'https://cmsweb.cern.ch/dqm/offline/data/browse/ROOT/OfflineData/' url_runs_2018 = "Run2018/ZeroBias/" url_runs_2017 = "Run2017/ZeroBias/" url_runs_2016 = "Run2016/ZeroBias/" # url_runs_2015="Run2015/ZeroBias/" url_runs = [url_runs_2018, url_runs_2017, url_runs_2016] #,url_runs_2015] print("Fetching from: " + baseurl) print("list of relative urls: ", url_runs) global_rundir_with_rootfiles = [] global_runs_with_rootfiles = [] global_rundir_without_rootfiles = [] # runs_without_rootfiles=[] for url in url_runs: response = cernrequests.get(baseurl + url) # open the document index = response.text # read the document a = str(index) # convert to string soup = BeautifulSoup(a, 'html.parser') # create the soup object RUNSXXRE = soup.body.find_all( string=re.compile("000.") ) # the "." for a regular expresions means that it will expect ANY character EXCEPT newlines rundir_with_rootfiles = [] runs_with_rootfiles = [] rundir_without_rootfiles = [] for rundir in RUNSXXRE: response = cernrequests.get(baseurl + url + rundir) # open the document index = response.text # read the document a = str(index) # convert to string soup = BeautifulSoup(a, 'html.parser') # create the soup object entries = soup.body.find_all(string=re.compile("DQM.")) if len(entries) == 0: rundir_without_rootfiles.append(str(rundir)) # Fill the list global_rundir_without_rootfiles.append( str(rundir)) # Fill the list #print rundir,"is empty" else: for n in re.findall( r"R(\d+)", str(entries) ): # This will only keep the 6 digits of the run numbers that we need (without the "R000") x = int(n) # We want to make the run numbers intergers runs_with_rootfiles.append(x) # Fill the list global_runs_with_rootfiles.append(x) # Fill the list rundir_with_rootfiles.append(str(rundir)) # Fill the list global_rundir_with_rootfiles.append( str(rundir)) # Fill the list #print rundir,"has",len(entries),"root files" print("===========================================================") print("For", url) print("Out of a total of", len(RUNSXXRE), "run directories:\n", len(rundir_with_rootfiles), "have root files\n", len(rundir_without_rootfiles), "are empty") print("There are", len(runs_with_rootfiles), "runs with root files") print("\n") #print "List of available runs for this year\n\n",runs_with_rootfiles print("===========================================================") print("For GLOBAL") print("Out of all run directories:\n", len(global_rundir_with_rootfiles), "have root files\n", len(global_rundir_without_rootfiles), "are empty") print("There are", len(global_runs_with_rootfiles), "runs with root files") print("\n") if rdirs == 'True': return global_runs_with_rootfiles, global_rundir_with_rootfiles, global_rundir_without_rootfiles else: return global_runs_with_rootfiles