示例#1
0
 def dl(self):
     """
     Downloads the highest Quallitiy picture available.
     returns False if something goes wrong.
     """
     if(self.orig_url == ""):
         if(self.hq_url == ""):
             down = Download(self.lq_url, self.config.get_image_folder())
             if(down.perform()):
                 return True
         else:
             down = Download(self.hq_url, self.config.get_image_folder())
             if(down.perform()):
                 return True
     else:
         down = Download(self.orig_url, as_var=True)
         if(down.perform()):
             result = down.get_result()
             soup = BeautifulSoup(result.getvalue())
             download_link = soup.find("a", text="this link")
             orig_url = self.dl_url_base + download_link["href"]
             time.sleep(120)
             down = Download(orig_url, self.config.get_image_folder())
             if(down.perform()):
                 self.file_name = down.get_output_name()
                 return True
     return False
示例#2
0
 def file_exists(self, file_path):
     hash_local = self.hash_file(file_path)
     download = Download(
         ("https://commons.wikimedia.org/w/api.php?action=query&list"
          "=allimages&format=json&aisha1=") + hash_local, as_var=True)
     if(download.perform()):
         content = download.get_result().getvalue()
         json_data = json.loads(content)
         if(len(json_data["query"]["allimages"]) > 0):
             return True
         else:
             return False
示例#3
0
 def parse_web(self):
     down = Download(self.url, as_var=True, post_dict=self.post_dict)
     found_start = False
     can_add = False
     if(down.perform()):
         web_string_etree = etree.fromstring(down.get_result().getvalue())
         for element in web_string_etree.iter("script"):
             redirect_url = element.text
         redirect_url_array = redirect_url.split("\"")
         down = Download(self.base_url + redirect_url_array[1], as_var=True)
         if(down.perform()):
             string_etree = html.fromstring(
                 down.get_result().getvalue())
             table = string_etree.xpath("//table[@id='QueryResults']")
             for element in table[0].iter("tr"):
                 list_of_elements = list(element.iter("td"))
                 if(len(list_of_elements) > 5):
                     a = list(list_of_elements[0].iter("a"))
                     if(found_start or self.no_need):
                         can_add = True
                     if(self.new_start):
                         if(self.new_start == a[0].text and not found_start):
                             found_start = True
                     if(can_add):
                         self.db.insert_image(a[0].attrib["href"],
                                              a[0].text,
                                              self.parse_date(
                             list_of_elements[1].text),
                             list_of_elements[2].text,
                             list_of_elements[3].text,
                             list_of_elements[4].text,
                             list_of_elements[5].text,
                             list_of_elements[6].text,
                             list_of_elements[7].text,
                             self.mission_id,
                             False, False)
                         self.db.update_mission_image_id(
                             self.mission_id, a[0].text)
             self.db.update_mission_image_id(
                 self.mission_id, str(0))
示例#4
0
 def find_urls(self):
     """
     Finds the Download urls with different qualities and save them.
     """
     down = Download(self.url, as_var=True)
     if(down.perform()):
         result = down.get_result()
         soup = BeautifulSoup(result.getvalue())
         download_links = soup.find_all("a", {"class": "DownloadLink"})
         if(download_links):
             self.lq_url = download_links[0]["href"]
             self.hq_url = download_links[1]["href"]
         raw_link = soup.find(
             text="Other options available:").find_next("script").text
         m = re.search(r"href=..(.*\.\b[a-zA-Z0-9]+\b)", raw_link)
         if(m):
             self.orig_url = self.url_base + "/" + m.group(1)
示例#5
0
 def parse_web(self):
     down = Download(self.url, as_var=True)
     if(down.perform()):
         result = down.get_result()
         soup = BeautifulSoup(result.getvalue())
         mission_table = soup.find(
             text="Missions used in the Database").find_next("table")
         mission_params = mission_table.find("tbody").find_all("tr")
         for m in mission_params:
             mission_as_list = list(m.children)
             if(len(mission_as_list) > 5):
                 self.db.insert_mission(mission_as_list[0].text,
                                        mission_as_list[1].text,
                                        mission_as_list[2].text,
                                        self.parse_date(
                                            mission_as_list[3].text),
                                        self.parse_date(
                                            mission_as_list[4].text),
                                        mission_as_list[5].text)
示例#6
0
 def find_online_category(self, term):
     result = None
     down = Download(self.base_api + urllib.quote(term), as_var=True)
     if(down.perform()):
         result = down.get_result()
     return result