Пример #1
0
 def __init__(self, link, name, template_data):
     self.logger = logging.getLogger(__name__)
     opener = urllib.request.build_opener()
     opener.addheaders = [(
         'User-Agent',
         'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36'
     )]
     urllib.request.install_opener(opener)
     self.link = link
     self.name = name
     self.template_data = template_data
     self.load_config()
     self.saveDir = Save()
     self.direct = ""
Пример #2
0
def main(parser):
    # subreddit/user/text file
    subR = None
    filepath = None

    if parser.subreddit:
        if '.txt' in parser.subreddit:
            filepath = parser.subreddit
        else:
            subR = parser.subreddit

    # output template
    global save
    save = Save(parser.base_dir, parser.template)
    logger.debug('Output template set to {}'.format(save))

    # initialise database
    global db
    db = DBInterface(parser.db_location)

    if parser.subreddit:
        # Passes subreddits to feeder
        current_cycle = 0
        while (current_cycle < parser.cycles):
            if filepath is not None:
                with open(filepath) as f:
                    line = f.readline()
                    while line:
                        subR = "{}".format(line.strip())
                        feeder(subR, parser)
                        line = f.readline()
            else:
                feeder(subR, parser)
            if parser.cycles > 1:
                logger.info("Waiting {} seconds".format(parser.wait))
                time.sleep(parser.wait)
            current_cycle += 1
Пример #3
0
class Common:
    valid_url = r'((.)+\.(?P<ext>jpg|png|gif|jpeg|bmp|tiff|webp|mp4|mov|mpeg|3gp|mp3|flac|ogg))|(https?://i.reddituploads.com/(.)+)'

    def __init__(self, link, name, template_data):
        self.logger = logging.getLogger(__name__)
        opener = urllib.request.build_opener()
        opener.addheaders = [(
            'User-Agent',
            'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36'
        )]
        urllib.request.install_opener(opener)
        self.link = link
        self.name = name
        self.template_data = template_data
        self.load_config()
        self.saveDir = Save()
        self.direct = ""

    def load_config(self):
        parser = Parser()
        data = parser.config

        try:
            int(data["media_download"]["retries"])
            int(data["media_download"]["wait_time"])
            self.retries = data["media_download"]["retries"]
            self.wait_time = data["media_download"]["wait_time"]

        except TypeError:
            self.logger.warning(
                "TypeError: Media download retries or wait time is not an integer."
            )
            self.retries = 5
            self.wait_time = 60

    def save(self):
        if '.gifv' in self.link:
            ext = 'mp4'
            self.link = self.link.replace('gifv', 'mp4')
        elif 'i.reddituploads.com' in self.link:
            ext = 'jpeg'
        else:
            ext = re.search(self.valid_url, self.link).group('ext')
        self.template_data["ext"] = ext
        self.direct = self.saveDir.get_dir(self.template_data)
        self.logger.debug("Saving {} with extension {}".format(self.link, ext))

        if not self.save_image():
            return False
        return True

    def save_image(self, current_retry=1):
        try:
            urlretrieve(self.link, self.direct)
        except (URLError, RemoteDisconnected, ConnectionResetError) as e:
            if self.retries > current_retry:
                self.logger.warning("{}, retrying {}".format(
                    str(e), self.link))
                time.sleep(self.wait_time)
                current_retry += 1
                self.save_image(current_retry)
            else:
                self.logger.error("{}, failed {}".format(str(e), self.link))
                return False
        except Exception as e:
            if self.retries > current_retry:
                self.logger.error("{}, retrying {}".format(str(e), self.link))
                time.sleep(self.wait_time)
                current_retry += 1
                self.save_image(current_retry)
            else:
                self.logger.error("{}, failed {}".format(str(e), self.link))
                return False
        return True

    def get_html(self, headers_param={}):
        headers = {
            'User-Agent':
            'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36',
        }
        headers.update(headers_param)
        req = Request(
            self.link,
            data=None,
            headers=headers,
        )
        try:
            page_html = urlopen(req).read()
            page_html = soup(page_html, "lxml")
        except (HTTPError, URLError) as e:
            page_html = None
            self.logger.error('{} - Link {}'.format(str(e), self.link))
        return page_html

    def format_name(self, title):
        title = re.sub('[?/|\\\}{:<>*"]', '', title)
        if len(title) > 190:
            title = title[:120]
        return title
Пример #4
0
def routeSubmission(submission):
    logger = logging.getLogger(__name__)
    save = Save()

    title = formatName(submission.title)
    link = submission.url
    downloaded = True
    path = {
        'author': str(submission.author),
        'subreddit': str(submission.subreddit),
        'id': str(submission.id),
        'created_utc': str(submission.created_utc),
        'title': title,
        'ext': 'txt'
    }

    # Selftext post
    if submission.is_self:
        with open(save.get_dir(path), 'a+') as f:
            f.write(str(submission.selftext.encode('utf-8')))

    # Link to a jpg, png, gifv, gif, jpeg
    elif re.match(Common.valid_url, link):
        if not Common(link, '{}-{}'.format(str(submission.id), title),
                      path).save():
            downloaded = False

    # Imgur
    elif re.match(Imgur.valid_url, link):
        if not Imgur(link, title, path).save():
            downloaded = False

    # Giphy
    elif re.match(Giphy.valid_url, link):
        if not Giphy(link, title, path).save():
            downloaded = False

    # Tenor
    elif re.match(Tenor.valid_url, link):
        if not Tenor(link, title, path).save():
            downloaded = False

    # Redgifs
    elif re.match(Redgifs.valid_url, link):
        if not Redgifs(link, title, path).save():
            downloaded = False

    # Gfycat
    elif re.match(Gfycat.valid_url, link):
        if not Gfycat(link, title, path).save():
            downloaded = False

    elif re.match(RedditGallery.valid_url, link):
        if not RedditGallery(link, title, path).save():
            downloaded = False
    # Flickr
    elif 'flickr.com/' in link:
        downloaded = False
        logger.info("No mathces: No Flickr support {}".format(link))

    # Reddit submission
    elif re.match(RedditHandler.valid_url, link):
        downloaded = False
        logger.debug("Fetching crosspost {}".format(link))
        new_submission = RedditHandler(link, title, path).save()
        if not new_submission:
            downloaded = False
        if not routeSubmission(new_submission):
            downloaded = False

    # youtube_dl supported site
    elif YouTube.yt_supported(link):
        if not YouTube(link, title, path).save():
            downloaded = False

    else:
        logger.info("No matches: {}".format(link))
        downloaded = False

    return downloaded
Пример #5
0
def main(args):

    subR = None
    filepath = None

    if args.subreddit:
        if '.txt' in args.subreddit:
            filepath = args.subreddit
        else:
            subR = args.subreddit

    # wait
    if args.wait and args.subreddit:
        try:
            wait = int(args.wait)
        except ValueError:
            logger.error("Please enter an integer in seconds to wait")
            sys.exit()
    else:
        wait = 600

    # posts
    if args.posts and args.subreddit:
        try:
            posts = int(args.posts)
        except ValueError:
            logger.error("Please enter an inter for the number of posts")
            sys.exit()
    else:
        posts = 50

    # output
    if args.output and args.subreddit:
        base_dir = os.path.abspath(args.output)
        if not os.path.exists(base_dir):
            os.makedirs(base_dir)
    else:
        base_dir = os.getcwd()

    # sort
    sort = 'hot'
    if args.sort and (args.sort.lower() == 'hot' or args.sort.lower() == 'new'
                      or args.sort.lower() == 'top') and args.subreddit:
        sort = args.sort
    elif args.sort:
        logger.error("Please enter hot, new or top for sort")
        sys.exit()

    # blacklist
    if args.blacklist:
        config["reddit"]["blacklist"].append(args.blacklist)

    # reddit api credentials
    if args.reddit_id:
        config["reddit"]["creds"]["client_id"] = args.reddit_id
    if args.reddit_secret:
        config["reddit"]["creds"]["client_secret"] = args.reddit_secret

    with open('./resources/config.json', 'w') as f:
        json.dump(config, f)

    # by_sub !!!
    global save
    save = Save(base_dir, args.by_sub)

    # initialise database
    global db
    db = DBInterface(config["general"]["database_location"])

    if args.subreddit:
        # Passes subreddits to feeder
        while (True):
            if filepath is not None:
                with open(filepath) as f:
                    line = f.readline()
                    while line:
                        subR = "{}".format(line.strip())
                        feeder(subR, posts, base_dir, sort)
                        line = f.readline()
            else:
                feeder(subR, posts, base_dir, sort)
            logger.info("Waiting {} seconds".format(wait))
            time.sleep(wait)
Пример #6
0
from resources.handlers.imgur import Imgur
from resources.handlers.common import Common

from resources.save import Save
from resources.db_interface import DBInterface


class color:
    RED = '\033[91m'
    BOLD = '\033[1m'
    END = '\033[0m'


with open('./resources/config.json') as f:
    config = json.load(f)
save = Save(os.getcwd(), True)
logger = logging.getLogger(__name__)
db = None


def grabber(subR, base_dir, posts, sort):
    # Initialise Reddit
    reddit = praw.Reddit(
        client_id=config["reddit"]["creds"]["client_id"],
        client_secret=config["reddit"]["creds"]["client_secret"],
        user_agent=config["reddit"]["creds"]["user_agent"])

    if 'u/' in subR or '/u/' in subR:
        if '/u/' in subR:
            subR = subR[3:]
        elif 'u/' in subR: