Пример #1
0
 def parse_thread(r):
     try:
         j = json.loads(r.content.decode('utf-8', 'ignore'))
     except JSONDecodeError:
         logger.warning("JSONDecodeError for %s:" % (r.url,))
         logger.warning(r.text)
         return []
     return j["posts"]
Пример #2
0
 def parse_threads_list(self, r):
     try:
         j = json.loads(r.content.decode('utf-8', 'ignore'))
     except JSONDecodeError:
         logger.warning("JSONDecodeError for %s:" % (r.url,))
         logger.warning(r.text)
         return [], None
     if j["currentPage"] < j["totalPages"]:
         return j["data"], self._base_url + "boards/%d" % (j["currentPage"] + 1,)
     return j["data"]
Пример #3
0
 def parse_thread(r):
     try:
         j = json.loads(r.content.decode('utf-8', 'ignore'))
     except JSONDecodeError:
         logger.warning("JSONDecodeError for %s:" % (r.url, ))
         logger.warning(r.text)
         return []
     thread = j.copy()
     del thread["replies"]
     yield thread
     for post in j["replies"]:
         yield post
Пример #4
0
    def parse_thread(r):
        try:
            j = json.loads(r.content.decode('utf-8', 'ignore'))
        except JSONDecodeError:
            logger.warning("JSONDecodeError for %s:" % (r.url,))
            logger.warning(r.text)
            return []

        all_items = []
        for post in j["posts"]:
            post["_parent"] = j["threadId"]
            all_items.append(post)

        del j["posts"]
        all_items.append(j)

        return all_items
Пример #5
0
def _ts(text):
    time = re.sub(r"^\w{2} ", "", text.strip()) \
        .replace("января", "01") \
        .replace("февраля", "02") \
        .replace("марта", "03") \
        .replace("апреля", "04") \
        .replace("мая", "05") \
        .replace("июня", "06") \
        .replace("июля", "07") \
        .replace("августа", "08") \
        .replace("сентября", "09") \
        .replace("октября", "10") \
        .replace("ноября", "11") \
        .replace("декабря", "12")  \
        .replace("⑨", "9")
    # For some reason, some dates are fuzzed / in chinese
    try:
        return int(
            datetime.datetime.strptime(time, "%d %m %Y %H:%M:%S").timestamp())
    except Exception as e:
        logger.warning("Error during date parsing (iichan): " + str(e))
        return 0
Пример #6
0
    def parse_threads_list(r):
        try:
            j = json.loads(r.content.decode('utf-8', 'ignore'))
            if len(j) == 0 or "post_id" not in j[0]:
                logger.warning("No threads in response for %s: %s" % (
                    r.url,
                    r.text,
                ))
                return [], None
        except JSONDecodeError:
            logger.warning("JSONDecodeError for %s:" % (r.url, ))
            logger.warning(r.text)
            return [], None

        return j, None
Пример #7
0
    def parse_threads_list(r):
        try:
            j = json.loads(r.content.decode('utf-8', 'ignore'))
            if len(j) == 0 or "threads" not in j[0]:
                logger.warning("No threads in response for %s: %s" % (r.url, r.text,))
                return [], None
        except JSONDecodeError:
            logger.warning("JSONDecodeError for %s:" % (r.url,))
            logger.warning(r.text)
            return [], None

        threads = []
        for page in j:
            for thread in page["threads"]:
                threads.append(thread)
        return threads, None
Пример #8
0
    def parse_threads_list(r):
        try:
            j = json.loads(r.content.decode('utf-8', 'ignore'))
            if len(j) == 0 or "threads" not in j:
                logger.warning("No threads in response for %s: %s" % (r.url, r.text,))
                return [], None
        except JSONDecodeError:
            logger.warning("JSONDecodeError for %s:" % (r.url,))
            logger.warning(r.text)
            return [], None

        next_page = None
        url = r.url[:r.url.rfind("?")] if "?" in r.url else r.url
        current_page = int(url[url.rfind("/") + 1:-5])
        if current_page < j["pageCount"]:
            next_page = urljoin(r.url, "%d.json" % (current_page + 1))

        return j["threads"], next_page