Exemplo n.º 1
0
def download():
    db.connect()
    aufmacher = Aufmacher.select()
    aufmacher_length = len(aufmacher)

    for index, auf in enumerate(aufmacher):
        path_to_file = auf.unique_id.replace("http://xml.zeit.de/", "")
        xml_file_path = (Path("xml") / path_to_file).with_suffix(".xml")
        if xml_file_path.is_file():
            continue

        print("{}/{}".format(index, aufmacher_length), xml_file_path)

        folder = xml_file_path.parent
        Path(folder).mkdir(parents=True, exist_ok=True)

        article_content = download_article(auf.unique_id)
        if article_content:
            print("writing", xml_file_path)
            with open(xml_file_path, "w") as xml_file:
                xml_file.write(article_content)
        else:
            print("error!")

    db.close()
Exemplo n.º 2
0
def db_init():
    db.connect()
    try:
        db.create_tables([Movie])
        print('Creating tables...')
    except OperationalError:
        pass
    db.close()
Exemplo n.º 3
0
def clean_up():
    db.connect()

    aufmachers = Aufmacher\
        .select(Aufmacher, TweetJob)\
        .join(TweetJob, JOIN.LEFT_OUTER, on=(Aufmacher.id == TweetJob.aufmacher).alias('tweetjob'))\
        .order_by(Aufmacher.created_at.desc())

    for aufmacher in aufmachers:
        if not aufmacher.tweetjob.id:
            print(model_to_dict(aufmacher.tweetjob))
            aufmacher.tweetjob.save()

    db.close()
Exemplo n.º 4
0
def get_db(dict_cursor=True):
    mysqldb = db.get_db()
    # if not in a request we won't be connected
    if not mysqldb:
        mysqldb = db.connect()
    # return a DictCursor
    if dict_cursor:
        return mysqldb.cursor(MySQLdb.cursors.DictCursor)
    else:
        return mysqldb.cursor()
Exemplo n.º 5
0
def scrape():
    r = requests.get(DOWNLOAD_URL)
    soup = BeautifulSoup(r.text, "html.parser")

    teaser = soup.select(".main article")

    if len(teaser):
        teaser = teaser[0]
    else:
        return

    unique_id = teaser["data-unique-id"].strip().replace("https", "http")

    db.connect()
    db.create_tables([Image, Author, Aufmacher, TweetJob], safe=True)

    possible_duplicate = Aufmacher.select().where(Aufmacher.unique_id == unique_id)
    if not len(possible_duplicate):
        get_article_data(unique_id)

    db.close()
Exemplo n.º 6
0
def stats():
    db.connect()

    aufmacher_count = Aufmacher.select().count()
    author_count = Author.select().count()
    image_count = Image.select().count()

    print("Stats:")
    print("{:>5} Aufmacher".format(aufmacher_count))
    print("{:>5} authors".format(author_count))
    print("{:>5} images".format(image_count))

    print("\nLatest:")
    latest_aufmacher = Aufmacher.select().order_by(Aufmacher.created_at.desc())
    latest_aufmacher_string = """
since {created_at}
{supertitle}: {title}
{subtitle}
by {author_name}
    """.format(**model_to_dict(latest_aufmacher[0]),
               author_name=latest_aufmacher[0].author.name)
    print(latest_aufmacher_string.strip())

    db.close()
Exemplo n.º 7
0
bot = telebot.TeleBot(token=config.token)
'''
# using proxy in Russia
apihelper.proxy = {
    # 'http': 'http://46.101.149.132:3128',
    # 'https': 'https://46.101.149.132:3128'
    # 'http': 'http://79.138.99.254:8080',
    # 'https': 'https://79.138.99.254:8080'
     'http': 'http://5.148.128.44:80',
     'https': 'https://5.148.128.44:80'
    # 'http': 'http://167.99.242.198:8080',
    # 'https': 'https://167.99.242.198:8080'
}
'''
# create tables in db
db.connect()
db.create_tables([User, Player, Challenge])

# create GOD if not exists
try:
    god = User.get(User.tg_id == config.creatorID)
except DoesNotExist:
    god = User.create(tg_id=config.creatorID, username=config.creatorUsername, name='Yury', role=Role.GOD)


@MWT(timeout=5*60)
def get_privilege_ids(role):
    logger.info("Update list of %s", role)
    return [user.tg_id for user in User.select().where(User.role >= role)]

Exemplo n.º 8
0
def make_stats():
    start_date = arrow.get("2019-10-14", "YYYY-MM-DD").datetime
    end_date = arrow.get("2020-10-14", "YYYY-MM-DD").datetime
    number_of_days = (end_date - start_date).days

    db.connect()
    aufmacher = (Aufmacher.select().where(
        (Aufmacher.created_at >= start_date)
        & (Aufmacher.created_at <= end_date)).order_by(Aufmacher.created_at))
    csv_data = []

    print("{} Aufmacher an {} Tagen".format(len(aufmacher), number_of_days))
    print(len(aufmacher) / number_of_days, " Aufmacher pro Tag")

    for index, auf in enumerate(aufmacher):
        path_to_file = auf.unique_id.replace("http://xml.zeit.de/", "")
        xml_file_path = (Path("xml") / path_to_file).with_suffix(".xml")

        next_aufmacher = aufmacher[index +
                                   1] if index < len(aufmacher) - 1 else None

        if xml_file_path.is_file():
            with open(xml_file_path, "r") as xml_file:
                parsed_article = untangle.parse(xml_file)
                print(xml_file_path)
                try:
                    article_data = get_article_data(auf,
                                                    parsed_article.article,
                                                    next_aufmacher)
                    if article_data:
                        csv_data.append(article_data)
                except AttributeError:
                    continue

    with open("stats/aufmacher.csv", "w") as csv_file:
        writer = csv.DictWriter(csv_file, fieldnames=csv_data[0].keys())
        writer.writeheader()
        writer.writerows(csv_data)

    with open("stats/tags.csv", "w") as csv_file:
        writer = csv.DictWriter(csv_file,
                                fieldnames=list(all_tags.values())[0].keys())
        writer.writeheader()
        writer.writerows(
            sorted(list(all_tags.values()),
                   key=itemgetter("count"),
                   reverse=True))

    with open("stats/tagcloud.txt", "w") as out_file:
        out_file.write("\n".join(dumb_tag_list))

    headlines_with_shitlist_words = []
    teasers_with_shitlist_words = []
    with open("shitlist.txt", "r") as shitlist_file:
        shitlist = [line.rstrip("\n").lower() for line in shitlist_file]
        print(shitlist)

        for auf in csv_data:
            if any([word in auf["teaserTitle"].lower() for word in shitlist]):
                headlines_with_shitlist_words.append({
                    "teaserTitle":
                    auf["teaserTitle"],
                    "url":
                    auf["url"]
                })

        for auf in teaser_texts:
            if any([word in auf["teaserText"].lower() for word in shitlist]):
                teasers_with_shitlist_words.append({
                    "teaserText":
                    auf["teaserText"],
                    "url":
                    auf["url"],
                    "words":
                    ", ".join([
                        word for word in shitlist
                        if word in auf["teaserText"].lower()
                    ]),
                })

    if len(headlines_with_shitlist_words):
        with open("stats/shitlisted_headlines.csv", "w") as out_file:
            writer = csv.DictWriter(
                out_file, fieldnames=headlines_with_shitlist_words[0].keys())
            writer.writeheader()
            writer.writerows(headlines_with_shitlist_words)

    if len(teasers_with_shitlist_words):
        with open("stats/shitlisted_teasers.csv", "w") as out_file:
            writer = csv.DictWriter(
                out_file, fieldnames=teasers_with_shitlist_words[0].keys())
            writer.writeheader()
            writer.writerows(teasers_with_shitlist_words)
Exemplo n.º 9
0
def go():
    select_from_datetime = datetime.datetime.now() - datetime.timedelta(days=1)
    db.connect()

    new_aufmachers = Aufmacher.select()\
        .where(Aufmacher.first_released > select_from_datetime)

    date_string = arrow.now().format('D. MMMM YYYY', locale='de')
    subject = "ZON zum {}".format(date_string)

    mail_html = None
    mail_text = None

    with open("mail_template.jinja2") as mjml_template_file:
        mail_mjml_template = Template(mjml_template_file.read())
        mail_mjml = mail_mjml_template.render({
            "aufmacher": new_aufmachers,
            "subject": subject,
            "date_string": date_string,
        })

    with open("text_template.jinja2") as text_template_file:
        mail_text_template = Template(text_template_file.read())
        mail_text = mail_text_template.render({
            "aufmacher": new_aufmachers,
            "subject": subject,
            "date_string": date_string,
        })

    if mail_mjml:
        mjml_filename = os.path.join(current_dir, "tmp/mail.mjml")
        mjmp_exec_path = os.path.join(current_dir, "node_modules/.bin/mjml")

        try:
            os.remove(mjml_filename)
        except OSError:
            pass
        with open(mjml_filename, "w") as mjml_output_file:
            mjml_output_file.write(mail_mjml)

        try:
            mail_html = subprocess.check_output(
                [mjmp_exec_path, mjml_filename])
            mail_html = mail_html.decode("utf-8")
        except subprocess.CalledProcessError:
            mail_html = None

        try:
            os.remove(mjml_filename)
        except OSError:
            pass

    if mail_html and mail_text and send:
        sg = sendgrid.SendGridAPIClient(sendgrid_key)
        data = {
            "personalizations": [{
                "to": [{
                    "email": "*****@*****.**"
                }],
                "subject": subject
            }],
            "from": {
                "email": "*****@*****.**"
            },
            "content": [{
                "type": "text/plain",
                "value": mail_text
            }, {
                "type": "text/html",
                "value": mail_html
            }]
        }
        response = sg.client.mail.send.post(request_body=data)
        print(response.status_code)
        print(response.body)
        print(response.headers)
Exemplo n.º 10
0
 def populate_db(self, path):
     if not Track.table_exists():
         db.create_tables([Track])
     db.connect()
     self.recursive_walk(path)