Exemplo n.º 1
0
def get_ibm():
    url = "https://developer.ibm.com/technologies/java/"
    res = requests.get(url)
    soup = BeautifulSoup(res.text, "html.parser")
    home = soup.find("a", class_="ibm--hub__block_link")
    home_title = home.find("h3",
                           class_="ibm--hub__title").find("span").get_text()
    home_url = "https://developer.ibm.com" + home.get("href")
    model = dataModel(home_title, home_url, "", "java")
    try:
        model.updateToInfo()
    except:
        print("Duplicate!")
    print(home_title, home_url)
    article_list = soup.find_all("a", class_="ibm--card__block_link")
    for article in article_list:
        title = article.find("h3").get_text()
        article_url = "https://developer.ibm.com" + article.get("href")
        image_url = ""
        if article.find("img") != None:
            image_url = article.find("img").get("src")
        model = dataModel(title, article_url, image_url, "java")
        model.printIt()
        try:
            model.updateToInfo()
        except:
            continue
Exemplo n.º 2
0
def get_java_world():
    url = "https://www.javaworld.com"
    res = requests.get(url)
    soup = BeautifulSoup(res.text, "html.parser")
    home_article = soup.find("div", class_="home-feature")
    home_title = home_article.find("div",
                                   class_="post-cont").find("a").get_text()
    home_url = url + home_article.find(
        "div", class_="post-cont").find("a").get("href")
    home_image_url = home_article.find(
        "figure",
        class_="feature-img").find("a").find("img").get("data-original")
    model = dataModel(home_title, home_url, home_image_url, "java")
    try:
        model.updateToInfo()
    except:
        print("Duplicate!")
    article_list = soup.find_all("div", class_="river-well article")
    for article in article_list:
        a = article.find("div", class_="post-cont").find("a")
        title = a.get_text()
        article_url = url + a.get("href")
        image_url = article.find(
            "figure",
            class_="well-img").find("a").find("img").get("data-original")
        model = dataModel(title, article_url, image_url, "java")
        try:
            model.updateToInfo()
        except:
            continue
Exemplo n.º 3
0
def getjrebel():
    url = "https://jrebel.com/rebellabs/"
    res = requests.get(url)
    soup = BeautifulSoup(res.text, "html.parser")
    article_list = soup.find_all("div", class_="post")
    for article in article_list:
        title = article.find("h1").find("a").get("title")
        article_url = article.find("h1").find("a").get("href")
        try:
            image_url = "https://jrebel.com" + article.find("img").get("src")
            model = dataModel(title, article_url, image_url, "java")
            print(title, article_url, image_url, "img")
        except:
            model = dataModel(title, article_url, "", "java")
            print(title, article_url)
        try:
            model.updateToInfo()
        except:
            continue
Exemplo n.º 4
0
def get_article():
    url = "https://github.com/explore"
    res = requests.get(url, headers=head)
    soup = BeautifulSoup(res.text, "html.parser")
    article_list = soup.find("div", class_="d-lg-flex gutter-lg-condensed").find_all("article")
    for article in article_list:
        article_url = article.find("a").get("href")
        img_url = article.find("img").get("src")
        title = article.find("h1").get_text()
        model = dataModel.dataModel(title.lstrip().rstrip(), article_url.lstrip(), img_url, "github")
        model.printIt()
        try:
            model.updateToInfo()
        except:
            continue
Exemplo n.º 5
0
def get_import_new():
    url = "http://www.importnew.com/all-posts"
    try:
        res = requests.get(url)
        soup = BeautifulSoup(res.text, "html.parser")
        article_list = soup.find_all("div", class_="post-thumb")
        for article in article_list:
            a = article.find("a")
            title = a.get("title")
            article_url = a.get("href")
            image_url = a.find("img").get("src")
            model = dataModel(title, article_url, image_url, "java")
            try:
                model.updateToInfo()
            except:
                continue
    except:
        print("connection error")
Exemplo n.º 6
0
def get_overops():
    url = "https://blog.overops.com/"
    res = requests.get(url, headers=head)
    soup = BeautifulSoup(res.text, "html.parser")
    article_list = soup.find_all("article")
    for article in article_list:
        title = article.find(
            "div", class_="col-md-7 col-sm-7 right-post-content").find(
                "h2").find("a").get_text()
        article_url = article.find(
            "div", class_="col-md-7 col-sm-7 right-post-content").find(
                "h2").find("a").get("href")
        img_url = article.find(
            "div", class_="col-md-5 col-sm-5 post-featured-image").find(
                "a").get("href")
        print(title, article_url, img_url)
        model = dataModel(title, article_url, img_url, "java")
        try:
            model.updateToInfo()
        except:
            continue
Exemplo n.º 7
0
def github():
    trendingUrl = "https://github.com/trending/"
    categoryArray = ["python", "c++", "c", "swift", "java", "javascript"]
    for category in categoryArray:
        url = trendingUrl + category + "?since=daily"
        res = requests.get(url, headers=head)
        soup = BeautifulSoup(res.text, "html.parser")
        projectList = soup.findAll("li", class_="col-12 d-block width-full py-4 border-bottom")
        for project in projectList:
            title = project.find('h3').find('a').get("href")[1:]
            project_url = "https://github.com/" + title
            try:
                describe = project.find('p', class_="col-9 d-inline-block text-gray m-0 pr-4").get_text().rstrip().lstrip() + " " + title
            except:
                describe = title
            model = dataModel.dataModel(describe, project_url, "", category)
            model.printIt()
            try:
                model.updateToInfo()
            except:
                continue