def get_urls_pars(html):
    soup = BeautifulSoup(html, 'lxml')
    web = soup.find('div', id="projects-list").find_all('div', class_="b-post")
    # print('--*-- Найдено', len(web), 'элементов --*--')
    for index, i in enumerate(web):
        show = i.find('a', class_="b-post__link").text
        url = i.find('a', class_="b-post__link").get('href')
        # ref_show = refind_show(show)

        strings = [
            'Спарсить', 'спарсить', 'Парсинг', 'парсинг', 'Парсер', 'парсер'
        ]
        for string in strings:
            match = re.search(string, show)
            if match:
                # print('Порядковый номер:', index+1)
                # print('show', show)
                # print('url', url)
                link = 'https://www.fl.ru' + url
                ref_link = refind_link(link)
                ref_link_2 = refind_link_2(link)
                ref_link_3 = refind_link_3(link)

                date_p1 = refind_w(ref_link_2)
                time_p = refind_t(ref_link_2)
                date_y = refind_name_y(date_p1)
                date_m = refind_name_m(date_p1)
                date_d = refind_name_d(date_p1)
                date_p = date_y + '-' + date_m + '-' + date_d
                # print('d', date_p)

                price = ref_link_3
                image = 'images/fl.png'

                try:
                    p = Fl.objects.get(link=link)
                    p.show = show
                    p.price = price
                    p.ref_link = ref_link
                    p.date_p = date_p
                    p.time_p = time_p
                    p.save()
                except Fl.DoesNotExist:
                    p = Fl(
                        link=link,
                        show=show,
                        price=price,
                        ref_link=ref_link,
                        date_p=date_p,
                        time_p=time_p,
                        image=image,
                    ).save()
                    print(link)
def get_urls_pars(html):
    soup = BeautifulSoup(html, 'lxml')
    web = soup.find_all('div', class_="row")
    # print(web)
    # print('--*-- Найдено', len(web), 'элементов --*--')
    for index, i in enumerate(web):
        try:
            show = i.find('a', class_="text-bold show_visited").text.strip()
            date = i.find('span', class_="time_ago").get('title')
            status = i.find('span', class_="text-muted").text.strip()
            status_2 = i.find(
                'div',
                class_="float-left float-sm-none text_field").text.strip()
            field = i.find('p', class_="text_field").text.strip()
            url = i.find('a', class_="text-bold show_visited").get('href')
        except:
            show = '-'
            date = '-'
            status = '-'
            status_2 = '-'
            field = '-'
            url = '-'

        strings = [
            'Спарсить', 'спарсить', 'Парсинг', 'парсинг', 'Парсер', 'парсер'
        ]
        for string in strings:
            match = re.search(string, show)
            if match:
                # print('Порядковый номер:', index+1)
                # print('show', show)
                # print('date', date)
                # print('status', status)
                # print('status_2', status_2)
                # print('field', field)
                # print('url', 'https://www.weblancer.net' + url)

                link = ('https://www.weblancer.net' + url)
                ref_link = field
                date_p1 = refind_w(date)
                time_p = refind_t(date)
                date_y = refind_name_y(date_p1)
                date_m = refind_name_m(date_p1)
                date_d = refind_name_d(date_p1)
                date_p = date_y + '-' + date_m + '-' + date_d
                price = 'По договорённости'
                image = 'images/weblancer.png'

                try:
                    p = Fl.objects.get(link=link)
                    p.show = show
                    p.price = price
                    p.ref_link = ref_link
                    p.date_p = date_p
                    p.time_p = time_p
                    p.save()
                except Fl.DoesNotExist:
                    p = Fl(
                        link=link,
                        show=show,
                        price=price,
                        ref_link=ref_link,
                        date_p=date_p,
                        time_p=time_p,
                        image=image,
                    ).save()
                    print(link)
def get_all_links(html):
	soup = BeautifulSoup(html, 'lxml')
	try:
		tds = soup.find('div', class_="b-task-block__description").text
		name = soup.find('h1', class_="b-task-block__header__title").text
		name_2 = soup.find('div', class_="js-task-item--brief").find('li', class_="b-task-brief__item").text
		ref_name_2 = refind_name_2(name_2)
		name_3 = soup.find('div', class_="js-task-item--brief").find('li', class_="b-task-brief__item").find_next_sibling('li').text
		# ref_name_3 = refind_name_3(name_3)
		name_4 = soup.find('span', class_="js-budget-text").text
		ref_name_4 = refind_name_4(name_4)
		name_5 = soup.find('div', class_="js-task-item--brief").find('li', class_="b-task-brief__item").find_next_sibling('li').find_next_sibling('li').find_next_sibling('li').find_next_sibling('li').text
		ref_name_5 = 'https://youdo.com/t' + refind_name_5(name_5)

		# print('1', name)
		# print('2', ref_name_2)
		# print('3', name_3)
		# print('4', ref_name_4)
		# print('5', ref_name_5)
		# print('6', tds)


		link = ref_name_5
		show = name
		ref_link = tds
		date_p = str(datetime.now()).split(' ')[0]
		# print(date_p)
		time_p1 = str(datetime.now()).split(' ')[1]
		time_p001 = time_p1.split('.')[0]
		time_p01 = time_p001.split(':')[0]
		time_p02 = time_p001.split(':')[1]
		time_p = time_p01 + ':' + time_p02
		# print(time_p)
		price = ref_name_4
		image = 'images/youdo.png'

		try:
			p = Fl.objects.get(link=link)
			p.show = show
			p.price = price
			p.ref_link = ref_link
			# p.date_p = date_p
			# p.time_p = time_p
			p.save()
		except Fl.DoesNotExist:
			p = Fl(
				link=link,
				show=show,
				price = price,
				ref_link = ref_link,
				date_p = date_p,
				time_p = time_p,
				image = image,
				).save()
			print(link)


		return tds
	except:
		r = '**-??????????????-**'
		return r
示例#4
0
def get_all_links(html):
    soup = BeautifulSoup(html, 'lxml')
    try:
        tds = soup.find('div', class_="task__description").text
    except:
        tds = '!-----!-----!'
    try:
        name = soup.find('h2', class_="task__title").text
    except:
        name = '!-----!-----!'
    try:
        date = soup.find('div', class_="task__meta").text
    except:
        date = '!-----!-----!'
    ref_date_1 = refind_date_1(date)
    ref_date_2 = refind_date_2(date)
    ref_date_3 = refind_date_3(date)
    try:
        prise = soup.find('div', class_="task__finance").text
    except:
        prise = '!-----!-----!'
    try:
        url_l = soup.find('div', class_="dropdown__menu").find('a').get('href')
    except:
        url_l = '!-----!-----!'
    try:
        ref_url_l = 'https' + url_l.split('+http')[1]
    except:
        ref_url_l = '!-----!-----!'
    # print(name)
    # print(ref_date_1)
    # print(ref_date_2)
    # print(ref_date_3)
    # print(prise)
    # print(tds)
    date_y = refind_name_y(ref_date_1)
    # print(date_y)
    date_d = refind_name_d(ref_date_1)
    # print(date_d)
    # date_m = refind_name_m(str(datetime.now()).split(' ')[0])
    date_m = refind_name_m(ref_date_1)
    # print(date_m)

    ref_date = date_y + '-' + date_m + '-' + date_d
    print(ref_date)

    link = ref_url_l
    show = name
    ref_link = tds
    date_p = ref_date
    time_p = refind_t(ref_date_1)
    price = prise
    image = 'images/freelansim.png'

    try:
        p = Fl.objects.get(link=link)
        p.show = show
        p.price = price
        p.ref_link = ref_link
        p.date_p = date_p
        p.time_p = time_p
        p.save()
    except Fl.DoesNotExist:
        p = Fl(
            link=link,
            show=show,
            price=price,
            ref_link=ref_link,
            date_p=date_p,
            time_p=time_p,
            image=image,
        ).save()
        print(link)

    return tds
def get_urls_pars(html):
    # print('---**???***--- && ---***???**---')
    soup = BeautifulSoup(html, 'lxml')
    # uas = soup.find('div', class_="page")
    uas = soup.find('tbody').find_all('tr')
    # print('--*-- Найдено', len(uas), 'элементов --*--')
    # print(uas)
    for index, ua in enumerate(uas):
        try:
            name = ua.find('td', class_="left").text
        except:
            name = '---'
        try:
            url = ua.find('td', class_="left").find('a').get('href')
        except:
            url = '- - - - -'
        try:
            price = ua.find('td', class_="text-center").find(
                'div', class_="text-green").text
        except:
            price = 'Договорная'
        try:
            dates = ua.find_all('td', class_="text-center")
            for index, d in enumerate(dates):
                if index == 2:
                    try:
                        date = d.find('div',
                                      class_="with-tooltip").find('h2').text
                    except:
                        date = '++++++'

        except:
            dates = '-----'
        # print(name)
        # print(url)

        # print('-------------- && --------------')

        # strings = ['парс',]
        # for string in strings:
        # 	match = re.search(string, name)
        # 	if match:
        # print('Порядковый номер:', index+1)
        ref_link = refind_links(url)
        # print('Текст:', name.split('\n')[1])
        # print('Ссылка:', url)
        # print('Дата:', date)
        # print('Цена:', price)
        # print('Содержание:', ref_link)

        if date in [
                '31', '30', '29', '28', '27', '26', '25', '24', '23', '22',
                '21', '20', '19', '18', '17', '16', '15', '14', '13', '12',
                '11', '10', '9', '8', '7', '6', '5', '4', '3', '2', '1'
        ]:
            # print(date)
            continue
        else:
            link = url
            show = refind_name(name)
            date_p = str(datetime.now()).split(' ')[0]
            time_p = date
            image = 'images/freelancehunt.png'

            try:
                p = Fl.objects.get(link=link)
                p.show = show
                p.price = price
                p.ref_link = ref_link
                # p.date_p = date_p
                # p.time_p = time_p
                p.save()
            except Fl.DoesNotExist:
                p = Fl(
                    link=link,
                    show=show,
                    price=price,
                    ref_link=ref_link,
                    date_p=date_p,
                    time_p=time_p,
                    image=image,
                ).save()
                print(link)

    print('---**+++***--- && ---***+++**---')
示例#6
0
def get_urls_pars(html):
    # print('---**???***--- && ---***???**---')
    soup = BeautifulSoup(html, 'lxml')
    uas = soup.find('div', class_="projects")
    # print('--*-- Найдено', len(uas), 'элементов --*--')
    for index, ua in enumerate(uas):
        try:
            name = ua.find('div').text.strip()
        except:
            name = '- - - - -'
        try:
            name_2 = ua.find('a').get('href')
        except:
            name_2 = '- - - - -'
        try:
            name_3 = ua.find('li').text
        except:
            name_3 = '- - - - -'
        try:
            name_4 = ua.find('ul').find('i').text
        except:
            name_4 = '- - - - -'
        try:
            name_5 = ua.find('ul').text
            ref_name_5 = refind_name_5(name_5)
        except:
            name_5 = '- - - - -'
        try:
            name_6 = ua.find('ul').text.strip()
            ref_name_6 = refind_name_6(name_6)
        except:
            name_6 = '- - - - -'
        try:
            date_y = refind_name_y(name_3)
            date_m = refind_name_m(name_3)
            date_d = refind_name_d(name_3)
            ref_date = date_d + '.' + date_m + '.20' + date_y
            date_pub = '20' + date_y + '-' + date_m + '-' + date_d
        except:
            ref_date = '00000000000'

        # print('-------------- && --------------')

        strings = ['Парсинг', 'парсинг', 'Парсер', 'парсер']
        for string in strings:
            match = re.search(string, name)
            if match:
                # print('Порядковый номер:', index+1)
                link = 'https://freelance.ru' + name_2
                ref_link = refind_link(link)
                # print('===', ref_link)

                # print('Текст:', name)
                # print('Дата:', date_pub)
                # print('Ответов:', name_4)
                # print('Просмотров:', ref_name_5)
                # print('Вид:', ref_name_6)

                show = name
                date_p = date_pub
                time_p1 = str(datetime.now()).split(' ')[1]
                time_p001 = time_p1.split('.')[0]
                time_p01 = time_p001.split(':')[0]
                time_p02 = time_p001.split(':')[1]
                time_p = time_p01 + ':' + time_p02
                price = 'Договорная'
                image = 'images/freelance.png'

                try:
                    p = Fl.objects.get(link=link)
                    p.show = show
                    p.price = price
                    p.ref_link = ref_link
                    p.date_p = date_p
                    # p.time_p = time_p
                    p.save()
                except Fl.DoesNotExist:
                    p = Fl(
                        link=link,
                        show=show,
                        price=price,
                        ref_link=ref_link,
                        date_p=date_p,
                        time_p=time_p,
                        image=image,
                    ).save()
                    print(link)
示例#7
0
def get_all_links(html, url):
    # print('---**???***--- && ---***???**---')
    # print('url', url)
    soup = BeautifulSoup(html, 'lxml')
    try:
        tds = soup.find('div', class_="s_box").find('h1', class_="proj_tophead").text.strip()
        # print('show', tds)
        tds_z = soup.find('p', class_="txt href_me").text.strip()
    # ref_tds_z = refind_tds_z(tds_z)
    except:
        tds_z = '**-Для Бизнес-аккаунтов-**'
    try:
        tds_z_2 = soup.find('div', class_="s_box").find_all('tr')
        for index, t in enumerate(tds_z_2):
            if index == 0:
                td_1 = t.find('td').text.strip()
            # print(td_1)
            # if index == 1:
            # 	td_2 = t.find('td').find('p', class_="txt href_me").text.strip()
            # print(td_2)
            if index == 2:
                td_3 = t.find('td').text.strip()
            # print(td_3)
            if index == 3:
                td_4 = t.find('td').find('p', class_="txt href_me").text.strip()
            # print(td_4)

    # if td_3 == 'Пожаловаться':
    # 	tds_z_3 = td_1 + '\n' + tds_z
    # elif tdtu == 'Присоединенные файлы':
    # 	tds_z_3 = td_1 + '\n' + tds_z + '\n' + td_3 + '\n' + td_4 + '\n' + tdtu
    # else:
    # 	tds_z_3 = td_1 + '\n' + tds_z + '\n' + td_3 + '\n' + td_4

    except:
        tds_z_2 = '------'

    try:
        tds_z_u = soup.find('div', class_="s_box").find_all('tr')
        all_tds_z_u = []
        for index, t in enumerate(tds_z_u):
            if index == 4:
                try:
                    tdtu = t.find('td').find('h4').text.strip()
                except:
                    tdtu = '---------1////////'
                try:
                    tdnu = t.find('td').text.strip()
                except:
                    tdnu = '---------2////////'
                try:
                    tdu_1 = t.find('td').find_all('a')
                    for index, i in enumerate(tdu_1):
                        if index in [2, 5, 8, 11, 14, 17, 20]:
                            tdu = i.get('href')
                            all_tds_z_u.append(tdu)
                        # print(index, '++++++++++', tdu)
                except:
                    tdu_1 = '---------3////////'

            # print(tdtu)
            # print(tdnu)
    except:
        tds_z_u = '------'

    myString = '\n'.join(all_tds_z_u)
    # print('!!!', myString)

    if td_3 == 'Пожаловаться':
        tds_z_3 = td_1 + '\n\n' + tds_z
    elif tdtu == 'Присоединенные файлы':
        tds_z_3 = td_1 + '\n\n' + tds_z + '\n\n' + td_3 + '\n\n' + td_4 + '\n\n' + tdtu + '\n\n' + myString
    else:
        tds_z_3 = td_1 + '\n\n' + tds_z + '\n\n' + td_3 + '\n\n' + td_4

    # print('text', tds_z)
    # print('text_2', tds_z_3)
    if tds_z != '**-Для Бизнес-аккаунтов-**':
        prices = soup.find('div', class_="col-lg-12").find_all('tr')
        for index, p in enumerate(prices):
            if index == 0:
                price = p.find('td').find_next_sibling('td').text
            # print('price', price)
            if index == 3:
                date_1 = p.find('td').find_next_sibling('td').text
            # print('date_1', date_1)
            if index == 4:
                date_2 = p.find('td').find_next_sibling('td').text
            # print('date_2', date_2)

        link = url
        show = tds
        if date_1 != '':
            date_p = date_1.split(' ')[0]
            time_p = date_1.split(' ')[1]
        # print('date_1', date_1)
        else:
            date_p = date_2.split(' ')[0]
            time_p = date_2.split(' ')[1]
        # print('date_2', date_2)
        price = price
        image = 'images/freelance.png'
        if tds_z_3 != '':
            ref_link = tds_z_3
        else:
            ref_link = tds_z

        try:
            p = Fl.objects.get(link=link)
            p.show = show
            p.price = price
            p.ref_link = ref_link
            p.date_p = date_p
            p.time_p = time_p
            p.save()
        except Fl.DoesNotExist:
            p = Fl(
                link=link,
                show=show,
                price=price,
                ref_link=ref_link,
                date_p=date_p,
                time_p=time_p,
                image=image,
            ).save()
            print('2 ', link)
    return tds_z