Пример #1
0
def getCoursesCards(soup, isSetCategories=False):
    selector = 'div.row.course-list.list > div'
    cards = soup.select(selector)

    data = []
    for card in cards:
        data.append(extractCourseCard(card))

    if isSetCategories:
        s = Scraper()

        categoriesUrls = getCategoriesURLs(soup)
        for category in categoriesUrls:
            name = category.get('name', '')
            url = category.get('url')

            s.get(url)
            tempSoup = s.html_soup()
            categoryCards = getCoursesCards(tempSoup)
            ids = [c['publicId'] for c in categoryCards]

            for course in data:
                if course['publicId'] in ids:
                    if course.get('categories', None):
                        course['categories'].append(name)
                    else:
                        course['categories'] = [name]

    return data
Пример #2
0
def teams_competing_this_week():
    # Create a list of team names
    teams = []

    # Get all weeks and their dates for the season
    scraper = Scraper()
    weeks = scraper.get_year_weeks(ScraperConstants.Men,
                                   datetime.datetime.now().year)
    # Get date to scrape this week
    # Fixes index error once in post season
    try:
        date = [week for week in weeks if int(week['current']) == 1][0]['date']
    except IndexError:
        return teams
    # Gets the schedule for this week
    schedule = scraper.get_schedule(ScraperConstants.Men, date)

    # For each day in the schedule with a meet on it
    for day in schedule:
        # For each meet on that day
        for meet in schedule[day]['meets']:
            # Create a name for the meet depending on home vs. away teams or virtual;
            if meet['away_teams'] != None:
                for team in meet['away_teams'].split(", "):
                    teams.append(team)
            if meet['home_teams'] != None:
                for team in meet['home_teams'].split(", "):
                    teams.append(team)
    return teams
Пример #3
0
    def handle(self, *args, **options):
        scraper = Scraper()
        start_time = round(time.time() * 1000)

        print("Getting list of all teams for %s" % options['year'])
        # Try and get list of all teams
        try:
            teams = scraper.get_teams(ScraperConstants.Men, options['year'])
            print("Found %s teams" % len(teams))
        except:
            traceback.print_exc()
            return

        # For each team
        new_gymnasts = []
        num_skipped = 0
        for team in teams:
            # Get the roster for the team
            try:
                print("Getting roster for %s" % team['team_name'])
                roster = scraper.get_roster(ScraperConstants.Men,
                                            options['year'], team['id'])
            except:
                traceback.print_exc()
                return

            # For each gymnast on the roster, make a new Gymnast object and save it to the database
            for gymnast in roster:
                name = "%s %s" % (gymnast['fname'].strip(),
                                  gymnast['lname'].strip())
                g = Gymnast(name=name,
                            rtn_id=gymnast['id'],
                            team=team['team_name'],
                            year=GYMNAST_YEARS[gymnast['school_year']])

                # Check if gymnast already exists in database to avoid readding
                if Gymnast.objects.filter(rtn_id=gymnast['id']).exists():
                    num_skipped = num_skipped + 1
                else:
                    new_gymnasts.append(g)

        # Save new gymnasts to database
        for gymnast in new_gymnasts:
            gymnast.save()
            events = ['FX', 'PH', 'SR', 'VT', 'PB', 'HB']
            for event in events:
                Average.objects.create(gymnast=gymnast,
                                       score=decimal.Decimal(0.00),
                                       event=event,
                                       number_of_scores=0)

        print("")
        print("------ RESULTS ------")
        print("Added %s gymnasts" % len(new_gymnasts))
        print("Skipped %s existing gymnasts" % num_skipped)
        print("Took %s ms" % (round(time.time() * 1000) - start_time))
Пример #4
0
def weekly_news(request):
    context = {}
    scraper = Scraper()
    context['current_week'] = int(
        scraper.get_current_and_max_week(ScraperConstants.Men,
                                         datetime.datetime.now().year)['week'])
    context['posts'] = Post.objects.filter(
        status=1, week=context['current_week']).order_by('-posted_at')
    context['lineup'] = 0
    context['platform'] = 1
    template_name = 'news/weekly_news.html'
    return render(request, 'news/weekly_news.html', context)
Пример #5
0
def remove_gymnast_from_roster(request, team_pk, gymnast_pk):
    scraper = Scraper()
    current_week = int(scraper.get_current_and_max_week(ScraperConstants.Men, datetime.datetime.now().year)['week'])

    gymnast = get_object_or_404(Gymnast, pk=gymnast_pk)
    team = get_object_or_404(FantasyTeam, pk=team_pk)
    team.roster.remove(gymnast)
    league = team.league
    lineups = LineUp.objects.filter(team=team, week=current_week)
    for lineup in lineups:
        lineup.gymnasts.remove(gymnast)
    league.drafted.remove(gymnast)
    return redirect('view_team', pk=team_pk)
Пример #6
0
    def get_context_data(self, **kwargs):
        scraper = Scraper()
        context = super().get_context_data(**kwargs)
        if context['object'].team2.user == self.request.user:
            context['team1'] = context['object'].team2
            context['team2'] = context['object'].team1
        else: 
            context['team1'] = context['object'].team1
            context['team2'] = context['object'].team2
        gymnasts = (Gymnast.objects.filter(LineUp__in=(LineUp.objects.filter(team=context['team1'], week=context['object'].week).all() | LineUp.objects.filter(team=context['team2'], week=context['object'].week).all())) | Gymnast.objects.filter(id__in=(context['team1'].roster.all() | context['team2'].roster.all()))).distinct()
        current_week = int(scraper.get_current_and_max_week(ScraperConstants.Men, datetime.datetime.now().year)['week'])

        context['current_week'] = current_week
        context['teams_competing'] = teams_competing_this_week()
        
        context['meet_started'] = {} #Could this be optimized?
        weeks = scraper.get_year_weeks(ScraperConstants.Men, datetime.datetime.now().year)
        date = [week for week in weeks if int(week['wk']) == int(context['object'].week)][0]['date']
        schedule = scraper.get_schedule(ScraperConstants.Men, date)
        
        # Loops through every meet day this week
        for day in schedule:
            # Loops through every meet on day
            for meet in schedule[day]['meets']:
                # Loops through gymnasts 
                for gymnast in gymnasts: #Could this be optimized?
                    # Checks if gymnasts team is in this meet
                    if gymnast.team in str(meet['home_teams']) or gymnast.team in str(meet['away_teams']): #Could this be optimized?
                        # Checks if this is gymnasts first meet of week
                        if gymnast.name not in context['meet_started']:
                            # Meet start datetime
                            meet_datetime = datetime.datetime.strptime(str(meet['d']) + " " + str(meet['time']), "%Y-%m-%d %H:%M:%S")
                            # Current datetime (eastern because thats what RTN uses)
                            now = datetime.datetime.now(timezone('US/Eastern'))
                            if now.date() > meet_datetime.date():
                                context['meet_started'][gymnast.name] = True
                            elif now.date() == meet_datetime.date():
                                if meet_datetime.time() != datetime.time(0, 0, 0):
                                    if now.time() > meet_datetime.time():
                                        context['meet_started'][gymnast.name] = True
                                    else:
                                        context['meet_started'][gymnast.name] = False
                                else:
                                    if now.time() >= datetime.time(12, 0, 0):
                                        context['meet_started'][gymnast.name] = True
                                    else: 
                                        context['meet_started'][gymnast.name] = False
                            else:
                                context['meet_started'][gymnast.name] = False
        return context
def test_description_dictionary():
    dict_ = {
        "Type": "Residential",
        "Style": "2 Storey",
        "Lot Size": "0.115 Ac",
        "MLS Number": "PW20120310",
        "Year Built": "2012",
        "Parking info": "2, Attached",
        "Zip": "92886",
        "School District": "Placentia-Yorba Linda Unified School District",
    }

    scraper = Scraper()
    assert scraper.description_dictionary(soup) == dict_
Пример #8
0
 def get_context_data(self, **kwargs):
     context = super().get_context_data(**kwargs)
     context['teams'] = FantasyTeam.objects.filter(
         league_id=context['object'].id).order_by('-wins', 'name')
     scraper = Scraper()
     context['current_week'] = int(
         scraper.get_current_and_max_week(
             ScraperConstants.Men,
             datetime.datetime.now().year)['week'])
     context['max_week'] = int(
         scraper.get_current_and_max_week(
             ScraperConstants.Men,
             datetime.datetime.now().year)['max'])
     context['matchups'] = Matchup.objects.filter(
         team1__in=context['teams'])
     return context
Пример #9
0
def extraBlogData(url):
    s = Scraper()
    s.get(url)
    soup = s.html_soup()

    data = {
        'publicId': url.split('/')[4],
    }

    thumbnailElm = soup.select_one('.post-content img')

    data.update({
        'thumbnail': thumbnailElm['src'] if thumbnailElm else None,
    })

    return data
def test_demographics_dictionary():
    dict_ = {
        "Total population": "50,545",
        "Male population": "24,484",
        "Female population": "26,061",
        "Median age": "42.80",
        "Total households": "16,559",
        "Average people per household": "3.03",
        "Total housing units": "17,062",
        "Owner occupied": "13,469",
        "Renter occupied": "3,090",
        "Median year built": "1979",
        "Median household income": "123,737",
        "Average household income": "154,190",
    }

    scraper = Scraper()
    assert scraper.demographics_dictionary(soup) == dict_
Пример #11
0
def getCoachingCards(soup):
    selector = 'div.row.services-row.list > div'
    cards = soup.select(selector)

    data = {
        'total': [],
        'coach': [],
        'live event': [],
        'course': [],
        'description': []
    }

    for card in cards:
        data['total'].append(extractCoachingCard(card))

    s = Scraper()

    # print(data['total'])

    for cardData in data['total']:
        url = cardData.get('url')
        s.get(url)
        tempSoup = s.html_soup()

        descriptionContentELm = tempSoup.select_one(
            '.course-block.custom_html')

        # print('Desc: ', descriptionContentELm.text)
        types = ['description', 'course', 'coach', 'live event']
        if not (descriptionContentELm
                and any([t in descriptionContentELm.text for t in types])):
            continue

        descriptionContent = descriptionContentELm.text
        cardData.update(parseCoaching(descriptionContent))

        card = handleCoachingCardData(cardData)
        data[card.pop('type')].append(card)

    data.pop('total')
    return data
Пример #12
0
def getCourses():
    # print('get all courses')
    allCourses = []
    courses = []
    first = True

    s = Scraper()
    pageNum = 1
    while courses or first:
        first = False
        allCourses.extend(courses)

        coursesUrl = f'{protocol}://{domain}/courses?page={pageNum}'
        # print('\n\n\nurl: ', coursesUrl, '\n\n\n\n')
        s.get(coursesUrl)
        coursesSoup = s.html_soup()

        courses = getCoursesCards(coursesSoup, isSetCategories=True)

        pageNum += 1

    return allCourses
Пример #13
0
def getTheGoodZoneDataAndMyPathes():

    s = Scraper()

    coursesUrl = f'{protocol}://{domain}/courses'
    s.get(coursesUrl)
    coursesSoup = s.html_soup()

    blogsUrl = f'{protocol}://{domain}/blog'
    s.get(blogsUrl)
    blogsSoup = s.html_soup()

    return [
        {
            'delPath': '/delete/courses/',
            'setPath': '/set/course/',
            'items': getCourses()
        },
        {
            'delPath': '/delete/coaches/',
            'setPath': '/set/coach/',
            'items': getCoaches(coursesSoup)['coach']
        },
        {
            'delPath': '/delete/instructors/',
            'setPath': '/set/instructor/',
            'items': getCoaches(coursesSoup)['course']
        },
        {
            'delPath': '/delete/live-events/',
            'setPath': '/set/live-event/',
            'items': getCoaches(coursesSoup)['live event']
        },
        {
            'setPath': '/set-course-description/',
            'items': getCoaches(coursesSoup)['description']
        },
        {
            'delPath': '/delete/blogs/',
            'setPath': '/set/blog/',
            'items': getBlogs(blogsSoup)
        },
    ]
def test_page_type():
    scraper = Scraper()
    assert isinstance(soup, BeautifulSoup)
def test_get_price():
    scraper = Scraper()
    assert scraper.get_price(soup) == 998888
def test_get_bedrooms():
    scraper = Scraper()
    assert scraper.get_bedrooms(soup) == 4
def test_get_baths():
    scraper = Scraper()
    assert scraper.get_baths(soup) == 3
Пример #18
0
def create_team(user, league):
    scraper = Scraper()
    team = FantasyTeam.objects.create(user=user,
                                      league=league,
                                      name=str(user.username) + "'s Team")
Пример #19
0
    def get_context_data(self, **kwargs):
        scraper = Scraper()
        context = super().get_context_data(**kwargs)
        context["roster"] = context["object"].roster.all()
        context['current_week'] = int(
            scraper.get_current_and_max_week(
                ScraperConstants.Men,
                datetime.datetime.now().year)['week'])
        context["lineups"] = LineUp.objects.filter(
            team=context['object'],
            week=context['current_week']).order_by('pk')
        context['teams_competing'] = teams_competing_this_week()

        context['meet_started'] = {}  #Could this be optimized?
        weeks = scraper.get_year_weeks(ScraperConstants.Men,
                                       datetime.datetime.now().year)

        # Fixes index error once in post season
        try:
            date = [
                week for week in weeks
                if int(week['wk']) == context['current_week']
            ][0]['date']
        except IndexError:
            return context

        schedule = scraper.get_schedule(ScraperConstants.Men, date)
        gymnasts = context["roster"]
        # Loops through every meet day this week
        for day in schedule:
            # Loops through every meet on day
            for meet in schedule[day]['meets']:
                # Loops through gymnasts
                for gymnast in gymnasts:  #Could this be optimized?
                    # Checks if gymnasts team is in this meet
                    if gymnast.team in str(
                            meet['home_teams']) or gymnast.team in str(
                                meet['away_teams']):  #Could this be optimized?
                        # Checks if this is gymnasts first meet of week
                        if gymnast.name not in context['meet_started']:
                            # Meet start datetime
                            meet_datetime = datetime.datetime.strptime(
                                str(meet['d']) + " " + str(meet['time']),
                                "%Y-%m-%d %H:%M:%S")
                            # Current datetime (eastern because thats what RTN uses)
                            now = datetime.datetime.now(timezone('US/Eastern'))
                            if now.date() > meet_datetime.date():
                                context['meet_started'][gymnast.name] = True
                            elif now.date() == meet_datetime.date():
                                if meet_datetime.time() != datetime.time(
                                        0, 0, 0):
                                    if now.time() > meet_datetime.time():
                                        context['meet_started'][
                                            gymnast.name] = True
                                    else:
                                        context['meet_started'][
                                            gymnast.name] = False
                                else:
                                    if now.time() >= datetime.time(12, 0, 0):
                                        context['meet_started'][
                                            gymnast.name] = True
                                    else:
                                        context['meet_started'][
                                            gymnast.name] = False
                            else:
                                context['meet_started'][gymnast.name] = False

        return context
Пример #20
0
# Load authenticated session from file to prevent unnecessary logins:

from scraper.DBConnection import DBConnection
from scraper.Scraper import Scraper
import argparse

parser = argparse.ArgumentParser(description='Fetch questions with solutions from Quizduell Germany')
parser.add_argument('username', type=str, help='The username of the quizduell account')
parser.add_argument('password', type=str, help='The password of the quizduell account')

parser.add_argument('--db', metavar='--db', type=str, help='Path to the sqlite database', dest='db_file',
                    default='Quizduell.sqlite', required=False)

args = parser.parse_args()

scraper = Scraper(args.username, args.password)
connection = DBConnection(args.db_file)

while True:
    for i in range(0, 10):
        if i == 0:
            scraper.give_up_all()
            connection.commit()
            print(connection.count_questions())
        game = scraper.fetch_game()
        connection.add_question(game)
Пример #21
0
    def handle(self, *args, **options):
        scraper = Scraper()
        start_time = round(time.time() * 1000)

        print("Getting all matchups for week %s" % options['week'])
        try:
            # Get all weeks and their dates for the season
            weeks = scraper.get_year_weeks(ScraperConstants.Men,
                                           options['year'])
            # Get date to scrape specified week
            date = [
                week for week in weeks if int(week['wk']) == options['week']
            ][0]['date']
        except:
            traceback.print_exc()
            return

        try:
            # Get schedule for the week
            schedule = scraper.get_schedule(ScraperConstants.Men, date)
        except:
            traceback.print_exc()
            return

        # Create a list of (meet id, day of meet, meet name)
        meets = []
        # For each day in the schedule with a meet on it
        for day in schedule:
            # For each meet on that day
            for meet in schedule[day]['meets']:
                # Create a name for the meet depenending on home vs. away teams or virtua;
                away_teams = meet['away_teams']
                home_teams = meet['home_teams']
                if home_teams == None:
                    meet_name = "%s (Virtual)" % away_teams
                else:
                    meet_name = "%s @ %s" % (away_teams, home_teams)

                # Add to list of (meet id, day of meet, meet name)
                meets.append((meet['meet_id'], day, meet_name))

        # Keep track of new scores added and number skipped
        scores = []
        num_skipped = 0
        # Go through list of (meet id, day of meet, meet name)
        for meet_id, day, meet_name in meets:
            print("Getting meet results for %s" % meet_name)
            # Get the meet's results
            try:
                meet_results = scraper.get_meet_results(
                    ScraperConstants.Men, meet_id)
            except:
                traceback.print_exc()
                return

            # Get the scores of every person who competed in the meet and save them
            # For each event in the meet's results
            for event_index_name in EVENT_NAMES_DICT:
                # Get each score for the event
                for score in meet_results[event_index_name]:
                    # Lookup the gymnast who had the score
                    gymnast = Gymnast.objects.filter(
                        rtn_id=score['gid']).first()
                    # Create a new score object
                    score = Score(event=EVENT_NAMES_DICT[event_index_name],
                                  score=float(score['score']),
                                  gymnast=gymnast,
                                  date=day,
                                  meet=meet_name,
                                  week=options['week'])

                    # Check if the score already exists in the database
                    if Score.objects.filter(
                            gymnast=gymnast,
                            date=day,
                            event=EVENT_NAMES_DICT[event_index_name],
                            week=options['week']).exists():
                        num_skipped = num_skipped + 1
                    else:
                        scores.append(score)

        # Save new scores to the database
        for score in scores:
            average = Average.objects.get(gymnast=score.gymnast,
                                          event=score.event)
            average.number_of_scores += 1
            average.score = (
                (average.score * (average.number_of_scores - 1)) +
                decimal.Decimal(score.score)) / average.number_of_scores
            average.save()
            score.save()

        print("")
        print("------ RESULTS ------")
        print("Added %s scores" % len(scores))
        print("Skipped %s existing scores" % num_skipped)
        print("Took %s ms" % (round(time.time() * 1000) - start_time))
Пример #22
0
def default():
    scraper = Scraper()
    return scraper.scrap()
Пример #23
0
def scrape(source, sourceName):
    status = 1

    #scrape ElMostrador
    if source == "ElMostrador":
        scraper = Scraper()
        scraper.setScrapingStrategy(ElMostradorScrapingStrategy())
        try:
            scraper.doScrape(source, "ElMostrador")
        except:
            print "Unexpected error: ", sys.exc_info()[0]
            status = 2
    # scrape Emol
    elif source == "Emol":
        scraper = Scraper()
        scraper.setScrapingStrategy(EmolScrapingStrategy())
        try:
            scraper.doScrape(source, "Emol")
        except:
            print "Unexpected error: ", sys.exc_info()[0]
            status = 2
    # scrape generic!
    else:
        scraper = Scraper()
        scraper.setScrapingStrategy(GenericScrapingStrategy())
        try:
            scraper.doScrape(source, sourceName)
        except:
            print "Unexpected error: ", sys.exc_info()[0]
            status = 2


    # on success, return 1!, on any error, return 2
    return status
def test_get_sqm():
    scraper = Scraper()
    assert scraper.get_sqm(soup) == 232.26
def test_get_lot_size():
    scraper = Scraper()
    assert scraper.get_lot_size(soup) == 0.115
Пример #26
0
def in_lineup_current_week(gymnast, team):
    scraper = Scraper()
    current_week = int(
        scraper.get_current_and_max_week(ScraperConstants.Men,
                                         datetime.now().year)['week'])
    return gymnast.LineUp.filter(week=current_week, team=team).exists()
Пример #27
0
    def receive(self, text_data):
        text_data_json = json.loads(text_data)
        gymnast_pk = text_data_json['gymnast_pk']
        user = self.scope['user']
        # Get league
        league = League.objects.filter(pk=self.league_pk).first()
        # Get user's team
        team = FantasyTeam.objects.filter(user=user, league=self.league_pk).first()
        
        # Get the position that is up to draft
        currently_drafting = league.currently_drafting
        # Check if user who send draft request is currently up
        if team.draft_position == currently_drafting and not league.draft_complete and league.draft_started:
            # Do something here with the gymnast_pk and the team
            gymnast = get_object_or_404(Gymnast, pk=gymnast_pk)
            if gymnast not in league.drafted.all() and len(team.roster.all()) < league.roster_size:
                team.roster.add(gymnast)
                league = team.league
                league.drafted.add(gymnast)

                # Snake draft, initially going down
                num_teams = len(FantasyTeam.objects.filter(league=self.league_pk))
                if league.going_down:
                    # If last person is drafting
                    if league.currently_drafting == (num_teams - 1):
                        # Give them another turn and start going up
                        league.going_down = False
                    else:
                        league.currently_drafting = league.currently_drafting + 1
                else:
                    # If first person is drafting on way back up
                    if league.currently_drafting == 0:
                        # Give first person another chance and start going down
                        league.going_down = True
                    else:
                        league.currently_drafting = league.currently_drafting - 1
                league.save()

                if len(league.drafted.all()) == league.roster_size * num_teams:
                    # Drafting is done
                    scraper = Scraper()
                    year = datetime.date.today().year
                    num_weeks = int(scraper.get_current_and_max_week(ScraperConstants.Men, year)['max'])
                    matchups = round_robin_matchups(num_teams, num_weeks)
                    team_pks = [x.pk for x in list(FantasyTeam.objects.filter(league__pk=self.league_pk))]
                    # Creates matchups for entire season
                    for week in matchups:
                        for matchup in matchups[week]:
                            team1_pk = team_pks[matchup[0] - 1]
                            team2_pk = team_pks[matchup[1] - 1]
                            team1 = FantasyTeam.objects.filter(pk=team1_pk).first()
                            team2 = FantasyTeam.objects.filter(pk=team2_pk).first()
                            m = Matchup(team1=team1, team2=team2, league=league, week=week)
                            m.save()
                            # Creates lineups for entire season
                            events = ['FX', 'PH', 'SR', 'VT', 'PB', 'HB']
                            for i in range(6):
                                if not LineUp.objects.filter(team=team1, event=events[i], week=week).exists():
                                    LineUp.objects.create(team=team1, event=events[i], week=week)
                                if not LineUp.objects.filter(team=team2, event=events[i], week=week).exists():
                                    LineUp.objects.create(team=team2, event=events[i], week=week)
  

                    league.draft_complete = True
                    async_to_sync(self.channel_layer.group_send)(self.draft_group, {
                        'type': 'draft_complete',
                    })

                # PERFORM CHECK AND AUTO DRAFT HERE
                league.save()

                # Send message to rest of draft group
                async_to_sync(self.channel_layer.group_send)(self.draft_group, {
                    'type': 'gymnast_drafted',
                    'gymnast_pk': gymnast_pk,
                    'gymnast_name': gymnast.name,
                    'team_pk': team.pk,
                    'team_name': team.name,
                    'ncaa_team_name': gymnast.team,
                    'position_currently_drafting': league.currently_drafting,
                })
            else:
                print("DRAFTING ERROR")
                async_to_sync(self.channel_layer.group_send)(self.draft_group, {
                    'type': 'gymnast_draft_error',
                    'error': 'Gymnast has already been drafted'
                })
        else:
            async_to_sync(self.channel_layer.group_send)(self.draft_group, {
                'type': 'gymnast_draft_error',
                'error': 'Not your turn to draft'
            })