Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def calculate_recent_performance(df, games=10):
- """
- Calculate the team's recent performance metrics for home and away teams efficiently, including current winning
- and losing streaks.
- :param df: DataFrame containing the game data.
- :param games: Number of recent games to calculate metrics for.
- :return: DataFrame with calculated metrics for both home and away teams.
- """
- # Prepare DataFrame for rolling calculations
- # Calculate for home games
- home_df = df.copy()
- home_df['home_wins'] = home_df['home_win']
- home_df['home_goals_scored'] = home_df['score_home']
- home_df['home_goals_conceded'] = home_df['score_away']
- home_df.sort_values(['home_team', 'game_date'], inplace=True)
- # Rolling calculate win rate, goals scored, and goals conceded for home team
- home_df['home_recent_win_rate'] = home_df.groupby('home_team')['home_wins'].transform(lambda x: x.rolling(window=games, min_periods=1).mean())
- home_df['home_avg_goals_scored'] = home_df.groupby('home_team')['home_goals_scored'].transform(lambda x: x.rolling(window=games, min_periods=1).mean())
- home_df['home_avg_goals_conceded'] = home_df.groupby('home_team')['home_goals_conceded'].transform(lambda x: x.rolling(window=games, min_periods=1).mean())
- # Calculate current winning and losing streak for home teams
- home_df['home_winning_streak'] = home_df.groupby('home_team')['home_wins'].transform(lambda x: x.groupby((x != x.shift()).cumsum()).cumcount() + 1)
- home_df['home_losing_streak'] = home_df.groupby('home_team')['home_wins'].transform(lambda x: (1 - x).groupby((x != x.shift()).cumsum()).cumcount() + 1)
- # Calculate for away games
- away_df = df.copy()
- away_df['away_wins'] = away_df['home_win'].apply(lambda x: 1 if x == 0 else 0) # Invert home_win for away team perspective
- away_df['away_goals_scored'] = away_df['score_away']
- away_df['away_goals_conceded'] = away_df['score_home']
- away_df.sort_values(['away_team', 'game_date'], inplace=True)
- # Rolling calculate win rate, goals scored, and goals conceded for away team
- away_df['away_recent_win_rate'] = away_df.groupby('away_team')['away_wins'].transform(lambda x: x.rolling(window=games, min_periods=1).mean())
- away_df['away_avg_goals_scored'] = away_df.groupby('away_team')['away_goals_scored'].transform(lambda x: x.rolling(window=games, min_periods=1).mean())
- away_df['away_avg_goals_conceded'] = away_df.groupby('away_team')['away_goals_conceded'].transform(lambda x: x.rolling(window=games, min_periods=1).mean())
- # Calculate current winning and losing streak for away teams
- away_df['away_winning_streak'] = away_df.groupby('away_team')['away_wins'].transform(lambda x: x.groupby((x != x.shift()).cumsum()).cumcount() + 1)
- away_df['away_losing_streak'] = away_df.groupby('away_team')['away_wins'].transform(lambda x: (1 - x).groupby((x != x.shift()).cumsum()).cumcount() + 1)
- # Merge the metrics back to the original dataframe
- df = df.merge(home_df[['game_date', 'home_team', 'home_recent_win_rate', 'home_avg_goals_scored', 'home_avg_goals_conceded', 'home_winning_streak', 'home_losing_streak']], on=['game_date', 'home_team'], how='left')
- df = df.merge(away_df[['game_date', 'away_team', 'away_recent_win_rate', 'away_avg_goals_scored', 'away_avg_goals_conceded', 'away_winning_streak', 'away_losing_streak']], on=['game_date', 'away_team'], how='left')
- return df
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement