import pandas as pd
gold_data = pd.read_parquet("../../files/datasets/too_messy_to_melt/gold_relegation_attendance.parquet")
print(gold_data.head())
           Observation_ID  Relegation_Event_ID         Team   Season  Tier  \
0  Aston Villa 2015-16_-1  Aston Villa 2015-16  Aston Villa  2014-15     1   
1   Aston Villa 2015-16_0  Aston Villa 2015-16  Aston Villa  2015-16     1   
2   Aston Villa 2015-16_1  Aston Villa 2015-16  Aston Villa  2016-17     2   
3   Aston Villa 2015-16_2  Aston Villa 2015-16  Aston Villa  2017-18     2   
4     Barnsley 1997-98_-1     Barnsley 1997-98     Barnsley  1996-97     2   

   Position  Attendance  Year_vs_Relegation Year_End_Outcome  \
0        17       34133                  -1         Survived   
1        20       33690                   0        Relegated   
2        13       32107                   1         Survived   
3         4       32097                   2         Survived   
4         2       11356                  -1         Promoted   

   Games_Played_History  ...  Covid_Baseline_Review_Flag  \
0                    38  ...                       False   
1                    38  ...                       False   
2                    46  ...                       False   
3                    46  ...                       False   
4                    46  ...                       False   

   Pct_Change_vs_Baseline  Pct_Change_vs_Baseline_Display  \
0                0.013149                         1.31493   
1                     0.0                             0.0   
2               -0.046987                       -4.698724   
3               -0.047284                       -4.728406   
4               -0.384465                      -38.446528   

   Pct_Change_Analysis Outcome_Group  Goals_For_Per_Game Points_Per_Game  \
0                 <NA>      Survived            0.815789             1.0   
1                 <NA>      Survived            0.710526        0.447368   
2            -4.698724      Survived            1.021739        1.347826   
3            -4.728406      Survived            1.565217        1.804348   
4                 <NA>      Survived            1.652174         1.73913   

  Is_Structural_Anomaly           Plot_Group_Label  Is_Baseline_Substituted  
0                 False  Survived (Still in Champ)                    False  
1                 False  Survived (Still in Champ)                    False  
2                 False  Survived (Still in Champ)                    False  
3                 False  Survived (Still in Champ)                    False  
4                 False  Survived (Still in Champ)                    False  

[5 rows x 35 columns]
gold_data["Relegation_Season_Index"] = (
    gold_data["Relegation_Event_ID"].astype(str)
    + " | Y"
    + gold_data["Year_vs_Relegation"].astype(str)
)
assert gold_data["Relegation_Season_Index"].is_unique
dupes = (
    gold_data
    .groupby(["Relegation_Event_ID", "Year_vs_Relegation"])
    .size()
    .reset_index(name="n")
    .query("n > 1")
    .sort_values("n", ascending=False)
)

print(dupes.head(100))
Empty DataFrame
Columns: [Relegation_Event_ID, Year_vs_Relegation, n]
Index: []
silver_data = pd.read_parquet("../../files/datasets/too_messy_to_melt/silver_relegation_attendance.parquet")
print(silver_data.head())
dupes_silver = (
    silver_data
    .groupby(["Relegation_Event_ID", "Year_vs_Relegation"])
    .size()
    .reset_index(name="n")
    .query("n > 1")
    .sort_values("n", ascending=False)
)

print(dupes_silver.head(100))
           Observation_ID  Relegation_Event_ID         Team   Season  Tier  \
0  Aston Villa 2015-16_-1  Aston Villa 2015-16  Aston Villa  2014-15     1   
1   Aston Villa 2015-16_0  Aston Villa 2015-16  Aston Villa  2015-16     1   
2   Aston Villa 2015-16_1  Aston Villa 2015-16  Aston Villa  2016-17     2   
3   Aston Villa 2015-16_2  Aston Villa 2015-16  Aston Villa  2017-18     2   
4     Barnsley 1997-98_-1     Barnsley 1997-98     Barnsley  1996-97     2   

   Position  Attendance  Year_vs_Relegation Year_End_Outcome  \
0        17       34133                  -1         Survived   
1        20       33690                   0        Relegated   
2        13       32107                   1         Survived   
3         4       32097                   2         Survived   
4         2       11356                  -1         Promoted   

   Games_Played_History  Wins_History  Goals_For_History  \
0                    38            10                 31   
1                    38             3                 27   
2                    46            16                 47   
3                    46            24                 72   
4                    46            22                 76   

   Goals_Against_History  Points_History  
0                     57              38  
1                     76              17  
2                     48              62  
3                     42              83  
4                     55              80  
Empty DataFrame
Columns: [Relegation_Event_ID, Year_vs_Relegation, n]
Index: []