# Importing the necessary packages to scrape the data

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from bs4 import BeautifulSoup as bs
import requests
from tqdm import tqdm
import re


# Retrieving the URL that contains links to all fight data

url = "http://www.ufcstats.com/statistics/events/completed?page=all"
headers = {"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.3 Safari/605.1.15"}


html = requests.get(url, headers = headers)
soup = bs(html.content, "html.parser")


# Putting each link on the home page into a list

first_layer = []
for link in tqdm(soup.find_all('a', href=True), desc="Scraping First Layer"):
    page = link['href']
    first_layer.append(page)

Scraping First Layer: 100%|███████████████| 694/694 [00:00<00:00, 293698.62it/s]


# Removing all links that do not contain relevant fight data

first_layer = first_layer[11:-16]


# Extracting Data From the "first layer" of the UFC Stats site and creating a dataframe

main_dataframe = pd.DataFrame()

for url in tqdm(first_layer, desc="Scraping First Layer"):
    html = requests.get(url, headers=headers)
    soup = bs(html.content, "html.parser")
    table = soup.find("table")
    data = pd.read_html(str(table))
    adj_data = pd.concat(data)
    
    event = soup.find("h2", class_="b-content__title").text.strip()
    

    adj_data["Event"] = event
    
    main_dataframe = pd.concat([main_dataframe, adj_data], ignore_index=True)
    
main_dataframe.head()

Scraping First Layer: 100%|███████████████████| 667/667 [04:59<00:00,  2.22it/s]


main_dataframe.to_csv('ufc_initial_file.csv')


# Getting the links for each fight

second_layer = []

with tqdm(first_layer, desc="Scraping Links") as layer:
    for link in layer:
        html = requests.get(link, headers = headers)
        soup = bs(html.content, "lxml")
        
        for link in soup.find_all('a', href=True):
            if "fight-details" in link['href']:
                    second_layer.append(link['href'])

Scraping Links: 100%|█████████████████████████| 667/667 [04:40<00:00,  2.37it/s]


# Creating a dataframe and scraping the data

main_df_1 = pd.DataFrame()  

for url in tqdm(second_layer[:1000], desc="Scraping Second Layer"):
    html = requests.get(url, headers=headers)
    soup = bs(html.content, "lxml")
    tables = soup.find_all("table")
    table_dataframes = []

    
    for table in tables:
        table_df = pd.read_html(str(table))[0] 
        table_dataframes.append(table_df)  

   
    event_name = soup.find("h2", class_="b-content__title").text.strip()

    combined_table = pd.concat(table_dataframes, ignore_index=True)[["Fighter", "Sig. str.", "Td", "Rev.", "Ctrl"]].dropna()

    
    combined_table["Event"] = event_name

    main_df_1 = pd.concat([main_df_1, combined_table], ignore_index=True)

main_df_1.head()

Scraping Second Layer: 100%|████████████████| 1000/1000 [11:49<00:00,  1.41it/s]


# Continued scraping

for url in tqdm(second_layer[1000:2000], desc="Scraping Second Layer"):
    html = requests.get(url, headers=headers)
    soup = bs(html.content, "lxml")
    tables = soup.find_all("table")
    table_dataframes = []

    
    for table in tables:
        table_df = pd.read_html(str(table))[0] 
        table_dataframes.append(table_df)  

   
    event_name = soup.find("h2", class_="b-content__title").text.strip()

    combined_table = pd.concat(table_dataframes, ignore_index=True)[["Fighter", "Sig. str.", "Td", "Rev.", "Ctrl"]].dropna()

    
    combined_table["Event"] = event_name

    main_df_1 = pd.concat([main_df_1, combined_table], ignore_index=True)

main_df_1.head()

Scraping Second Layer: 100%|████████████████| 1000/1000 [11:46<00:00,  1.42it/s]


# Continued scraping

for url in tqdm(second_layer[2000:3000], desc="Scraping Second Layer"):
    html = requests.get(url, headers=headers)
    soup = bs(html.content, "lxml")
    tables = soup.find_all("table")
    table_dataframes = []

    
    for table in tables:
        table_df = pd.read_html(str(table))[0] 
        table_dataframes.append(table_df)  

   
    event_name = soup.find("h2", class_="b-content__title").text.strip()

    combined_table = pd.concat(table_dataframes, ignore_index=True)[["Fighter", "Sig. str.", "Td", "Rev.", "Ctrl"]].dropna()

    
    combined_table["Event"] = event_name

    main_df_1 = pd.concat([main_df_1, combined_table], ignore_index=True)

main_df_1.head()

Scraping Second Layer: 100%|████████████████| 1000/1000 [12:05<00:00,  1.38it/s]


# Continued scraping

for url in tqdm(second_layer[3000:4000], desc="Scraping Second Layer"):
    html = requests.get(url, headers=headers)
    soup = bs(html.content, "lxml")
    tables = soup.find_all("table")
    table_dataframes = []

    
    for table in tables:
        table_df = pd.read_html(str(table))[0] 
        table_dataframes.append(table_df)  

   
    event_name = soup.find("h2", class_="b-content__title").text.strip()

    combined_table = pd.concat(table_dataframes, ignore_index=True)[["Fighter", "Sig. str.", "Td", "Rev.", "Ctrl"]].dropna()

    
    combined_table["Event"] = event_name

    main_df_1 = pd.concat([main_df_1, combined_table], ignore_index=True)

main_df_1.head()

Scraping Second Layer: 100%|████████████████| 1000/1000 [12:07<00:00,  1.37it/s]


# Continued scraping

for url in tqdm(second_layer[4000:5000], desc="Scraping Second Layer"):
    html = requests.get(url, headers=headers)
    soup = bs(html.content, "lxml")
    tables = soup.find_all("table")
    table_dataframes = []

    
    for table in tables:
        table_df = pd.read_html(str(table))[0] 
        table_dataframes.append(table_df)  

   
    event_name = soup.find("h2", class_="b-content__title").text.strip()

    combined_table = pd.concat(table_dataframes, ignore_index=True)[["Fighter", "Sig. str.", "Td", "Rev.", "Ctrl"]].dropna()

    
    combined_table["Event"] = event_name

    main_df_1 = pd.concat([main_df_1, combined_table], ignore_index=True)

main_df_1.head()

Scraping Second Layer: 100%|████████████████| 1000/1000 [12:50<00:00,  1.30it/s]


# Continued scraping

for url in tqdm(second_layer[5000:6000], desc="Scraping Second Layer"):
    html = requests.get(url, headers=headers)
    soup = bs(html.content, "lxml")
    tables = soup.find_all("table")
    table_dataframes = []

    
    for table in tables:
        table_df = pd.read_html(str(table))[0] 
        table_dataframes.append(table_df)  

   
    event_name = soup.find("h2", class_="b-content__title").text.strip()

    combined_table = pd.concat(table_dataframes, ignore_index=True)[["Fighter", "Sig. str.", "Td", "Rev.", "Ctrl"]].dropna()

    
    combined_table["Event"] = event_name

    main_df_1 = pd.concat([main_df_1, combined_table], ignore_index=True)

main_df_1.head()

Scraping Second Layer: 100%|████████████████| 1000/1000 [11:53<00:00,  1.40it/s]


# Continued scraping

for url in tqdm(second_layer[6000:7340], desc="Scraping Second Layer"):
    html = requests.get(url, headers=headers)
    soup = bs(html.content, "lxml")
    tables = soup.find_all("table")
    table_dataframes = []

    
    for table in tables:
        table_df = pd.read_html(str(table))[0] 
        table_dataframes.append(table_df)  

   
    event_name = soup.find("h2", class_="b-content__title").text.strip()

    combined_table = pd.concat(table_dataframes, ignore_index=True)[["Fighter", "Sig. str.", "Td", "Rev.", "Ctrl"]].dropna()

    
    combined_table["Event"] = event_name

    main_df_1 = pd.concat([main_df_1, combined_table], ignore_index=True)

main_df_1.head()

Scraping Second Layer: 100%|████████████████| 1340/1340 [13:18<00:00,  1.68it/s]


main_df_1.to_csv("ufc_second_csv")


# Initialize the main DataFrame

main_dataframe_1 = pd.DataFrame()  

for url in tqdm(second_layer[:500], desc="Scraping Second Layer"):
    html = requests.get(url, headers=headers)
    soup = bs(html.content, "lxml")
    tables = soup.find_all("table")
    table_dataframes = []

    
    for table in tables:
        table_df = pd.read_html(str(table))[0] 
        table_dataframes.append(table_df)  

   
    event_name = soup.find("h2", class_="b-content__title").text.strip()

    combined_table = pd.concat(table_dataframes, ignore_index=True)[["Fighter", "Head", "Body", "Leg", "Distance", "Clinch", "Ground"]].dropna()

    
    combined_table["Event"] = event_name

    main_dataframe_1 = pd.concat([main_dataframe_1, combined_table], ignore_index=True)

main_dataframe_1.head()

Scraping Second Layer: 100%|██████████████████| 500/500 [06:17<00:00,  1.33it/s]


# Scraping the next 1,500 links

for url in tqdm(second_layer[500:2000], desc="Scraping Second Layer"):
    html = requests.get(url, headers=headers)
    soup = bs(html.content, "lxml")
    tables = soup.find_all("table")
    table_dataframes = []

    
    for table in tables:
        table_df = pd.read_html(str(table))[0] 
        table_dataframes.append(table_df)  

   
    event_name = soup.find("h2", class_="b-content__title").text.strip()

    combined_table = pd.concat(table_dataframes, ignore_index=True)[["Fighter", "Ctrl", "Head", "Body", "Leg", "Distance", "Clinch", "Ground"]].dropna()

    
    combined_table["Event"] = event_name

    main_dataframe_1 = pd.concat([main_dataframe_1, combined_table], ignore_index=True)

main_dataframe_1.head()

Scraping Second Layer: 100%|████████████████| 1500/1500 [19:14<00:00,  1.30it/s]


# Scraping the next 1,000 links

for url in tqdm(second_layer[2000:3000], desc="Scraping Second Layer"):
    html = requests.get(url, headers=headers)
    soup = bs(html.content, "lxml")
    tables = soup.find_all("table")
    table_dataframes = []

    
    for table in tables:
        table_df = pd.read_html(str(table))[0] 
        table_dataframes.append(table_df)  

   
    event_name = soup.find("h2", class_="b-content__title").text.strip()

    combined_table = pd.concat(table_dataframes, ignore_index=True)[["Fighter", "Head", "Body", "Leg", "Distance", "Clinch", "Ground"]].dropna()

    
    combined_table["Event"] = event_name

    main_dataframe_1 = pd.concat([main_dataframe_1, combined_table], ignore_index=True)

main_dataframe_1.head()

Scraping Second Layer: 100%|████████████████| 1000/1000 [13:36<00:00,  1.22it/s]


# Scraping the next 1,000 links

for url in tqdm(second_layer[3000:4000], desc="Scraping Second Layer"):
    html = requests.get(url, headers=headers)
    soup = bs(html.content, "lxml")
    tables = soup.find_all("table")
    table_dataframes = []

    
    for table in tables:
        table_df = pd.read_html(str(table))[0] 
        table_dataframes.append(table_df)  

   
    event_name = soup.find("h2", class_="b-content__title").text.strip()

    combined_table = pd.concat(table_dataframes, ignore_index=True)[["Fighter", "Head", "Body", "Leg", "Distance", "Clinch", "Ground"]].dropna()

    
    combined_table["Event"] = event_name

    main_dataframe_1 = pd.concat([main_dataframe_1, combined_table], ignore_index=True)

main_dataframe_1.head()

Scraping Second Layer: 100%|████████████████| 1000/1000 [13:43<00:00,  1.21it/s]


# Scraping the next 1,000 links

for url in tqdm(second_layer[4000:5000], desc="Scraping Second Layer"):
    html = requests.get(url, headers=headers)
    soup = bs(html.content, "lxml")
    tables = soup.find_all("table")
    table_dataframes = []

    
    for table in tables:
        table_df = pd.read_html(str(table))[0] 
        table_dataframes.append(table_df)  

   
    event_name = soup.find("h2", class_="b-content__title").text.strip()

    combined_table = pd.concat(table_dataframes, ignore_index=True)[["Fighter", "Head", "Body", "Leg", "Distance", "Clinch", "Ground"]].dropna()

    
    combined_table["Event"] = event_name

    main_dataframe_1 = pd.concat([main_dataframe_1, combined_table], ignore_index=True)

main_dataframe_1.head()

Scraping Second Layer: 100%|████████████████| 1000/1000 [13:27<00:00,  1.24it/s]


# Scraping the next 1,000 links

for url in tqdm(second_layer[5000:6000], desc="Scraping Second Layer"):
    html = requests.get(url, headers=headers)
    soup = bs(html.content, "lxml")
    tables = soup.find_all("table")
    table_dataframes = []

    
    for table in tables:
        table_df = pd.read_html(str(table))[0] 
        table_dataframes.append(table_df)  

   
    event_name = soup.find("h2", class_="b-content__title").text.strip()

    combined_table = pd.concat(table_dataframes, ignore_index=True)[["Fighter", "Head", "Body", "Leg", "Distance", "Clinch", "Ground"]].dropna()

    
    combined_table["Event"] = event_name

    main_dataframe_1 = pd.concat([main_dataframe_1, combined_table], ignore_index=True)

main_dataframe_1.head()

Scraping Second Layer: 100%|████████████████| 1000/1000 [13:35<00:00,  1.23it/s]


# Scraping the rest of the links

for url in tqdm(second_layer[6000:7340], desc="Scraping Second Layer"):
    html = requests.get(url, headers=headers)
    soup = bs(html.content, "lxml")
    tables = soup.find_all("table")
    table_dataframes = []

    
    for table in tables:
        table_df = pd.read_html(str(table))[0] 
        table_dataframes.append(table_df)  

   
    event_name = soup.find("h2", class_="b-content__title").text.strip()

    combined_table = pd.concat(table_dataframes, ignore_index=True)[["Fighter", "Head", "Body", "Leg", "Distance", "Clinch", "Ground"]].dropna()

    
    combined_table["Event"] = event_name

    main_dataframe_1 = pd.concat([main_dataframe_1, combined_table], ignore_index=True)

main_dataframe_1.head()

Scraping Second Layer: 100%|████████████████| 1340/1340 [15:49<00:00,  1.41it/s]


main_dataframe_1.to_csv('ufc_compiled.csv')


data_1 = pd.read_csv("ufc_initial_file.csv")
data_1.head()


# Dropping Unnecessary columns and changing column names
data_1 = data_1.drop(columns=["Unnamed: 0", "W/L", "Time"], axis=1)
data_1["Fighter"] = data_1["Fighter"].str.strip()
data_1.head()


def convert_to_int(x):
    return [int(i) if isinstance(i, str) and i != '--' else 0 for i in x] if isinstance(x, list) else [0]

data_1["Kd"] = data_1["Kd"].str.split().apply(convert_to_int)
data_1["Str"] = data_1["Str"].str.split().apply(convert_to_int)
data_1["Td"] = data_1["Td"].str.split().apply(convert_to_int)
data_1["Sub"] = data_1["Sub"].str.split().apply(convert_to_int)

data_1.head()


data_1.dtypes

Fighter         object
Kd              object
Str             object
Td              object
Sub             object
Weight class    object
Method          object
Round            int64
Event           object
dtype: object


# Loading the second dataset
data_2 = pd.read_csv("ufc_compiled.csv")
data_2.head()


# Loading the third dataset
data_3 = pd.read_csv("ufc_third.csv")
data_3.head()


# Merging the datasets
data_adj = pd.merge(data_2, data_3, on=['Fighter', "Event"], how='inner')
data_adj.head()


# Renaming columns
data_adj.rename(columns={"Fighter": "Fighter_1"}, inplace=True)
data_adj["Fighter_1"] = data_adj["Fighter_1"].str.strip()
data_adj.head()


# Removing duplicates
data_adj = data_adj.drop_duplicates(subset=["Fighter_1"])
data_adj = data_adj.reset_index(drop=True)
data_adj.head()


# Joining the datasets
data_1['key'] = data_1['Fighter'].str.split().apply(sorted).str.join(' ')
data_adj['key'] = data_adj['Fighter_1'].str.split().apply(sorted).str.join(' ')

merged_df = data_1.merge(data_adj, on=['key', 'Event'], how='outer')

merged_df = merged_df.drop(columns=['key'])

merged_df.head()


# Removing unavailable data
merged_df = merged_df.dropna()
merged_df.head()


# Dropping unnecessary columns
merged_df = merged_df.drop(["Unnamed: 0_x", "Unnamed: 0_y"], axis=1)
merged_df.head()


decision_data = merged_df[merged_df['Method'].str.contains('DEC', case=False)].reset_index(drop=True)
decision_data = decision_data[(decision_data["Round"] == 3) | (decision_data["Round"] == 5)]
decision_data.head()


def string_to_numbers(column):
    numbers = re.findall(r'\d+', str(column))  
    return [int(num) for num in numbers]

decision_data["Head"] = decision_data["Head"].apply(string_to_numbers)
decision_data["Body"] = decision_data["Body"].apply(string_to_numbers)
decision_data["Leg"] = decision_data["Leg"].apply(string_to_numbers)
decision_data["Distance"] = decision_data["Distance"].apply(string_to_numbers)
decision_data["Clinch"] = decision_data["Clinch"].apply(string_to_numbers)
decision_data["Ground"] = decision_data["Ground"].apply(string_to_numbers)
decision_data["Sig. str."] = decision_data["Sig. str."].apply(string_to_numbers)
decision_data["Td_y"] = decision_data["Td_y"].apply(string_to_numbers)
decision_data["Rev."] = decision_data["Rev."].apply(string_to_numbers)

decision_data.head()


decision_data['Fighter_1_Kd'] = decision_data.apply(lambda row: row['Kd'][0] if row['Fighter_1'] == row['Fighter'] else row['Kd'][1], axis=1)
decision_data['Fighter_2_Kd'] = decision_data.apply(lambda row: row['Kd'][1] if row['Fighter_1'] == row['Fighter'] else row['Kd'][0], axis=1)
decision_data['Fighter_1_Str'] = decision_data.apply(lambda row: row['Str'][0] if row['Fighter_1'] == row['Fighter'] else row['Str'][1], axis=1)
decision_data['Fighter_2_Str'] = decision_data.apply(lambda row: row['Str'][1] if row['Fighter_1'] == row['Fighter'] else row['Str'][0], axis=1)
decision_data['Fighter_1_Td'] = decision_data.apply(lambda row: row['Td_x'][0] if row['Fighter_1'] == row['Fighter'] else row['Td_x'][1], axis=1)
decision_data['Fighter_2_Td'] = decision_data.apply(lambda row: row['Td_x'][1] if row['Fighter_1'] == row['Fighter'] else row['Td_x'][0], axis=1)
decision_data['Fighter_1_Sub'] = decision_data.apply(lambda row: row['Sub'][0] if row['Fighter_1'] == row['Fighter'] else row['Sub'][1], axis=1)
decision_data['Fighter_2_Sub'] = decision_data.apply(lambda row: row['Sub'][1] if row['Fighter_1'] == row['Fighter'] else row['Sub'][0], axis=1)
decision_data.head()


decision_data = decision_data.drop(columns=["Kd", "Str", "Td_x", "Sub"], axis=1)


def extract_values_from_list(lst):
    if len(lst) == 4:
        return lst[0], lst[2]
    else:
        return None, None

decision_data[["Fighter_1_Head", "Fighter_2_Head"]] = decision_data["Head"].apply(lambda x: pd.Series(extract_values_from_list(x)))
decision_data[["Fighter_1_Body", "Fighter_2_Body"]] = decision_data["Body"].apply(lambda x: pd.Series(extract_values_from_list(x)))
decision_data[["Fighter_1_Leg", "Fighter_2_Leg"]] = decision_data["Leg"].apply(lambda x: pd.Series(extract_values_from_list(x)))
decision_data[["Fighter_1_Distance", "Fighter_2_Distance"]] = decision_data["Distance"].apply(lambda x: pd.Series(extract_values_from_list(x)))
decision_data[["Fighter_1_Clinch", "Fighter_2_Clinch"]] = decision_data["Clinch"].apply(lambda x: pd.Series(extract_values_from_list(x)))
decision_data[["Fighter_1_Ground", "Fighter_2_Ground"]] = decision_data["Ground"].apply(lambda x: pd.Series(extract_values_from_list(x)))

decision_data.head()


# Extracting Takedown Attempts

def extract_values_from_list_1(lst):
    if len(lst) == 4:
        return lst[1], lst[3]
    else:
        return None, None
    
decision_data[["Fighter_1_Td_Att", "Fighter_2_Td_Att"]] = decision_data["Td_y"].apply(lambda x: pd.Series(extract_values_from_list_1(x)))
decision_data.head()


# Adjusting the Control Time Column

decision_data["Ctrl"] = decision_data["Ctrl"].str.split()
decision_data.head()


# Extracting Control Times

def extract_values_from_list_2(lst):
    if len(lst) == 2:
        return lst[0], lst[1]
    else:
        return None, None
    
decision_data[["Fighter_1_Ctrl", "Fighter_2_Ctrl"]] = decision_data["Ctrl"].apply(lambda x: pd.Series(extract_values_from_list_2(x)))
decision_data.head()


# Changing missing times

decision_data = decision_data.replace('--', '00:00')


decision_data.head()


# Continuing to adjust the control time columns

decision_data['Fighter_1_Ctrl'] = decision_data['Fighter_1_Ctrl'].apply(lambda x: "00:0" + x if len(x) == 4 else "00:" + x)
decision_data['Fighter_2_Ctrl'] = decision_data['Fighter_2_Ctrl'].apply(lambda x: "00:0" + x if len(x) == 4 else "00:" + x)

decision_data.head()


decision_data['Fighter_1_Ctrl'] = pd.to_timedelta(decision_data['Fighter_1_Ctrl'])
decision_data['Fighter_2_Ctrl'] = pd.to_timedelta(decision_data['Fighter_2_Ctrl'])

# Convert timedelta to total seconds and then to integers
decision_data['Fighter_1_Ctrl'] = decision_data['Fighter_1_Ctrl'].dt.total_seconds().astype(int)
decision_data['Fighter_2_Ctrl'] = decision_data['Fighter_2_Ctrl'].dt.total_seconds().astype(int)

decision_data.head()


# Converting columns to numeric

columns_to_convert = ["Fighter_1_Head", "Fighter_2_Head",
                     "Fighter_1_Body", "Fighter_2_Leg",
                     "Fighter_1_Distance", "Fighter_2_Distance",
                     "Fighter_1_Clinch", "Fighter_2_Clinch",
                     "Fighter_1_Ground", "Fighter_2_Ground"]
                     

decision_data[columns_to_convert] = decision_data[columns_to_convert].apply(pd.to_numeric)


decision_data.dtypes

Fighter               object
Weight class          object
Method                object
Round                  int64
Event                 object
Fighter_1             object
Head                  object
Body                  object
Leg                   object
Distance              object
Clinch                object
Ground                object
Sig. str.             object
Td_y                  object
Rev.                  object
Ctrl                  object
Fighter_1_Kd           int64
Fighter_2_Kd           int64
Fighter_1_Str          int64
Fighter_2_Str          int64
Fighter_1_Td           int64
Fighter_2_Td           int64
Fighter_1_Sub          int64
Fighter_2_Sub          int64
Fighter_1_Head         int64
Fighter_2_Head         int64
Fighter_1_Body         int64
Fighter_2_Body         int64
Fighter_1_Leg          int64
Fighter_2_Leg          int64
Fighter_1_Distance     int64
Fighter_2_Distance     int64
Fighter_1_Clinch       int64
Fighter_2_Clinch       int64
Fighter_1_Ground       int64
Fighter_2_Ground       int64
Fighter_1_Td_Att       int64
Fighter_2_Td_Att       int64
Fighter_1_Ctrl         int64
Fighter_2_Ctrl         int64
dtype: object


decision_data["Diff_Kd"] = decision_data["Fighter_1_Kd"] - decision_data["Fighter_2_Kd"]
decision_data["Diff_Str"] = decision_data["Fighter_1_Str"] - decision_data["Fighter_2_Str"]
decision_data["Diff_Td"] = decision_data["Fighter_1_Td"] - decision_data["Fighter_2_Td"]
decision_data["Diff_Sub"] = decision_data["Fighter_1_Sub"] - decision_data["Fighter_2_Sub"]
decision_data["Diff_Head"] = decision_data["Fighter_1_Head"] - decision_data["Fighter_2_Head"]
decision_data["Diff_Body"] = decision_data["Fighter_1_Body"] - decision_data["Fighter_2_Body"]
decision_data["Diff_Leg"]= decision_data["Fighter_1_Leg"] - decision_data["Fighter_2_Leg"]
decision_data["Diff_Distance"] = decision_data["Fighter_1_Distance"] - decision_data["Fighter_2_Distance"]
decision_data["Diff_Clinch"] = decision_data["Fighter_1_Clinch"] - decision_data["Fighter_2_Clinch"]
decision_data["Diff_Ground"] = decision_data["Fighter_1_Ground"] - decision_data["Fighter_2_Ground"]
decision_data["Diff_Ctrl"] = decision_data["Fighter_1_Ctrl"] - decision_data["Fighter_2_Ctrl"]


decision_data['Fighter_1_W'] = decision_data.apply(lambda row: 1 if row['Fighter_1'].replace(" ", "") == row['Fighter'].replace(" ", "") else 0, axis=1)
decision_data.head()


decision_data["First_Fighter"] = decision_data["Fighter_1"].apply(lambda x: " ".join(x.split()[:2]))


fighter_1 = decision_data[["Method", "Weight class", "Diff_Kd", "Diff_Str",
                           "Diff_Td", "Diff_Sub", "Diff_Head",
                           "Diff_Body", "Diff_Leg", "Diff_Distance",
                           "Diff_Clinch", "Diff_Ground", "Diff_Ctrl",
                           'Fighter_1_W']]

fighter_1.head()


fighter_1.to_csv("model_data_1.csv")


fighter_1.dtypes

Method           object
Weight class     object
Diff_Kd           int64
Diff_Str          int64
Diff_Td           int64
Diff_Sub          int64
Diff_Head         int64
Diff_Body         int64
Diff_Leg          int64
Diff_Distance     int64
Diff_Clinch       int64
Diff_Ground       int64
Diff_Ctrl         int64
Fighter_1_W       int64
dtype: object


# Creating a new dataframe with fights that ended in a decision

decision_data = data_1[data_1['Method'].str.contains('DEC', case=False)].reset_index(drop=True)
decision_data.head()


decision_data.dtypes

Fighter         object
Kd              object
Str             object
Td              object
Sub             object
Weight class    object
Method          object
Round            int64
Event           object
key             object
dtype: object


data_1 = pd.read_csv("ufc_initial_file.csv")
data_1.head()


# Creating a new dataframe with fights that ended in a decision

decision_data_1 = data_1[data_1['Method'].str.contains('DEC', case=False)].reset_index(drop=True)
decision_data_1.head()


decision_data_1.replace("--", 0, inplace = True)


decision_data_1[['Winner_Kd', 'Loser_Kd']] = decision_data_1['Kd'].str.split(' ', n=1, expand=True)
decision_data_1[['Winner_Str', 'Loser_Str']]= decision_data_1['Str'].str.split(' ', n=1, expand=True)
decision_data_1[['Winner_Td', 'Loser_Td']] = decision_data_1['Td'].str.split(' ', n=1, expand=True)
decision_data_1[['Winner_Sub', 'Loser_Sub']]= decision_data_1['Sub'].str.split(' ', n=1, expand=True)
decision_data_1.head()


decision_data_1.replace("--", 0, inplace=True)
decision_data_1.replace(" --", 0, inplace=True)
decision_data_1[["Winner_Str", "Loser_Str", "Winner_Kd", "Loser_Kd", "Winner_Td", "Loser_Td", "Winner_Sub", "Loser_Sub"]] = decision_data_1[["Winner_Str", "Loser_Str", "Winner_Kd", "Loser_Kd", "Winner_Td", "Loser_Td", "Winner_Sub", "Loser_Sub"]].apply(pd.to_numeric)
decision_data_1.head()


decision_data_1["Diff_Kd"] = decision_data_1["Winner_Kd"] - decision_data_1["Loser_Kd"]
decision_data_1["Diff_Str"] = decision_data_1["Winner_Str"] - decision_data_1["Loser_Str"]
decision_data_1["Diff_Td"] = decision_data_1["Winner_Td"] - decision_data_1["Loser_Td"]
decision_data_1["Diff_Sub"] = decision_data_1["Winner_Sub"] - decision_data_1["Loser_Sub"]
decision_data_1.head()


decision_data_1 = decision_data_1[(decision_data_1["Round"] == 3) | (decision_data_1["Round"] == 5)]


# Visualize Distribution of Winner Strike Differential in Unanimous Decision Fights

U_DEC = decision_data_1[decision_data_1["Method"] == "U-DEC"]
plt.hist(U_DEC["Diff_Str"], bins = 40)
mean_value = U_DEC["Diff_Str"].mean()
std_value = U_DEC["Diff_Str"].std()
greater_than_0 = (len(U_DEC[U_DEC["Diff_Str"] > 0])/len(U_DEC))*100
less_than_0 = (len(U_DEC[U_DEC["Diff_Str"] < 0])/len(U_DEC))*100
plt.text(0.85, 0.9, f'Mean: {mean_value:.2f}', fontsize=12, transform=plt.gca().transAxes, va='center', ha='center')
plt.text(0.85, 0.8, f'Std: {std_value:.2f}', fontsize=12, transform=plt.gca().transAxes, va='center', ha='center')
plt.text(0.75, 0.7, f'% Greater Than 0: {greater_than_0:.2f}', fontsize=12, transform=plt.gca().transAxes, va='center', ha='center')
plt.text(0.75, 0.6, f'% Less Than 0: {less_than_0:.2f}', fontsize=12, transform=plt.gca().transAxes, va='center', ha='center')
plt.axvline(x=0, color='red', linestyle='--', linewidth=2, label='Zero')
plt.xlabel('Significant Strike Differential')
plt.ylabel('Frequency')
plt.title('Unanimous Decision Winner Sig. Strike Differential (Per 3 Rounds)')
plt.show()


# Visualize Distribution of Winner Takedown Differential in Unanimous Decision Fights
plt.figure(figsize=(9,4))
plt.hist(U_DEC["Diff_Td"], bins = 30)
mean_value = U_DEC["Diff_Td"].mean()
std_value =  U_DEC["Diff_Td"].std()
greater_than_0 = (len(U_DEC[U_DEC["Diff_Td"] > 0])/len(U_DEC))*100
less_than_0 = (len(U_DEC[U_DEC["Diff_Td"] < 0])/len(U_DEC))*100
plt.text(0.85, 0.9, f'Mean: {mean_value:.2f}', fontsize=12, transform=plt.gca().transAxes, va='center', ha='center')
plt.text(0.85, 0.8, f'Std: {std_value:.2f}', fontsize=12, transform=plt.gca().transAxes, va='center', ha='center')
plt.text(0.85, 0.7, f'% Greater Than 0: {greater_than_0:.2f}', fontsize=12, transform=plt.gca().transAxes, va='center', ha='center')
plt.text(0.85, 0.6, f'% Less Than 0: {less_than_0:.2f}', fontsize=12, transform=plt.gca().transAxes, va='center', ha='center')
plt.axvline(x=0, color='red', linestyle='--', linewidth=2, label='Zero')
plt.xlabel('Takedown Differential')
plt.ylabel('Frequency')
plt.title('Unanimous Decision Winner Takedown Differential (Per 3 Rounds)')
plt.show()


# Visualize Distribution of Winner Strike Differential in Split Decision Fights

S_DEC = decision_data_1[decision_data_1["Method"] == "S-DEC"]
plt.hist(S_DEC["Diff_Str"], bins = 30)
mean_value = S_DEC["Diff_Str"].mean()
std_value = S_DEC["Diff_Str"].std()
greater_than_0 = (len(S_DEC[S_DEC["Diff_Str"] > 0])/len(S_DEC))*100
less_than_0 = (len(S_DEC[S_DEC["Diff_Str"] < 0])/len(S_DEC))*100
plt.text(0.85, 0.9, f'Mean: {mean_value:.2f}', fontsize=12, transform=plt.gca().transAxes, va='center', ha='center')
plt.text(0.85, 0.8, f'Std: {std_value:.2f}', fontsize=12, transform=plt.gca().transAxes, va='center', ha='center')
plt.text(0.75, 0.7, f'% Greater Than 0: {greater_than_0:.2f}', fontsize=12, transform=plt.gca().transAxes, va='center', ha='center')
plt.text(0.75, 0.6, f'% Less Than 0: {less_than_0:.2f}', fontsize=12, transform=plt.gca().transAxes, va='center', ha='center')
plt.axvline(x=0, color='red', linestyle='--', linewidth=2, label='Zero')
plt.xlabel("Significant Strike Differential")
plt.ylabel('Frequency')
plt.title('Split Decision Winner Sig. Strike Differential (Per 3 Rounds)')
plt.show()


# Visualize Distribution of Winner Takedown Differential in Split Decision Fights
plt.figure(figsize=(8, 5))
plt.hist(S_DEC["Diff_Td"], bins = 15)
mean_value = S_DEC["Diff_Td"].mean()
std_value = S_DEC["Diff_Td"].std()
greater_than_0 = (len(S_DEC[S_DEC["Diff_Td"] > 0])/len(S_DEC))*100
less_than_0 = (len(S_DEC[S_DEC["Diff_Td"] < 0])/len(S_DEC))*100
plt.text(0.85, 0.9, f'Mean: {mean_value:.2f}', fontsize=12, transform=plt.gca().transAxes, va='center', ha='center')
plt.text(0.85, 0.8, f'Std: {std_value:.2f}', fontsize=12, transform=plt.gca().transAxes, va='center', ha='center')
plt.text(0.81, 0.7, f'% Greater than 0: {greater_than_0:.2f}', fontsize=12, transform=plt.gca().transAxes, va='center', ha='center')
plt.text(0.8, 0.6, f'% Less than 0: {less_than_0:.2f}', fontsize=12, transform=plt.gca().transAxes, va='center', ha='center')
plt.axvline(x=0, color='red', linestyle='--', linewidth=2, label='Zero')
plt.xlabel('Takedown Differential')
plt.ylabel('Frequency')
plt.title('Split Decision Winner Takedown Differential (Per 3 Rounds)')
plt.show()


from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import statsmodels.api as sm
from statsmodels.tools import add_constant


# Import the Data

data = pd.read_csv("model_data_1.csv")
data.head()


# Removing unnecessary columns

data = data.drop(data.columns[0], axis = 1)
data.head()


# Creating the Correlation Matrix

correlation_matrix = data[["Diff_Kd", "Diff_Str", "Diff_Td", "Diff_Sub", "Diff_Head",
                          "Diff_Body", "Diff_Leg", "Diff_Distance", "Diff_Clinch",
                          "Diff_Ground", "Diff_Ctrl"]].corr()

plt.figure(figsize=(8, 6))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt='.2f', linewidths=.5)
plt.title('Correlation Matrix')
plt.show()


# Finding VIF Scores

from statsmodels.stats.outliers_influence import variance_inflation_factor
data_with_const = sm.add_constant(data[["Diff_Kd", "Diff_Str", "Diff_Td", "Diff_Sub", "Diff_Head",
                          "Diff_Body", "Diff_Leg", "Diff_Clinch",
                          "Diff_Ground", "Diff_Ctrl"]])

vif_data = pd.DataFrame()
vif_data["Variable"] = data_with_const.columns
vif_data["VIF"] = [variance_inflation_factor(data_with_const.values, i) for i in range(data_with_const.shape[1])]

print(vif_data)

       Variable       VIF
0         const  1.755790
1       Diff_Kd  1.008553
2      Diff_Str  1.070263
3       Diff_Td  1.064902
4      Diff_Sub  1.005223
5     Diff_Head  1.646237
6     Diff_Body  1.354418
7      Diff_Leg  1.084286
8   Diff_Clinch  1.476568
9   Diff_Ground  2.235665
10    Diff_Ctrl  1.737613


# Creating the training and testing set

import sklearn
from sklearn.model_selection import train_test_split

train, test = train_test_split(data,
                               test_size=0.3,
                               stratify=data['Method'],
                               random_state = 42)


# Adjusting the training and testing sets

X_train = train[["Diff_Kd", "Diff_Str", "Diff_Td", "Diff_Sub", "Diff_Head", "Diff_Body", "Diff_Leg", 
            "Diff_Clinch", "Diff_Ground", "Diff_Ctrl"]]
y_train = train["Fighter_1_W"]
X_test = test[["Diff_Kd", "Diff_Str", "Diff_Td", "Diff_Sub", "Diff_Head", "Diff_Body", "Diff_Leg", 
            "Diff_Clinch", "Diff_Ground", "Diff_Ctrl"]]
y_test = test["Fighter_1_W"]


# Creating the Model

features_1 = ["Diff_Kd", "Diff_Str", "Diff_Td", "Diff_Sub", "Diff_Head", "Diff_Body", "Diff_Leg", 
            "Diff_Clinch", "Diff_Ground", "Diff_Ctrl"]
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_train_with_intercept = add_constant(X_train_scaled)

logit_model = sm.Logit(y_train,X_train_with_intercept)
result_1 = logit_model.fit()
print(result_1.summary(xname=["const"] + features_1))

Optimization terminated successfully.
         Current function value: 0.346389
         Iterations 8
                           Logit Regression Results                           
==============================================================================
Dep. Variable:            Fighter_1_W   No. Observations:                 2312
Model:                          Logit   Df Residuals:                     2301
Method:                           MLE   Df Model:                           10
Date:                Tue, 12 Dec 2023   Pseudo R-squ.:                  0.4753
Time:                        13:02:14   Log-Likelihood:                -800.85
converged:                       True   LL-Null:                       -1526.4
Covariance Type:            nonrobust   LLR p-value:                8.855e-306
===============================================================================
                  coef    std err          z      P>|z|      [0.025      0.975]
-------------------------------------------------------------------------------
const           0.8660      0.083     10.458      0.000       0.704       1.028
Diff_Kd         0.1662      0.066      2.520      0.012       0.037       0.295
Diff_Str        0.3626      0.103      3.509      0.000       0.160       0.565
Diff_Td         0.2000      0.072      2.784      0.005       0.059       0.341
Diff_Sub       -0.0691      0.067     -1.027      0.304      -0.201       0.063
Diff_Head       1.9469      0.123     15.783      0.000       1.705       2.189
Diff_Body       0.6200      0.083      7.481      0.000       0.458       0.782
Diff_Leg        0.5947      0.070      8.481      0.000       0.457       0.732
Diff_Clinch    -0.0030      0.088     -0.034      0.973      -0.175       0.169
Diff_Ground     0.3204      0.146      2.194      0.028       0.034       0.607
Diff_Ctrl       1.3975      0.099     14.151      0.000       1.204       1.591
===============================================================================


# Finding the Accuracy

X_test_scaled = scaler.transform(X_test)  # Use the same scaler as on the training set
X_test_with_intercept = add_constant(X_test_scaled)

# Predict on the test set
y_pred = result_1.predict(X_test_with_intercept)

# Convert predicted probabilities to binary predictions (0 or 1)
y_pred_binary = (y_pred > 0.5).astype(int)

# Calculate accuracy
accuracy_1 = (y_pred_binary == y_test).mean()
print(f"Accuracy: {accuracy_1:.2f}")

Accuracy: 0.86


# Creating New Training Dataframe

X_train_adj = X_train.copy()

third_quartile_value = data["Diff_Clinch"].quantile(0.95)
X_train_adj.loc[:, "Diff_Clinch"] = X_train_adj["Diff_Clinch"].apply(lambda x: 1 if x > third_quartile_value else 0)

third_quartile_value_1 = data["Diff_Sub"].quantile(0.95)
X_train_adj.loc[:, "Diff_Sub"] = X_train_adj["Diff_Sub"].apply(lambda x: 1 if x > third_quartile_value_1 else 0)


# Creating New Testing Dataframe

X_test_adj = X_test.copy()

third_quartile_value = data["Diff_Clinch"].quantile(0.95)
X_test_adj.loc[:, "Diff_Clinch"] = X_test_adj["Diff_Clinch"].apply(lambda x: 1 if x > third_quartile_value else 0)

third_quartile_value_1 = data["Diff_Sub"].quantile(0.95)
X_test_adj.loc[:, "Diff_Sub"] = X_test_adj["Diff_Sub"].apply(lambda x: 1 if x > third_quartile_value_1 else 0)


# Creating the Model

features = ["Diff_Kd", "Diff_Str", "Diff_Td", "Diff_Sub_Cat", "Diff_Head", "Diff_Body", "Diff_Leg", 
            "Diff_Clinch_Cat", "Diff_Ground", "Diff_Ctrl"]
scaler = StandardScaler()
X_train_adj_scaled = scaler.fit_transform(X_train_adj)
X_train_adj_scaled_with_intercept = add_constant(X_train_adj_scaled)
X_train_adj_scaled_with_intercept

logit_model_adj = sm.Logit(y_train, X_train_adj_scaled_with_intercept)
result_adj = logit_model_adj.fit()
print(result_adj.summary(xname=["const"] + features))

Optimization terminated successfully.
         Current function value: 0.346313
         Iterations 8
                           Logit Regression Results                           
==============================================================================
Dep. Variable:            Fighter_1_W   No. Observations:                 2312
Model:                          Logit   Df Residuals:                     2301
Method:                           MLE   Df Model:                           10
Date:                Tue, 12 Dec 2023   Pseudo R-squ.:                  0.4755
Time:                        13:02:20   Log-Likelihood:                -800.67
converged:                       True   LL-Null:                       -1526.4
Covariance Type:            nonrobust   LLR p-value:                7.435e-306
===================================================================================
                      coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------
const               0.8726      0.083     10.460      0.000       0.709       1.036
Diff_Kd             0.1654      0.066      2.503      0.012       0.036       0.295
Diff_Str            0.3731      0.103      3.607      0.000       0.170       0.576
Diff_Td             0.2065      0.072      2.872      0.004       0.066       0.347
Diff_Sub_Cat        0.0469      0.079      0.594      0.552      -0.108       0.201
Diff_Head           1.9339      0.119     16.317      0.000       1.702       2.166
Diff_Body           0.6062      0.076      7.952      0.000       0.457       0.756
Diff_Leg            0.5910      0.069      8.539      0.000       0.455       0.727
Diff_Clinch_Cat     0.1005      0.101      0.991      0.322      -0.098       0.299
Diff_Ground         0.3204      0.145      2.214      0.027       0.037       0.604
Diff_Ctrl           1.3933      0.099     14.138      0.000       1.200       1.586
===================================================================================


# Finding Accuracy

X_test_adj_scaled = scaler.transform(X_test_adj)  
X_test_adj_with_intercept = add_constant(X_test_adj_scaled)

y_pred = result_adj.predict(X_test_adj_with_intercept)

y_pred_binary = (y_pred > 0.5).astype(int)

# Calculate accuracy
accuracy_adj = (y_pred_binary == y_test).mean()
print(f"Accuracy: {accuracy_adj:.2f}")

Accuracy: 0.86


# Creating the New Model

features_adj = ["Diff_Kd", "Diff_Str", "Diff_Td", "Diff_Head", "Diff_Body", "Diff_Leg", 
            "Diff_Ground", "Diff_Ctrl"]

X_train_adj.drop(["Diff_Sub", "Diff_Clinch"], axis=1, inplace=True)

scaler = StandardScaler()
X_train_adj_scaled = scaler.fit_transform(X_train_adj)
X_train_adj_scaled_with_intercept = add_constant(X_train_adj_scaled)
X_train_adj_scaled_with_intercept

logit_model_adj = sm.Logit(y_train, X_train_adj_scaled_with_intercept)
result_adj_1 = logit_model_adj.fit()
print(result_adj_1.summary(xname=["const"] + features_adj))

Optimization terminated successfully.
         Current function value: 0.346617
         Iterations 8
                           Logit Regression Results                           
==============================================================================
Dep. Variable:            Fighter_1_W   No. Observations:                 2312
Model:                          Logit   Df Residuals:                     2303
Method:                           MLE   Df Model:                            8
Date:                Tue, 12 Dec 2023   Pseudo R-squ.:                  0.4750
Time:                        13:02:24   Log-Likelihood:                -801.38
converged:                       True   LL-Null:                       -1526.4
Covariance Type:            nonrobust   LLR p-value:                8.244e-308
===============================================================================
                  coef    std err          z      P>|z|      [0.025      0.975]
-------------------------------------------------------------------------------
const           0.8637      0.083     10.448      0.000       0.702       1.026
Diff_Kd         0.1662      0.066      2.516      0.012       0.037       0.296
Diff_Str        0.3665      0.103      3.552      0.000       0.164       0.569
Diff_Td         0.2020      0.072      2.815      0.005       0.061       0.343
Diff_Head       1.9475      0.118     16.515      0.000       1.716       2.179
Diff_Body       0.6177      0.075      8.217      0.000       0.470       0.765
Diff_Leg        0.5936      0.069      8.586      0.000       0.458       0.729
Diff_Ground     0.3152      0.145      2.174      0.030       0.031       0.599
Diff_Ctrl       1.3941      0.099     14.153      0.000       1.201       1.587
===============================================================================


# Finding the Accuracy

X_test_adj.drop(["Diff_Sub", "Diff_Clinch"], axis=1, inplace=True)

X_test_adj_scaled = scaler.transform(X_test_adj)  
X_test_adj_with_intercept = add_constant(X_test_adj_scaled)

y_pred = result_adj_1.predict(X_test_adj_with_intercept)

y_pred_binary = (y_pred > 0.5).astype(int)

# Calculate accuracy
accuracy_adj_1 = (y_pred_binary == y_test).mean()
print(f"Accuracy: {accuracy_adj_1:.2f}")

Accuracy: 0.86


# Creating the training and testing sets

ud_train = train[train["Method"] == "U-DEC"]
ud_test = test[test["Method"] == "U-DEC"]


# Creating predictors and targets

ud_X_train = ud_train[["Diff_Kd", "Diff_Str", "Diff_Td", "Diff_Sub", "Diff_Head", "Diff_Body", "Diff_Leg", 
            "Diff_Clinch", "Diff_Ground", "Diff_Ctrl"]]
ud_y_train =ud_train["Fighter_1_W"]
ud_X_test = ud_test[["Diff_Kd", "Diff_Str", "Diff_Td", "Diff_Sub", "Diff_Head", "Diff_Body", "Diff_Leg", 
            "Diff_Clinch", "Diff_Ground", "Diff_Ctrl"]]
ud_y_test = ud_test["Fighter_1_W"]


# Building the model

features_2 = ["Diff_Kd", "Diff_Str", "Diff_Td", "Diff_Sub", "Diff_Head", "Diff_Body", "Diff_Leg", 
            "Diff_Clinch", "Diff_Ground", "Diff_Ctrl"]
scaler = StandardScaler()
ud_X_train_scaled = scaler.fit_transform(ud_X_train)
ud_X_train_with_intercept = add_constant(ud_X_train_scaled)

ud_logit_model = sm.Logit(ud_y_train, ud_X_train_with_intercept)
ud_result = ud_logit_model.fit()
print(ud_result.summary(xname=["const"] + features_2))

Optimization terminated successfully.
         Current function value: 0.233096
         Iterations 9
                           Logit Regression Results                           
==============================================================================
Dep. Variable:            Fighter_1_W   No. Observations:                 1778
Model:                          Logit   Df Residuals:                     1767
Method:                           MLE   Df Model:                           10
Date:                Tue, 12 Dec 2023   Pseudo R-squ.:                  0.6433
Time:                        13:02:33   Log-Likelihood:                -414.44
converged:                       True   LL-Null:                       -1161.7
Covariance Type:            nonrobust   LLR p-value:                     0.000
===============================================================================
                  coef    std err          z      P>|z|      [0.025      0.975]
-------------------------------------------------------------------------------
const           1.2235      0.146      8.392      0.000       0.938       1.509
Diff_Kd         0.1968      0.098      2.009      0.045       0.005       0.389
Diff_Str        0.5126      0.160      3.203      0.001       0.199       0.826
Diff_Td         0.2294      0.105      2.194      0.028       0.024       0.434
Diff_Sub       -0.0942      0.102     -0.920      0.357      -0.295       0.106
Diff_Head       2.9089      0.209     13.928      0.000       2.500       3.318
Diff_Body       0.8977      0.126      7.122      0.000       0.651       1.145
Diff_Leg        0.7792      0.100      7.799      0.000       0.583       0.975
Diff_Clinch    -0.2040      0.131     -1.554      0.120      -0.461       0.053
Diff_Ground     0.7814      0.255      3.058      0.002       0.281       1.282
Diff_Ctrl       1.7974      0.149     12.094      0.000       1.506       2.089
===============================================================================


# Finding the Accuracy

ud_X_test_scaled = scaler.transform(ud_X_test)  
ud_X_test_with_intercept = add_constant(ud_X_test_scaled)

ud_y_pred = ud_result.predict(ud_X_test_with_intercept)

ud_y_pred_binary = (ud_y_pred > 0.5).astype(int)

accuracy_2 = (ud_y_pred_binary == ud_y_test).mean()
print(f"Accuracy: {accuracy_2:.2f}")

Accuracy: 0.90


# Creating New Training Dataframe

ud_X_train_adj = ud_X_train.copy()

third_quartile_value = data["Diff_Clinch"].quantile(0.95)
ud_X_train_adj.loc[:, "Diff_Clinch"] = ud_X_train_adj["Diff_Clinch"].apply(lambda x: 1 if x > third_quartile_value else 0)

third_quartile_value = data["Diff_Sub"].quantile(0.95)
ud_X_train_adj.loc[:, "Diff_Sub"] = ud_X_train_adj["Diff_Sub"].apply(lambda x: 1 if x > third_quartile_value else 0)


# Creating New Testing Dataframe

ud_X_test_adj = ud_X_test.copy()

third_quartile_value = data["Diff_Clinch"].quantile(0.95)
ud_X_test_adj.loc[:, "Diff_Clinch"] = ud_X_test_adj["Diff_Clinch"].apply(lambda x: 1 if x > third_quartile_value else 0)

third_quartile_value = data["Diff_Sub"].quantile(0.95)
ud_X_test_adj.loc[:, "Diff_Sub"] = ud_X_test_adj["Diff_Sub"].apply(lambda x: 1 if x > third_quartile_value else 0)


# Creating New Model

features_3 = ["Diff_Kd", "Diff_Str", "Diff_Td", "Diff_Sub_Cat", "Diff_Head", "Diff_Body", "Diff_Leg", 
            "Diff_Clinch_Cat", "Diff_Ground", "Diff_Ctrl"]
scaler = StandardScaler()
ud_X_train_adj_scaled = scaler.fit_transform(ud_X_train_adj)
ud_X_train_adj_with_intercept = add_constant(ud_X_train_adj_scaled)

ud_logit_model_adj = sm.Logit(ud_y_train, ud_X_train_adj_with_intercept)
ud_result_adj = ud_logit_model_adj.fit()
print(ud_result_adj.summary(xname=["const"] + features_3))

Optimization terminated successfully.
         Current function value: 0.233651
         Iterations 8
                           Logit Regression Results                           
==============================================================================
Dep. Variable:            Fighter_1_W   No. Observations:                 1778
Model:                          Logit   Df Residuals:                     1767
Method:                           MLE   Df Model:                           10
Date:                Tue, 12 Dec 2023   Pseudo R-squ.:                  0.6424
Time:                        13:02:40   Log-Likelihood:                -415.43
converged:                       True   LL-Null:                       -1161.7
Covariance Type:            nonrobust   LLR p-value:                     0.000
===================================================================================
                      coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------
const               1.2198      0.146      8.341      0.000       0.933       1.506
Diff_Kd             0.1791      0.097      1.838      0.066      -0.012       0.370
Diff_Str            0.5444      0.160      3.396      0.001       0.230       0.859
Diff_Td             0.2424      0.105      2.303      0.021       0.036       0.449
Diff_Sub_Cat        0.0814      0.139      0.585      0.559      -0.191       0.354
Diff_Head           2.8149      0.199     14.141      0.000       2.425       3.205
Diff_Body           0.7904      0.112      7.060      0.000       0.571       1.010
Diff_Leg            0.7477      0.098      7.667      0.000       0.557       0.939
Diff_Clinch_Cat     0.1495      0.157      0.953      0.341      -0.158       0.457
Diff_Ground         0.7659      0.252      3.045      0.002       0.273       1.259
Diff_Ctrl           1.8119      0.149     12.121      0.000       1.519       2.105
===================================================================================


# Finding Accuracy

ud_X_test_scaled_adj = scaler.transform(ud_X_test_adj)  
ud_X_test_with_intercept_adj = add_constant(ud_X_test_scaled_adj)

ud_y_pred = ud_result_adj.predict(ud_X_test_with_intercept_adj)

ud_y_pred_binary = (ud_y_pred > 0.5).astype(int)

accuracy_3 = (ud_y_pred_binary == ud_y_test).mean()
print(f"Accuracy: {accuracy_3:.2f}")

Accuracy: 0.91


# Creating New Model

features_adj_1 = ["Diff_Str", "Diff_Td", "Diff_Head", "Diff_Body", "Diff_Leg", 
            "Diff_Ground", "Diff_Ctrl"]

ud_X_train_adj.drop(["Diff_Sub", "Diff_Clinch", "Diff_Kd"], axis = 1, inplace = True)

scaler = StandardScaler()
ud_X_train_adj_scaled = scaler.fit_transform(ud_X_train_adj)
ud_X_train_adj_with_intercept = add_constant(ud_X_train_adj_scaled)

ud_logit_model_adj = sm.Logit(ud_y_train, ud_X_train_adj_with_intercept)
ud_result_adj_1 = ud_logit_model_adj.fit()
print(ud_result_adj_1.summary(xname=["const"] + features_adj_1))

Optimization terminated successfully.
         Current function value: 0.234980
         Iterations 8
                           Logit Regression Results                           
==============================================================================
Dep. Variable:            Fighter_1_W   No. Observations:                 1778
Model:                          Logit   Df Residuals:                     1770
Method:                           MLE   Df Model:                            7
Date:                Tue, 12 Dec 2023   Pseudo R-squ.:                  0.6404
Time:                        13:02:46   Log-Likelihood:                -417.79
converged:                       True   LL-Null:                       -1161.7
Covariance Type:            nonrobust   LLR p-value:                     0.000
===============================================================================
                  coef    std err          z      P>|z|      [0.025      0.975]
-------------------------------------------------------------------------------
const           1.2145      0.145      8.390      0.000       0.931       1.498
Diff_Str        0.5227      0.160      3.277      0.001       0.210       0.835
Diff_Td         0.2160      0.104      2.083      0.037       0.013       0.419
Diff_Head       2.8266      0.198     14.268      0.000       2.438       3.215
Diff_Body       0.8029      0.111      7.216      0.000       0.585       1.021
Diff_Leg        0.7413      0.097      7.611      0.000       0.550       0.932
Diff_Ground     0.7550      0.250      3.025      0.002       0.266       1.244
Diff_Ctrl       1.7921      0.148     12.138      0.000       1.503       2.082
===============================================================================


# Finding Accuracy

ud_X_test_adj.drop(["Diff_Sub", "Diff_Clinch", "Diff_Kd"], axis = 1, inplace = True)

ud_X_test_scaled_adj = scaler.transform(ud_X_test_adj)  
ud_X_test_with_intercept_adj = add_constant(ud_X_test_scaled_adj)

ud_y_pred = ud_result_adj_1.predict(ud_X_test_with_intercept_adj)

ud_y_pred_binary = (ud_y_pred > 0.5).astype(int)

accuracy_adj_2 = (ud_y_pred_binary == ud_y_test).mean()
print(f"Accuracy: {accuracy_adj_2:.2f}")

Accuracy: 0.91


# Creating Subset of Data

sd_train = train[train["Method"] == "S-DEC"]
sd_test = test[test["Method"] == "S-DEC"]


sd_X_train = sd_train[["Diff_Kd", "Diff_Str", "Diff_Td", "Diff_Sub", "Diff_Head", "Diff_Body", "Diff_Leg", 
               "Diff_Clinch", "Diff_Ground", "Diff_Ctrl"]]
sd_y_train = sd_train["Fighter_1_W"]
sd_X_test = sd_test[["Diff_Kd", "Diff_Str", "Diff_Td", "Diff_Sub", "Diff_Head", "Diff_Body", "Diff_Leg", 
               "Diff_Clinch", "Diff_Ground", "Diff_Ctrl"]]
sd_y_test = sd_test["Fighter_1_W"]


# Creating Model

features = ["Diff_Kd", "Diff_Str", "Diff_Td", "Diff_Sub", "Diff_Head", "Diff_Body", "Diff_Leg", 
            "Diff_Clinch", "Diff_Ground", "Diff_Ctrl"]
scaler = StandardScaler()
sd_X_train_scaled = scaler.fit_transform(sd_X_train)
sd_X_train_with_intercept = add_constant(sd_X_train_scaled)

sd_logit_model = sm.Logit(sd_y_train, sd_X_train_with_intercept)
sd_result = sd_logit_model.fit()
print(sd_result.summary(xname=["const"] + features))

Optimization terminated successfully.
         Current function value: 0.615246
         Iterations 5
                           Logit Regression Results                           
==============================================================================
Dep. Variable:            Fighter_1_W   No. Observations:                  477
Model:                          Logit   Df Residuals:                      466
Method:                           MLE   Df Model:                           10
Date:                Tue, 12 Dec 2023   Pseudo R-squ.:                 0.09707
Time:                        13:02:56   Log-Likelihood:                -293.47
converged:                       True   LL-Null:                       -325.02
Covariance Type:            nonrobust   LLR p-value:                 9.337e-10
===============================================================================
                  coef    std err          z      P>|z|      [0.025      0.975]
-------------------------------------------------------------------------------
const           0.3484      0.100      3.489      0.000       0.153       0.544
Diff_Kd         0.1818      0.105      1.732      0.083      -0.024       0.387
Diff_Str        0.0282      0.118      0.239      0.811      -0.203       0.259
Diff_Td         0.1484      0.109      1.367      0.172      -0.064       0.361
Diff_Sub        0.0334      0.104      0.320      0.749      -0.171       0.238
Diff_Head       0.4809      0.127      3.787      0.000       0.232       0.730
Diff_Body       0.1866      0.116      1.610      0.107      -0.041       0.414
Diff_Leg        0.1800      0.110      1.642      0.101      -0.035       0.395
Diff_Clinch     0.2843      0.122      2.333      0.020       0.045       0.523
Diff_Ground     0.1526      0.116      1.316      0.188      -0.075       0.380
Diff_Ctrl       0.4915      0.115      4.283      0.000       0.267       0.716
===============================================================================


# Finding Accuracy

sd_X_test_scaled = scaler.transform(sd_X_test)  # Use the same scaler as on the training set
sd_X_test_with_intercept = add_constant(sd_X_test_scaled)

# Predict on the test set
sd_y_pred = sd_result.predict(sd_X_test_with_intercept)

# Convert predicted probabilities to binary predictions (0 or 1)
sd_y_pred_binary = (sd_y_pred > 0.5).astype(int)

# Calculate accuracy
accuracy = (sd_y_pred_binary == sd_y_test).mean()
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.61


# Creating Dummy Variables

sd_X_train_adj = sd_X_train.copy()

third_quartile_value = data["Diff_Str"].quantile(0.90)
sd_X_train_adj.loc[:, "Diff_Str"] = sd_X_train_adj["Diff_Str"].apply(lambda x: 1 if x > third_quartile_value else 0)

third_quartile_value = data["Diff_Td"].quantile(0.90)
sd_X_train_adj.loc[:, "Diff_Td"] = sd_X_train_adj["Diff_Td"].apply(lambda x: 1 if x > third_quartile_value else 0)

third_quartile_value = data["Diff_Sub"].quantile(0.90)
sd_X_train_adj.loc[:, "Diff_Sub"] = sd_X_train_adj["Diff_Sub"].apply(lambda x: 1 if x > third_quartile_value else 0)

third_quartile_value = data["Diff_Body"].quantile(0.90)
sd_X_train_adj.loc[:, "Diff_Body"] = sd_X_train_adj["Diff_Body"].apply(lambda x: 1 if x > third_quartile_value else 0)

third_quartile_value = data["Diff_Leg"].quantile(0.90)
sd_X_train_adj.loc[:, "Diff_Leg"] = sd_X_train_adj["Diff_Leg"].apply(lambda x: 1 if x > third_quartile_value else 0)

third_quartile_value = data["Diff_Ground"].quantile(0.90)
sd_X_train_adj.loc[:, "Diff_Ground"] = sd_X_train_adj["Diff_Ground"].apply(lambda x: 1 if x > third_quartile_value else 0)


sd_X_test_adj = sd_X_test.copy()

third_quartile_value = data["Diff_Str"].quantile(0.90)
sd_X_test_adj.loc[:, "Diff_Str"] = sd_X_test_adj["Diff_Str"].apply(lambda x: 1 if x > third_quartile_value else 0)

third_quartile_value = data["Diff_Td"].quantile(0.90)
sd_X_test_adj.loc[:, "Diff_Td"] = sd_X_test_adj["Diff_Td"].apply(lambda x: 1 if x > third_quartile_value else 0)

third_quartile_value = data["Diff_Sub"].quantile(0.90)
sd_X_test_adj.loc[:, "Diff_Sub"] = sd_X_test_adj["Diff_Sub"].apply(lambda x: 1 if x > third_quartile_value else 0)

third_quartile_value = data["Diff_Body"].quantile(0.90)
sd_X_test_adj.loc[:, "Diff_Body"] = sd_X_test_adj["Diff_Body"].apply(lambda x: 1 if x > third_quartile_value else 0)

third_quartile_value = data["Diff_Leg"].quantile(0.90)
sd_X_test_adj.loc[:, "Diff_Leg"] = sd_X_test_adj["Diff_Leg"].apply(lambda x: 1 if x > third_quartile_value else 0)

third_quartile_value = data["Diff_Ground"].quantile(0.90)
sd_X_test_adj.loc[:, "Diff_Ground"] = sd_X_test_adj["Diff_Ground"].apply(lambda x: 1 if x > third_quartile_value else 0)


# Creating New Model

features = ["Diff_Kd_Cat", "Diff_Str_Cat", "Diff_Td_Cat", "Diff_Sub_Cat", "Diff_Head", "Diff_Body_Cat", "Diff_Leg_Cat", 
            "Diff_Clinch", "Diff_Ground_Cat", "Diff_Ctrl"]
scaler = StandardScaler()
sd_X_train_adj_scaled = scaler.fit_transform(sd_X_train_adj)
sd_X_train_adj_with_intercept = add_constant(sd_X_train_adj_scaled)

sd_adj_logit_model = sm.Logit(sd_y_train, sd_X_train_adj_with_intercept)
sd_result_adj = sd_adj_logit_model.fit()
print(sd_result_adj.summary(xname=["const"] + features))

Optimization terminated successfully.
         Current function value: 0.619419
         Iterations 5
                           Logit Regression Results                           
==============================================================================
Dep. Variable:            Fighter_1_W   No. Observations:                  477
Model:                          Logit   Df Residuals:                      466
Method:                           MLE   Df Model:                           10
Date:                Tue, 12 Dec 2023   Pseudo R-squ.:                 0.09095
Time:                        13:03:04   Log-Likelihood:                -295.46
converged:                       True   LL-Null:                       -325.02
Covariance Type:            nonrobust   LLR p-value:                 5.313e-09
===================================================================================
                      coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------
const               0.3509      0.099      3.533      0.000       0.156       0.546
Diff_Kd_Cat         0.1576      0.103      1.524      0.127      -0.045       0.360
Diff_Str_Cat       -0.0407      0.099     -0.410      0.682      -0.235       0.154
Diff_Td_Cat        -0.0416      0.100     -0.416      0.678      -0.237       0.154
Diff_Sub_Cat        0.1777      0.109      1.635      0.102      -0.035       0.391
Diff_Head           0.4444      0.113      3.941      0.000       0.223       0.665
Diff_Body_Cat       0.1346      0.105      1.279      0.201      -0.072       0.341
Diff_Leg_Cat        0.0788      0.104      0.759      0.448      -0.125       0.282
Diff_Clinch         0.3669      0.114      3.206      0.001       0.143       0.591
Diff_Ground_Cat    -0.0500      0.112     -0.446      0.656      -0.270       0.170
Diff_Ctrl           0.4914      0.108      4.553      0.000       0.280       0.703
===================================================================================


# Finding Accuracy

sd_X_test_scaled_adj = scaler.transform(sd_X_test_adj)  # Use the same scaler as on the training set
sd_X_test_with_intercept_adj = add_constant(sd_X_test_scaled_adj)

# Predict on the test set
sd_y_pred = sd_result_adj.predict(sd_X_test_with_intercept_adj)

# Convert predicted probabilities to binary predictions (0 or 1)
sd_y_pred_binary = (sd_y_pred > 0.5).astype(int)

# Calculate accuracy
accuracy = (sd_y_pred_binary == sd_y_test).mean()
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.63


# Creating New Model

features_sd = ["Diff_Head", "Diff_Clinch", "Diff_Ctrl"]

sd_X_train_adj.drop(["Diff_Kd", "Diff_Str", "Diff_Td", "Diff_Sub", "Diff_Body", "Diff_Leg", "Diff_Ground"], axis = 1, inplace = True)
scaler = StandardScaler()
sd_X_train_adj_scaled = scaler.fit_transform(sd_X_train_adj)
sd_X_train_adj_with_intercept = add_constant(sd_X_train_adj_scaled)

sd_adj_logit_model = sm.Logit(sd_y_train, sd_X_train_adj_with_intercept)
sd_result_adj = sd_adj_logit_model.fit()
print(sd_result_adj.summary(xname=["const"] + features_sd))

Optimization terminated successfully.
         Current function value: 0.627974
         Iterations 5
                           Logit Regression Results                           
==============================================================================
Dep. Variable:            Fighter_1_W   No. Observations:                  477
Model:                          Logit   Df Residuals:                      473
Method:                           MLE   Df Model:                            3
Date:                Tue, 12 Dec 2023   Pseudo R-squ.:                 0.07839
Time:                        13:03:12   Log-Likelihood:                -299.54
converged:                       True   LL-Null:                       -325.02
Covariance Type:            nonrobust   LLR p-value:                 4.991e-11
===============================================================================
                  coef    std err          z      P>|z|      [0.025      0.975]
-------------------------------------------------------------------------------
const           0.3420      0.098      3.483      0.000       0.150       0.534
Diff_Head       0.4052      0.107      3.774      0.000       0.195       0.616
Diff_Clinch     0.3990      0.111      3.602      0.000       0.182       0.616
Diff_Ctrl       0.4997      0.106      4.727      0.000       0.293       0.707
===============================================================================


# Finding Accuracy

sd_X_test_adj.drop(["Diff_Kd", "Diff_Str", "Diff_Td", "Diff_Sub", "Diff_Body", "Diff_Leg", "Diff_Ground"], axis = 1, inplace = True)

sd_X_test_scaled_adj = scaler.transform(sd_X_test_adj)  # Use the same scaler as on the training set
sd_X_test_with_intercept_adj = add_constant(sd_X_test_scaled_adj)

# Predict on the test set
sd_y_pred = sd_result_adj.predict(sd_X_test_with_intercept_adj)

# Convert predicted probabilities to binary predictions (0 or 1)
sd_y_pred_binary = (sd_y_pred > 0.5).astype(int)

# Calculate accuracy
accuracy_adj_3 = (sd_y_pred_binary == sd_y_test).mean()
print(f"Accuracy: {accuracy_adj_3:.2f}")

Accuracy: 0.62


# Model
print(result_adj_1.summary(xname=["const"] + features_adj))

                           Logit Regression Results                           
==============================================================================
Dep. Variable:            Fighter_1_W   No. Observations:                 2312
Model:                          Logit   Df Residuals:                     2303
Method:                           MLE   Df Model:                            8
Date:                Tue, 12 Dec 2023   Pseudo R-squ.:                  0.4750
Time:                        13:03:16   Log-Likelihood:                -801.38
converged:                       True   LL-Null:                       -1526.4
Covariance Type:            nonrobust   LLR p-value:                8.244e-308
===============================================================================
                  coef    std err          z      P>|z|      [0.025      0.975]
-------------------------------------------------------------------------------
const           0.8637      0.083     10.448      0.000       0.702       1.026
Diff_Kd         0.1662      0.066      2.516      0.012       0.037       0.296
Diff_Str        0.3665      0.103      3.552      0.000       0.164       0.569
Diff_Td         0.2020      0.072      2.815      0.005       0.061       0.343
Diff_Head       1.9475      0.118     16.515      0.000       1.716       2.179
Diff_Body       0.6177      0.075      8.217      0.000       0.470       0.765
Diff_Leg        0.5936      0.069      8.586      0.000       0.458       0.729
Diff_Ground     0.3152      0.145      2.174      0.030       0.031       0.599
Diff_Ctrl       1.3941      0.099     14.153      0.000       1.201       1.587
===============================================================================


# Accuracy for Model
print(f"Accuracy: {accuracy_adj_1:.2f}")

Accuracy: 0.86


# Model
print(ud_result_adj_1.summary(xname=["const"] + features_adj_1))

                           Logit Regression Results                           
==============================================================================
Dep. Variable:            Fighter_1_W   No. Observations:                 1778
Model:                          Logit   Df Residuals:                     1770
Method:                           MLE   Df Model:                            7
Date:                Tue, 12 Dec 2023   Pseudo R-squ.:                  0.6404
Time:                        13:03:20   Log-Likelihood:                -417.79
converged:                       True   LL-Null:                       -1161.7
Covariance Type:            nonrobust   LLR p-value:                     0.000
===============================================================================
                  coef    std err          z      P>|z|      [0.025      0.975]
-------------------------------------------------------------------------------
const           1.2145      0.145      8.390      0.000       0.931       1.498
Diff_Str        0.5227      0.160      3.277      0.001       0.210       0.835
Diff_Td         0.2160      0.104      2.083      0.037       0.013       0.419
Diff_Head       2.8266      0.198     14.268      0.000       2.438       3.215
Diff_Body       0.8029      0.111      7.216      0.000       0.585       1.021
Diff_Leg        0.7413      0.097      7.611      0.000       0.550       0.932
Diff_Ground     0.7550      0.250      3.025      0.002       0.266       1.244
Diff_Ctrl       1.7921      0.148     12.138      0.000       1.503       2.082
===============================================================================


# Accuracy for Model
print(f"Accuracy: {accuracy_adj_2:.2f}")

Accuracy: 0.91


# Model
print(sd_result_adj.summary(xname=["const"] + features_sd))

                           Logit Regression Results                           
==============================================================================
Dep. Variable:            Fighter_1_W   No. Observations:                  477
Model:                          Logit   Df Residuals:                      473
Method:                           MLE   Df Model:                            3
Date:                Tue, 12 Dec 2023   Pseudo R-squ.:                 0.07839
Time:                        13:03:25   Log-Likelihood:                -299.54
converged:                       True   LL-Null:                       -325.02
Covariance Type:            nonrobust   LLR p-value:                 4.991e-11
===============================================================================
                  coef    std err          z      P>|z|      [0.025      0.975]
-------------------------------------------------------------------------------
const           0.3420      0.098      3.483      0.000       0.150       0.534
Diff_Head       0.4052      0.107      3.774      0.000       0.195       0.616
Diff_Clinch     0.3990      0.111      3.602      0.000       0.182       0.616
Diff_Ctrl       0.4997      0.106      4.727      0.000       0.293       0.707
===============================================================================


# Accuracy for Model
print(f"Accuracy: {accuracy_adj_3:.2f}")

Accuracy: 0.62

	W/L	Fighter	Kd	Str	Td	Sub	Weight class	Method	Round	Time	Event
0	win	Islam Makhachev Alexander Volkanovski	1 0	24 4	0 0	0 0	Lightweight	KO/TKO Kick	1	3:06	UFC 294: Makhachev vs. Volkanovski 2
1	win	Khamzat Chimaev Kamaru Usman	0 0	38 36	4 0	1 0	Middleweight	M-DEC	3	5:00	UFC 294: Makhachev vs. Volkanovski 2
2	nc nc	Magomed Ankalaev Johnny Walker	0 0	13 14	1 0	0 0	Light Heavyweight	CNC	1	3:13	UFC 294: Makhachev vs. Volkanovski 2
3	win	Ikram Aliskerov Warlley Alves	1 0	26 12	0 0	0 0	Middleweight	KO/TKO Punches	1	2:07	UFC 294: Makhachev vs. Volkanovski 2
4	win	Said Nurmagomedov Muin Gafurov	0 0	5 1	0 0	1 0	Bantamweight	SUB Guillotine Choke	1	1:13	UFC 294: Makhachev vs. Volkanovski 2

	Fighter	Sig. str.	Td	Ctrl	Event
0	Islam Makhachev Alexander Volkanovski	24 of 29 4 of 6	0 of 1 0 of 0	0:19 0:49	UFC 294: Makhachev vs. Volkanovski 2
1	Kamaru Usman Khamzat Chimaev	36 of 66 38 of 70	0 of 1 4 of 12	0:04 7:16	UFC 294: Makhachev vs. Volkanovski 2
2	Magomed Ankalaev Johnny Walker	13 of 19 14 of 27	1 of 1 0 of 0	1:06 0:00	UFC 294: Makhachev vs. Volkanovski 2
3	Magomed Ankalaev Johnny Walker	13 of 19 14 of 27	1 of 1 0 of 0	1:06 0:00	UFC 294: Makhachev vs. Volkanovski 2
4	Ikram Aliskerov Warlley Alves	26 of 36 12 of 16	0 of 0 0 of 0	0:00 0:00	UFC 294: Makhachev vs. Volkanovski 2

	Fighter	Sig. str.	Td	Ctrl	Event
0	Islam Makhachev Alexander Volkanovski	24 of 29 4 of 6	0 of 1 0 of 0	0:19 0:49	UFC 294: Makhachev vs. Volkanovski 2
1	Kamaru Usman Khamzat Chimaev	36 of 66 38 of 70	0 of 1 4 of 12	0:04 7:16	UFC 294: Makhachev vs. Volkanovski 2
2	Magomed Ankalaev Johnny Walker	13 of 19 14 of 27	1 of 1 0 of 0	1:06 0:00	UFC 294: Makhachev vs. Volkanovski 2
3	Magomed Ankalaev Johnny Walker	13 of 19 14 of 27	1 of 1 0 of 0	1:06 0:00	UFC 294: Makhachev vs. Volkanovski 2
4	Ikram Aliskerov Warlley Alves	26 of 36 12 of 16	0 of 0 0 of 0	0:00 0:00	UFC 294: Makhachev vs. Volkanovski 2

	Fighter	Sig. str.	Td	Ctrl	Event
0	Islam Makhachev Alexander Volkanovski	24 of 29 4 of 6	0 of 1 0 of 0	0:19 0:49	UFC 294: Makhachev vs. Volkanovski 2
1	Kamaru Usman Khamzat Chimaev	36 of 66 38 of 70	0 of 1 4 of 12	0:04 7:16	UFC 294: Makhachev vs. Volkanovski 2
2	Magomed Ankalaev Johnny Walker	13 of 19 14 of 27	1 of 1 0 of 0	1:06 0:00	UFC 294: Makhachev vs. Volkanovski 2
3	Magomed Ankalaev Johnny Walker	13 of 19 14 of 27	1 of 1 0 of 0	1:06 0:00	UFC 294: Makhachev vs. Volkanovski 2
4	Ikram Aliskerov Warlley Alves	26 of 36 12 of 16	0 of 0 0 of 0	0:00 0:00	UFC 294: Makhachev vs. Volkanovski 2

	Fighter	Sig. str.	Td	Ctrl	Event
0	Islam Makhachev Alexander Volkanovski	24 of 29 4 of 6	0 of 1 0 of 0	0:19 0:49	UFC 294: Makhachev vs. Volkanovski 2
1	Kamaru Usman Khamzat Chimaev	36 of 66 38 of 70	0 of 1 4 of 12	0:04 7:16	UFC 294: Makhachev vs. Volkanovski 2
2	Magomed Ankalaev Johnny Walker	13 of 19 14 of 27	1 of 1 0 of 0	1:06 0:00	UFC 294: Makhachev vs. Volkanovski 2
3	Magomed Ankalaev Johnny Walker	13 of 19 14 of 27	1 of 1 0 of 0	1:06 0:00	UFC 294: Makhachev vs. Volkanovski 2
4	Ikram Aliskerov Warlley Alves	26 of 36 12 of 16	0 of 0 0 of 0	0:00 0:00	UFC 294: Makhachev vs. Volkanovski 2

Exploring Decision Fights in the UFC¶

- John Collopy¶

Links to Each Section¶

Motivation¶

Further Reading¶

ETL¶

Scraping the First "Layer" of Data¶

Scraping the Second Layer of Data (Part 1)¶

Scraping the Second Layer of Data (Part 2)¶

Unifying the Datasets¶

EDA¶

Split Decision Analysis¶

Model Selection¶

Model for All Types of Fights¶

Checking for Multicollinearity¶

Unanimous Decision Model¶

Split Decision Model¶

Final Models¶

Model for All Fights¶

Model for Unanimous Decisions¶

Model for Split Decisions¶

Conclusions¶

Takeaways from the Logistic Regression Models¶

	Fighter	Head	Body	Leg	Distance	Clinch	Ground	Event
0	Islam Makhachev Alexander Volkanovski	13 of 16 0 of 1	9 of 11 1 of 1	2 of 2 3 of 4	7 of 11 3 of 5	8 of 9 1 of 1	9 of 9 0 of 0	UFC 294: Makhachev vs. Volkanovski 2
1	Kamaru Usman Khamzat Chimaev	27 of 52 27 of 53	6 of 10 3 of 9	3 of 4 8 of 8	33 of 63 22 of 53	1 of 1 0 of 0	2 of 2 16 of 17	UFC 294: Makhachev vs. Volkanovski 2
2	Magomed Ankalaev Johnny Walker	7 of 13 4 of 15	3 of 3 1 of 2	3 of 3 9 of 10	7 of 11 14 of 27	1 of 1 0 of 0	5 of 7 0 of 0	UFC 294: Makhachev vs. Volkanovski 2
3	Magomed Ankalaev Johnny Walker	7 of 13 4 of 15	3 of 3 1 of 2	3 of 3 9 of 10	7 of 11 14 of 27	1 of 1 0 of 0	5 of 7 0 of 0	UFC 294: Makhachev vs. Volkanovski 2
4	Ikram Aliskerov Warlley Alves	20 of 29 3 of 6	6 of 6 1 of 1	0 of 1 8 of 9	26 of 36 12 of 16	0 of 0 0 of 0	0 of 0 0 of 0	UFC 294: Makhachev vs. Volkanovski 2

	Fighter	Kd	Str	Td	Sub	Weight class	Method	Round	Event
0	Islam Makhachev Alexander Volkanovski	[1, 0]	[24, 4]	[0, 0]	[0, 0]	Lightweight	KO/TKO Kick	1	UFC 294: Makhachev vs. Volkanovski 2
1	Khamzat Chimaev Kamaru Usman	[0, 0]	[38, 36]	[4, 0]	[1, 0]	Middleweight	M-DEC	3	UFC 294: Makhachev vs. Volkanovski 2
2	Magomed Ankalaev Johnny Walker	[0, 0]	[13, 14]	[1, 0]	[0, 0]	Light Heavyweight	CNC	1	UFC 294: Makhachev vs. Volkanovski 2
3	Ikram Aliskerov Warlley Alves	[1, 0]	[26, 12]	[0, 0]	[0, 0]	Middleweight	KO/TKO Punches	1	UFC 294: Makhachev vs. Volkanovski 2
4	Said Nurmagomedov Muin Gafurov	[0, 0]	[5, 1]	[0, 0]	[1, 0]	Bantamweight	SUB Guillotine Choke	1	UFC 294: Makhachev vs. Volkanovski 2

	Method	Weight class	Diff_Kd	Diff_Str	Diff_Td	Diff_Sub	Diff_Head	Diff_Body	Diff_Leg	Diff_Distance	Diff_Clinch	Diff_Ground	Diff_Ctrl	Fighter_1_W
0	M-DEC	Middleweight	0	-2	-4	-1	0	3	-5	11	1	-14	-432	0
1	U-DEC	Lightweight	0	-18	-3	1	-4	-2	-12	-9	-4	-5	-286	0
2	U-DEC	Middleweight	0	7	1	0	10	8	-11	0	3	4	-218	0
3	U-DEC	Featherweight	0	2	-2	0	6	-4	0	12	-6	-4	-238	0
4	U-DEC	Women's Strawweight	-1	-27	1	0	16	18	-7	23	11	-7	-262	1

	Fighter	Kd	Str	Td_x	Sub	Weight class	Method	Round	Event	Fighter_1	Head	Body	Leg	Distance	Clinch	Ground	Sig. str.	Td_y	Rev.	Ctrl
0	Khamzat Chimaev Kamaru Usman	[0, 0]	[38, 36]	[4, 0]	[1, 0]	Middleweight	M-DEC	3	UFC 294: Makhachev vs. Volkanovski 2	Kamaru Usman Khamzat Chimaev	[27, 52, 27, 53]	[6, 10, 3, 9]	[3, 4, 8, 8]	[33, 63, 22, 53]	[1, 1, 0, 0]	[2, 2, 16, 17]	[36, 66, 38, 70]	[0, 1, 4, 12]	[0, 0]	0:04 7:16
1	Trevor Peek Mohammad Yahya	[0, 0]	[61, 43]	[4, 1]	[0, 1]	Lightweight	U-DEC	3	UFC 294: Makhachev vs. Volkanovski 2	Mohammad Yahya Trevor Peek	[28, 70, 32, 61]	[14, 21, 16, 23]	[1, 1, 13, 13]	[41, 90, 50, 83]	[2, 2, 6, 9]	[0, 0, 5, 5]	[43, 92, 61, 97]	[1, 1, 4, 7]	[0, 0]	0:24 5:10
2	Sedriques Dumas Abu Azaitar	[0, 0]	[34, 41]	[1, 2]	[0, 0]	Middleweight	U-DEC	3	UFC 294: Makhachev vs. Volkanovski 2	Abu Azaitar Sedriques Dumas	[21, 78, 11, 45]	[13, 18, 5, 11]	[7, 10, 18, 23]	[31, 90, 31, 76]	[3, 6, 0, 0]	[7, 10, 3, 3]	[41, 106, 34, 79]	[2, 4, 1, 2]	[0, 0]	0:32 4:10
3	Muhammad Naimov Nathaniel Wood	[0, 0]	[48, 50]	[4, 2]	[0, 0]	Featherweight	U-DEC	3	UFC 294: Makhachev vs. Volkanovski 2	Nathaniel Wood Muhammad Naimov	[37, 57, 31, 60]	[6, 7, 10, 13]	[7, 8, 7, 8]	[36, 56, 24, 48]	[5, 6, 11, 14]	[9, 10, 13, 19]	[50, 72, 48, 81]	[2, 7, 4, 9]	[0, 1]	2:44 6:42
4	Viktoriia Dudakova Jinh Yu Frey	[1, 0]	[74, 47]	[0, 1]	[0, 0]	Women's Strawweight	U-DEC	3	UFC 294: Makhachev vs. Volkanovski 2	Viktoriia Dudakova Jinh Yu Frey	[47, 115, 31, 80]	[26, 30, 8, 13]	[1, 1, 8, 8]	[60, 131, 37, 89]	[14, 15, 3, 4]	[0, 0, 7, 8]	[74, 146, 47, 101]	[0, 3, 1, 3]	[0, 0]	0:18 4:40

	Fighter	Weight class	Method	Round	Event	Fighter_1	Head	Body	Leg	Distance	...	Fighter_1_Body	Fighter_2_Body	Fighter_1_Leg	Fighter_2_Leg	Fighter_1_Distance	Fighter_2_Distance	Fighter_1_Clinch	Fighter_2_Clinch	Fighter_1_Ground	Fighter_2_Ground
0	Khamzat Chimaev Kamaru Usman	Middleweight	M-DEC	3	UFC 294: Makhachev vs. Volkanovski 2	Kamaru Usman Khamzat Chimaev	[27, 52, 27, 53]	[6, 10, 3, 9]	[3, 4, 8, 8]	[33, 63, 22, 53]	...	6	3	3	8	33	22	1	0	2	16
1	Trevor Peek Mohammad Yahya	Lightweight	U-DEC	3	UFC 294: Makhachev vs. Volkanovski 2	Mohammad Yahya Trevor Peek	[28, 70, 32, 61]	[14, 21, 16, 23]	[1, 1, 13, 13]	[41, 90, 50, 83]	...	14	16	1	13	41	50	2	6	0	5
2	Sedriques Dumas Abu Azaitar	Middleweight	U-DEC	3	UFC 294: Makhachev vs. Volkanovski 2	Abu Azaitar Sedriques Dumas	[21, 78, 11, 45]	[13, 18, 5, 11]	[7, 10, 18, 23]	[31, 90, 31, 76]	...	13	5	7	18	31	31	3	0	7	3
3	Muhammad Naimov Nathaniel Wood	Featherweight	U-DEC	3	UFC 294: Makhachev vs. Volkanovski 2	Nathaniel Wood Muhammad Naimov	[37, 57, 31, 60]	[6, 7, 10, 13]	[7, 8, 7, 8]	[36, 56, 24, 48]	...	6	10	7	7	36	24	5	11	9	13
4	Viktoriia Dudakova Jinh Yu Frey	Women's Strawweight	U-DEC	3	UFC 294: Makhachev vs. Volkanovski 2	Viktoriia Dudakova Jinh Yu Frey	[47, 115, 31, 80]	[26, 30, 8, 13]	[1, 1, 8, 8]	[60, 131, 37, 89]	...	26	8	1	8	60	37	14	3	0	7

	Fighter	Weight class	Method	Round	Event	Fighter_1	Head	Body	Leg	Distance	...	Fighter_1_Leg	Fighter_2_Leg	Fighter_1_Distance	Fighter_2_Distance	Fighter_1_Clinch	Fighter_2_Clinch	Fighter_1_Ground	Fighter_2_Ground	Fighter_1_Td_Att	Fighter_2_Td_Att
0	Khamzat Chimaev Kamaru Usman	Middleweight	M-DEC	3	UFC 294: Makhachev vs. Volkanovski 2	Kamaru Usman Khamzat Chimaev	[27, 52, 27, 53]	[6, 10, 3, 9]	[3, 4, 8, 8]	[33, 63, 22, 53]	...	3	8	33	22	1	0	2	16	1	12
1	Trevor Peek Mohammad Yahya	Lightweight	U-DEC	3	UFC 294: Makhachev vs. Volkanovski 2	Mohammad Yahya Trevor Peek	[28, 70, 32, 61]	[14, 21, 16, 23]	[1, 1, 13, 13]	[41, 90, 50, 83]	...	1	13	41	50	2	6	0	5	1	7
2	Sedriques Dumas Abu Azaitar	Middleweight	U-DEC	3	UFC 294: Makhachev vs. Volkanovski 2	Abu Azaitar Sedriques Dumas	[21, 78, 11, 45]	[13, 18, 5, 11]	[7, 10, 18, 23]	[31, 90, 31, 76]	...	7	18	31	31	3	0	7	3	4	2
3	Muhammad Naimov Nathaniel Wood	Featherweight	U-DEC	3	UFC 294: Makhachev vs. Volkanovski 2	Nathaniel Wood Muhammad Naimov	[37, 57, 31, 60]	[6, 7, 10, 13]	[7, 8, 7, 8]	[36, 56, 24, 48]	...	7	7	36	24	5	11	9	13	7	9
4	Viktoriia Dudakova Jinh Yu Frey	Women's Strawweight	U-DEC	3	UFC 294: Makhachev vs. Volkanovski 2	Viktoriia Dudakova Jinh Yu Frey	[47, 115, 31, 80]	[26, 30, 8, 13]	[1, 1, 8, 8]	[60, 131, 37, 89]	...	1	8	60	37	14	3	0	7	3	3

	Fighter	Weight class	Method	Round	Event	Fighter_1	Head	Body	Leg	Distance	...	Fighter_1_Leg	Fighter_2_Leg	Fighter_1_Distance	Fighter_2_Distance	Fighter_1_Clinch	Fighter_2_Clinch	Fighter_1_Ground	Fighter_2_Ground	Fighter_1_Td_Att	Fighter_2_Td_Att
0	Khamzat Chimaev Kamaru Usman	Middleweight	M-DEC	3	UFC 294: Makhachev vs. Volkanovski 2	Kamaru Usman Khamzat Chimaev	[27, 52, 27, 53]	[6, 10, 3, 9]	[3, 4, 8, 8]	[33, 63, 22, 53]	...	3	8	33	22	1	0	2	16	1	12
1	Trevor Peek Mohammad Yahya	Lightweight	U-DEC	3	UFC 294: Makhachev vs. Volkanovski 2	Mohammad Yahya Trevor Peek	[28, 70, 32, 61]	[14, 21, 16, 23]	[1, 1, 13, 13]	[41, 90, 50, 83]	...	1	13	41	50	2	6	0	5	1	7
2	Sedriques Dumas Abu Azaitar	Middleweight	U-DEC	3	UFC 294: Makhachev vs. Volkanovski 2	Abu Azaitar Sedriques Dumas	[21, 78, 11, 45]	[13, 18, 5, 11]	[7, 10, 18, 23]	[31, 90, 31, 76]	...	7	18	31	31	3	0	7	3	4	2
3	Muhammad Naimov Nathaniel Wood	Featherweight	U-DEC	3	UFC 294: Makhachev vs. Volkanovski 2	Nathaniel Wood Muhammad Naimov	[37, 57, 31, 60]	[6, 7, 10, 13]	[7, 8, 7, 8]	[36, 56, 24, 48]	...	7	7	36	24	5	11	9	13	7	9
4	Viktoriia Dudakova Jinh Yu Frey	Women's Strawweight	U-DEC	3	UFC 294: Makhachev vs. Volkanovski 2	Viktoriia Dudakova Jinh Yu Frey	[47, 115, 31, 80]	[26, 30, 8, 13]	[1, 1, 8, 8]	[60, 131, 37, 89]	...	1	8	60	37	14	3	0	7	3	3