import warnings
warnings.filterwarnings('ignore')
from profiling import *
import pandas as pd
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_columns', None)

import streamlit as st
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import StratifiedShuffleSplit
import pickle
from sklearn.preprocessing import OrdinalEncoder
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn import set_config
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from lifelines.plotting import qq_plot
from lifelines import LogLogisticFitter
from lifelines import CoxPHFitter

from sklearn.preprocessing import FunctionTransformer


df_telco_churn = pd.read_csv('WA_Fn-UseC_-Telco-Customer-Churn.csv')
df_telco_churn


df_telco_churn


df_telco_churn = df_telco_churn.reindex(np.random.permutation(df_telco_churn.index))
df_telco_churn.reset_index(inplace=True, drop=True) # Reset index


df_telco_churn.columns

Index(['customerID', 'gender', 'SeniorCitizen', 'Partner', 'Dependents',
       'tenure', 'PhoneService', 'MultipleLines', 'InternetService',
       'OnlineSecurity', 'OnlineBackup', 'DeviceProtection', 'TechSupport',
       'StreamingTV', 'StreamingMovies', 'Contract', 'PaperlessBilling',
       'PaymentMethod', 'MonthlyCharges', 'TotalCharges', 'Churn'],
      dtype='object')


# check if there is duplicates or not
assert len(df_telco_churn['customerID'].drop_duplicates()) == len(df_telco_churn['customerID'])


#df_telco_churn.drop(['customerID'], axis=1, inplace=True)
df_telco_churn['SeniorCitizen'] = df_telco_churn['SeniorCitizen'].replace({1: 'Yes', 0: 'No'})


df_telco_churn['TotalCharges'] = pd.to_numeric(df_telco_churn['TotalCharges'], errors='coerce').fillna(0)
df_telco_churn['MonthlyCharges'] = pd.to_numeric(df_telco_churn['MonthlyCharges'], errors='coerce').fillna(0)


# correct wrongly assign dtypes in Pandas DataFrame
feat_num = []
feat_obj = []
for iclm in df_telco_churn.columns.to_list():
    if iclm != 'customerID':
        if  df_telco_churn[iclm].dtypes=='dbdate':
            df_telco_churn[iclm] = [int(i.strftime('%y%j')) for i in df_telco_churn[iclm]] # convert 'dbdate' to Julian date
            feat_num.append(iclm)
        else:
            try:
                pd.to_numeric(df_telco_churn[iclm])
                feat_num.append(iclm)
            except (ValueError, TypeError):
                feat_obj.append(iclm)
        
df_telco_churn[feat_num] = df_telco_churn[feat_num].astype(float)  
clmns = feat_num + feat_obj


font = {'size'   : 9}
plt.rc('font', **font)
colors_map = plt.cm.get_cmap('jet')
colors = colors_map(np.linspace(0,0.8,len(clmns)))

#
miss_n = [None]*len(feat_num)
miss_p = [None]*len(feat_num)
p1 = [None]*len(feat_num)
p99 = [None]*len(feat_num)
sd_num = [None]*len(feat_num)
#      
nfig1 = np.arange(0, len(clmns),10)
nfig2 = np.arange(10, len(clmns)+10,10)
ino = 0
num_count = 0
for ig in zip(nfig1, nfig2):
    in_ = 0
    fig, ax = plt.subplots(figsize=(20, 5), dpi= 100, facecolor='w', edgecolor='k') 
    ax.axis('off')  # This turns off the axis lines and labels
    for ir in range(ig[0],ig[1]):
        if ir < len(clmns):
            ax1 = plt.subplot(2,5,in_+1) 
            
            if clmns[ir] in feat_num:
                val = df_telco_churn[clmns[ir]]
                _,_,_, miss_n[num_count], miss_p[num_count],_ ,_ ,_ ,_ ,_ ,\
                p1[num_count],_ ,_ ,p99[num_count] ,sd_num[num_count] = profiling.histplt(val,bins=8,
                                  title=f'{clmns[ir]}',xlabl=None,days=False,
                                  ylabl=None,xlimt=(np.nanmin(val),np.nanmax(val)*4),ylimt=False
                                  ,axt=ax1,nsplit=5,scale=0.95,font=9,loc=1,color=colors[ir])
                num_count += 1
                
            elif clmns[ir] in feat_obj:
                val_obj = df_telco_churn[clmns[ir]]
                profiling.bargraph (val_obj, title=f'{clmns[ir]}', ylabel='',titlefontsize=10, xfontsize=9.5,
                          yfontsize=7,percent=False,fontsizelable=8, font=10, xshift=0, width=0.2,
                                    color=colors[ir],scale=0.9,legend=True, ytick_rot=25, y_rot=90, axt=ax1)                
            in_ += 1

    ino += 1
    plt.subplots_adjust(hspace=0.6)
    plt.subplots_adjust(wspace=0.3)
    
    #plt.savefig(f'./1-histograms/histogram.png', dpi=200, bbox_inches='tight')
    plt.show()


label = 'Churn'
time = 'tenure'


df_telco_churn.set_index('customerID', inplace=True)
df_telco_churn


# Training and Test
spt = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
for train_idx, test_idx in spt.split(df_telco_churn, df_telco_churn[label]):
    train_set_strat = df_telco_churn.iloc[train_idx]
    test_set_strat  = df_telco_churn.iloc[test_idx]


train_set_strat


train_set_strat


#def data_processing(df, feat_num, feat_obj, label, time):
#    #df = df.drop([label]+[time], axis=1)
#    feat_num = [i for i in feat_num if i != label and i != time]
#    feat_obj = [i for i in feat_obj if i != label and i != time]
#    
#    # Convert to one-hot-encoding
#    X = pd.get_dummies(df, columns=feat_obj, prefix=feat_obj)
#    X.replace({True: 1, False: 0}, inplace=True)
#    
#    X[label].replace({'Yes': 1, 'No': 0}, inplace=True)
#    
#    # move time and label to far right
#    remaining_cols = [col for col in X.columns if col not in [label]+[time]]
#    
#    # Reorder columns
#    new_order = remaining_cols +[time] + [label]    
#    
#    return X[new_order]


#train_set_strat = data_processing(train_set_strat, feat_num, feat_obj, label, time)
#train_set_strat


feat_num = [i for i in feat_num if i != label and i != time]
feat_obj = [i for i in feat_obj if i != label and i != time]

# Define the transformations for numerical features
numerical_features = feat_num
numerical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean')),
    #('scaler', StandardScaler())
])

# Define the transformations for categorical features
categorical_features = feat_obj
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

# Combine the transformations using ColumnTransformer
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_features),
        ('cat', categorical_transformer, categorical_features)
    ]
)

# Apply the transformations
df_processed = preprocessor.fit_transform(train_set_strat)

# Get feature names
numerical_feature_names = numerical_features
categorical_feature_names = preprocessor.named_transformers_['cat']['onehot'].get_feature_names_out(categorical_features)

# Combine feature names
feature_names = np.concatenate([numerical_feature_names, categorical_feature_names])

# Convert the result back to a DataFrame for better readability
df_processed = pd.DataFrame(df_processed, columns=feature_names, index=train_set_strat.index)

df_processed


set_config(display="diagram")
preprocessor

ColumnTransformer(transformers=[('num',
                                 Pipeline(steps=[('imputer', SimpleImputer())]),
                                 ['MonthlyCharges', 'TotalCharges']),
                                ('cat',
                                 Pipeline(steps=[('imputer',
                                                  SimpleImputer(strategy='most_frequent')),
                                                 ('onehot',
                                                  OneHotEncoder(handle_unknown='ignore'))]),
                                 ['gender', 'SeniorCitizen', 'Partner',
                                  'Dependents', 'PhoneService', 'MultipleLines',
                                  'InternetService', 'OnlineSecurity',
                                  'OnlineBackup', 'DeviceProtection',
                                  'TechSupport', 'StreamingTV',
                                  'StreamingMovies', 'Contract',
                                  'PaperlessBilling', 'PaymentMethod'])])

ColumnTransformer(transformers=[('num',
                                 Pipeline(steps=[('imputer', SimpleImputer())]),
                                 ['MonthlyCharges', 'TotalCharges']),
                                ('cat',
                                 Pipeline(steps=[('imputer',
                                                  SimpleImputer(strategy='most_frequent')),
                                                 ('onehot',
                                                  OneHotEncoder(handle_unknown='ignore'))]),
                                 ['gender', 'SeniorCitizen', 'Partner',
                                  'Dependents', 'PhoneService', 'MultipleLines',
                                  'InternetService', 'OnlineSecurity',
                                  'OnlineBackup', 'DeviceProtection',
                                  'TechSupport', 'StreamingTV',
                                  'StreamingMovies', 'Contract',
                                  'PaperlessBilling', 'PaymentMethod'])])

['MonthlyCharges', 'TotalCharges']

SimpleImputer()

['gender', 'SeniorCitizen', 'Partner', 'Dependents', 'PhoneService', 'MultipleLines', 'InternetService', 'OnlineSecurity', 'OnlineBackup', 'DeviceProtection', 'TechSupport', 'StreamingTV', 'StreamingMovies', 'Contract', 'PaperlessBilling', 'PaymentMethod']

SimpleImputer(strategy='most_frequent')

OneHotEncoder(handle_unknown='ignore')


df_processed


train_set_strat[label].replace({'Yes': 1, 'No': 0}, inplace=True)
train_set_strat


train_set_time_label = pd.merge(df_processed, train_set_strat[['tenure', 'Churn']], left_index=True, right_index=True, how='left')
train_set_time_label


from lifelines.plotting import plot_lifetimes

tenure = train_set_strat[time].sample(35, replace=False)
Churn = train_set_strat[label].sample(35, replace=False)
fig, ax = plt.subplots(figsize=(10, 6))
plot_lifetimes(tenure, Churn, ax=ax)

# Customize the appearance
ax.set_title("Customer subscription lifelines of Sample Data", fontsize=20, fontweight='bold')
ax.set_xlabel("Days subscribed", fontsize=14)
ax.set_ylabel("Customer ID", fontsize=14)
ax.grid(True, which='both', linestyle='--', linewidth=0.5)
ax.axvline(x=tenure.mean(), color='r', linestyle='--', linewidth=3, label='Mean Duration')
ax.legend(loc=4,fontsize=12)

# Customize tick parameters
ax.tick_params(axis='both', which='major', labelsize=10)
ax.tick_params(axis='both', which='minor', labelsize=10)

# Adding a background color
ax.set_facecolor('#f7f7f7')

# Display the plot
plt.tight_layout()
plt.show()


train_set_strat[[time]+[label]]


from lifelines import KaplanMeierFitter
from matplotlib import pyplot as plt

T = train_set_strat[time]
E = train_set_strat[label]
kmf = KaplanMeierFitter().fit(T, E, alpha=0.05)
#kmf.plot_survival_function()
kmf.survival_function_.plot()
plt.ylim(0.6, 1);
# Plot the survival function with improved aesthetics
plt.ylim(0.6, 1)
plt.title('Survival Function')
plt.xlabel('Time')
plt.ylabel('Survival Probability')
plt.show()


from lifelines import KaplanMeierFitter
from matplotlib import pyplot as plt

T = train_set_strat[time]
E = train_set_strat[label]
kmf = KaplanMeierFitter().fit(T, E, alpha=0.05)
kmf.plot(ci_show=False)
plt.ylim(0.6, 1);
# Plot the survival function with improved aesthetics
plt.ylim(0.6, 1)
plt.title('Survival Function')
plt.xlabel('Time')
plt.ylabel('Survival Probability')
plt.show()


train_set_strat_f = train_set_strat[train_set_strat.gender=='Female']
train_set_strat_m = train_set_strat[train_set_strat.gender=='Male']

ax = plt.subplot(111)
#
T = train_set_strat_f[time]
E = train_set_strat_f[label]
churn_kmf = KaplanMeierFitter()
churn_kmf.fit(T, E, label="Female")
churn_kmf.plot_survival_function(ax=ax)
#
T = train_set_strat_m[time]
E = train_set_strat_m[label]
churn_kmf = KaplanMeierFitter()
churn_kmf.fit(T, E, label="Male")
churn_kmf.plot_survival_function(ax=ax)

plt.ylim(0.6, 1);
# Plot the survival function with improved aesthetics
plt.ylim(0.6, 1)
plt.title('Survival Function')
plt.xlabel('Time')
plt.ylabel('Survival Probability')
plt.show()


durations_A = train_set_strat_f[time]
event_observed_A = train_set_strat_f[label]
#
durations_B = train_set_strat_m[time]
event_observed_B = train_set_strat_m[label]
#
from lifelines.statistics import logrank_test
lrt = logrank_test(durations_A, durations_B, event_observed_A, event_observed_B)


lrt.print_summary()


### Example in Python

import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats

# Parameters
shape_k = 1.5  # Shape parameter
scale_lambda = 1  # Scale parameter

# Generate data
data = np.random.weibull(shape_k, 1000) * scale_lambda

# Plot histogram
plt.hist(data, bins=30, density=True, alpha=0.6, color='c', label='Histogram')

# Plot PDF
x = np.linspace(0, max(data), 100)
pdf = (shape_k / scale_lambda) * (x / scale_lambda)**(shape_k - 1) * np.exp(-(x / scale_lambda)**shape_k)
plt.plot(x, pdf, 'r-', label='Fitted Weibull distribution')

plt.title('Weibull Distribution')
plt.xlabel('Value')
plt.ylabel('Density')
plt.legend()
plt.show()


import numpy as np
import matplotlib.pyplot as plt

# Parameters
shape_k = 1.5  # Shape parameter
scale_lambda = 1  # Scale parameter

# Generate data
x = np.linspace(0, 5, 1000)

# Calculate survival function
survival_function = np.exp(-(x / scale_lambda)**shape_k)

# Plot survival function
plt.plot(x, survival_function, 'r-', label='Survival Function')
plt.title('Weibull Survival Function')
plt.xlabel('Time')
plt.ylabel('Survival Probability')
plt.legend()
plt.grid(True)
plt.show()


# Import the WeibullFitter class
from lifelines import WeibullFitter

#Instantiate a WeibullFitter class
wb = WeibullFitter()


# Call .fit() to fit the estimator to the data
wb.fit(tenure, Churn)

<lifelines.WeibullFitter:"Weibull_estimate", fitted with 35 total observations, 27 right-censored observations>


wb.survival_function_.plot()

# Plot survival function
plt.title('Weibull Survival Function')
plt.xlabel('Time')
plt.ylabel('Survival Probability')
plt.legend()
plt.grid(True)
plt.show()


print(wb.lambda_, wb.rho_)

78.65021981037957 2.653534865645372


print(wb.predict(50))

0.7403834568839308


train_set_strat_f = train_set_strat[train_set_strat.gender=='Female']
train_set_strat_m = train_set_strat[train_set_strat.gender=='Male']

ax = plt.subplot(111)
#
T = train_set_strat_f[time]
E = train_set_strat_f[label]
churn_kmf = KaplanMeierFitter()
churn_kmf.fit(T, E, label="Female")
churn_kmf.plot_survival_function(ax=ax)
#
T = train_set_strat_m[time]
E = train_set_strat_m[label]
churn_kmf = KaplanMeierFitter()
churn_kmf.fit(T, E, label="Male")
churn_kmf.plot_survival_function(ax=ax)

plt.ylim(0.6, 1);
# Plot the survival function with improved aesthetics
plt.ylim(0.6, 1)
plt.title('Survival Function')
plt.xlabel('Time')
plt.ylabel('Survival Probability')
plt.show()


durations_A = train_set_strat_f[time]
event_observed_A = train_set_strat_f[label]
#
durations_B = train_set_strat_m[time]
event_observed_B = train_set_strat_m[label]
#
from lifelines.statistics import logrank_test
lrt = logrank_test(durations_A, durations_B, event_observed_A, event_observed_B)


lrt.print_summary()


train_set_time_label


from lifelines import WeibullAFTFitter
aft = WeibullAFTFitter()
train_set_time_label['tenure'] = train_set_time_label['tenure'] + 0.0000001
aft.fit(df=train_set_time_label, duration_col="tenure", event_col="Churn")

<lifelines.WeibullAFTFitter: fitted with 5634 total observations, 4139 right-censored observations>


aft.summary


fig, ax = plt.subplots(figsize=(10, 8))  # Set the figure size

# Extract and modify the index
new_index = [(col[0].replace("lambda_", "").replace("Intercept", "").strip(), col[1]) for col in aft.summary.index]
aft.summary.index = pd.MultiIndex.from_tuples(new_index)
aft.plot(ax=ax)

# Beautify the plot
ax.set_title("Weibull AFT Model Coefficients", fontsize=16, fontweight='bold')
ax.set_xlabel("Coefficient Value", fontsize=14)
ax.set_ylabel("Covariates", fontsize=14)
ax.grid(True, linestyle='--', linewidth=0.5)  # Add a grid with custom style
ax.axvline(x=0, color='k', linestyle='--', linewidth=1)  # Add a vertical line at x=0
ax.spines['top'].set_visible(False)  # Hide the top spine
ax.spines['right'].set_visible(False)  # Hide the right spine

# Customize tick parameters
ax.tick_params(axis='x', which='both', bottom=True, top=False, labelbottom=True)
ax.tick_params(axis='y', which='both', left=True, right=False, labelleft=True)

plt.tight_layout()  # Adjust subplots to fit into figure area.

# Show the plot
plt.show()


# Plot partial effects on outcome
fig, ax = plt.subplots(figsize=(8, 5))  # Set the figure size
aft.plot_partial_effects_on_outcome(covariates=['TotalCharges'], values=[100, 300, 500, 700, 1000, 2000, 3000], ax=ax)

# Beautify the plot
ax.set_title("Partial Effects of TotalCharges on Survival Outcome", fontsize=16, fontweight='bold')
ax.set_xlabel("Timeline (days)", fontsize=14)
ax.set_ylabel("Survival Probability", fontsize=14)
ax.grid(True, linestyle='--', linewidth=0.5)  # Add a grid with custom style
ax.spines['top'].set_visible(False)  # Hide the top spine
ax.spines['right'].set_visible(False)  # Hide the right spine

# Customize tick parameters
ax.tick_params(axis='x', which='both', bottom=True, top=False, labelbottom=True)
ax.tick_params(axis='y', which='both', left=True, right=False, labelleft=True)

plt.legend(title="TotalCharges Values", title_fontsize='13', fontsize='11')  # Customize the legend

plt.tight_layout()  # Adjust subplots to fit into figure area

# Show the plot
plt.show()


train_set_time_label['Contract_Month-to-month']

customerID
3545-CNWRG    1.0
7673-BQGKU    0.0
2541-YGPKE    0.0
6034-YMTOB    1.0
2984-RGEYA    0.0
             ... 
0218-QNVAS    0.0
2889-FPWRM    0.0
4009-ALQFH    1.0
8024-XNAFQ    0.0
8982-NHAVY    0.0
Name: Contract_Month-to-month, Length: 5634, dtype: float64


# Plot partial effects on outcome
fig, ax = plt.subplots(figsize=(8, 5))  # Set the figure size
aft.plot_partial_effects_on_outcome(covariates=['Contract_Two year'], values=[0, 1], ax=ax)

# Beautify the plot
ax.set_title("Partial Effects of Contract_Month-to-month on Survival Outcome", fontsize=16, fontweight='bold')
ax.set_xlabel("Timeline (days)", fontsize=14)
ax.set_ylabel("Survival Probability", fontsize=14)
ax.grid(True, linestyle='--', linewidth=0.5)  # Add a grid with custom style
ax.spines['top'].set_visible(False)  # Hide the top spine
ax.spines['right'].set_visible(False)  # Hide the right spine

# Customize tick parameters
ax.tick_params(axis='x', which='both', bottom=True, top=False, labelbottom=True)
ax.tick_params(axis='y', which='both', left=True, right=False, labelleft=True)

plt.legend(title="Contract_Two year", title_fontsize='13', fontsize='11')  # Customize the legend

plt.tight_layout()  # Adjust subplots to fit into figure area

# Show the plot
plt.show()


# Apply the transformations
df_processed = preprocessor.fit_transform(test_set_strat)

# Get feature names
numerical_feature_names = numerical_features
categorical_feature_names = preprocessor.named_transformers_['cat']['onehot'].get_feature_names_out(categorical_features)

# Combine feature names
feature_names = np.concatenate([numerical_feature_names, categorical_feature_names])

# Convert the result back to a DataFrame for better readability
df_processed = pd.DataFrame(df_processed, columns=feature_names, index=test_set_strat.index)

df_processed


test_set_strat[label].replace({'Yes': 1, 'No': 0}, inplace=True)
test_set_strat


df_processed


test_set_strat_time_label = pd.merge(df_processed, test_set_strat[['tenure', 'Churn']], 
                                     left_index=True, right_index=True, how='left')
test_set_strat_time_label.head()


#aft.predict_survival_function(test_set_strat_time_label.iloc[0], conditional_after=10)
customer_1 = aft.predict_survival_function(test_set_strat_time_label.iloc[:2])
customer_1.head()


aft.predict_median(test_set_strat_time_label)

0023-UYUPN     144.567957
7610-TVOPG      35.194502
8050-DVOJX    1050.752765
8189-DUKMV      81.133199
2911-WDXMV      16.136747
                 ...     
8777-PVYGU     177.297121
6519-CFDBX       9.189431
6384-VMJHP    2238.554347
3441-CGZJH     201.703757
5274-XHAKY     112.877190
Length: 1409, dtype: float64


uncensored_tenure_test = test_set_strat_time_label[test_set_strat_time_label.Churn==1]


x = uncensored_tenure_test.tenure.values


y = aft.predict_median(uncensored_tenure_test).values


from sklearn.metrics import mean_absolute_error
mae = mean_absolute_error(x, y)
print(f"Mean Absolute Error: {mae}")

Mean Absolute Error: 124.89845413868007


def CrossPlot (x:list,y:list,title:str,xlabl:str,ylabl:str,loc:int,
               xlimt:list,ylimt:list,axt=None,scale: float=0.8,alpha: float=0.6,
               markersize: float=6,marker: str='ro',fit_ : bool=True, 
               markeredgecolor='white', color='black') -> None:
    #
    ax1 = axt or plt.axes()
    x=np.array(x)
    y=np.array(y)    
    no_nan=np.where((~np.isnan(x)) & (~np.isnan(y)))[0]
    Mean_x=np.mean(x)
    SD_x=np.sqrt(np.var(x)) 
    #
    n_x=len(x)
    n_y=len(y)
    Mean_y=np.mean(y)
    SD_y=np.sqrt(np.var(y)) 
    corr=np.corrcoef(x[no_nan],y[no_nan])
    n_=len(no_nan)

    txt = r'$\rho_{x,y}$=%.2f' + '\n $n$=%.0f \n $\mu_{x}$=%.0f  \n '
    txt += '$\mu_{y}$=%.2f '
    anchored_text = AnchoredText(txt %(corr[1,0], n_x,Mean_x,Mean_y), loc=loc,
                            prop={ 'size': font['size']*scale, 'fontweight': 'bold'})    
        
    ax1.add_artist(anchored_text)
    Lfunc1=np.polyfit(x,y,1)
    vEst=Lfunc1[0]*x+Lfunc1[1]    
    try:
        title
    except NameError:
        pass  # do nothing! 
    else:
        plt.title(title,fontsize=font['size']*(scale))   
#
    try:
        xlabl
    except NameError:
        pass  # do nothing! 
    else:
        plt.xlabel(xlabl,fontsize=font['size']*scale)            
#
    try:
        ylabl
    except NameError:
        pass  # do nothing! 
    else:
        plt.ylabel(ylabl,fontsize=font['size']*scale)        
        
    try:
        xlimt
    except NameError:
        pass  # do nothing! 
    else:
        plt.xlim(xlimt)   
#        
    try:
        ylimt
    except NameError:
        pass  # do nothing! 
    else:
        plt.ylim(ylimt)   
      
    plt.plot(x,y,marker,markersize=markersize, alpha=alpha, markeredgecolor=markeredgecolor, color=color) 
    if fit_: ax1.plot(x, vEst,'k-',linewidth=2)   
    ax1.grid(linewidth='0.1') 
    plt.xticks(fontsize=font['size']*scale)    
    plt.yticks(fontsize=font['size']*scale)


font = {'size'   : 12}
plt.rc('font', **font)
fig, ax1 = plt.subplots(figsize=(6, 6), dpi= 80, facecolor='w', edgecolor='k')

CrossPlot(x,y,title=f'crossplot',
          xlabl='Test set Survival Time',
          ylabl='Predicted Survival Time',
          loc=2,
          xlimt=(np.nanmin(x)-np.nanmin(x)*0.01,np.nanmax(x)+np.nanmax(x)*0.01),
          ylimt=(0, 1000),
          axt=ax1,
          scale=1.1,
          alpha=1, 
          fit_=False,
          markersize=8,
          marker='o',
          color='g',
          markeredgecolor='black')


plt.show()

from lifelines import WeibullFitter


from lifelines import WeibullFitter,\
ExponentialFitter,LogNormalFitter


T = train_set_time_label['tenure'] 
E = train_set_time_label['Churn']


wb = WeibullFitter().fit(T, E)
exp = ExponentialFitter().fit(T, E)
log = LogNormalFitter().fit(T, E)


print(f'AIC_ for WeibullFitter: {int(wb.AIC_)}')
print(f'AIC_ for ExponentialFitter: {int(exp.AIC_)}')
print(f'AIC_ for LogNormalFitter: {int(log.AIC_)}')

AIC_ for WeibullFitter: 16898
AIC_ for ExponentialFitter: 17327
AIC_ for LogNormalFitter: 16804


from lifelines.utils import find_best_parametric_model

best_model, best_aic_ = find_best_parametric_model(event_times=T,
                                                   event_observed=E,
                                                   scoring_method="AIC")

print(best_model)

<lifelines.SplineFitter:"SplineFitter: 2 internal knot", fitted with 5634 total observations, 4139 right-censored observations>


from lifelines import SplineFitter

# Fit the SplineFitter model with 2 internal knots
knots = np.percentile(T.loc[E.astype(bool)], [0, 100])
spline_fitter = SplineFitter(knot_locations=knots) # Specify the number of internal knots
spline_fitter.fit(T, E)

# Print model summary
print(spline_fitter.summary)

            coef  se(coef)  coef lower 95%  coef upper 95%  cmp to          z  \
phi_0_ -3.475976  0.059837       -3.593254       -3.358698     0.0 -58.091151   
phi_1_  0.646012  0.014859        0.616890        0.675135     0.0  43.477111   

          p  -log2(p)  
phi_0_  0.0       inf  
phi_1_  0.0       inf


# Plot the survival function
spline_fitter.plot_survival_function()
plt.title('Survival Function')
plt.xlabel('Time')
plt.ylabel('Survival Probability')
plt.show()


font = {'size'   : 12}
plt.rc('font', **font)
fig, axs = plt.subplots(2, 2, figsize=(12, 9), dpi=80, facecolor='w', edgecolor='k')

ax1 = axs[0, 0]
model = WeibullFitter()
model.fit(T, E)
qq_plot(model, ax=ax1)
ax1.set_title("WeibullFitter",fontsize=15)

ax2 = axs[0, 1]
model = LogNormalFitter()
model.fit(T, E)
qq_plot(model, ax=ax2)
ax2.set_title("LogNormalFitter",fontsize=15)

ax3 = axs[1, 0]
model = LogLogisticFitter()
model.fit(T, E)
qq_plot(model, ax=ax3)
ax3.set_title("LogLogisticFitter",fontsize=15)


ax4 = axs[1, 1]
model = ExponentialFitter()
model.fit(T, E)
qq_plot(model, ax=ax4)
ax4.set_title("ExponentialFitter",fontsize=15)

plt.subplots_adjust(wspace=0.2)
plt.subplots_adjust(hspace=0.4)
plt.show()


coxph = CoxPHFitter(penalizer=0.1)
coxph.fit(train_set_time_label, duration_col='tenure', event_col='Churn', show_progress=True)

Iteration 1: norm_delta = 8.84e-01, step_size = 0.9500, log_lik = -12184.15103, newton_decrement = 1.39e+03, seconds_since_start = 0.0
Iteration 2: norm_delta = 2.81e-01, step_size = 0.9500, log_lik = -10779.98964, newton_decrement = 7.76e+01, seconds_since_start = 0.1
Iteration 3: norm_delta = 5.50e-02, step_size = 0.9500, log_lik = -10698.26734, newton_decrement = 2.26e+00, seconds_since_start = 0.1
Iteration 4: norm_delta = 1.50e-03, step_size = 1.0000, log_lik = -10695.96813, newton_decrement = 1.63e-03, seconds_since_start = 0.1
Iteration 5: norm_delta = 1.12e-06, step_size = 1.0000, log_lik = -10695.96650, newton_decrement = 9.01e-10, seconds_since_start = 0.2
Convergence success after 5 iterations.

<lifelines.CoxPHFitter: fitted with 5634 total observations, 4139 right-censored observations>


coxph.print_summary()


fig, ax = plt.subplots(figsize=(10, 9))  # Set the figure size

coxph.plot()

# Beautify the plot
ax.set_title("Cox PH Coefficients", fontsize=16, fontweight='bold')
ax.set_xlabel("Coefficient Value", fontsize=14)
ax.set_ylabel("Covariates", fontsize=14)
ax.grid(True, linestyle='--', linewidth=0.5)  # Add a grid with custom style
ax.axvline(x=0, color='k', linestyle='--', linewidth=1)  # Add a vertical line at x=0
ax.spines['top'].set_visible(False)  # Hide the top spine
ax.spines['right'].set_visible(False)  # Hide the right spine

# Customize tick parameters
ax.tick_params(axis='x', which='both', bottom=True, top=False, labelbottom=True)
ax.tick_params(axis='y', which='both', left=True, right=False, labelleft=True)

plt.tight_layout()  # Adjust subplots to fit into figure area.

# Show the plot
plt.show()


import matplotlib.pyplot as plt

font = {'size': 12}
plt.rc('font', **font)

fig, axs = plt.subplots(1, 2, figsize=(12, 4), dpi=80, facecolor='w', edgecolor='k')

ax1 = axs[0]
coxph.baseline_hazard_.plot(ax=ax1, color='blue')
ax1.set_title("Baseline hazard function", fontsize=15)
ax1.set_xlabel("Timeline (days)", fontsize=14)
ax1.set_ylabel("Survival Probability", fontsize=14)
ax1.tick_params(axis='x', which='both', bottom=True, top=False, labelbottom=True)
ax1.tick_params(axis='y', which='both', left=True, right=False, labelleft=True)

ax2 = axs[1]
coxph.baseline_survival_.plot(ax=ax2, color='green')
ax2.set_title("Baseline survival function", fontsize=15)
ax2.set_xlabel("Timeline (days)", fontsize=14)
ax2.set_ylabel("Survival Probability", fontsize=14)
ax2.tick_params(axis='x', which='both', bottom=True, top=False, labelbottom=True)
ax2.tick_params(axis='y', which='both', left=True, right=False, labelleft=True)

plt.subplots_adjust(wspace=0.2, hspace=0.4)
plt.show()


# Plot partial effects on outcome
fig, ax = plt.subplots(figsize=(8, 5))  # Set the figure size

coxph.plot_partial_effects_on_outcome(covariates=["TotalCharges"],
                                      values=[500, 1000, 2000, 3000, 5000, 10000],ax=ax)
# Beautify the plot
ax.set_title("Visualize the hazard ratio for TotalCharges", fontsize=16, fontweight='bold')
ax.set_xlabel("Timeline (days)", fontsize=14)
ax.set_ylabel("Survival Probability", fontsize=14)
ax.grid(True, linestyle='--', linewidth=0.5)  # Add a grid with custom style
ax.spines['top'].set_visible(False)  # Hide the top spine
ax.spines['right'].set_visible(False)  # Hide the right spine

# Customize tick parameters
ax.tick_params(axis='x', which='both', bottom=True, top=False, labelbottom=True)
ax.tick_params(axis='y', which='both', left=True, right=False, labelleft=True)

plt.legend(title="TotalCharges", title_fontsize='13', fontsize='11')  # Customize the legend

plt.tight_layout()  # Adjust subplots to fit into figure area

# Show the plot
plt.show()


import numpy as np
from lifelines import AalenAdditiveFitter, CoxPHFitter
from lifelines.datasets import load_regression_dataset
from lifelines.utils import k_fold_cross_validation

df = load_regression_dataset()

#create the three models we'd like to compare.
aaf_1 = AalenAdditiveFitter(coef_penalizer=0.5)
aaf_2 = AalenAdditiveFitter(coef_penalizer=10)
cph = CoxPHFitter()

print(np.mean(k_fold_cross_validation(cph, df, duration_col='T', event_col='E', scoring_method="concordance_index")))
print(np.mean(k_fold_cross_validation(aaf_1, df, duration_col='T', event_col='E', scoring_method="concordance_index")))
print(np.mean(k_fold_cross_validation(aaf_2, df, duration_col='T', event_col='E', scoring_method="concordance_index")))

0.5590243798738769
0.5644669239008682
0.5726651568632664


coxph.check_assumptions(train_set_time_label, p_value_threshold=0.05)

The ``p_value_threshold`` is set at 0.05. Even under the null hypothesis of no violations, some
covariates will be below the threshold by chance. This is compounded when there are many covariates.
Similarly, when there are lots of observations, even minor deviances from the proportional hazard
assumption will be flagged.

With that in mind, it's best to use a combination of statistical tests and visual tests to determine
the most serious violations. Produce visual plots using ``check_assumptions(..., show_plots=True)``
and looking for non-constant lines. See link [A] below for a full example.


1. Variable 'MonthlyCharges' failed the non-proportional test: p-value is 0.0466.

   Advice 1: the functional form of the variable 'MonthlyCharges' might be incorrect. That is, there
may be non-linear terms missing. The proportional hazard test used is very sensitive to incorrect
functional forms. See documentation in link [D] below on how to specify a functional form.

   Advice 2: try binning the variable 'MonthlyCharges' using pd.cut, and then specify it in
`strata=['MonthlyCharges', ...]` in the call in `.fit`. See documentation in link [B] below.

   Advice 3: try adding an interaction term with your time variable. See documentation in link [C]
below.


2. Variable 'Contract_One year' failed the non-proportional test: p-value is 0.0001.

   Advice: with so few unique values (only 2), you can include `strata=['Contract_One year', ...]`
in the call in `.fit`. See documentation in link [E] below.

3. Variable 'Contract_Two year' failed the non-proportional test: p-value is 0.0008.

   Advice: with so few unique values (only 2), you can include `strata=['Contract_Two year', ...]`
in the call in `.fit`. See documentation in link [E] below.

---
[A]  https://lifelines.readthedocs.io/en/latest/jupyter_notebooks/Proportional%20hazard%20assumption.html
[B]  https://lifelines.readthedocs.io/en/latest/jupyter_notebooks/Proportional%20hazard%20assumption.html#Bin-variable-and-stratify-on-it
[C]  https://lifelines.readthedocs.io/en/latest/jupyter_notebooks/Proportional%20hazard%20assumption.html#Introduce-time-varying-covariates
[D]  https://lifelines.readthedocs.io/en/latest/jupyter_notebooks/Proportional%20hazard%20assumption.html#Modify-the-functional-form
[E]  https://lifelines.readthedocs.io/en/latest/jupyter_notebooks/Proportional%20hazard%20assumption.html#Stratification

[]


test_set_strat_time_label.head()


coxph.predict_median(test_set_strat_time_label)

0023-UYUPN     inf
7610-TVOPG    51.0
8050-DVOJX     inf
8189-DUKMV     inf
2911-WDXMV    28.0
              ... 
8777-PVYGU    71.0
6519-CFDBX    10.0
6384-VMJHP     inf
3441-CGZJH     inf
5274-XHAKY    67.0
Name: 0.5, Length: 1409, dtype: float64


uncensored_tenure_test = test_set_strat_time_label[test_set_strat_time_label.Churn==1]


x = uncensored_tenure_test.tenure.values
y = coxph.predict_median(uncensored_tenure_test).values
#
x = x[np.isfinite(y)]
y = y[np.isfinite(y)]

mae = mean_absolute_error(x, y)
print(f"Mean Absolute Error: {mae}")

Mean Absolute Error: 13.485549208092484


font = {'size'   : 12}
plt.rc('font', **font)
fig, ax1 = plt.subplots(figsize=(6, 6), dpi= 80, facecolor='w', edgecolor='k')

CrossPlot(x,y,title=f'crossplot',
          xlabl='Test set Survival Time',
          ylabl='Predicted Survival Time',
          loc=2,
          xlimt=(np.nanmin(x)-np.nanmin(x)*0.01,np.nanmax(x)+np.nanmax(x)*0.01),
          ylimt=(0, 100),
          axt=ax1,
          scale=1.1,
          alpha=1, 
          fit_=False,
          markersize=8,
          marker='o',
          color='g',
          markeredgecolor='black')


plt.show()

from lifelines.utils import concordance_index
c_index = concordance_index(test_data["time"], model.predict(test_data), test_data["event"])


from lifelines.utils import concordance_index
c_index = concordance_index(test_set_strat_time_label["tenure"], 
                            coxph.predict_median(test_set_strat_time_label), test_set_strat_time_label["Churn"])


c_index

0.9005230055534491


#aft.predict_survival_function(test_set_strat_time_label.iloc[0], conditional_after=10)
customer_1= coxph.predict_survival_function(test_set_strat_time_label.iloc[0])
customer_2= coxph.predict_survival_function(test_set_strat_time_label.iloc[10])
customer_3= coxph.predict_survival_function(test_set_strat_time_label.iloc[20])
customer_4= coxph.predict_survival_function(test_set_strat_time_label.iloc[30])
customer_1.head()


font = {'size'   : 12}
plt.rc('font', **font)
fig, axs = plt.subplots(2, 2, figsize=(12, 9), dpi=80, facecolor='w', edgecolor='k')

ax1 = axs[0, 0]
customer_1.plot(ax=ax1, color='blue')
ax1.set_title("Customer 1: Predicted Survival Function")
ax1.set_xlabel("Time")
ax1.set_ylabel("Survival Probability")

#
ax2 = axs[0, 1]
customer_2.plot(ax=ax2, color='green')
ax2.set_title("Customer 2: Predicted Survival Function")
ax2.set_xlabel("Time")
ax2.set_ylabel("Survival Probability")

#
ax3 = axs[1, 0]
customer_3.plot(ax=ax3, color='red')
ax3.set_title("Customer 3: Predicted Survival Function")
ax3.set_xlabel("Time")
ax3.set_ylabel("Survival Probability")

#
ax4 = axs[1, 1]
customer_4.plot(ax=ax4, color='yellow')
ax4.set_title("Customer 4: Predicted Survival Function")
ax4.set_xlabel("Time")
ax4.set_ylabel("Survival Probability")
#
plt.subplots_adjust(wspace=0.2)
plt.subplots_adjust(hspace=0.4)
plt.show()

t_0	-1
null_distribution	chi squared
degrees_of_freedom	1
test_name	logrank_test

t_0	-1
null_distribution	chi squared
degrees_of_freedom	1
test_name	logrank_test

		coef	exp(coef)	se(coef)	coef lower 95%	coef upper 95%	exp(coef) lower 95%	exp(coef) upper 95%	cmp to	z	p	-log2(p)
param	covariate
lambda_	Contract_Month-to-month	-0.625910	0.534774	0.068717	-0.760592	-0.491228	0.467390	0.611874	0.0	-9.108573	8.347282e-20	63.377255
	Contract_One year	-0.117617	0.889036	0.074833	-0.264288	0.029054	0.767753	1.029480	0.0	-1.571719	1.160157e-01	3.107609
	Contract_Two year	0.637203	1.891184	0.104509	0.432370	0.842036	1.540905	2.321087	0.0	6.097138	1.079842e-09	29.786532
	Dependents_No	-0.177750	0.837152	0.071699	-0.318277	-0.037222	0.727401	0.963463	0.0	-2.479100	1.317143e-02	6.246445
	Dependents_Yes	-0.094509	0.909820	0.072860	-0.237311	0.048293	0.788746	1.049478	0.0	-1.297138	1.945839e-01	2.361536
	DeviceProtection_No	-0.144414	0.865529	0.044978	-0.232569	-0.056259	0.792495	0.945294	0.0	-3.210794	1.323686e-03	9.561223
	DeviceProtection_No internet service	0.071509	1.074128	0.064156	-0.054234	0.197252	0.947210	1.218051	0.0	1.114610	2.650178e-01	1.915839
	DeviceProtection_Yes	-0.148935	0.861625	0.130892	-0.405478	0.107608	0.666658	1.113611	0.0	-1.137852	2.551824e-01	1.970399
	InternetService_DSL	0.239567	1.270699	0.154473	-0.063193	0.542328	0.938762	1.720006	0.0	1.550872	1.209323e-01	3.047728
	InternetService_Fiber optic	-0.497227	0.608215	0.295886	-1.077154	0.082699	0.340563	1.086215	0.0	-1.680467	9.286652e-02	3.428698
	InternetService_No	0.071509	1.074128	0.064156	-0.054234	0.197252	0.947210	1.218051	0.0	1.114610	2.650178e-01	1.915839
	MonthlyCharges	-0.000004	0.999996	0.017860	-0.035010	0.035001	0.965596	1.035621	0.0	-0.000231	9.998156e-01	0.000266
	MultipleLines_No	-0.078587	0.924422	0.023857	-0.125346	-0.031827	0.882192	0.968674	0.0	-3.294037	9.875951e-04	9.983793
	MultipleLines_No phone service	-0.161648	0.850740	0.028529	-0.217565	-0.105732	0.804475	0.899666	0.0	-5.666011	1.461602e-08	26.027874
	MultipleLines_Yes	-0.096340	0.908155	0.104498	-0.301152	0.108472	0.739965	1.114574	0.0	-0.921929	3.565654e-01	1.487762
	OnlineBackup_No	-0.165493	0.847476	0.044559	-0.252828	-0.078159	0.776602	0.924818	0.0	-3.714003	2.040063e-04	12.259099
	OnlineBackup_No internet service	0.071509	1.074128	0.064156	-0.054234	0.197252	0.947210	1.218051	0.0	1.114610	2.650178e-01	1.915839
	OnlineBackup_Yes	-0.125277	0.882253	0.130599	-0.381247	0.130693	0.683009	1.139618	0.0	-0.959247	3.374346e-01	1.567320
	OnlineSecurity_No	-0.245415	0.782380	0.051645	-0.346637	-0.144193	0.707062	0.865720	0.0	-4.751984	2.014307e-06	18.921285
	OnlineSecurity_No internet service	0.071509	1.074128	0.064156	-0.054234	0.197252	0.947210	1.218051	0.0	1.114610	2.650178e-01	1.915839
	OnlineSecurity_Yes	-0.037410	0.963282	0.137672	-0.307242	0.232423	0.735473	1.261653	0.0	-0.271729	7.858301e-01	0.347711
	PaperlessBilling_No	-0.053089	0.948295	0.062459	-0.175507	0.069329	0.839031	1.071788	0.0	-0.849983	3.953346e-01	1.338854
	PaperlessBilling_Yes	-0.181115	0.834339	0.061262	-0.301187	-0.061043	0.739940	0.940782	0.0	-2.956395	3.112585e-03	8.327671
	Partner_No	-0.173280	0.840902	0.060440	-0.291741	-0.054819	0.746962	0.946657	0.0	-2.866952	4.144456e-03	7.914602
	Partner_Yes	-0.053971	0.947460	0.059192	-0.169986	0.062044	0.843677	1.064009	0.0	-0.911785	3.618819e-01	1.466409
	PaymentMethod_Bank transfer (automatic)	0.086271	1.090102	0.056765	-0.024986	0.197528	0.975323	1.218387	0.0	1.519795	1.285624e-01	2.959459
	PaymentMethod_Credit card (automatic)	0.093706	1.098237	0.057591	-0.019170	0.206582	0.981012	1.229469	0.0	1.627097	1.037164e-01	3.269284
	PaymentMethod_Electronic check	-0.192595	0.824816	0.047560	-0.285811	-0.099379	0.751404	0.905399	0.0	-4.049516	5.132368e-05	14.250016
	PaymentMethod_Mailed check	-0.250926	0.778080	0.054606	-0.357953	-0.143900	0.699106	0.865974	0.0	-4.595186	4.323638e-06	17.819323
	PhoneService_No	-0.161648	0.850740	0.028529	-0.217565	-0.105732	0.804475	0.899666	0.0	-5.666011	1.461602e-08	26.027874
	PhoneService_Yes	-0.511905	0.599353	0.336254	-1.170950	0.147141	0.310072	1.158517	0.0	-1.522376	1.279149e-01	2.966743
	SeniorCitizen_No	-0.206465	0.813455	0.104921	-0.412107	-0.000823	0.662254	0.999178	0.0	-1.967805	4.909043e-02	4.348414
	SeniorCitizen_Yes	-0.211985	0.808977	0.105803	-0.419356	-0.004614	0.657470	0.995397	0.0	-2.003570	4.511610e-02	4.470214
	StreamingMovies_No	-0.068278	0.934000	0.022077	-0.111548	-0.025009	0.894449	0.975301	0.0	-3.092792	1.982832e-03	8.978222
	StreamingMovies_No internet service	0.071509	1.074128	0.064156	-0.054234	0.197252	0.947210	1.218051	0.0	1.114610	2.650178e-01	1.915839
	StreamingMovies_Yes	-0.221827	0.801054	0.172112	-0.559160	0.115506	0.571689	1.122441	0.0	-1.288853	1.974491e-01	2.340447
	StreamingTV_No	-0.045958	0.955082	0.021924	-0.088928	-0.002988	0.914911	0.997017	0.0	-2.096251	3.605990e-02	4.793461
	StreamingTV_No internet service	0.071509	1.074128	0.064156	-0.054234	0.197252	0.947210	1.218051	0.0	1.114610	2.650178e-01	1.915839
	StreamingTV_Yes	-0.245891	0.782008	0.173192	-0.585340	0.093558	0.556916	1.098075	0.0	-1.419762	1.556770e-01	2.683373
	TechSupport_No	-0.214250	0.807146	0.050140	-0.312523	-0.115978	0.731599	0.890495	0.0	-4.273035	1.928304e-05	15.662308
	TechSupport_No internet service	0.071509	1.074128	0.064156	-0.054234	0.197252	0.947210	1.218051	0.0	1.114610	2.650178e-01	1.915839
	TechSupport_Yes	-0.074763	0.927963	0.137096	-0.343466	0.193940	0.709307	1.214023	0.0	-0.545335	5.855233e-01	0.772202
	TotalCharges	0.000717	1.000717	0.000020	0.000678	0.000755	1.000679	1.000756	0.0	36.507606	8.399247e-292	966.932744
	gender_Female	-0.114614	0.891711	0.058029	-0.228348	-0.000879	0.795847	0.999121	0.0	-1.975113	4.825527e-02	4.373170
	gender_Male	-0.112292	0.893783	0.059452	-0.228815	0.004231	0.795475	1.004240	0.0	-1.888792	5.891966e-02	4.085107
	Intercept	5.323952	205.193302	0.027924	5.269222	5.378683	194.264759	216.736641	0.0	190.657013	0.000000e+00	inf
rho_	Intercept	0.353859	1.424554	0.022236	0.310277	0.397441	1.363803	1.488011	0.0	15.913804	5.082928e-57	187.004242

customerID	0023-UYUPN	7610-TVOPG
1.000000e-07	1.000000	1.000000
1.000000e+00	0.999420	0.995666
2.000000e+00	0.998443	0.988410
3.000000e+00	0.997228	0.979442
4.000000e+00	0.995827	0.969191

model	lifelines.CoxPHFitter
duration col	'tenure'
event col	'Churn'
penalizer	0.1
l1 ratio	0.0
baseline estimation	breslow
number of observations	5634
number of events observed	1495
partial log-likelihood	-10695.97
time fit was run	2024-08-15 03:25:50 UTC

	customerID	gender	SeniorCitizen	Partner	Dependents	tenure	PhoneService	MultipleLines	InternetService	OnlineSecurity	OnlineBackup	DeviceProtection	TechSupport	StreamingTV	StreamingMovies	Contract	PaperlessBilling	PaymentMethod	MonthlyCharges	TotalCharges	Churn
0	7590-VHVEG	Female	0	Yes	No	1	No	No phone service	DSL	No	Yes	No	No	No	No	Month-to-month	Yes	Electronic check	29.85	29.85	No
1	5575-GNVDE	Male	0	No	No	34	Yes	No	DSL	Yes	No	Yes	No	No	No	One year	No	Mailed check	56.95	1889.5	No
2	3668-QPYBK	Male	0	No	No	2	Yes	No	DSL	Yes	Yes	No	No	No	No	Month-to-month	Yes	Mailed check	53.85	108.15	Yes
3	7795-CFOCW	Male	0	No	No	45	No	No phone service	DSL	Yes	No	Yes	Yes	No	No	One year	No	Bank transfer (automatic)	42.30	1840.75	No
4	9237-HQITU	Female	0	No	No	2	Yes	No	Fiber optic	No	No	No	No	No	No	Month-to-month	Yes	Electronic check	70.70	151.65	Yes
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
7038	6840-RESVB	Male	0	Yes	Yes	24	Yes	Yes	DSL	Yes	No	Yes	Yes	Yes	Yes	One year	Yes	Mailed check	84.80	1990.5	No
7039	2234-XADUH	Female	0	Yes	Yes	72	Yes	Yes	Fiber optic	No	Yes	Yes	No	Yes	Yes	One year	Yes	Credit card (automatic)	103.20	7362.9	No
7040	4801-JZAZL	Female	0	Yes	Yes	11	No	No phone service	DSL	Yes	No	No	No	No	No	Month-to-month	Yes	Electronic check	29.60	346.45	No
7041	8361-LTMKD	Male	1	Yes	No	4	Yes	Yes	Fiber optic	No	No	No	No	No	No	Month-to-month	Yes	Mailed check	74.40	306.6	Yes
7042	3186-AJIEK	Male	0	No	No	66	Yes	No	Fiber optic	Yes	No	Yes	Yes	Yes	Yes	Two year	Yes	Bank transfer (automatic)	105.65	6844.5	No

	gender	SeniorCitizen	Partner	Dependents	tenure	PhoneService	MultipleLines	InternetService	OnlineSecurity	OnlineBackup	DeviceProtection	TechSupport	StreamingTV	StreamingMovies	Contract	PaperlessBilling	PaymentMethod	MonthlyCharges	TotalCharges	Churn
customerID
5609-IMCGG	Female	No	No	No	15.0	Yes	Yes	Fiber optic	Yes	No	Yes	No	No	No	Month-to-month	Yes	Credit card (automatic)	84.35	1302.65	No
5980-NOPLP	Female	No	Yes	No	44.0	Yes	Yes	Fiber optic	No	No	No	No	Yes	Yes	Month-to-month	Yes	Electronic check	96.10	4364.10	Yes
7963-SHNDT	Female	No	No	No	52.0	No	No phone service	DSL	Yes	Yes	No	Yes	No	No	Two year	No	Mailed check	39.10	1982.10	No
0875-CABNR	Female	Yes	No	No	10.0	Yes	No	Fiber optic	No	No	Yes	No	No	Yes	Month-to-month	Yes	Credit card (automatic)	84.60	865.55	Yes
0229-LFJAF	Male	No	No	No	72.0	Yes	Yes	DSL	Yes	Yes	Yes	Yes	No	No	Two year	No	Bank transfer (automatic)	69.65	4908.25	No
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
6849-WLEYG	Male	No	No	Yes	1.0	Yes	No	No	No internet service	No internet service	No internet service	No internet service	No internet service	No internet service	Month-to-month	No	Mailed check	19.75	19.75	Yes
4176-RELJR	Male	Yes	No	No	67.0	Yes	Yes	No	No internet service	No internet service	No internet service	No internet service	No internet service	No internet service	Two year	No	Credit card (automatic)	25.10	1672.15	No
0422-OHQHQ	Female	No	Yes	Yes	15.0	Yes	No	No	No internet service	No internet service	No internet service	No internet service	No internet service	No internet service	One year	No	Mailed check	20.55	295.95	No
3757-NJYBX	Male	Yes	Yes	No	32.0	Yes	Yes	Fiber optic	No	Yes	Yes	No	Yes	Yes	Month-to-month	No	Bank transfer (automatic)	106.35	3520.75	Yes
1934-SJVJK	Male	No	No	No	1.0	Yes	No	No	No internet service	No internet service	No internet service	No internet service	No internet service	No internet service	Month-to-month	No	Mailed check	20.15	20.15	Yes

	gender	SeniorCitizen	Partner	Dependents	tenure	PhoneService	MultipleLines	InternetService	OnlineSecurity	OnlineBackup	DeviceProtection	TechSupport	StreamingTV	StreamingMovies	Contract	PaperlessBilling	PaymentMethod	MonthlyCharges	TotalCharges	Churn
customerID
3545-CNWRG	Female	No	Yes	Yes	49.0	Yes	Yes	Fiber optic	No	Yes	No	No	Yes	Yes	Month-to-month	Yes	Electronic check	98.35	4889.20	No
7673-BQGKU	Female	No	Yes	Yes	69.0	Yes	No	No	No internet service	No internet service	No internet service	No internet service	No internet service	No internet service	Two year	No	Bank transfer (automatic)	20.15	1337.50	No
2541-YGPKE	Male	No	Yes	Yes	42.0	Yes	No	DSL	Yes	No	No	Yes	No	Yes	One year	No	Credit card (automatic)	63.70	2763.35	No
6034-YMTOB	Female	No	No	No	5.0	Yes	No	Fiber optic	No	No	No	Yes	No	No	Month-to-month	Yes	Mailed check	75.65	399.45	No
2984-RGEYA	Female	No	Yes	Yes	69.0	Yes	No	No	No internet service	No internet service	No internet service	No internet service	No internet service	No internet service	Two year	No	Bank transfer (automatic)	19.75	1375.40	No
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
0218-QNVAS	Male	No	Yes	Yes	71.0	Yes	Yes	Fiber optic	No	Yes	No	No	Yes	Yes	One year	No	Bank transfer (automatic)	100.55	7113.75	No
2889-FPWRM	Male	No	Yes	No	72.0	Yes	Yes	Fiber optic	Yes	Yes	Yes	Yes	Yes	Yes	One year	Yes	Bank transfer (automatic)	117.80	8684.80	Yes
4009-ALQFH	Female	No	No	No	25.0	Yes	No	Fiber optic	No	Yes	Yes	No	Yes	Yes	Month-to-month	Yes	Electronic check	99.50	2369.05	Yes
8024-XNAFQ	Female	Yes	No	No	72.0	Yes	Yes	Fiber optic	Yes	Yes	Yes	Yes	No	Yes	Two year	Yes	Bank transfer (automatic)	107.40	7748.75	No
8982-NHAVY	Male	No	No	No	27.0	Yes	Yes	Fiber optic	No	No	Yes	No	Yes	Yes	One year	Yes	Bank transfer (automatic)	100.50	2673.45	No

	MonthlyCharges	TotalCharges	gender_Female	gender_Male	SeniorCitizen_No	SeniorCitizen_Yes	Partner_No	Partner_Yes	Dependents_No	Dependents_Yes	PhoneService_No	PhoneService_Yes	MultipleLines_No	MultipleLines_No phone service	MultipleLines_Yes	InternetService_DSL	InternetService_Fiber optic	InternetService_No	OnlineSecurity_No	OnlineSecurity_No internet service	OnlineSecurity_Yes	OnlineBackup_No	OnlineBackup_No internet service	OnlineBackup_Yes	DeviceProtection_No	DeviceProtection_No internet service	DeviceProtection_Yes	TechSupport_No	TechSupport_No internet service	TechSupport_Yes	StreamingTV_No	StreamingTV_No internet service	StreamingTV_Yes	StreamingMovies_No	StreamingMovies_No internet service	StreamingMovies_Yes	Contract_Month-to-month	Contract_One year	Contract_Two year	PaperlessBilling_No	PaperlessBilling_Yes	PaymentMethod_Bank transfer (automatic)	PaymentMethod_Credit card (automatic)	PaymentMethod_Electronic check	PaymentMethod_Mailed check
customerID
0023-UYUPN	25.20	1306.30	1.0	0.0	0.0	1.0	0.0	1.0	1.0	0.0	0.0	1.0	0.0	0.0	1.0	0.0	0.0	1.0	0.0	1.0	0.0	0.0	1.0	0.0	0.0	1.0	0.0	0.0	1.0	0.0	0.0	1.0	0.0	0.0	1.0	0.0	0.0	1.0	0.0	1.0	0.0	0.0	0.0	1.0	0.0
7610-TVOPG	26.35	378.60	0.0	1.0	1.0	0.0	1.0	0.0	1.0	0.0	0.0	1.0	0.0	0.0	1.0	0.0	0.0	1.0	0.0	1.0	0.0	0.0	1.0	0.0	0.0	1.0	0.0	0.0	1.0	0.0	0.0	1.0	0.0	0.0	1.0	0.0	1.0	0.0	0.0	0.0	1.0	0.0	0.0	1.0	0.0
8050-DVOJX	81.35	4060.90	0.0	1.0	0.0	1.0	1.0	0.0	1.0	0.0	0.0	1.0	0.0	0.0	1.0	1.0	0.0	0.0	0.0	0.0	1.0	0.0	0.0	1.0	0.0	0.0	1.0	0.0	0.0	1.0	1.0	0.0	0.0	0.0	0.0	1.0	0.0	0.0	1.0	1.0	0.0	0.0	1.0	0.0	0.0
8189-DUKMV	20.50	79.05	1.0	0.0	1.0	0.0	0.0	1.0	1.0	0.0	0.0	1.0	1.0	0.0	0.0	0.0	0.0	1.0	0.0	1.0	0.0	0.0	1.0	0.0	0.0	1.0	0.0	0.0	1.0	0.0	0.0	1.0	0.0	0.0	1.0	0.0	0.0	1.0	0.0	1.0	0.0	1.0	0.0	0.0	0.0
2911-WDXMV	80.55	1406.65	0.0	1.0	1.0	0.0	1.0	0.0	0.0	1.0	0.0	1.0	0.0	0.0	1.0	0.0	1.0	0.0	1.0	0.0	0.0	1.0	0.0	0.0	0.0	0.0	1.0	1.0	0.0	0.0	1.0	0.0	0.0	1.0	0.0	0.0	1.0	0.0	0.0	0.0	1.0	0.0	1.0	0.0	0.0
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
8777-PVYGU	64.30	3410.60	1.0	0.0	1.0	0.0	0.0	1.0	1.0	0.0	0.0	1.0	1.0	0.0	0.0	1.0	0.0	0.0	0.0	0.0	1.0	1.0	0.0	0.0	0.0	0.0	1.0	1.0	0.0	0.0	0.0	0.0	1.0	1.0	0.0	0.0	0.0	1.0	0.0	0.0	1.0	0.0	0.0	0.0	1.0
6519-CFDBX	45.40	80.95	1.0	0.0	1.0	0.0	1.0	0.0	1.0	0.0	0.0	1.0	1.0	0.0	0.0	1.0	0.0	0.0	1.0	0.0	0.0	1.0	0.0	0.0	1.0	0.0	0.0	1.0	0.0	0.0	1.0	0.0	0.0	1.0	0.0	0.0	1.0	0.0	0.0	0.0	1.0	0.0	0.0	1.0	0.0
6384-VMJHP	73.00	5265.20	1.0	0.0	1.0	0.0	1.0	0.0	1.0	0.0	0.0	1.0	1.0	0.0	0.0	1.0	0.0	0.0	0.0	0.0	1.0	0.0	0.0	1.0	0.0	0.0	1.0	0.0	0.0	1.0	1.0	0.0	0.0	0.0	0.0	1.0	0.0	0.0	1.0	0.0	1.0	0.0	1.0	0.0	0.0
3441-CGZJH	60.40	2640.55	1.0	0.0	1.0	0.0	0.0	1.0	0.0	1.0	1.0	0.0	0.0	1.0	0.0	1.0	0.0	0.0	0.0	0.0	1.0	1.0	0.0	0.0	0.0	0.0	1.0	0.0	0.0	1.0	0.0	0.0	1.0	0.0	0.0	1.0	0.0	1.0	0.0	0.0	1.0	0.0	1.0	0.0	0.0
5274-XHAKY	94.30	3893.60	1.0	0.0	1.0	0.0	0.0	1.0	0.0	1.0	0.0	1.0	0.0	0.0	1.0	0.0	1.0	0.0	0.0	0.0	1.0	0.0	0.0	1.0	1.0	0.0	0.0	1.0	0.0	0.0	0.0	0.0	1.0	1.0	0.0	0.0	1.0	0.0	0.0	0.0	1.0	1.0	0.0	0.0	0.0

	coef	exp(coef)	se(coef)	coef lower 95%	coef upper 95%	exp(coef) lower 95%	exp(coef) upper 95%	z	p	-log2(p)
MonthlyCharges	0.00	1.00	0.00	-0.00	0.00	1.00	1.00	1.67	0.10	3.38
TotalCharges	-0.00	1.00	0.00	-0.00	-0.00	1.00	1.00	-23.80	<0.005	413.64
gender_Female	0.01	1.01	0.06	-0.11	0.14	0.89	1.15	0.20	0.84	0.25
gender_Male	-0.01	0.99	0.06	-0.14	0.11	0.87	1.12	-0.20	0.84	0.25
SeniorCitizen_No	-0.00	1.00	0.09	-0.17	0.17	0.84	1.18	-0.00	1.00	0.01
SeniorCitizen_Yes	0.00	1.00	0.09	-0.17	0.17	0.85	1.18	0.00	1.00	0.01
Partner_No	0.17	1.19	0.07	0.04	0.30	1.05	1.35	2.65	0.01	6.94
Partner_Yes	-0.17	0.84	0.07	-0.30	-0.04	0.74	0.96	-2.65	0.01	6.94
Dependents_No	0.06	1.06	0.07	-0.09	0.20	0.92	1.22	0.76	0.45	1.16
Dependents_Yes	-0.06	0.95	0.07	-0.20	0.09	0.82	1.09	-0.76	0.45	1.16
PhoneService_No	-0.05	0.95	0.12	-0.30	0.19	0.74	1.21	-0.42	0.68	0.57
PhoneService_Yes	0.05	1.05	0.12	-0.19	0.30	0.83	1.34	0.42	0.68	0.57
MultipleLines_No	0.11	1.12	0.06	-0.01	0.24	0.99	1.27	1.76	0.08	3.67
MultipleLines_No phone service	-0.05	0.95	0.12	-0.30	0.19	0.74	1.21	-0.42	0.68	0.57
MultipleLines_Yes	-0.10	0.91	0.06	-0.22	0.03	0.80	1.03	-1.51	0.13	2.92
InternetService_DSL	-0.23	0.79	0.07	-0.37	-0.10	0.69	0.91	-3.33	<0.005	10.19
InternetService_Fiber optic	0.30	1.34	0.07	0.16	0.43	1.17	1.54	4.27	<0.005	15.65
InternetService_No	-0.12	0.89	0.10	-0.31	0.07	0.73	1.07	-1.24	0.21	2.23
OnlineSecurity_No	0.27	1.31	0.07	0.14	0.40	1.15	1.49	3.98	<0.005	13.80
OnlineSecurity_No internet service	-0.12	0.89	0.10	-0.31	0.07	0.73	1.07	-1.24	0.21	2.23
OnlineSecurity_Yes	-0.23	0.79	0.07	-0.37	-0.09	0.69	0.91	-3.23	<0.005	9.68
OnlineBackup_No	0.22	1.25	0.07	0.10	0.35	1.10	1.42	3.42	<0.005	10.62
OnlineBackup_No internet service	-0.12	0.89	0.10	-0.31	0.07	0.73	1.07	-1.24	0.21	2.23
OnlineBackup_Yes	-0.15	0.86	0.07	-0.29	-0.02	0.75	0.98	-2.29	0.02	5.52
DeviceProtection_No	0.14	1.16	0.07	0.02	0.27	1.02	1.31	2.20	0.03	5.15
DeviceProtection_No internet service	-0.12	0.89	0.10	-0.31	0.07	0.73	1.07	-1.24	0.21	2.23
DeviceProtection_Yes	-0.07	0.94	0.07	-0.20	0.06	0.82	1.07	-1.00	0.32	1.65
TechSupport_No	0.21	1.23	0.07	0.08	0.34	1.08	1.40	3.07	<0.005	8.88
TechSupport_No internet service	-0.12	0.89	0.10	-0.31	0.07	0.73	1.07	-1.24	0.21	2.23
TechSupport_Yes	-0.15	0.86	0.07	-0.29	-0.01	0.75	0.99	-2.14	0.03	4.96
StreamingTV_No	0.01	1.01	0.07	-0.12	0.14	0.89	1.15	0.16	0.88	0.19
StreamingTV_No internet service	-0.12	0.89	0.10	-0.31	0.07	0.73	1.07	-1.24	0.21	2.23
StreamingTV_Yes	0.08	1.08	0.07	-0.05	0.21	0.95	1.23	1.16	0.24	2.03
StreamingMovies_No	0.05	1.05	0.07	-0.08	0.17	0.92	1.19	0.70	0.48	1.05
StreamingMovies_No internet service	-0.12	0.89	0.10	-0.31	0.07	0.73	1.07	-1.24	0.21	2.23
StreamingMovies_Yes	0.04	1.04	0.07	-0.09	0.17	0.91	1.19	0.61	0.54	0.89
Contract_Month-to-month	0.73	2.08	0.07	0.60	0.87	1.82	2.38	10.76	<0.005	87.35
Contract_One year	-0.25	0.78	0.08	-0.39	-0.10	0.67	0.91	-3.22	<0.005	9.61
Contract_Two year	-0.77	0.46	0.08	-0.93	-0.61	0.40	0.54	-9.60	<0.005	70.05
PaperlessBilling_No	-0.11	0.90	0.07	-0.24	0.02	0.79	1.03	-1.59	0.11	3.15
PaperlessBilling_Yes	0.11	1.11	0.07	-0.02	0.24	0.98	1.27	1.59	0.11	3.15
PaymentMethod_Bank transfer (automatic)	-0.25	0.78	0.07	-0.38	-0.11	0.68	0.89	-3.59	<0.005	11.58
PaymentMethod_Credit card (automatic)	-0.27	0.76	0.07	-0.41	-0.13	0.67	0.87	-3.90	<0.005	13.33
PaymentMethod_Electronic check	0.24	1.27	0.06	0.12	0.36	1.13	1.43	3.97	<0.005	13.75
PaymentMethod_Mailed check	0.20	1.22	0.07	0.06	0.33	1.06	1.39	2.86	<0.005	7.89

Concordance	0.90
Partial AIC	21481.93
log-likelihood ratio test	2976.37 on 45 df
-log2(p) of ll-ratio test	inf

Calculation	Example
Coefficient	x = 0.521
Hazard ratio	$e^{0.405}$ = 1.683
Hazards interpretation	1.683 − 1 = 0.683 => 68.3% increase in hazards
Survival time interpretation	$\frac{1}{1.683} - 1$ => −0.405 => 40.5% decrease in survival time

		test_statistic	p	-log2(p)
Contract_Month-to-month	km	0.00	0.97	0.05
Contract_Month-to-month	rank	0.00	0.96	0.05
Contract_One year	km	16.36	<0.005	14.22
Contract_One year	rank	11.24	<0.005	10.29
Contract_Two year	km	11.30	<0.005	10.33
Contract_Two year	rank	8.30	<0.005	7.98
Dependents_No	km	0.07	0.80	0.33
Dependents_No	rank	0.04	0.84	0.24
Dependents_Yes	km	0.07	0.80	0.33
Dependents_Yes	rank	0.04	0.84	0.24
DeviceProtection_No	km	0.01	0.93	0.10
DeviceProtection_No	rank	0.00	0.95	0.07
DeviceProtection_No internet service	km	0.67	0.41	1.28
DeviceProtection_No internet service	rank	0.71	0.40	1.33
DeviceProtection_Yes	km	0.99	0.32	1.65
DeviceProtection_Yes	rank	0.99	0.32	1.65
InternetService_DSL	km	1.11	0.29	1.78
InternetService_DSL	rank	1.21	0.27	1.88
InternetService_Fiber optic	km	3.12	0.08	3.69
InternetService_Fiber optic	rank	3.35	0.07	3.89
InternetService_No	km	0.67	0.41	1.28
InternetService_No	rank	0.71	0.40	1.33
MonthlyCharges	km	3.59	0.06	4.11
MonthlyCharges	rank	3.96	0.05	4.42
MultipleLines_No	km	0.70	0.40	1.31
MultipleLines_No	rank	0.97	0.32	1.62
MultipleLines_No phone service	km	0.35	0.56	0.85
MultipleLines_No phone service	rank	0.45	0.50	0.99
MultipleLines_Yes	km	1.53	0.22	2.21
MultipleLines_Yes	rank	2.09	0.15	2.75
OnlineBackup_No	km	0.23	0.63	0.66
OnlineBackup_No	rank	0.21	0.65	0.63
OnlineBackup_No internet service	km	0.67	0.41	1.28
OnlineBackup_No internet service	rank	0.71	0.40	1.33
OnlineBackup_Yes	km	2.01	0.16	2.68
OnlineBackup_Yes	rank	2.03	0.15	2.70
OnlineSecurity_No	km	0.00	0.97	0.05
OnlineSecurity_No	rank	0.05	0.82	0.28
OnlineSecurity_No internet service	km	0.67	0.41	1.28
OnlineSecurity_No internet service	rank	0.71	0.40	1.33
OnlineSecurity_Yes	km	0.98	0.32	1.64
OnlineSecurity_Yes	rank	1.53	0.22	2.21
PaperlessBilling_No	km	0.04	0.84	0.26
PaperlessBilling_No	rank	0.05	0.83	0.27
PaperlessBilling_Yes	km	0.04	0.84	0.26
PaperlessBilling_Yes	rank	0.05	0.83	0.27
Partner_No	km	0.28	0.60	0.74
Partner_No	rank	0.42	0.52	0.95
Partner_Yes	km	0.28	0.60	0.74
Partner_Yes	rank	0.42	0.52	0.95
PaymentMethod_Bank transfer (automatic)	km	0.75	0.39	1.37
PaymentMethod_Bank transfer (automatic)	rank	1.24	0.27	1.91
PaymentMethod_Credit card (automatic)	km	0.52	0.47	1.09
PaymentMethod_Credit card (automatic)	rank	0.90	0.34	1.54
PaymentMethod_Electronic check	km	0.00	0.95	0.07
PaymentMethod_Electronic check	rank	0.03	0.86	0.22
PaymentMethod_Mailed check	km	2.14	0.14	2.80
PaymentMethod_Mailed check	rank	3.20	0.07	3.77
PhoneService_No	km	0.35	0.56	0.85
PhoneService_No	rank	0.45	0.50	0.99
PhoneService_Yes	km	0.35	0.56	0.85
PhoneService_Yes	rank	0.45	0.50	0.99
SeniorCitizen_No	km	0.01	0.92	0.13
SeniorCitizen_No	rank	0.02	0.88	0.19
SeniorCitizen_Yes	km	0.01	0.92	0.13
SeniorCitizen_Yes	rank	0.02	0.88	0.19
StreamingMovies_No	km	0.69	0.41	1.30
StreamingMovies_No	rank	0.98	0.32	1.63
StreamingMovies_No internet service	km	0.67	0.41	1.28
StreamingMovies_No internet service	rank	0.71	0.40	1.33
StreamingMovies_Yes	km	2.87	0.09	3.47
StreamingMovies_Yes	rank	3.52	0.06	4.04
StreamingTV_No	km	0.85	0.36	1.49
StreamingTV_No	rank	0.96	0.33	1.61
StreamingTV_No internet service	km	0.67	0.41	1.28
StreamingTV_No internet service	rank	0.71	0.40	1.33
StreamingTV_Yes	km	3.24	0.07	3.80
StreamingTV_Yes	rank	3.56	0.06	4.08
TechSupport_No	km	0.10	0.75	0.41
TechSupport_No	rank	0.13	0.72	0.48
TechSupport_No internet service	km	0.67	0.41	1.28
TechSupport_No internet service	rank	0.71	0.40	1.33
TechSupport_Yes	km	1.70	0.19	2.38
TechSupport_Yes	rank	1.93	0.17	2.60
TotalCharges	km	2.28	0.13	2.93
TotalCharges	rank	0.32	0.57	0.81
gender_Female	km	0.01	0.93	0.10
gender_Female	rank	0.01	0.93	0.10
gender_Male	km	0.01	0.93	0.10
gender_Male	rank	0.01	0.93	0.10

Table of Contents

Introduction¶

What is survival analysis?¶

Time to Event¶

Censoring¶

Why is Censoring a Challenge?¶

Survival Function¶

Checking Data for Censoring¶

Non-parametric vs parametric models¶

Telco Customer Churn¶

Data Processing¶

Pipeline¶

Kaplan-Meier¶

How Kaplan-Meier survival curve is constructed?¶

Small Example¶

Applying survival analysis to groups¶

Hypothesis Testing¶

Log-rank hypothesis testing¶

Log-rank Test Overview¶

WeibullFitter Package¶

Introducing the Weibull distribution¶

Key Properties of the Weibull Distribution¶

Weibull Survival Function¶

Plotting the Weibull Survival Function in Python¶

Survival analysis with Weibull distribution¶

Weibull with Covariates¶

Survival regression¶

Predict Survival Time¶

Which model fits the data the best?¶

The Akaike Information Criterion (AIC)¶

find_best_parametric_model()¶

Use QQ Plots for Model Selection¶

CoxPHFitter Package¶

Key Points¶

Assumption¶

Fit Cox PH model¶

How to Interpret coefficients¶

Visualize the Hazard Ratio¶

Cross Validation¶

How to Use the Kaplan-Meier curves¶

Prediction with the Cox PH Model¶

Concordance Index (C-Index)¶

`WeibullFitter` Package¶

`find_best_parametric_model()`¶

`CoxPHFitter` Package¶