import streamlit as st
import time
import numpy as np
import warnings
warnings.filterwarnings('ignore')

# Initialize the progress bar and status text in the sidebar
progress_bar = st.sidebar.progress(0)
status_text = st.sidebar.empty()

# Generate initial random data for the line chart
last_rows = np.random.randn(1, 1)
chart = st.line_chart(last_rows)

# Update the line chart in a loop
for i in range(1, 101):
    new_rows = last_rows[-1, :] + np.random.randn(5, 1).cumsum(axis=0)
    status_text.text(f"{i}% Complete")
    chart.add_rows(new_rows)
    progress_bar.progress(i)
    last_rows = new_rows
    time.sleep(0.5)

# Clear the progress bar after completion
progress_bar.empty()

# Add a button to rerun the script
st.button("Re-run")


import streamlit as st
st.write('Hello World')


import streamlit as st
import numpy as np
import matplotlib.pyplot as plt

# Initialize the progress bar and status text in the sidebar
progress_bar = st.sidebar.progress(0)

binom_dist = np.random.binomial(1, .5, 500)
list_of_means = []
for i in range(0, 1000):
     list_of_means.append(np.random.choice(binom_dist, 100, replace=True).mean())
fig, ax = plt.subplots()
ax = plt.hist(list_of_means)
st.pyplot(fig)

# Add a button to rerun the script
st.button("Re-run")


import streamlit as st  
import numpy as np  
import matplotlib.pyplot as plt

# User input for the probability of heads
prob_heads = st.number_input('Chance of Coins Landing on Heads', min_value=0.0, max_value=1.0, value=0.5)

# User input for the graph title
graph_title = st.text_input('Graph Title')

# User input for the number of samples to draw from the binomial distribution
num_samples = st.radio('Number of Samples', options=[50, 100, 200], index=1)

# Generate a binomial distribution
binomial_dist = np.random.binomial(n=1, p=prob_heads, size=1000)

# Calculate the means of random samples from the binomial distribution
means_list = [np.random.choice(binomial_dist, size=num_samples, replace=True).mean() for _ in range(1000)]

# Create a histogram of the means
fig, ax = plt.subplots()
fig.set_size_inches(5, 2)  # Adjust the figure size
ax.hist(means_list, bins=np.arange(0, 1.1, 0.05), range=[0, 1])
ax.set_title(graph_title)

# Display the plot in Streamlit
st.pyplot(fig)


import streamlit as st  
import numpy as np  
import matplotlib.pyplot as plt

st.title('Illustrating the Central Limit Theorem with Streamlit')
st.subheader('An App by Tyler Richards')
st.write(('This app simulates a thousand coin flips using the chance of heads input below,'
     'and then samples with replacement from that population and plots the histogram of the'
     ' means of the samples in order to illustrate the central limit theorem!'))

# User input for the probability of heads
prob_heads = st.number_input('Chance of Coins Landing on Heads', min_value=0.0, max_value=1.0, value=0.5)

# User input for the graph title
graph_title = st.text_input('Graph Title')

# User input for the number of samples to draw from the binomial distribution
num_samples = st.radio('Number of Samples', options=[50, 100, 200], index=1)

# Generate a binomial distribution
binomial_dist = np.random.binomial(n=1, p=prob_heads, size=1000)

# Calculate the means of random samples from the binomial distribution
means_list = [np.random.choice(binomial_dist, size=num_samples, replace=True).mean() for _ in range(1000)]

# Create a histogram of the means
fig, ax = plt.subplots()
fig.set_size_inches(5, 2)  # Adjust the figure size
ax.hist(means_list, bins=np.arange(0, 1.1, 0.05), range=[0, 1])
ax.set_title(graph_title)

# Display the plot in Streamlit
st.pyplot(fig)

# Add a button to rerun the script
st.button("Re-run")


import streamlit as st
import pandas as pd
import altair as alt

st.title("Energy Efficiency Dataset")
st.markdown('Use this Streamlit app to create scatterplots between different attributes!')
st.markdown('Here is head of data set:')

# Load data
df_energy_eff = pd.read_csv('energyeff.csv')

# Display the first few rows of the dataset
st.write(df_energy_eff.head())


# Dropdown for selecting orientation
selected_orientation = st.selectbox('Select an Orientation to visualize:', [2, 3, 4, 5])

# Dropdown for selecting the x variable
selected_x_var = st.selectbox('Choose the x variable:', 
    ['Relative Compactness', 'Surface Area', 'Wall Area', 'Roof Area', 'Overall Height',
     'Glazing Area', 'Glazing Area Distribution', 'Heating Load'])

# Dropdown for selecting the y variable
selected_y_var = st.selectbox('Choose the y variable:', 
    ['Relative Compactness', 'Surface Area', 'Wall Area', 'Roof Area', 'Overall Height',
     'Glazing Area', 'Glazing Area Distribution', 'Heating Load'])


# Filter the dataset based on the selected orientation
df_filtered = df_energy_eff[df_energy_eff['Orientation'] == selected_orientation]

# Create and display the Altair scatterplot
scatterplot = alt.Chart(
    df_filtered, title=f"Scatterplot of Orientation {selected_orientation} for Energy Efficiency"
    ).mark_circle().encode(
    x=selected_x_var,
    y=selected_y_var
)
st.altair_chart(scatterplot)

# Add a button to rerun the script
st.button("Re-run")


import streamlit as st
import pandas as pd
import altair as alt

st.title("Energy Efficiency Dataset")
st.markdown('Use this Streamlit app to create scatterplots between different attributes!')
st.markdown('Here is head of data set:')

# Load data
df_energy_eff = pd.read_csv('energyeff.csv')

# Display the first few rows of the dataset
st.write(df_energy_eff.head())


# Dropdown for selecting orientation
selected_orientation = st.selectbox('Select an Orientation to visualize:', [2, 3, 4, 5])

# Dropdown for selecting the x variable
selected_x_var = st.selectbox('Choose the x variable:', 
    ['Relative Compactness', 'Surface Area', 'Wall Area', 'Roof Area', 'Overall Height',
     'Glazing Area', 'Glazing Area Distribution', 'Heating Load'])

# Dropdown for selecting the y variable
selected_y_var = st.selectbox('Choose the y variable:', 
    ['Relative Compactness', 'Surface Area', 'Wall Area', 'Roof Area', 'Overall Height',
     'Glazing Area', 'Glazing Area Distribution', 'Heating Load'])


# Filter the dataset based on the selected orientation
df_filtered = df_energy_eff[df_energy_eff['Orientation'] == selected_orientation]

# Create and display the Altair scatterplot
scatterplot = (alt.Chart(
    df_filtered, title=f"Scatterplot of Orientation {selected_orientation} for Energy Efficiency"
    ).mark_circle().encode(
    x=selected_x_var,
    y=selected_y_var,
    color="Heating Load"
).interactive()
    )

st.altair_chart(scatterplot, use_container_width=True)

# Add a button to rerun the script
st.button("Re-run")


import streamlit as st
import pandas as pd
import altair as alt

st.title("Energy Efficiency Dataset")
st.markdown('Use this Streamlit app to scatterplot between attributes!')

df_energy_file = st.file_uploader("Select Local Energy Efficiency CSV (default provided)")
if df_energy_file is not None:
    df_energy_eff = pd.read_csv(df_energy_file)
    # Display the first few rows of the dataset
    st.write(df_energy_eff.head())
else:
    st.stop()
    df_energy_eff = pd.read_csv('energyeff.csv')
    # Display the first few rows of the dataset
    st.write(df_energy_eff.head())

    
# Dropdown for selecting orientation
selected_orientation = st.selectbox('Select an Orientation to visualize:', [2, 3, 4, 5])

# Dropdown for selecting the x variable
selected_x_var = st.selectbox('Choose the x variable:', 
    ['Relative Compactness', 'Surface Area', 'Wall Area', 'Roof Area', 'Overall Height',
     'Glazing Area', 'Glazing Area Distribution', 'Heating Load'])

# Dropdown for selecting the y variable
selected_y_var = st.selectbox('Choose the y variable:', 
    ['Relative Compactness', 'Surface Area', 'Wall Area', 'Roof Area', 'Overall Height',
     'Glazing Area', 'Glazing Area Distribution', 'Heating Load'])


# Filter the dataset based on the selected orientation
df_filtered = df_energy_eff[df_energy_eff['Orientation'] == selected_orientation]

# Create and display the Altair scatterplot
scatterplot = (alt.Chart(
    df_filtered, title=f"Scatterplot of Orientation {selected_orientation} for Energy Efficiency"
    ).mark_circle().encode(
    x=selected_x_var,
    y=selected_y_var,
    color="Heating Load"
).interactive()
    )

st.altair_chart(scatterplot, use_container_width=True)

# Add a button to rerun the script
st.button("Re-run")


import streamlit as st
import pandas as pd
import altair as alt
import seaborn as sns
import time
st.title("Energy Efficiency Dataset")
st.markdown('Use this Streamlit app to scatterplot between attributes!')

energy_file = st.file_uploader("Select Local Energy Efficiency CSV (default provided)")

def load_file(energy_file):
    time.sleep(3)
    if energy_file is not None:
        df = pd.read_csv(energy_file)
    else:
        df = pd.read_csv('energyeff.csv')
    return(df)    

df_energy_eff = load_file(energy_file)

sns.set_style('darkgrid')
markers = {2: "X", 3: "s", 4:'o', 5:'*'}

selected_ori = st.selectbox('What Orientation would you like to visualize?',
     [2, 3, 4, 5])
selected_x_var = st.selectbox('What do you want the x variable to be?',
     ['Relative Compactness', 'Surface Area', 'Wall Area', 'Roof Area', 'Overall Height',
       'Glazing Area', 'Glazing Area Distribution', 'Heating Load'])
selected_y_var = st.selectbox('What about the y?',
     ['Relative Compactness', 'Surface Area', 'Wall Area', 'Roof Area', 'Overall Height',
       'Glazing Area', 'Glazing Area Distribution', 'Heating Load'])

df_energy_eff = df_energy_eff[df_energy_eff['Orientation'] == selected_ori] 
alt_chart = (
    alt.Chart(df_energy_eff, title=f"Scatterplot of Orientation {selected_ori} for Energy Efficiency")
    .mark_circle()
    .encode(
        x=selected_x_var,
        y=selected_y_var,
        color="Orientation"
    )
    .interactive()
)
st.altair_chart(alt_chart, use_container_width=True)


import streamlit as st
import pandas as pd
import altair as alt
import seaborn as sns
import time
st.title("Energy Efficiency Dataset")
st.markdown('Use this Streamlit app to scatterplot between attributes!')

energy_file = st.file_uploader("Select Local Energy Efficiency CSV (default provided)")

@st.cache_data()

def load_file(energy_file):
    time.sleep(3)
    if energy_file is not None:
        df = pd.read_csv(energy_file)
    else:
        df = pd.read_csv('energyeff.csv')
    return(df)  

df_energy_eff = load_file(energy_file)

sns.set_style('darkgrid')
markers = {2: "X", 3: "s", 4:'o', 5:'*'}

selected_ori = st.selectbox('What Orientation would you like to visualize?',
     [2, 3, 4, 5])
selected_x_var = st.selectbox('What do you want the x variable to be?',
     ['Relative Compactness', 'Surface Area', 'Wall Area', 'Roof Area', 'Overall Height',
       'Glazing Area', 'Glazing Area Distribution', 'Heating Load'])
selected_y_var = st.selectbox('What about the y?',
     ['Relative Compactness', 'Surface Area', 'Wall Area', 'Roof Area', 'Overall Height',
       'Glazing Area', 'Glazing Area Distribution', 'Heating Load'])

df_energy_eff = df_energy_eff[df_energy_eff['Orientation'] == selected_ori] 
alt_chart = (
    alt.Chart(df_energy_eff, title=f"Scatterplot of Orientation {selected_ori} for Energy Efficiency")
    .mark_circle()
    .encode(
        x=selected_x_var,
        y=selected_y_var,
        color="species"
    )
    .interactive()
)
#st.altair_chart(alt_chart, use_container_width=True)


import streamlit as st
st.title('My To-Do List Creator')
my_todo_list = ["Go to Costco", "Got to swimming pool", "Learn English"]
st.write('My current To-Do list is:', my_todo_list)
new_todo = st.text_input("What do you need to do?")
if st.button('Add the new To-Do item'):
    st.write('Adding a new item to the list')
    my_todo_list.append(new_todo)
st.write('My new To-Do list is:', my_todo_list)


import streamlit as st
st.title('My To-Do List Creator')
if 'my_todo_list' not in st.session_state:
    st.session_state.my_todo_list = ["Go to Costco", "Got to swimming pool", "Learn English"]
new_todo = st.text_input("What do you need to do?")
if st.button('Add the new To-Do item'):
    st.write('Adding a new item to the list')
    st.session_state.my_todo_list.append(new_todo)
st.write('My To-Do list is:', st.session_state.my_todo_list)


import streamlit as st
import pandas as pd
st.title('Canada GHG Emission')
st.write(
    """This app analyzes GHG emission for Environment and Climate Change of Canada."""
)
ghg_df = pd.read_csv('./GHG/GHGEmissions2004-Present.csv')
ghg_df_grouped = pd.DataFrame(ghg_df.groupby(['ReferenceYear', 
                                              'FacilityProvince']).sum()['TotalEmissions']).reset_index()

ghg_2022 = ghg_df_grouped[ghg_df_grouped.ReferenceYear==2022]
st.line_chart(data=ghg_2022, x='FacilityProvince', y='TotalEmissions', color='#ff2b2b')
st.bar_chart(data=ghg_2022, x='FacilityProvince', y='TotalEmissions', color='#09ab3b')
st.area_chart(data=ghg_2022, x='FacilityProvince', y='TotalEmissions', color='#0068c9')


import streamlit as st
import pandas as pd
st.title('Canada GHG Emission')
st.write(
    """This app analyzes GHG emission for Environment and Climate Change of Canada."""
)
ghg_df = pd.read_csv('GHGEmissions2004-Present.csv')
ghg_df_grouped = pd.DataFrame(ghg_df.groupby(['ReferenceYear', 
                                              'FacilityProvince',
                                              'latitude',
                                              'longitude']).mean()['TotalEmissions']).reset_index()

ghg_2022 = ghg_df_grouped[ghg_df_grouped.ReferenceYear==2022]
ghg_2022 = ghg_2022.dropna(subset=['longitude', 'latitude'])
ghg_2022 = ghg_2022.sample(n = 1000)
st.map(ghg_2022)


import streamlit as st
import pandas as pd
import plotly.express as px
st.title('Canada GHG Emission')
st.write(
    """This app analyzes GHG emission for Environment and Climate Change of Canada."""
)
ghg_df = pd.read_csv('GHGEmissions2004-Present.csv')
ghg_df_grouped = pd.DataFrame(ghg_df.groupby(['ReferenceYear', 
                                              'FacilityProvince']).mean()['TotalEmissions']).reset_index()

ghg_2022 = ghg_df_grouped[ghg_df_grouped.ReferenceYear==2022]

fig = px.histogram(ghg_2022['TotalEmissions'], nbins=500)
st.plotly_chart(fig)


import streamlit as st
import pandas as pd
import plotly.express as px
st.title('Canada GHG Emission')
st.write(
    """This app analyzes GHG emission for Environment and Climate Change of Canada."""
)
ghg_df = pd.read_csv('GHGEmissions2004-Present.csv')
ghg_df_grouped = pd.DataFrame(ghg_df.groupby(['ReferenceYear', 
                                              'FacilityProvince']).mean()['TotalEmissions']).reset_index()

ghg_2022 = ghg_df_grouped[ghg_df_grouped.ReferenceYear==2022]

fig = px.histogram(ghg_2022['TotalEmissions'], nbins=500)
st.plotly_chart(fig)


import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import datetime as dt

st.title('Canada GHG Emission')
st.write(
    """This app analyzes GHG emission for Environment and Climate Change of Canada."""
)
ghg_df = pd.read_csv('GHGEmissions2004-Present.csv')
ghg_df_grouped = pd.DataFrame(ghg_df.groupby(['ReferenceYear', 
                                              'FacilityProvince']).mean()['TotalEmissions']).reset_index()

ghg_2022 = ghg_df_grouped[ghg_df_grouped.ReferenceYear==2022]
st.subheader('Seaborn Chart for GHG Emission 2022')
fig_sb, ax_sb = plt.subplots(figsize=(4, 2))
ax_sb = sns.histplot(ghg_2022['TotalEmissions'])
plt.xlabel('TotalEmissions_2022')
st.pyplot(fig_sb)
st.subheader('Matploblib Chart for GHG Emission 2022')
fig_mpl, ax_mpl = plt.subplots(figsize=(4, 2))
ax_mpl = plt.hist(ghg_2022['TotalEmissions'])
plt.xlabel('TotalEmissions_2022')
st.pyplot(fig_mpl)


import streamlit as st
import pandas as pd
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource
st.title('SF Trees')
st.write(
    """This app analyzes trees in San Francisco using
    a dataset kindly provided by SF DPW"""
)
st.subheader('Bokeh Chart')

ghg_df = pd.read_csv('GHGEmissions2004-Present.csv')
ghg_df_grouped = pd.DataFrame(ghg_df.groupby(['ReferenceYear', 
                                              'FacilityProvince']).mean()['TotalEmissions']).reset_index()
ghg_2022 = ghg_df_grouped[ghg_df_grouped.ReferenceYear==2022]

# Create the blank plot
provinces = list(ghg_2022.FacilityProvince.drop_duplicates())
histogram = figure(x_range=provinces,
           title = 'Bokeh_TotalEmissions',
          x_axis_label = 'TotalEmissions', 
           y_axis_label = 'FacilityProvince')

histogram.vbar(x=provinces, top=ghg_2022.TotalEmissions, width=0.9)

histogram.xgrid.grid_line_color = None
histogram.y_range.start = 0
st.bokeh_chart(histogram)


import streamlit as st
import pandas as pd
import altair as alt

st.title('Canada GHG Emission')
st.write(
    """This app analyzes GHG emission for Environment and Climate Change of Canada."""
)
ghg_df = pd.read_csv('GHGEmissions2004-Present.csv')
ghg_df_grouped = pd.DataFrame(ghg_df.groupby(['ReferenceYear', 
                                              'FacilityProvince']).sum()['TotalEmissions']).reset_index()

ghg_2022 = ghg_df_grouped[ghg_df_grouped.ReferenceYear==2022]

fig = alt.Chart(ghg_2022).mark_bar().encode(x ='FacilityProvince', 
    y='TotalEmissions').properties(
    width=800,
    height=500
)

st.altair_chart(fig)


import pickle
import streamlit as st
import numpy as np

st.title('Customer Churn Prediction')


st.write(
    """This app is created by [streamlit](https://streamlit.io/) using pre-trained model to predict the likelihood if bank 
    customers will turnover next cycle. Random forest classifier is trained by Bank Turnover Dataset from 
    [Kaggle](https://www.kaggle.com/barelydedicated/bank-customer-churn-modeling/version/1).
     The app is trained by 10 inputs (predictors). The user inputs are `Geography`, `CreditScore`, 
     `Gender`, `Age`, `Tenure`, `Balance`, `NumOfProducts`, `HasCrCard`, `IsActiveMember`, `EstimatedSalary`"""
)


rf_pickle = open("random_forest_churn.pickle", "rb")
rfc = pickle.load(rf_pickle)

rf_pickle.close()

Geography = st.selectbox("Geography", options=["France", "Germany", "Spain"])
CreditScore = st.number_input("CreditScore", min_value=300)
Gender = st.selectbox("Gender", options=['Male', 'Female'])
Age = st.number_input("Age", min_value=18)
Tenure = st.number_input("Tenure", min_value=2)
Balance = st.number_input("Balance", min_value=500)
NumOfProducts = st.number_input("NumOfProducts", min_value=1)
HasCrCard = st.selectbox("HasCrCard", options=[0, 1])
IsActiveMember = st.selectbox("IsActiveMember", options=[0, 1])
EstimatedSalary = st.number_input("EstimatedSalary", min_value=1000)

user_inputs = [Geography, CreditScore, Gender, Age, Tenure, Balance, NumOfProducts, HasCrCard, IsActiveMember, EstimatedSalary]
st.write("The user inputs are `Geography`, `CreditScore`, `Gender`, `Age`, `Tenure`, `Balance`, `NumOfProducts`, `HasCrCard`, `IsActiveMember`, `EstimatedSalary`")

Is_France, Is_Germany, Is_Spain = 0, 0, 0
if Geography == 'France':
    Is_France = 1
elif Geography == 'Germany':
    Is_Germany = 1
elif Geography == 'Spain':
    Is_Spain = 1

if Gender == 'Male':
    Gender = 1
elif Gender == 'Female':
    Gender = 0


std_pickle = open("std.pickle", "rb")
scaler = pickle.load(std_pickle)
std_pickle.close()


clmn_std = np.array([CreditScore, Age, Tenure, Balance, EstimatedSalary]).reshape(1, 5)
clmn_not_std = np.array([Is_France, Is_Germany, Is_Spain,
      Gender, NumOfProducts, HasCrCard, IsActiveMember]).reshape(1, 7)
feat_std = scaler.transform(clmn_std)
to_pred = np.concatenate((feat_std, clmn_not_std), axis=1)


if st.button("Predict", type="primary"):
    y_pred = int(rfc.predict_proba(to_pred)[0][0]*100)
    st.markdown(f"""<p style='font-size:24px;'>The likelihood of churn for this customer is <strong>{y_pred}%</strong></p>.""", unsafe_allow_html=True)


import streamlit as st
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import OrdinalEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
import xgboost as xgb
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from ydata_profiling import ProfileReport
import shap
import time
from streamlit_pandas_profiling import st_profile_report
from matplotlib.ticker import PercentFormatter


st.title('Customer Churn Prediction')


st.write(
    """This app is created by [streamlit](https://streamlit.io/) to train a model to predict 
    customer turnover for next cycle. Random forest classifier is trained by Bank Turnover Dataset from 
    [Kaggle](https://www.kaggle.com/barelydedicated/bank-customer-churn-modeling/version/1).
     The app is trained by 10 inputs (predictors). The user inputs are `Geography`, `CreditScore`, 
     `Gender`, `Age`, `Tenure`, `Balance`, `NumOfProducts`, `HasCrCard`, `IsActiveMember`, `EstimatedSalary`"""
)
st.image('DataTable.jpg')

with st.form('input'):
    churn_file = st.file_uploader('Upload your churn data')
    st.form_submit_button()


#if churn_file is None:
#    rf_pickle = open("random_forest_churn.pickle", "rb")
#    rfc = pickle.load(rf_pickle)
#    std_pickle = open("std.pickle", "rb")
#    scaler = pickle.load(std_pickle)
#    rf_pickle.close()
#    std_pickle.close()
#
#else:
if churn_file is not None:
    df = pd.read_csv(churn_file)
    # Shuffle the data
    np.random.seed(42) 
    df
    if st.button("profiling", type="primary"):
        profile = df.profile_report(title='Pandas Profiling Report')
        st_profile_report(profile)
    
    df = df.reindex(np.random.permutation(df.index))
    df.reset_index(inplace=True, drop=True) # Reset index
    
    # Remove 'RowNumber','CustomerId','Surname' features
    df = df.drop(['RowNumber','CustomerId','Surname'],axis=1,inplace=False)
    
    
    # Training and Test
    st.subheader("Split Data to Training and Test")
    test_size = st.number_input("proportion of test data", min_value=0.1, max_value=0.4)
    
    spt = StratifiedShuffleSplit(n_splits=1, test_size=test_size, random_state=42)
    for train_idx, test_idx in spt.split(df, df['Exited']):
        train_set_strat = df.loc[train_idx]
        test_set_strat  = df.loc[test_idx] 
        
    train_set_strat.reset_index(inplace=True, drop=True) # Reset index
    test_set_strat.reset_index(inplace=True, drop=True) # Reset index  
    
    st.subheader("Select Input")
    features = ["Geography", "CreditScore", "Gender", "Age", "Tenure",
     "Balance", "NumOfProducts", "HasCrCard", "IsActiveMember", "EstimatedSalary"]
    selected_features = st.multiselect('Select features:', features)
    selected_features_test = selected_features.copy()
 
    st.subheader("RandomForest Hyperparameters")
    n_estimators = st.number_input("n_estimators", min_value=10, max_value=200)
    max_depth = st.number_input("max_depth", min_value=5, max_value=30)
    min_samples_split = st.number_input("min_samples_split", min_value=5, max_value=30)
    bootstrap = st.selectbox("bootstrap", options=[True, False])
    random_state = st.number_input("random_state", min_value=1)
 
    # Random Forest for taining set
    if st.button("Train Random Forest", type="primary"):
        # Text Handeling
        # Convert Geography to one-hot-encoding
        clmn = []
        # Convert gender to 0 and 1
        if 'Gender' in selected_features:
            ordinal_encoder = OrdinalEncoder()
            clmn.append('Gender')
            train_set_strat['Gender'] = ordinal_encoder.fit_transform(train_set_strat[['Gender']])
        
        # Remove 'Geography'
        if 'Geography' in selected_features:
            Geog_1hot = pd.get_dummies(train_set_strat['Geography'],prefix='Is')
            clmn.append(list(Geog_1hot.columns))        
            selected_features.remove('Geography')        
            train_set_strat = train_set_strat.drop(['Geography'],axis=1,inplace=False)
            train_set_strat = pd.concat([Geog_1hot,train_set_strat], axis=1) # Concatenate rows
            
        if 'NumOfProducts' in selected_features:
            clmn.append('NumOfProducts')       
            
        if 'HasCrCard' in selected_features:
            clmn.append('HasCrCard') 
        
        if 'IsActiveMember' in selected_features:
            clmn.append('IsActiveMember') 
        
        # Standardization 
        # Make training features and target
        X_train = train_set_strat.drop("Exited", axis=1)
        y_train = train_set_strat["Exited"].values
     
        selected_features_con = [i for i in selected_features if i not in clmn]
        if len(selected_features_con)==0 and len(clmn)==0:
            raise ValueError("Please select at least one input!")
     
        # Divide into two training sets (with and without standization)
        X_train_for_std = X_train[selected_features_con]
  
        clmn = [item for sublist in clmn for item in (sublist if isinstance(sublist, list) else [sublist])]
  
        X_train_not_std = X_train[clmn]
        st.session_state.clmns_all = selected_features_con + clmn
     
        #
        scaler = StandardScaler()
        scaler.fit(X_train_for_std)
        #
        df_train_std = scaler.transform(X_train_for_std)
        X_train_std = np.concatenate((df_train_std, X_train_not_std), axis=1)

        # Initialize the progress bar
        progress_bar = st.progress(0)
        progress_step = 100 / 3  # Assuming 3 main steps in your process        
  
        rnd = RandomForestClassifier(n_estimators=n_estimators, max_depth= max_depth, 
                                     min_samples_split= 20, bootstrap= bootstrap, 
                                     random_state=random_state)
        rnd.fit(X_train_std, y_train)
        st.session_state.X_train_std = X_train_std
        st.session_state.y_train = y_train

        progress_bar.progress(100)  # Complete the progress bar
        
        # Convert gender to 0 and           
        if 'Gender' in selected_features_test:
            ordinal_encoder = OrdinalEncoder()
            test_set_strat['Gender'] = ordinal_encoder.fit_transform(test_set_strat[['Gender']])
        
        # Remove 'Geography'
        if 'Geography' in selected_features_test:
            Geog_1hot = pd.get_dummies(test_set_strat['Geography'],prefix='Is')        
            test_set_strat = test_set_strat.drop(['Geography'],axis=1,inplace=False)
            test_set_strat = pd.concat([Geog_1hot,test_set_strat], axis=1) # Concatenate rows
        
        # Standardize data
        X_test = test_set_strat.drop("Exited", axis=1)
        y_test = test_set_strat["Exited"].values
        #
        X_test_for_std = X_test[selected_features_con]
        X_test_not_std = X_test[clmn]
        #
        df_test_std = scaler.transform(X_test_for_std)
        X_test_std = np.concatenate((df_test_std,X_test_not_std), axis=1)
        
        # Random Forest for test set
        y_test_pred = rnd.predict(X_test_std)
        y_test_proba_rnd = rnd.predict_proba(X_test_std)
        
        score = accuracy_score(y_test_pred, y_test)
  
        st.markdown(f"""<p style='font-size:24px;'>Random Forest 
          model was trained. The accuracy score for test set is <strong>{int(score*100)}%</strong></p>""", unsafe_allow_html=True)
    
        st.session_state.rnd = rnd    
    # Apply feature importance with shapely
    if st.button("Shapely Feature Importance", type="secondary"):
        # Plot the importance of features
        font = {'size'   : 7}
        plt.rc('font', **font)
    
        fig, ax1 = plt.subplots(figsize=(6, 3), dpi= 180, facecolor='w', edgecolor='k')   
        # Initialize the progress bar
        #progress_bar = st.progress(0)
        progress_step = 100 / 2     
        explainer = shap.TreeExplainer(st.session_state.rnd)
        X_train_std = pd.DataFrame(st.session_state.X_train_std, 
            columns=st.session_state.clmns_all)

        shap_values = explainer(X_train_std)
        shap_values_for_class = shap_values[..., 0]
        progress_bar = st.progress(int(progress_step))

        shap.plots.beeswarm(shap_values_for_class, 
            max_display=len(st.session_state.clmns_all))
        st.pyplot(fig)
        # Complete the progress bar
        progress_bar.progress(100)


    # Apply feature importance with Random Forest
    if st.button("Random Forest Feature Importance", type="secondary"):
        class prfrmnce_plot(object):
            """Plot performance of features to predict a target"""
            def __init__(self,importance: list, title: str, ylabel: str,clmns: str,
                        titlefontsize: int=10, xfontsize: int=5, yfontsize: int=8) -> None:
                self.importance    = importance
                self.title         = title 
                self.ylabel        = ylabel  
                self.clmns         = clmns  
                self.titlefontsize = titlefontsize 
                self.xfontsize     = xfontsize 
                self.yfontsize     = yfontsize
                
            #########################    
            
            def bargraph(self, select: bool= False, fontsizelable: bool= False, xshift: float=-0.1, nsim: int=False
                         ,yshift: float=0.01,perent: bool=False, xlim: list=False,axt=None,
                         ylim: list=False, y_rot: int=0, graph_float: bool=True) -> pd.DataFrame():
                ax1 = axt or plt.axes()
                if not nsim:
                    # Make all negative coefficients to positive
                    sort_score=sorted(zip(abs(self.importance),self.clmns), reverse=True)
                    Clmns_sort=[sort_score[i][1] for i in range(len(self.clmns))]
                    sort_score=[sort_score[i][0] for i in range(len(self.clmns))]
                else:
                    importance_agg=[]
                    importance_std=[]
                    for iclmn in range(len(self.clmns)):
                        tmp=[]
                        for isim in range(nsim):
                            tmp.append(abs(self.importance[isim][iclmn]))
                        importance_agg.append(np.mean(tmp))
                        importance_std.append(np.std(tmp))
                        
                    # Make all negative coefficients to positive
                    sort_score=sorted(zip(importance_agg,self.clmns), reverse=True)
                    Clmns_sort=[sort_score[i][1] for i in range(len(self.clmns))]
                    sort_score=[sort_score[i][0] for i in range(len(self.clmns))]                
                    
        
                index1 = np.arange(len(self.clmns))
                # select the most important features
                if (select):
                    Clmns_sort=Clmns_sort[:select]
                    sort_score=sort_score[:select]
                ax1.bar(Clmns_sort, sort_score, width=0.6, align='center', alpha=1, edgecolor='k', capsize=4,color='b')
                plt.title(self.title,fontsize=self.titlefontsize)
                ax1.set_ylabel(self.ylabel,fontsize=self.yfontsize)
                ax1.set_xticks(np.arange(len(Clmns_sort)))
                
                ax1.set_xticklabels(Clmns_sort,fontsize=self.xfontsize, rotation=90,y=0.02)   
                if (perent): plt.gca().yaxis.set_major_formatter(PercentFormatter(1))
                ax1.xaxis.grid(color='k', linestyle='--', linewidth=0.2) 
                if (xlim): plt.xlim(xlim)
                if (ylim): plt.ylim(ylim)
                if (fontsizelable):
                    for ii in range(len(sort_score)):
                        if (perent):
                            plt.text(xshift+ii, sort_score[ii]+yshift,f'{"{0:.1f}".format(sort_score[ii]*100)}%',
                            fontsize=fontsizelable,rotation=y_rot,color='k')     
                        else:
                            if graph_float:
                                plt.text(xshift+ii, sort_score[ii]+yshift,f'{"{0:.3f}".format(sort_score[ii])}',
                                fontsize=fontsizelable,rotation=y_rot,color='k') 
                            else:
                                plt.text(xshift+ii, sort_score[ii]+yshift,f'{"{0:.0f}".format(sort_score[ii])}',
                                    fontsize=fontsizelable,rotation=y_rot,color='k')                             
                            
                
                dic_Clmns={}
                for i in range(len(Clmns_sort)):
                    dic_Clmns[Clmns_sort[i]]=sort_score[i]
                    
        
         # Plot the importance of features
        font = {'size'   : 7}
        plt.rc('font', **font)
        fig, ax1 = plt.subplots(figsize=(6, 3), dpi= 180, facecolor='w', edgecolor='k')
        
        
        # Calculate importance
        importance = abs(st.session_state.rnd.feature_importances_)
        
        df_most_important = prfrmnce_plot(importance, title=f'Feature Importance by Random Forest', 
                    ylabel='Random Forest Score',clmns=st.session_state.clmns_all,titlefontsize=9, 
                    xfontsize=7, yfontsize=8).bargraph(perent=True,fontsizelable=8,xshift=-0.25,axt=ax1,
                    yshift=0.01,ylim=[0, max(importance)+0.05], xlim=[-0.5,len(st.session_state.clmns_all)+0.5], y_rot=0)      
                    
        st.pyplot(fig)

pip install pipreqs
pipreqs .
!pipreqs . # in jupyter notebook


import pickle
import streamlit as st
import numpy as np

st.title('Customer Churn Prediction')

# Typing effect that stops at the author's name length and repeats from the beginning
st.markdown(
    """
    <style>
        .author-title {
            font-size: 1.3em;
            font-weight: bold;
            color: #007acc; /* Color for "Author:" */
            white-space: nowrap;
            vertical-align: middle; /* Ensures alignment with animated text */
        }
    
        .author-name {
            font-size: 1.2em;
            font-weight: bold;
            color: red; /* Color for the author's name */
            overflow: hidden;
            white-space: nowrap;
            border-right: 3px solid;
            display: inline-block;
            vertical-align: middle; /* Aligns with the static "Author:" text */
            animation: typing 5s steps(20, end) infinite, blink-caret 0.75s step-end infinite;
            max-width: 10ch; /* Limit width to fit text length */
        }
    
        /* Typing effect */
        @keyframes typing {
            0% { max-width: 0; }
            50% { max-width: 30ch; } /* Adjust to match the name's length */
            100% { max-width: 0; } /* Reset back to zero */
        }
    
        /* Blinking cursor animation for the author's name */
        @keyframes blink-caret {
            from, to { border-color: transparent; }
            50% { border-color: red; }
        }
    </style>
    
    <p><span class="author-title">Author:</span> <span class="author-name">Mehdi Rezvandehy</span></p>

    """,
    unsafe_allow_html=True
)
st.write("""""")

st.write(
    """This app is created by [streamlit](https://streamlit.io/) to predict the likelihood if bank 
    customers will turnover next cycle. Random forest classifier is trained by Bank Turnover Dataset from 
    [Kaggle](https://www.kaggle.com/barelydedicated/bank-customer-churn-modeling/version/1).
     The app is trained by 10 inputs (predictors). The user inputs are `Geography`, `CreditScore`, 
     `Gender`, `Age`, `Tenure`, `Balance`, `NumOfProducts`, `HasCrCard`, `IsActiveMember`, `EstimatedSalary`"""
)
st.image('DataTable.jpg')

rf_pickle = open("random_forest_churn.pickle", "rb")
rfc = pickle.load(rf_pickle)

rf_pickle.close()

st.write('')

col1, col2, col3  = st.columns(3)

col1.subheader("Input Data")
Geography = col1.selectbox("Geography", options=["France", "Germany", "Spain"])
CreditScore = col1.number_input("CreditScore", min_value=300)
Gender = col1.selectbox("Gender", options=['Male', 'Female'])
Age = col1.number_input("Age", min_value=18)
Tenure = col1.number_input("Tenure", min_value=2)

col2.subheader(" ")
col2.subheader(" ")
Balance = col2.number_input("Balance", min_value=500)
NumOfProducts = col2.number_input("NumOfProducts", min_value=1)
HasCrCard = col2.selectbox("HasCrCard", options=[0, 1])
IsActiveMember = col2.selectbox("IsActiveMember", options=[0, 1])
EstimatedSalary = col2.number_input("EstimatedSalary", min_value=1000)

user_inputs = [Geography, CreditScore, Gender, Age, Tenure, Balance, NumOfProducts, HasCrCard, IsActiveMember, EstimatedSalary]


Is_France, Is_Germany, Is_Spain = 0, 0, 0
if Geography == 'France':
    Is_France = 1
elif Geography == 'Germany':
    Is_Germany = 1
elif Geography == 'Spain':
    Is_Spain = 1

if Gender == 'Male':
    Gender = 1
elif Gender == 'Female':
    Gender = 0


std_pickle = open("std.pickle", "rb")
scaler = pickle.load(std_pickle)
std_pickle.close()


clmn_std = np.array([CreditScore, Age, Tenure, Balance, EstimatedSalary]).reshape(1, 5)
clmn_not_std = np.array([Is_France, Is_Germany, Is_Spain,
      Gender, NumOfProducts, HasCrCard, IsActiveMember]).reshape(1, 7)
feat_std = scaler.transform(clmn_std)
to_pred = np.concatenate((feat_std, clmn_not_std), axis=1)


y_pred = int(rfc.predict_proba(to_pred)[0][0]*100)

col3.subheader("Prediction")
col3.write(f"The likelihood of churn for this customer is predicted **{y_pred}**%")


import pickle
import streamlit as st
import numpy as np

st.title('Customer Churn Prediction')

password_guess = st.text_input('What is the Password?') 
if password_guess != st.secrets["password"]: 
    st.stop()
    
st.write(
    """This app is created by [streamlit](https://streamlit.io/) to predict the likelihood if bank 
    customers will turnover next cycle. Random forest classifier is trained by Bank Turnover Dataset from 
    [Kaggle](https://www.kaggle.com/barelydedicated/bank-customer-churn-modeling/version/1).
     The app is trained by 10 inputs (predictors). The user inputs are `Geography`, `CreditScore`, 
     `Gender`, `Age`, `Tenure`, `Balance`, `NumOfProducts`, `HasCrCard`, `IsActiveMember`, `EstimatedSalary`"""
)
st.image('DataTable.jpg')

rf_pickle = open("random_forest_churn.pickle", "rb")
rfc = pickle.load(rf_pickle)

rf_pickle.close()


import streamlit as st
st.title("Churn prediction")
st.write(
    """
    This app predicts the likelood of customer leaving business
    """
)
col1, col2, col3 = st.columns(3)
with col1:
    st.write("Column 1")
with col2:
    st.write("Column 2")
with col3:
    st.write("Column 3")


import streamlit as st

f_width = st.number_input('First Width', min_value=5, value=5)
s_width = st.number_input('Second Width', min_value=5, value=5)
t_width = st.number_input('Third Width', min_value=5, value=5)
col1, col2, col3 = st.columns((f_width,s_width,t_width))
with col1:
     st.write('First column')
with col2:
     st.write('Second column')
with col3:
     st.write('Third column')


import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt

st.title('Customer Churn')

st.write(
    """This app analysises user inputs `Geography`, `CreditScore`, 
     `Gender`, `Age`, `Tenure`, `Balance`, `NumOfProducts`, `HasCrCard`, `IsActiveMember`, `EstimatedSalary`"""
)

df = pd.read_csv('Churn_Modelling.csv')


fig_mpl, ax_mpl = plt.subplots()


col1, col2, col3 = st.columns(3)

col1.subheader("Input 1")
selected_var1 = col1.selectbox('What do you want the x variable to be?',
     ["Balance", "NumOfProducts", "HasCrCard", "IsActiveMember", "EstimatedSalary"], 
     key = "<selected_var1>")
with col1:
    col1.write(df[selected_var1].head())
    ax_mpl = plt.hist(df[selected_var1])
    plt.xlabel(selected_var1)
    col1.pyplot(fig_mpl)
#

col2.subheader("Input 2") 
selected_var2 = col2.selectbox('What do you want the x variable to be?',
     ["Balance", "NumOfProducts", "HasCrCard", "IsActiveMember", "EstimatedSalary"], 
     key = "<selected_var2>")
with col2:
    col2.write(df[selected_var2].head())
    ax_mpl = plt.hist(df[selected_var2])
    plt.xlabel(selected_var2)
    col2.pyplot(fig_mpl)

#

col3.subheader("Input 3")
selected_var3 = col3.selectbox('What do you want the x variable to be?',
     ["Balance", "NumOfProducts", "HasCrCard", "IsActiveMember", "EstimatedSalary"], 
     key = "<selected_var3we>")      
with col3:
    col3.write(df[selected_var3].head())
    ax_mpl = plt.hist(df[selected_var3])
    plt.xlabel(selected_var3)
    col3.pyplot(fig_mpl)


import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt

st.title('Customer Churn')

st.write(
    """This app analysises user inputs `Geography`, `CreditScore`, 
     `Gender`, `Age`, `Tenure`, `Balance`, `NumOfProducts`, `HasCrCard`, `IsActiveMember`, `EstimatedSalary`"""
)

df = pd.read_csv('Churn_Modelling.csv')


fig_mpl, ax_mpl = plt.subplots()

tab1, tab2, tab3 = st.tabs(["Input 1", "Input 2", "Input 3"])

tab1.subheader("Input 1")
selected_var1 = tab1.selectbox('What do you want the x variable to be?',
     ["Balance", "NumOfProducts", "HasCrCard", "IsActiveMember", "EstimatedSalary"], 
     key = "<selected_var1>")
with tab1:
    ax_mpl = plt.hist(df[selected_var1])
    plt.xlabel(selected_var1)
    tab1.pyplot(fig_mpl)

tab2.subheader("Input 2") 
selected_var2 = tab2.selectbox('What do you want the x variable to be?',
     ["Balance", "NumOfProducts", "HasCrCard", "IsActiveMember", "EstimatedSalary"], 
     key = "<selected_var2>")    
with tab2:
    ax_mpl = plt.hist(df[selected_var2])
    plt.xlabel(selected_var2)
    tab2.pyplot(fig_mpl)

tab3.subheader("Input 3")
selected_var3 = tab3.selectbox('What do you want the x variable to be?',
     ["Balance", "NumOfProducts", "HasCrCard", "IsActiveMember", "EstimatedSalary"], 
     key = "<selected_var3we>")      
with tab3:
    ax_mpl = plt.hist(df[selected_var3])
    plt.xlabel(selected_var3)
    tab3.pyplot(fig_mpl)


import pandas as pd
import streamlit as st
import matplotlib.pyplot as plt

st.title("Customer Churn")
st.write(
    """This app analysises user inputs `Geography`, `CreditScore`, 
     `Gender`, `Age`, `Tenure`, `Balance`, `NumOfProducts`, `HasCrCard`, `IsActiveMember`, `EstimatedSalary`"""
)
df = pd.read_csv('Churn_Modelling.csv')

df = st.data_editor(df) 

fig_mpl, ax_mpl = plt.subplots()

col1, col2, col3 = st.columns(3)

col1.subheader("Input 1")
selected_var1 = col1.selectbox('What do you want the x variable to be?',
     ["Balance", "NumOfProducts", "HasCrCard", "IsActiveMember", "EstimatedSalary"], 
     key = "<selected_var1>")
with col1:
    col1.write(df[selected_var1].head())
    ax_mpl = plt.hist(df[selected_var1])
    plt.xlabel(selected_var1)
    col1.pyplot(fig_mpl)

col2.subheader("Input 2") 
selected_var2 = col2.selectbox('What do you want the x variable to be?',
     ["Balance", "NumOfProducts", "HasCrCard", "IsActiveMember", "EstimatedSalary"], 
     key = "<selected_var2>")
with col2:
    col2.write(df[selected_var2].head())
    ax_mpl = plt.hist(df[selected_var2])
    plt.xlabel(selected_var2)
    col2.pyplot(fig_mpl)

col3.subheader("Input 3")
selected_var3 = col3.selectbox('What do you want the x variable to be?',
     ["Balance", "NumOfProducts", "HasCrCard", "IsActiveMember", "EstimatedSalary"], 
     key = "<selected_var3we>")      
with col3:
    col3.write(df[selected_var3].head())
    ax_mpl = plt.hist(df[selected_var3])
    plt.xlabel(selected_var3)
    col3.pyplot(fig_mpl)


if st.button("Save data and overwrite:"):
    df.to_csv("data.csv", index=False)
    st.write("Saved!")


import pandas as pd
import streamlit as st
from st_aggrid import AgGrid
st.title("Streamlit Churn Example")
df = pd.read_csv("Churn.csv")
AgGrid(df)

Table of Contents

Introduction To Streamlit¶

Installing Streamlit¶

Make an app from scratch¶

How to use user input in Streamlit apps¶

Finishing touches – adding text to Streamlit¶

Uploading, Downloading, and Manipulating Data¶

Exploring Data¶

Flow control in Streamlit¶

An introduction to caching¶

Persistence with Session State¶

Data Visualization¶

Greenhouse Gas (GHG) Emissions¶

Streamlit’s built-in Graphing¶

Streamlit’s Visualization Options¶

Plotly¶

Matplotlib and Seaborn¶

Bokeh¶

Altair¶

Streamlit with ML Model¶

Model Training Outside Streamlit Apps¶

Model Training Inside Streamlit Apps¶

Deploying Streamlit¶

Deploying with Streamlit Community Cloud¶

Deploying from GitHub¶

Streamlit Secrets¶

Beautify Streamlit Apps¶

Columns in Streamlit¶

Tabs in Streamlit¶

Editable DataFrames¶

Streamlit Components¶

Deploying Streamlit Apps with Hugging Face¶

Choose Deployment Approach¶

How to Deploy¶

Summary¶