import sys
import os
parent_dir = os.path.abspath("../../libraries/surrogate_modelling")
sys.path.insert(0, parent_dir)

from distributions import *
from simparameter import SimParameter
from simparameter_set import SimParamSet
from surrogate_model import SurrogateModel
import pandas as pd
import matplotlib.pyplot as plt
from digital_twin import DigitalTwin
import utils

import warnings
warnings.filterwarnings("ignore", category=RuntimeWarning)
warnings.filterwarnings("ignore", category=FutureWarning)

param_names = ["e1", "e2", "e3", "g1", "g2", 'q']

P1 = SimParameter("e1", UniformDistribution(6, 12))
P2 = SimParameter("e2", UniformDistribution(10, 13))
P3 = SimParameter("e3", UniformDistribution(6, 12))
P4 = SimParameter("g1", UniformDistribution(400, 750))
P5 = SimParameter("g2", UniformDistribution(200, 500))
P6 = SimParameter('q', UniformDistribution(5, 100))

# Create a set to store simulation parameters.
Q = SimParamSet()  

# Add the defined parameters (mass and stiffness) to the parameter set.
Q.add(P1)
Q.add(P2)
Q.add(P3)
Q.add(P4)
Q.add(P5)
Q.add(P6)

Q_df = pd.read_csv('../../demo/data/YB_data/x_df.csv')

QoI_df = pd.read_csv('../../demo/data/YB_data/y_df.csv')
QoI_names = QoI_df.columns.to_list()

method = "gPCE"

# Model configurations
match method:
    # DNN model configurations
    case "DNN":
        config = {  
            'init_config' : {
                'layers': [
                    {'neurons': 512, 'activation': 'relu', 'dropout': 0.2},
                    {'neurons': 256, 'activation': 'sigmoid', 'dropout': 0.2},
                    {'neurons': 256, 'activation': 'sigmoid', 'dropout': 0.2},
                    {'neurons': 128, 'activation': 'relu', 'dropout': None},
                    ],
                'outputAF': 'tanh'
                },
            'train_config' : {
                'optimizer': 'Adam',
                'loss': 'MSE',
                'epochs': 100,
                'batch_size': 32,
                'k_fold': None,
                'early_stopping': {
                    'patience': 25,
                    'min_delta': 0.0001}
                }
                }
    # gPCE model configurations
    case "gPCE":
        config = {
            'init_config' : {
            'p' : 4
            }, 
            'train_config' : {
                'k_fold': 9
                }
        }
    # GBT model configurations
    case "GBT":
        config = {
            'init_config' : {
                'gbt_method': 'xgboost'
            },
            'train_config' : {
                'max_depth': 6,
                'num_of_iter': 250,
                'k_fold': 9
                }
        }

split_config = {
        'train_test_ratio': 0.8, 
        'random_seed': 1997,
        'split_type': 'no_shuffle'
        }

# Initialize surrogate model
# This creates an instance of the SurrogateModel class using the provided sampling data (Q), 
model = SurrogateModel(Q, QoI_names, method, **config['init_config'])
# Split data into training and testing sets
model.train_test_split(Q_df, QoI_df, **split_config)
# Train the model
model.train(model.X_train, model.y_train, **config['train_config'])

----- Training started for 'gPCE' model -----
Fold 1/9
Fold 2/9
Fold 3/9
Fold 4/9
Fold 5/9
Fold 6/9
Fold 7/9
Fold 8/9
Fold 9/9
Average train loss: 0.00000005712744, Average valid loss: 0.00000006250216
----- Training ended for 'gPCE' model -----

# get mean and variance of surrogate model
mean, var = model.get_mean_and_var()
mean, var

(tensor([ 2.7743,  2.8505,  2.9367,  3.7722,  0.4081,  0.2468,  0.3018,  0.3478,
          0.2118,  0.2759,  0.1688,  0.2010,  0.1236,  0.1246,  0.0767,  0.0526,
          0.0324, -0.3133,  0.0303, -0.1712, -0.2671,  0.0265, -0.2112,  0.0208,
         -0.1516,  0.0167, -0.0911,  0.0115, -0.0339,  0.0055,  0.2243,  0.2363,
          0.2331,  0.1932,  0.2008,  0.1547,  0.1584,  0.1139,  0.1140,  0.0715,
          0.0690,  0.0314,  0.0280,  0.3136,  0.3077,  0.3117,  0.2718,  0.2689,
          0.2185,  0.2162,  0.1605,  0.1627,  0.0989,  0.1046,  0.0395,  0.0466,
          0.2118,  0.3243,  0.2859,  0.1817,  0.2752,  0.1448,  0.2164,  0.1064,
          0.1565,  0.0670,  0.0955,  0.0294,  0.0393, -0.1446, -0.3790, -0.2396,
         -0.1278, -0.3313, -0.1053, -0.2664, -0.0796, -0.2003, -0.0506, -0.1286,
         -0.0201, -0.0572, -0.1038, -0.0831, -0.0668, -0.0943, -0.0683, -0.0776,
         -0.0522, -0.0585, -0.0362, -0.0377, -0.0202, -0.0183, -0.0057, -0.5091,
         -0.1766, -0.3811, -0.4516, -0.1582, -0.3720, -0.1317, -0.2782, -0.1008,
         -0.1738, -0.0657, -0.0682, -0.0298], dtype=torch.float64),
 tensor([3.2473e-02, 3.7043e-02, 3.9851e-02, 4.3552e-02, 9.3087e-04, 2.1419e-03,
         1.7075e-03, 7.6325e-04, 1.5919e-03, 5.4404e-04, 1.0245e-03, 3.3861e-04,
         5.6002e-04, 1.5773e-04, 2.2288e-04, 3.9395e-05, 4.2350e-05, 8.9520e-03,
         4.5643e-03, 6.8811e-03, 6.5400e-03, 3.4919e-03, 4.0899e-03, 2.2733e-03,
         2.1184e-03, 1.2820e-03, 7.6736e-04, 5.2940e-04, 1.0734e-04, 1.0608e-04,
         7.6296e-03, 8.1514e-03, 8.0058e-03, 5.4964e-03, 5.8307e-03, 3.4205e-03,
         3.5764e-03, 1.7885e-03, 1.8292e-03, 6.7552e-04, 6.5817e-04, 1.2105e-04,
         1.0551e-04, 3.0554e-03, 3.1084e-03, 3.0582e-03, 2.4081e-03, 2.4837e-03,
         1.6466e-03, 1.6868e-03, 9.5205e-04, 9.9477e-04, 3.9916e-04, 4.3052e-04,
         6.9199e-05, 9.0503e-05, 2.1812e-03, 2.9197e-03, 2.7060e-03, 1.6645e-03,
         2.2218e-03, 1.1013e-03, 1.4667e-03, 6.2305e-04, 8.3035e-04, 2.5909e-04,
         3.4550e-04, 5.4786e-05, 6.9389e-05, 4.7126e-03, 3.2287e-03, 4.0428e-03,
         3.4347e-03, 2.3601e-03, 2.1595e-03, 1.4560e-03, 1.1243e-03, 7.7828e-04,
         4.0912e-04, 2.9888e-04, 6.0352e-05, 5.3782e-05, 4.2110e-05, 6.1928e-05,
         1.2479e-05, 3.6459e-05, 5.1016e-05, 2.7151e-05, 3.5769e-05, 1.6897e-05,
         2.1179e-05, 7.3721e-06, 8.9293e-06, 9.8074e-07, 1.7253e-06, 1.6884e-05,
         1.5419e-04, 1.7589e-05, 1.4200e-05, 9.5525e-05, 1.3663e-05, 4.9475e-05,
         1.3617e-05, 2.1475e-05, 1.1192e-05, 6.7341e-06, 4.4793e-06, 1.0324e-06],
        dtype=torch.float64))

model.model.mean()

tensor([ 2.7743,  2.8505,  2.9367,  3.7722,  0.4081,  0.2468,  0.3018,  0.3478,
         0.2118,  0.2759,  0.1688,  0.2010,  0.1236,  0.1246,  0.0767,  0.0526,
         0.0324, -0.3133,  0.0303, -0.1712, -0.2671,  0.0265, -0.2112,  0.0208,
        -0.1516,  0.0167, -0.0911,  0.0115, -0.0339,  0.0055,  0.2243,  0.2363,
         0.2331,  0.1932,  0.2008,  0.1547,  0.1584,  0.1139,  0.1140,  0.0715,
         0.0690,  0.0314,  0.0280,  0.3136,  0.3077,  0.3117,  0.2718,  0.2689,
         0.2185,  0.2162,  0.1605,  0.1627,  0.0989,  0.1046,  0.0395,  0.0466,
         0.2118,  0.3243,  0.2859,  0.1817,  0.2752,  0.1448,  0.2164,  0.1064,
         0.1565,  0.0670,  0.0955,  0.0294,  0.0393, -0.1446, -0.3790, -0.2396,
        -0.1278, -0.3313, -0.1053, -0.2664, -0.0796, -0.2003, -0.0506, -0.1286,
        -0.0201, -0.0572, -0.1038, -0.0831, -0.0668, -0.0943, -0.0683, -0.0776,
        -0.0522, -0.0585, -0.0362, -0.0377, -0.0202, -0.0183, -0.0057, -0.5091,
        -0.1766, -0.3811, -0.4516, -0.1582, -0.3720, -0.1317, -0.2782, -0.1008,
        -0.1738, -0.0657, -0.0682, -0.0298], dtype=torch.float64)

# set max_index for Sobol sensitivity analysis
max_index = 2

partial_variance, sobol_index = model.get_sobol_sensitivity(max_index)

# Plot Sobol Sensitivity Index
# The 'plot_sobol_sensitivity' method generates a plot of Sobol Sensitivity indices, which quantify 
# the contribution of each input parameter to the output uncertainty. 
# - max_index: Specifies the maximum index of parameters to consider in the plot.
fig = model.plot_sobol_sensitivity(max_index=max_index)

# 1 point from the data
q_sample_df = model.X_test.iloc[100:101]

# Calculate SHAP (Shapley Additive Explanations) values
# The 'get_shap_values' method computes the SHAP values for the model's test data (model.X_test). 
# SHAP values explain the contribution of each input feature to the prediction for each test sample.
# - model.X_test.iloc[:100]: The first 100 samples from the test set are selected for SHAP value computation.
# SHAP values help to interpret the model's decisions and understand the impact of each input parameter on the output.
shap_values = await model.get_shap_values(model.X_test.iloc[:100])

Message: sample size for shap values is set to 100.

  0%|          | 0/100 [00:00<?, ?it/s]

# Plot SHAP Single Waterfall Plot
# The 'plot_shap_single_waterfall' method generates a SHAP waterfall plot for a single test sample, 
# visualizing how each feature contributes to the model's prediction for that sample.
# - q_sample_df: A DataFrame containing the sample data (e.g., a specific parameter set) for which the SHAP values are calculated.
# - param_name: The name of the quantity of interest (QoI) being analyzed ('f.4' in this case).
# The SHAP waterfall plot shows the cumulative effect of each feature on the model's prediction, helping to interpret individual predictions.
fig = await model.plot_shap_single_waterfall(q=q_sample_df, param_name='f.4')

  0%|          | 0/1 [00:00<?, ?it/s]

# Generate SHAP (Shapley values) waterfall plots for multiple features
# 'q_sample_df' contains the uncertain parameters sample, which will be passed as input to the SHAP model.
# The 'plot_shap_multiple_waterfalls' function will generate individual waterfall plots

fig = await model.plot_shap_multiple_waterfalls(q=q_sample_df, param_name='freq')

  0%|          | 0/1 [00:00<?, ?it/s]

fig = await model.plot_shap_multiple_waterfalls(q=q_sample_df, param_name='S.1')

  0%|          | 0/1 [00:00<?, ?it/s]

# Plot SHAP Beeswarm Plot
# The 'plot_shap_beeswarm' method generates a SHAP beeswarm plot, which visualizes the distribution of SHAP values
# for a range of test samples, showing the impact of each feature on the model’s predictions.
# - q: The test data (model.X_test.iloc[:100]) is selected for which SHAP values are calculated and visualized.
# - param_name: The name of the quantity of interest (QoI) for which SHAP values are computed ('f.4' in this case).
# The SHAP beeswarm plot shows how different input features influence the predictions across multiple samples,
# helping to understand the global feature importance and the relationships between features and the target.
fig = await model.plot_shap_beeswarm(q=model.X_test.iloc[:100], param_name='f.4')

  0%|          | 0/100 [00:00<?, ?it/s]

# Make a prediction using the surrogate model
# - q_sample_df: A DataFrame containing the input parameter values for which the model will generate a prediction.
model.predict(q_sample_df)

array([[ 2.81226645,  2.87816751,  2.955402  ,  3.80484901,  0.43711232,
         0.29100068,  0.34135724,  0.37310518,  0.24920781,  0.29674105,
         0.19839353,  0.21698349,  0.14509418,  0.13520874,  0.08995678,
         0.05764575,  0.03804062, -0.24890221,  0.06929687, -0.11736139,
        -0.21170799,  0.0602394 , -0.16731891,  0.04793349, -0.12009466,
         0.03676505, -0.07225739,  0.0242405 , -0.02706651,  0.01116259,
         0.16636797,  0.17602263,  0.1735658 ,  0.14414382,  0.14987356,
         0.11620243,  0.11860772,  0.08623765,  0.08567523,  0.05466003,
         0.0522007 ,  0.02436639,  0.02139055,  0.36278906,  0.3568296 ,
         0.36063495,  0.31462165,  0.31220404,  0.25340066,  0.25172673,
         0.18666124,  0.18959405,  0.11573366,  0.12203901,  0.04640015,
         0.05451802,  0.24861801,  0.36805008,  0.32781299,  0.21344075,
         0.31242606,  0.17018085,  0.24604221,  0.12527097,  0.17831911,
         0.07899243,  0.10927322,  0.03482369,  0.04530489, -0.10027328,
        -0.34564293, -0.19992995, -0.09030418, -0.30264769, -0.07589632,
        -0.24419855, -0.05880264, -0.18408011, -0.03844267, -0.11859847,
        -0.01560799, -0.05299593, -0.10443864, -0.09068785, -0.07012373,
        -0.09471332, -0.07488756, -0.0777761 , -0.05759732, -0.05860807,
        -0.04029862, -0.0378872 , -0.02301609, -0.01878813, -0.00701539,
        -0.51136677, -0.16833269, -0.37839354, -0.45309181, -0.15155505,
        -0.37317291, -0.12681651, -0.27936321, -0.0976401 , -0.17501807,
        -0.06395373, -0.06936329, -0.02916887]])

qoi_m_df = pd.read_csv('../../demo/data/YB_data/z_m_df.csv')

# For Update
sigma = pd.read_csv('../../demo/data/YB_data/sigma.csv')

# Generate the simulation parameter set (random noise) based on the updated measurement error (sigma)
E = utils.generate_stdrn_simparamset(sigma.values.flatten())

nwalkers = 64  # Number of walkers (parallel chains) in the MCMC process
nburn = 1000  # Number of burn-in steps: these are discarded, as they help the MCMC chain reach equilibrium
niter = 100  # Total number of iterations for the MCMC process

# Initialize the Digital Twin model with the surrogate model and simulation parameter set (E)
DT = DigitalTwin(model, E)  # Create a DigitalTwin instance that integrates the surrogate model and the random noise (E)

# Update the Digital Twin model with the synthetic measurement data and run MCMC sampling
DT.update(qoi_m_df, nwalkers=nwalkers, nburn=nburn, niter=niter)  
# The 'update' method updates the Digital Twin using the synthetic measurement data (qoi_m_df) 
# and performs MCMC sampling with the specified parameters (number of walkers, burn-in steps, and iterations)

MCMC creating
Burning period

100%|██████████| 1000/1000 [04:24<00:00,  3.78it/s]

MCMC running

100%|██████████| 100/100 [00:24<00:00,  4.01it/s]

--- 289.75980615615845 seconds ---

# Get the posterior mean and variance from the Digital Twin model
mean_of_post, variance_of_post = DT.get_mean_and_var_of_posterior()  
# This returns the mean and variance of the posterior distribution after updating the model with data.

# Get the Maximum A Posteriori (MAP) estimate of the parameters
map = DT.get_MAP()  
# This retrieves the most likely parameter values based on the updated model.

# Plot the MCMC sampling results along with the mean and MAP values
fig = utils.plot_MCMC(model, DT, nwalkers=nwalkers, map_point=map)

Tall Timber Building with Bayesian Digital Twinning¶

System Overview¶

Probabilistic Framework¶

Key Innovations¶

Visualizations & Results¶

Conclusion¶

Imports¶

Generate data¶

Create SimParameters and SimParameterSet¶

Sampling¶

Model¶

Choosing the Model¶

Define model¶

Calculate Sobol Sensitivities¶

Calculate SHAP values¶

Update¶

Synthetic measurement¶

Set parameters¶