MAIF · guillaume-vignal · Jul 4, 2024 · Apr 29, 2024 · Apr 30, 2024 · Apr 30, 2024
diff --git a/docs/_static/tutorial/tuto-webapp01-additional_filtered.png b/docs/_static/tutorial/tuto-webapp01-additional_filtered.png
diff --git a/docs/_static/tutorial/tuto-webapp01-additional_identity_card.png b/docs/_static/tutorial/tuto-webapp01-additional_identity_card.png
diff --git a/docs/_static/tutorial/tuto-webapp01-additional_in_dataset.png b/docs/_static/tutorial/tuto-webapp01-additional_in_dataset.png
diff --git a/docs/_static/tutorial/tuto-webapp01-additional_picking.png b/docs/_static/tutorial/tuto-webapp01-additional_picking.png
diff --git a/shapash/explainer/consistency.py b/shapash/explainer/consistency.py
@@ -25,7 +25,7 @@ def tuning_colorscale(self, values):
         Parameters
         ----------
         values: 1 column pd.DataFrame
-            values whose quantiles must be calculated
+            values whose quantiles must be calculated
         """
         desc_df = values.describe(percentiles=np.arange(0.1, 1, 0.1).tolist())
         min_pred, max_init = list(desc_df.loc[["min", "max"]].values)

diff --git a/shapash/explainer/smart_plotter.py b/shapash/explainer/smart_plotter.py
diff --git a/shapash/report/project_report.py b/shapash/report/project_report.py
@@ -56,7 +56,7 @@ class ProjectReport:
         Information about the project (author, description, ...).
     x_train : pd.DataFrame
         DataFrame used for training the model.
-    y_test : pd.Series or pd.DataFrame
+    y_train : pd.Series or pd.DataFrame
         Series of labels in the train set.
     y_test : pd.Series or pd.DataFrame
         Series of labels in the test set.
@@ -393,6 +393,10 @@ def display_model_explainability(self):
             for feature_label in sorted(list_cols_labels):
                 feature = self.explainer.inv_features_dict.get(feature_label, feature_label)
                 fig = self.explainer.plot.contribution_plot(feature, label=label, max_points=200)
+                # Apparently matkers are not supported during conversion into html
+                for el in fig.data:
+                    if el.type == "bar":
+                        el.marker.color = "lightgrey"
                 explain_contrib_data.append(
                     {
                         "feature_index": int(inv_columns_dict[feature]),

diff --git a/shapash/style/colors.json b/shapash/style/colors.json
@@ -27,6 +27,7 @@
       "rgb(0, 98, 128)",
       "rgb(0, 70, 92)"
     ],
+    "contrib_distribution": "rgb(211, 211, 211)",
     "featureimp_bar": {
       "1": "rgba(0, 154, 203, 1)",
       "2": "rgba(223, 103, 0, 0.8)"
@@ -126,6 +127,7 @@
       "rgb(255, 123, 38)",
       "rgb(255, 77, 7)"
     ],
+    "contrib_distribution": "rgb(211, 211, 211)",
     "featureimp_bar": {
       "1": "rgba(244, 192, 0, 1.0)",
       "2": "rgba(52, 55, 54, 0.7)"

diff --git a/shapash/style/style_utils.py b/shapash/style/style_utils.py
@@ -102,6 +102,7 @@ def define_style(palette):
     }
     style_dict["featureimp_groups"] = list(palette["featureimp_groups"].values())
     style_dict["init_contrib_colorscale"] = palette["contrib_colorscale"]
+    style_dict["contrib_distribution"] = palette["contrib_distribution"]
     style_dict["violin_area_classif"] = list(palette["violin_area_classif"].values())
     style_dict["prediction_plot"] = list(palette["prediction_plot"].values())
     style_dict["violin_default"] = palette["violin_default"]

diff --git a/shapash/utils/model.py b/shapash/utils/model.py
@@ -102,7 +102,7 @@ def predict_error(y_target, y_pred, case):
     """
     prediction_error = None
     if y_target is not None and y_pred is not None and case == "regression":
-        if (y_target == 0).any()[0]:
+        if (y_target == 0).any().iloc[0]:
             prediction_error = abs(y_target.values - y_pred.values)
         else:
             prediction_error = abs((y_target.values - y_pred.values) / y_target.values)

diff --git a/shapash/webapp/smart_app.py b/shapash/webapp/smart_app.py
@@ -4,7 +4,7 @@
 import copy
 import random
 import re
-from math import log10
+from math import isfinite, log10
 
 import dash
 import dash_bootstrap_components as dbc
@@ -193,7 +193,7 @@ def init_data(self, rows=None):
             typ = self.dataframe[col].dtype
             if typ == float:
                 std = self.dataframe[col].std()
-                if std != 0:
+                if isfinite(std) and std != 0:
                     digit = max(round(log10(1 / std) + 1) + 2, 0)
                     self.round_dataframe[col] = self.dataframe[col].map(f"{{:.{digit}f}}".format).astype(float)
 
@@ -1778,7 +1778,7 @@ def update_feature_selector(feature, data, label, click_zoom, points, violin, gf
             if feature is not None and feature["points"][0]["curveNumber"] == 0 and len(gfi_figure["data"]) == 2:
                 subset = get_indexes_from_datatable(data, list_index)
             else:
-                subset = None
+                subset = self.list_index
 
             fs_figure = self.explainer.plot.contribution_plot(
                 col=selected_feature,
@@ -1834,13 +1834,16 @@ def update_index_id(
             """
             ctx = dash.callback_context
             selected = None
-            if ctx.triggered[0]["prop_id"] == "feature_selector.clickData":
-                selected = click_data["points"][0]["customdata"][1]
-            elif ctx.triggered[0]["prop_id"] == "prediction_picking.clickData":
-                selected = prediction_picking["points"][0]["customdata"]
-            elif ctx.triggered[0]["prop_id"] == "dataset.active_cell":
-                selected = data[cell["row"]]["_index_"]
-            elif ("del_dropdown_button" in ctx.triggered[0]["prop_id"]) & (None in nclicks_del):
+            try:
+                if ctx.triggered[0]["prop_id"] == "feature_selector.clickData":
+                    selected = click_data["points"][0]["customdata"][1]
+                elif ctx.triggered[0]["prop_id"] == "prediction_picking.clickData":
+                    selected = prediction_picking["points"][0]["customdata"]
+                elif ctx.triggered[0]["prop_id"] == "dataset.active_cell":
+                    selected = data[cell["row"]]["_index_"]
+                elif ("del_dropdown_button" in ctx.triggered[0]["prop_id"]) & (None in nclicks_del):
+                    selected = current_index_id
+            except KeyError:
                 selected = current_index_id
             return selected, True
 

diff --git a/shapash/webapp/utils/explanations.py b/shapash/webapp/utils/explanations.py
@@ -41,7 +41,10 @@ def __init__(self):
                                 feature positively impacts the prediction. \n
                                 Positive impact means that the variable favors a higher probability
                                 returned by the model or
-                                increases the predicted value (in case of regression problem).
+                                increases the predicted value (in case of regression problem).\n
+                                In gray, the distribution of feature values is represented, either
+                                by a curve if the values are considered continuous or by bars if
+                                they are considered discrete.
                                 """
         self.prediction_picking = """
                                 **What are the samples with correct or wrong predictions?**

diff --git a/shapash/webapp/utils/utils.py b/shapash/webapp/utils/utils.py
@@ -1,4 +1,5 @@
 import pandas as pd
+from pandas.api.types import is_any_real_numeric_dtype
 
 
 def round_to_k(x, k):
@@ -37,7 +38,7 @@ def get_index_type(data):
     str
         Type numeric or text of the dataset index
     """
-    if data.index.is_numeric():
+    if is_any_real_numeric_dtype(data.index):
         return "number"
     else:
         return "text"