Quickstart

[13]:
from typing import Union, Optional, List, Dict, Tuple, Any
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import statsmodels.api as sm
import statsmodels.formula.api as smf

import myforestplot as mfp

%load_ext watermark
%watermark -n -u -v -iv -w -p graphviz
The watermark extension is already loaded. To reload it, use:
  %reload_ext watermark
Last updated: Thu Sep 22 2022

Python implementation: CPython
Python version       : 3.9.7
IPython version      : 8.0.1

graphviz: not installed

numpy       : 1.21.5
matplotlib  : 3.5.1
pandas      : 1.4.1
statsmodels : 0.13.2
myforestplot: 0.2.2

Watermark: 2.3.1

Create pretty statsmodels result dataframe

titanic data can be downloaded from https://raw.githubusercontent.com/mwaskom/seaborn-data/master/titanic.csv.

[14]:
titanic = pd.read_csv("titanic.csv")
titanic = titanic[["survived", "pclass", "sex", "age", "embark_town"]]
titanic = titanic.dropna()
[15]:
res = smf.logit("survived ~ sex + age + embark_town", data=titanic).fit()
Optimization terminated successfully.
         Current function value: 0.509889
         Iterations 6
[16]:
res.summary()
[16]:
Logit Regression Results
Dep. Variable: survived No. Observations: 712
Model: Logit Df Residuals: 707
Method: MLE Df Model: 4
Date: Thu, 22 Sep 2022 Pseudo R-squ.: 0.2444
Time: 22:33:05 Log-Likelihood: -363.04
converged: True LL-Null: -480.45
Covariance Type: nonrobust LLR p-value: 1.209e-49
coef std err z P>|z| [0.025 0.975]
Intercept 2.2046 0.322 6.851 0.000 1.574 2.835
sex[T.male] -2.4760 0.191 -12.976 0.000 -2.850 -2.102
embark_town[T.Queenstown] -1.8156 0.535 -3.393 0.001 -2.864 -0.767
embark_town[T.Southampton] -1.0069 0.237 -4.251 0.000 -1.471 -0.543
age -0.0081 0.007 -1.233 0.217 -0.021 0.005

After fitting, clean dataframe.

[17]:
order = ["age", "sex", "embark_town"]
cont_cols = ["age"]
categorical = {"embark_town": ['Southampton', 'Cherbourg', 'Queenstown']}
df_sum = mfp.statsmodels_pretty_result_dataframe(titanic, res,
                                                 order=order,
                                                 cont_cols=cont_cols,
                                                 fml=".3f",
                                                 )
[18]:
df_sum
[18]:
category item 0 1 risk pvalues nobs risk_pretty
3 age age 0.979300 1.004771 0.991954 2.174575e-01 NaN 0.99 (0.98, 1.00)
0 sex male 0.057848 0.122213 0.084082 1.668679e-38 453.0 0.08 (0.06, 0.12)
4 sex female NaN NaN NaN NaN 259.0 Ref.
1 embark_town Queenstown 0.057027 0.464428 0.162742 6.902284e-04 28.0 0.16 (0.06, 0.46)
2 embark_town Southampton 0.229654 0.581167 0.365332 2.125424e-05 554.0 0.37 (0.23, 0.58)
5 embark_town Cherbourg NaN NaN NaN NaN 130.0 Ref.

The above type of dataframe structure is required. If you prepare dataframe like above, you do not need to use “mfp.statsmodels_pretty_result_dataframe”.

Create a forest plot

[19]:
df = df_sum.copy()
[20]:
df["nobs"] = (df["nobs"]
              .replace(np.nan, titanic.shape[0])
              .astype(int)
              )
[21]:
plt.rcParams["font.size"] = 8
fp = mfp.SimpleForestPlot(ratio=(8,3), dpi=150, figsize=(7,3), df=df)
fp.errorbar(errorbar_kwds=None)
fp.ax2.set_xlim([0, 1.5])
fp.ax2.set_xticks([0, 0.5, 1, 1.5])
fp.ax2.set_xlabel("OR")
fp.ax2.axvline(x=1, ymin=0, ymax=1.0, color="black", alpha=0.5)

fp.ax1.set_xlim([0.15, 1])
fp.embed_strings("category", 0.1, header="Category",
                 duplicate_hide=True,
                 text_kwds=dict(fontweight="bold"),
                 header_kwds=dict(fontweight="bold")
                 )
fp.embed_strings("item", 0.36, header="", replace={"age":""})
fp.embed_strings("nobs", 0.60, header="N")
fp.embed_strings("risk_pretty", 0.72, header="OR (95% CI)")
fp.horizontal_variable_separators()
plt.show()
../_images/notebooks_1_quickstart_13_0.png

If vertical_align=True, category values can be aligned just above item values.

[22]:
plt.rcParams["font.size"] = 8
fp = mfp.SimpleForestPlot(ratio=(8,3), dpi=150, figsize=(5,3),
                        df=df, vertical_align=True)
fp.errorbar(errorbar_kwds=None)
fp.ax2.set_xlim([0, 1.5])
fp.ax2.set_xticks([0, 0.5, 1, 1.5])
fp.ax2.set_xlabel("OR")
fp.ax2.axvline(x=1, ymin=0, ymax=1.0, color="black", alpha=0.5)

fp.ax1.set_xlim([0.35, 1])
fp.embed_cate_strings("category", 0.30, header="Category",
                 text_kwds=dict(fontweight="bold"),
                 header_kwds=dict(fontweight="bold"),
                 )
fp.embed_strings("item", 0.36, header="", replace={"age":""})
fp.embed_strings("nobs", 0.60, header="N")
fp.embed_strings("risk_pretty", 0.72, header="OR (95% CI)")
fp.horizontal_variable_separators()
plt.show()
../_images/notebooks_1_quickstart_15_0.png

“All participants” row can be manually set.

[23]:
plt.rcParams["font.size"] = 8
fp = mfp.SimpleForestPlot(ratio=(8,3), dpi=150, figsize=(5,3),
                        df=df, vertical_align=True)
fp.errorbar(errorbar_kwds=None)
fp.ax2.set_xlim([0, 1.5])
fp.ax2.set_xticks([0, 0.5, 1, 1.5])
fp.ax2.set_xlabel("OR")
fp.ax2.axvline(x=1, ymin=0, ymax=1.0, color="black", alpha=0.5)

fp.ax1.set_xlim([0.35, 1])
y_header=1.7
fp.embed_cate_strings("category", 0.30, header="Category",
                 text_kwds=dict(fontweight="bold"),
                 header_kwds=dict(fontweight="bold"),
                 y_header=y_header
                 )
fp.embed_strings("item", 0.36, header="All participants", replace={"age":""})
fp.embed_strings("nobs", 0.60, header="N", y_header=y_header)
fp.ax1.text(0.60, 0.8, "712")
fp.embed_strings("risk_pretty", 0.72, header="OR (95% CI)", y_header=y_header)
fp.horizontal_variable_separators()
plt.show()
../_images/notebooks_1_quickstart_17_0.png

Customize your forestplot

SimpleForestPlot is a simplized version of forestplot with one text and figure field. ForestPlot can customize multiple text and figure fields.

[24]:
plt.rcParams["font.size"] = 8
fp = mfp.ForestPlot(df=df,
                    ratio=[5,5,3],
                    fig_ax_index=[2],
                    dpi=150,
                    figsize=(6,3),
                    yticks_show=False,
                    vertical_align=True)
fp.errorbar(index=2, errorbar_kwds=None)
fp.axd[2].set_xlim([0, 1.5])
fp.axd[2].set_xticks([0, 0.5, 1, 1.5])
fp.axd[2].set_xlabel("OR")
fp.axd[2].axvline(x=1, ymin=0, ymax=1.0, color="black", alpha=0.5)

fp.axd[1].set_xlim([0.50, 1])
fp.embed_cate_strings(1, "category", 0.5, header="Category",
                 text_kwds=dict(fontweight="bold"),
                 header_kwds=dict(fontweight="bold"),
                 )
fp.embed_strings(1, "item", 0.55, header="", replace={"age":""})
fp.embed_strings(1, "nobs", 0.86, header="N")
fp.embed_strings(3, "risk_pretty", 0.1, header="OR (95% CI)")
fp.horizontal_variable_separators()
../_images/notebooks_1_quickstart_19_0.png

You can see various patterns of forestplot in Gallery page.

[ ]: