I am trying to plot two variables with different ranges using split violinplots in seaborn.
Here's what I have done so far:
from matplotlib import pyplot as plt
import seaborn as sns
import numpy as np
df1 = pd.read_csv('dummy_metric1.csv')
df2 = pd.read_csv('dummy_metric2.csv')
fig, ax2 = plt.subplots()
sns.set_style('white')
palette1 = 'Set2'
palette2 = 'Set1'
colors_list = ['#78C850', '#F08030', '#6890F0', '#A8B820', '#F8D030', '#E0C068', '#C03028', '#F85888', '#98D8D8']
ax1 = sns.violinplot(y=df1.Value,x=df1.modality,hue=df1.metric, palette=palette1, inner="stick")
xlim = ax1.get_xlim()
ylim = ax1.get_ylim()
for violin in ax1.collections:
bbox = violin.get_paths()[0].get_extents()
x0, y0, width, height = bbox.bounds
violin.set_clip_path(plt.Rectangle((x0, y0), width / 2, height, transform=ax1.transData))
ax1.set_xlim(xlim)
ax1.set_ylim(ylim)
ax1.set_title("dummy")
ax1.set_ylabel("metric1")
ax1.set_xlabel("Modality")
ax1.set_xticklabels(ax1.get_xticklabels(), rotation=45, ha='right')
ax1.legend_.remove()
ax2 = ax1.twinx()
ax2 = sns.violinplot(y=df2.Value,x=df2.modality,hue=df2.metric, palette=palette2, inner=None)
xlim = ax2.get_xlim()
ylim = ax2.get_ylim()
for violin in ax2.collections:
bbox = violin.get_paths()[0].get_extents()
x0, y0, width, height = bbox.bounds
violin.set_clip_path(plt.Rectangle((x0, y0), width / 2, height, transform=ax2.transData))
ax2.set_xlim(xlim)
ax2.set_ylim(ylim)
ax2.set_ylabel("Metric2")
ax2.set_xticklabels(ax2.get_xticklabels(), rotation=45, ha='right')
ax2.legend_.remove()
fig.tight_layout()
plt.show()
However, I am unable to use the right portion of the ax2 violins. Here's is the output.
When I do violin.set_clip_path(plt.Rectangle((width/2, y0), width / 2, height, transform=ax2.transData)) I get this result:
Can someone explain what am I missing? Also, how can I manage having inner="stick"?
TIA
CodePudding user response:
Here is an approach using split=True and dummy data to force a split with empty halves. For the left half, metric is set to 1 for the real data, and to 2 for the dummy data. And vice versa for the right half. We need to make sure all dataframes use the same categorical order for the modality column, to avoid a mix-up.
from matplotlib import pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
sns.set_style('white')
df1 = pd.DataFrame({'modality': pd.Categorical.from_codes(np.random.randint(0, 3, 30), ['a', 'b', 'c']),
'Value': np.random.rand(30) * 25 50})
df1['metric'] = 1
df1_dummy = pd.DataFrame({'modality': pd.Categorical.from_codes([0], ['a', 'b', 'c']), 'Value': [np.nan]})
df1_dummy['metric'] = 2
df2 = pd.DataFrame({'modality': pd.Categorical.from_codes(np.random.randint(0, 3, 100), ['a', 'b', 'c']),
'Value': np.random.randn(100).cumsum() / 10 1})
df2['metric'] = 2
df2_dummy = pd.DataFrame({'modality': pd.Categorical.from_codes([0], ['a', 'b', 'c']), 'Value': [np.nan]})
df2_dummy['metric'] = 1
ax1 = sns.violinplot(y='Value', x='modality', hue='metric', palette=['turquoise', 'red'],
inner="stick", split=True, data=pd.concat([df1, df1_dummy]))
ax1.legend_.remove()
ax1.set_ylabel('metric 1')
ax2 = ax1.twinx()
sns.violinplot(y='Value', x='modality', hue='metric', palette=['turquoise', 'red'],
inner="stick", split=True, data=pd.concat([df2, df2_dummy]), ax=ax2)
ax2.set_ylabel('metric 2')
plt.tight_layout()
plt.show()
PS: Here is a possible adaption of the original code:
- using
plt.Rectangle((x0 width/2, y0), width/2, height)to clip the violins on ax2 - using the
ax=parameter ofsns.violinplot()to indicate the correct subplot - not changing the xlim and ylim of neither ax
- making sure both dataframes use the same categorical order for
modality - to clip the "inner" lines, for
ax1: loop through the lines, get theirx0andx1, and shorten the line tox0and(x0 x1)/2 - similar for
ax2: loop through the lines, get theirx0andx1, and shorten the line to(x0 x1)/2andx1 - update the legend for
ax2combining it with the legend ofax1, and then removeax1's legend
from matplotlib import pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
df1 = pd.DataFrame({'modality': pd.Categorical.from_codes(np.random.randint(0, 3, 30), ['a', 'b', 'c']),
'Value': np.random.rand(30) * 25 50})
df1['metric'] = 1
df2 = pd.DataFrame({'modality': pd.Categorical.from_codes(np.random.randint(0, 3, 100), ['a', 'b', 'c']),
'Value': np.random.randn(100).cumsum() / 10 1})
df2['metric'] = 2
fig, ax1 = plt.subplots()
sns.set_style('white')
palette1 = 'Set2'
palette2 = 'Set1'
sns.violinplot(y=df1.Value, x=df1.modality, hue=df1.metric, palette=palette1, inner="stick", ax=ax1)
for violin in ax1.collections:
bbox = violin.get_paths()[0].get_extents()
x0, y0, width, height = bbox.bounds
violin.set_clip_path(plt.Rectangle((x0, y0), width / 2, height, transform=ax1.transData))
for line in ax1.lines:
x = line.get_xdata()
line.set_xdata([x[0], np.mean(x)])
ax1.set_ylabel("metric1")
ax1.set_xlabel("Modality")
ax2 = ax1.twinx()
sns.violinplot(y=df2.Value, x=df2.modality, hue=df2.metric, palette=palette2, inner="stick", ax=ax2)
ylim = ax2.get_ylim()
for violin in ax2.collections:
bbox = violin.get_paths()[0].get_extents()
x0, y0, width, height = bbox.bounds
violin.set_clip_path(plt.Rectangle((x0 width / 2, y0), width / 2, height, transform=ax2.transData))
for line in ax2.lines:
x = line.get_xdata()
line.set_xdata([np.mean(x), x[1]])
ax2.set_ylabel("Metric2")
ax2.set_xticklabels(ax2.get_xticklabels(), rotation=45, ha='right')
ax2.legend(handles=ax1.legend_.legendHandles ax2.legend_.legendHandles, title='Metric')
ax1.legend_.remove()
fig.tight_layout()
plt.show()



