I have the following dataframe
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib 
print('matplotlib: {}'.format(matplotlib.__version__))
# 3.5.3
df=pd.DataFrame({'Type': [ 'Sentence', 'Array', 'String', '-','-', 'Sentence', 'Array', 'String', '-','-', 'Sentence'],
                 'Length': [42,21,11,6,6,42,21,11,6,6,42],
                 'label': [1,1,0,0,0,1,1,0,0,0,1],
                 })
print(df)
#       Type     Length  label
#0   Sentence      42      1
#1      Array      21      1
#2     String      11      0
#3          -       6      0
#4          -       6      0
#5   Sentence      42      1
#6      Array      21      1
#7     String      11      0
#8          -       6      0
#9          -       6      0
#10  Sentence      42      1
I want to plot stacked bar chart for the arbitrary column within dataframe (either numerical e.g. Length column or categorical e.g. Type column) and stack with respect to label column using annotations of both count/percentage, where small values of rare observations are also displayed. The following script gives me the wrong results:
ax = df.plot.bar(stacked=True)
#ax = df[["Type","label"]].plot.bar(stacked=True)
#ax = df.groupby('Type').size().plot(kind='bar', stacked=True)
ax.legend(["0: normanl", "1: Anomaly"])
for p in ax.patches:
    width, height = p.get_width(), p.get_height()
    x, y = p.get_xy() 
    ax.text(x+width/2, 
            y+height/2, 
            '{:.0f} %'.format(height), 
            horizontalalignment='center', 
            verticalalignment='center')
I can imagine that somehow I need to calculate the counts of the selected column with respect to label column:
## counts will be used for the labels
counts = df.apply(lambda x: x.value_counts())
## percents will be used to determine the height of each bar
percents = counts.div(counts.sum(axis=1), axis=0)
I tried to solve the problem by using df.groupby(['selcted column', 'label'] unsuccessfully. I collected all possible solutions in this Google Colab Notebook nevertheless I couldn't find a straightforward way to adapt into dataframe.
So far I have tried following solution inspired by this post to solve the problem by using df.groupby(['selcted column', 'label'] unsuccessfully and I got TypeError: unsupported operand type(s) for +: 'int' and 'str' for total = sum(dff.sum()) can't figure out what is the problem? in indexing or df transformation.
BTW I collected all possible solutions in this Google Colab Notebook nevertheless I couldn't find a straightforward way to adapt into dataframe via Mathplotlib. So I'm looking for an elegant way of using Seaborn or plotly.
df = df.groupby(["Type","label"]).count()
#dfp_Type   = df.pivot_table(index='Type', columns='label', values= 'Length', aggfunc='mean') 
dfp_Type   = df.pivot_table(index='Type', columns='label', values= df.Type.size(), aggfunc='mean') 
#dfp_Length = df.pivot_table(index='Length', columns='label', values= df.Length.size(), aggfunc='mean') 
ax = dfp_Type.plot(kind='bar', stacked=True, rot=0) 
# iterate through each bar container
for c in ax.containers: labels = [v.get_height() if v.get_height() > 0 else '' for v in c]
# add the annotations
ax.bar_label(c, fmt='%0.0f%%', label_type='center')
# move the legend
ax.legend(title='Class', bbox_to_anchor=(1, 1.02), loc='upper left')
plt.show()
output:

Expected output:

 
     
    



