Data visualization: matplotlib & seaborn¶
Basic plots¶
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
# inline plot (for jupyter)
%matplotlib inline
plt.figure(figsize=(9, 3))
x = np.linspace(0, 10, 50)
sinus = np.sin(x)
plt.plot(x, sinus)
plt.show()
data:image/s3,"s3://crabby-images/90c69/90c69b34c7bd78ff5e35c27abf03747d362338b3" alt="../_images/scipy_matplotlib_1_0.png"
plt.figure(figsize=(9, 3))
plt.plot(x, sinus, "o")
plt.show()
# use plt.plot to get color / marker abbreviations
data:image/s3,"s3://crabby-images/77c70/77c7096024833dc4d21e692e38827f5ea81c2118" alt="../_images/scipy_matplotlib_2_0.png"
# Rapid multiplot
plt.figure(figsize=(9, 3))
cosinus = np.cos(x)
plt.plot(x, sinus, "-b", x, sinus, "ob", x, cosinus, "-r", x, cosinus, "or")
plt.xlabel('this is x!')
plt.ylabel('this is y!')
plt.title('My First Plot')
plt.show()
data:image/s3,"s3://crabby-images/4cf77/4cf7769c2e7893df752c98bc4efa2688239da9c7" alt="../_images/scipy_matplotlib_3_0.png"
# Step by step
plt.figure(figsize=(9, 3))
plt.plot(x, sinus, label='sinus', color='blue', linestyle='--', linewidth=2)
plt.plot(x, cosinus, label='cosinus', color='red', linestyle='-', linewidth=2)
plt.legend()
plt.show()
data:image/s3,"s3://crabby-images/55e3d/55e3d6836680c0402042f114e63b8cf485c6a960" alt="../_images/scipy_matplotlib_4_0.png"
Scatter (2D) plots¶
Load dataset
import pandas as pd
try:
salary = pd.read_csv("../datasets/salary_table.csv")
except:
url = 'https://github.com/duchesnay/pystatsml/raw/master/datasets/salary_table.csv'
salary = pd.read_csv(url)
df = salary
print(df.head())
salary experience education management
0 13876 1 Bachelor Y
1 11608 1 Ph.D N
2 18701 1 Ph.D Y
3 11283 1 Master N
4 11767 1 Ph.D N
Simple scatter with colors¶
plt.figure(figsize=(3, 3), dpi=100)
_ = sns.scatterplot(x="experience", y="salary", hue="education", data=salary)
data:image/s3,"s3://crabby-images/12124/12124b4981fff54975bef8ed7c4ba1e5c1532fc2" alt="../_images/scipy_matplotlib_8_0.png"
Legend outside
ax = sns.relplot(x="experience", y="salary", hue="education", data=salary)
data:image/s3,"s3://crabby-images/5efc2/5efc251e5128cba098fa00707c76eaf90308192f" alt="../_images/scipy_matplotlib_10_0.png"
Linear model¶
ax = sns.lmplot(x="experience", y="salary", hue="education", data=salary)
data:image/s3,"s3://crabby-images/58df3/58df3349cca1ebffed3d4ea2bce4cb4290f8599b" alt="../_images/scipy_matplotlib_12_0.png"
Scatter plot with colors and symbols¶
ax = sns.relplot(x="experience", y="salary", hue="education", style='management', data=salary)
data:image/s3,"s3://crabby-images/3e83a/3e83a61f53d4fde8e4484687f97025ee07f17b94" alt="../_images/scipy_matplotlib_14_0.png"
Saving Figures¶
### bitmap format
plt.plot(x, sinus)
plt.savefig("sinus.png")
plt.close()
# Prefer vectorial format (SVG: Scalable Vector Graphics) can be edited with
# Inkscape, Adobe Illustrator, Blender, etc.
plt.plot(x, sinus)
plt.savefig("sinus.svg")
plt.close()
# Or pdf
plt.plot(x, sinus)
plt.savefig("sinus.pdf")
plt.close()
Boxplot and violin plot: one factor¶
Box plots are non-parametric: they display variation in samples of a statistical population without making any assumptions of the underlying statistical distribution.
ax = sns.boxplot(x="management", y="salary", data=salary)
ax = sns.stripplot(x="management", y="salary", data=salary, jitter=True, color="black")
data:image/s3,"s3://crabby-images/f2b40/f2b40f258c29468c8f5276d18b6a548a6a598be0" alt="../_images/scipy_matplotlib_18_0.png"
ax = sns.violinplot(x="management", y="salary", data=salary)
ax = sns.stripplot(x="management", y="salary", data=salary, jitter=True, color="white")
data:image/s3,"s3://crabby-images/1326c/1326cd4f1e30338df7e010f78892a35e001cac9d" alt="../_images/scipy_matplotlib_19_0.png"
Boxplot and violin plot: two factors¶
ax = sns.boxplot(x="management", y="salary", hue="education", data=salary)
ax = sns.stripplot(x="management", y="salary", hue="education", data=salary, jitter=True, dodge=True, linewidth=1)
data:image/s3,"s3://crabby-images/0b9c4/0b9c48c2f205e657e2b597fb85fce5646a564b6a" alt="../_images/scipy_matplotlib_21_0.png"
ax = sns.violinplot(x="management", y="salary", hue="education", data=salary)
ax = sns.stripplot(x="management", y="salary", hue="education", data=salary, jitter=True, dodge=True, linewidth=1)
data:image/s3,"s3://crabby-images/67d15/67d15082beb1b8e71830c4208c927a97d69062d3" alt="../_images/scipy_matplotlib_22_0.png"
Distributions and density plot¶
ax = sns.displot(x="salary", hue="management", kind="kde", data=salary, fill=True)
data:image/s3,"s3://crabby-images/c38cb/c38cb869facecbf2c2292014505c45ff72c908cd" alt="../_images/scipy_matplotlib_24_0.png"
Multiple axis¶
fig, axes = plt.subplots(3, 1, figsize=(9, 9), sharex=True)
i = 0
for edu, d in salary.groupby(['education']):
sns.kdeplot(x="salary", hue="management", data=d, fill=True, ax=axes[i], palette="muted")
axes[i].set_title(edu)
i += 1
data:image/s3,"s3://crabby-images/46ac0/46ac04c3cacd50cee88b11247666bfcf3d74723d" alt="../_images/scipy_matplotlib_26_0.png"
Pairwise scatter plots¶
ax = sns.pairplot(salary, hue="management")
data:image/s3,"s3://crabby-images/13eed/13eedafa5a8e35e2bed1df8a3f634990d72051dc" alt="../_images/scipy_matplotlib_28_0.png"
Time series¶
import seaborn as sns
sns.set(style="darkgrid")
# Load an example dataset with long-form data
fmri = sns.load_dataset("fmri")
# Plot the responses for different events and regions
ax = sns.pointplot(x="timepoint", y="signal",
hue="region", style="event",
data=fmri)
data:image/s3,"s3://crabby-images/e60a4/e60a463058fe9cfb608e38f2ecb78265c7e9c3be" alt="../_images/scipy_matplotlib_30_0.png"