# Setup the libraries
%matplotlib inline
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
sns.set(color_codes=True)
# lets load the data again
weather_df = pd.read_csv("https://raw.githubusercontent.com/vega/vega-datasets/gh-pages/data/weather.csv")
cars_df = pd.read_json("https://github.com/vega/vega-datasets/raw/gh-pages/data/cars.json")
# Scatter plot for Acceleration X Miles per gallon
plt.scatter(cars_df.Acceleration, cars_df.Miles_per_Gallon)
<matplotlib.collections.PathCollection at 0x1190e8358>
# Scatter plot with Seaborn
sns.lmplot(x='Acceleration', y='Miles_per_Gallon', data=cars_df, fit_reg=False)
<seaborn.axisgrid.FacetGrid at 0x1194161d0>
# Seaborn can also fit a regression line to show the direction of the relation
sns.lmplot(x='Acceleration', y='Miles_per_Gallon', data=cars_df)
<seaborn.axisgrid.FacetGrid at 0x118eccb00>
# Plotting 3 variables, using jue
sns.lmplot(x='Acceleration', y='Miles_per_Gallon', hue='Cylinders',data=cars_df, fit_reg=False)
<seaborn.axisgrid.FacetGrid at 0x118c5cb70>
# Possible on matplotlib as well
# Showing legend properly is not easy
plt.scatter(cars_df.Acceleration, cars_df.Miles_per_Gallon, c=cars_df.Cylinders)
plt.legend()
<matplotlib.legend.Legend at 0x11be56780>
# Same thing with matplotlib
# Notice the legend is missing
colors = {
2:'r',
3:'g',
4:'b',
5:'y',
6:'w',
8:'k',
}
# We draw each cylinder plot separatly
for x in sorted(set(cars_df.Cylinders)):
d = cars_df[cars_df.Cylinders == x]
plt.scatter(d.Acceleration, d.Miles_per_Gallon, c=colors.get(x), label=x)
plt.legend(title="Cylinders")
<matplotlib.legend.Legend at 0x11bb848d0>
Use seaborn whenever possible
# See what happens when I replace hue with col
sns.lmplot(x='Acceleration', y='Miles_per_Gallon', col='Cylinders',data=cars_df, fit_reg=False)
<seaborn.axisgrid.FacetGrid at 0x11c8a5d30>
# now row
sns.lmplot(x='Acceleration', y='Miles_per_Gallon', row='Cylinders',data=cars_df, fit_reg=False)
<seaborn.axisgrid.FacetGrid at 0x11ce27d30>
# Let's examine relationship between wind and percipitation
g = sns.lmplot(x='wind', y='precipitation', data=weather_df, fit_reg=False)
# Distribution of percipitation
sns.distplot(weather_df.precipitation)
# The values are bunched up close to zero
<matplotlib.axes._subplots.AxesSubplot at 0x120b8b978>
# Using log scale on the y access will make the plot clearer
g = sns.lmplot(x='wind', y='precipitation', data=weather_df, fit_reg=False)
g.set(yscale="log")
# No clear relationship
<seaborn.axisgrid.FacetGrid at 0x120e752e8>
# Let's see if it is the same in all locations
g = sns.lmplot(x='wind', y='precipitation', hue='location', data=weather_df, fit_reg=False)
g.set(yscale="log")
# Hue not making comparison easy
<seaborn.axisgrid.FacetGrid at 0x120e17f28>
# two plots are better for comparison
g = sns.lmplot(x='wind', y='precipitation', col='location', data=weather_df, fit_reg=False)
g.set(yscale="log")
# Still no clear relation
<seaborn.axisgrid.FacetGrid at 0x1215d0ac8>
# Let's also break it based on weather types
g = sns.lmplot(x='wind', y='precipitation', col='location', row='weather', data=weather_df, fit_reg=False)
g.set(yscale="log")
<seaborn.axisgrid.FacetGrid at 0x11e558860>
sns.pairplot(weather_df)
<seaborn.axisgrid.PairGrid at 0x105f0a2b0>
# Try to plot 2 different plots of each (10 plot in total)
# where you set the y and then the hue to see how the
# plots will behave differently
# Your turn to show off FactorPlot