# Notice the new import statements today!
from datascience import *
import numpy as np
import seaborn as sns
# Run this cell.
full_titanic = Table.from_df(sns.load_dataset('titanic').dropna())
titanic = full_titanic.select('sex', 'age', 'fare')
sex | age | fare |
female | 38 | 71.2833 |
female | 35 | 53.1 |
male | 54 | 51.8625 |
female | 4 | 16.7 |
female | 58 | 26.55 |
male | 34 | 13 |
male | 28 | 35.5 |
male | 19 | 263 |
female | 49 | 76.7292 |
male | 65 | 61.9792 |
... (172 rows omitted)
Visualizations are for humans!
titanic.select('age', 'fare').scatter('age', 'fare',
width = 500,
height = 500,
title = 'Fare vs. age for Titanic passengers',
show = False)
Visualize, then quantify!
ans = Table.from_df(sns.load_dataset('anscombe'))
dataset | x | y |
I | 10 | 8.04 |
I | 8 | 6.95 |
I | 13 | 7.58 |
I | 9 | 8.81 |
I | 11 | 8.33 |
I | 14 | 9.96 |
I | 6 | 7.24 |
I | 4 | 4.26 |
I | 12 | 10.84 |
I | 7 | 4.82 |
... (34 rows omitted)
import plotly.graph_objects as go
from plotly.subplots import make_subplots
fig = make_subplots(rows=2, cols=2, start_cell="bottom-left")
for ds, pos in zip(['I', 'II', 'III', 'IV'], [(2, 1), (2, 2), (1, 1), (1, 2)]):
x = ans.where('dataset', ds).column('x'),
y = ans.where('dataset', ds).column('y'),
mode = 'markers',
name = 'Dataset ' + ds),
row=pos[0], col=pos[1])
fig.update_layout(title = "Anscombe's Quartet")
bay_codes = Table().with_columns(
'city', np.array(['Berkeley', 'San Francisco', 'Palo Alto']),
'area code', np.array([510, 415, 650])
city | area code |
Berkeley | 510 |
San Francisco | 415 |
Palo Alto | 650 |
titanic.group('sex', np.mean).select('sex', 'age mean').barh('sex', 'age mean', title = 'Average age of females and males on the Titanic', show = False)