from datascience import *
import numpy as np
Table.interactive_plots()
import plotly.express as px
import seaborn as sns
world = Table.from_df(px.data.gapminder())
world
country | continent | year | lifeExp | pop | gdpPercap | iso_alpha | iso_num |
---|---|---|---|---|---|---|---|
Afghanistan | Asia | 1952 | 28.801 | 8425333 | 779.445 | AFG | 4 |
Afghanistan | Asia | 1957 | 30.332 | 9240934 | 820.853 | AFG | 4 |
Afghanistan | Asia | 1962 | 31.997 | 10267083 | 853.101 | AFG | 4 |
Afghanistan | Asia | 1967 | 34.02 | 11537966 | 836.197 | AFG | 4 |
Afghanistan | Asia | 1972 | 36.088 | 13079460 | 739.981 | AFG | 4 |
Afghanistan | Asia | 1977 | 38.438 | 14880372 | 786.113 | AFG | 4 |
Afghanistan | Asia | 1982 | 39.854 | 12881816 | 978.011 | AFG | 4 |
Afghanistan | Asia | 1987 | 40.822 | 13867957 | 852.396 | AFG | 4 |
Afghanistan | Asia | 1992 | 41.674 | 16317921 | 649.341 | AFG | 4 |
Afghanistan | Asia | 1997 | 41.763 | 22227415 | 635.341 | AFG | 4 |
... (1694 rows omitted)
px.scatter(world.to_df(),
x = 'gdpPercap',
y = 'lifeExp',
hover_name = 'country',
color = 'continent',
size = 'pop',
size_max = 60,
log_x = True,
range_y = [30, 90],
animation_frame = 'year',
title = 'Life Expectancy, GDP Per Capita, and Population over Time'
)
px.histogram(world.to_df(),
x = 'lifeExp',
animation_frame = 'year',
range_x = [20, 90],
range_y = [0, 50],
title = 'Distribution of Life Expectancy over Time')
world_latest = world.where('year', 2007)
world_latest
country | continent | year | lifeExp | pop | gdpPercap | iso_alpha | iso_num |
---|---|---|---|---|---|---|---|
Afghanistan | Asia | 2007 | 43.828 | 31889923 | 974.58 | AFG | 4 |
Albania | Europe | 2007 | 76.423 | 3600523 | 5937.03 | ALB | 8 |
Algeria | Africa | 2007 | 72.301 | 33333216 | 6223.37 | DZA | 12 |
Angola | Africa | 2007 | 42.731 | 12420476 | 4797.23 | AGO | 24 |
Argentina | Americas | 2007 | 75.32 | 40301927 | 12779.4 | ARG | 32 |
Australia | Oceania | 2007 | 81.235 | 20434176 | 34435.4 | AUS | 36 |
Austria | Europe | 2007 | 79.829 | 8199783 | 36126.5 | AUT | 40 |
Bahrain | Asia | 2007 | 75.635 | 708573 | 29796 | BHR | 48 |
Bangladesh | Asia | 2007 | 64.062 | 150448339 | 1391.25 | BGD | 50 |
Belgium | Europe | 2007 | 79.441 | 10392226 | 33692.6 | BEL | 56 |
... (132 rows omitted)
px.box(world_latest.to_df(),
y = 'lifeExp',
x = 'continent',
color = 'continent',
hover_name = 'country',
title = 'Distribution of Life Expectancy in 2007 by Continent'
)
world_latest.where('continent', 'Americas')
country | continent | year | lifeExp | pop | gdpPercap | iso_alpha | iso_num |
---|---|---|---|---|---|---|---|
Argentina | Americas | 2007 | 75.32 | 40301927 | 12779.4 | ARG | 32 |
Bolivia | Americas | 2007 | 65.554 | 9119152 | 3822.14 | BOL | 68 |
Brazil | Americas | 2007 | 72.39 | 190010647 | 9065.8 | BRA | 76 |
Canada | Americas | 2007 | 80.653 | 33390141 | 36319.2 | CAN | 124 |
Chile | Americas | 2007 | 78.553 | 16284741 | 13171.6 | CHL | 152 |
Colombia | Americas | 2007 | 72.889 | 44227550 | 7006.58 | COL | 170 |
Costa Rica | Americas | 2007 | 78.782 | 4133884 | 9645.06 | CRI | 188 |
Cuba | Americas | 2007 | 78.273 | 11416987 | 8948.1 | CUB | 192 |
Dominican Republic | Americas | 2007 | 72.235 | 9319622 | 6025.37 | DOM | 214 |
Ecuador | Americas | 2007 | 74.994 | 13755680 | 6873.26 | ECU | 218 |
... (15 rows omitted)
px.pie(world_latest.where('continent', 'Americas').to_df(),
values = 'pop',
names = 'country',
title = 'Population of the Americas'
)
world_for_pie = world_latest \
.group('continent', sum) \
.select('continent', 'pop sum')
world_for_pie
continent | pop sum |
---|---|
Africa | 929539692 |
Americas | 898871184 |
Asia | 3811953827 |
Europe | 586098529 |
Oceania | 24549947 |
px.pie(world_for_pie.to_df(),
values = 'pop sum',
names = 'continent',
title = 'World Population by Continent')
phases = [
['Newborn', '1998-11-26', '1999-11-26', 'Canada'],
['Toddler, Preschooler', '1999-11-26', '2005-09-03', 'US'],
['Elementary School Student', '2005-09-03', '2009-06-30', 'Canada'],
['Middle School Student', '2009-09-15', '2012-06-15', 'Canada'],
['High School Student', '2012-09-05', '2016-05-30', 'Canada'],
['Undergrad @ UC Berkeley', '2016-08-22','2020-05-15', 'US'],
['Masters @ UC Berkeley', '2020-08-25', '2021-05-14', 'Canada'],
['Teaching Data 94', '2021-01-20', '2021-05-14', 'Canada']]
phases_table = Table(labels = ['Phase', 'Start', 'End', 'Location']).with_rows(phases)
phases_table
Phase | Start | End | Location |
---|---|---|---|
Newborn | 1998-11-26 | 1999-11-26 | Canada |
Toddler, Preschooler | 1999-11-26 | 2005-09-03 | US |
Elementary School Student | 2005-09-03 | 2009-06-30 | Canada |
Middle School Student | 2009-09-15 | 2012-06-15 | Canada |
High School Student | 2012-09-05 | 2016-05-30 | Canada |
Undergrad @ UC Berkeley | 2016-08-22 | 2020-05-15 | US |
Masters @ UC Berkeley | 2020-08-25 | 2021-05-14 | Canada |
Teaching Data 94 | 2021-01-20 | 2021-05-14 | Canada |
px.timeline(phases_table.to_df(),
x_start = 'Start',
x_end = 'End',
y = 'Phase',
text = 'Location',
title = 'My Life Trajectory') \
.update_yaxes(autorange='reversed')
world_latest
country | continent | year | lifeExp | pop | gdpPercap | iso_alpha | iso_num |
---|---|---|---|---|---|---|---|
Afghanistan | Asia | 2007 | 43.828 | 31889923 | 974.58 | AFG | 4 |
Albania | Europe | 2007 | 76.423 | 3600523 | 5937.03 | ALB | 8 |
Algeria | Africa | 2007 | 72.301 | 33333216 | 6223.37 | DZA | 12 |
Angola | Africa | 2007 | 42.731 | 12420476 | 4797.23 | AGO | 24 |
Argentina | Americas | 2007 | 75.32 | 40301927 | 12779.4 | ARG | 32 |
Australia | Oceania | 2007 | 81.235 | 20434176 | 34435.4 | AUS | 36 |
Austria | Europe | 2007 | 79.829 | 8199783 | 36126.5 | AUT | 40 |
Bahrain | Asia | 2007 | 75.635 | 708573 | 29796 | BHR | 48 |
Bangladesh | Asia | 2007 | 64.062 | 150448339 | 1391.25 | BGD | 50 |
Belgium | Europe | 2007 | 79.441 | 10392226 | 33692.6 | BEL | 56 |
... (132 rows omitted)
px.choropleth(world_latest.to_df(),
locations = 'iso_alpha',
color = 'lifeExp',
hover_name = 'country',
title = 'Life Expectancy Per Country',
color_continuous_scale = px.colors.sequential.tempo
)
wm = Table.read_table('data/walmart.csv')
wm
storenum | OPENDATE | date_super | conversion | st | county | STREETADDR | STRCITY | STRSTATE | ZIPCODE | type_store | LAT | LON | MONTH | DAY | YEAR |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | 7/1/62 | 3/1/97 | 1 | 5 | 7 | 2110 WEST WALNUT | Rogers | AR | 72756 | Supercenter | 36.3422 | -94.0714 | 7 | 1 | 1962 |
2 | 8/1/64 | 3/1/96 | 1 | 5 | 9 | 1417 HWY 62/65 N | Harrison | AR | 72601 | Supercenter | 36.237 | -93.0935 | 8 | 1 | 1964 |
4 | 8/1/65 | 3/1/02 | 1 | 5 | 7 | 2901 HWY 412 EAST | Siloam Springs | AR | 72761 | Supercenter | 36.1799 | -94.5021 | 8 | 1 | 1965 |
8 | 10/1/67 | 3/1/93 | 1 | 5 | 29 | 1621 NORTH BUSINESS 9 | Morrilton | AR | 72110 | Supercenter | 35.1565 | -92.7586 | 10 | 1 | 1967 |
7 | 10/1/67 | nan | nan | 5 | 119 | 3801 CAMP ROBINSON RD. | North Little Rock | AR | 72118 | Wal-Mart | 34.8133 | -92.3023 | 10 | 1 | 1967 |
10 | 7/1/68 | 3/1/98 | 1 | 40 | 21 | 2020 SOUTH MUSKOGEE | Tahlequah | OK | 74464 | Supercenter | 35.9237 | -94.9719 | 7 | 1 | 1968 |
13 | 11/1/68 | 3/1/96 | 1 | 29 | 97 | 2705 GRAND AVE | Carthage | MO | 64836 | Supercenter | 37.169 | -94.3116 | 11 | 1 | 1968 |
12 | 7/1/68 | 3/1/94 | 1 | 40 | 131 | 1500 LYNN RIGGS BLVD | Claremore | OK | 74017 | Supercenter | 36.3271 | -95.6119 | 7 | 1 | 1968 |
11 | 3/1/68 | 2/20/02 | 1 | 5 | 5 | 65 WAL-MART DRIVE | Mountain Home | AR | 72653 | Supercenter | 36.329 | -92.3578 | 3 | 1 | 1968 |
9 | 3/1/68 | 3/1/00 | 1 | 29 | 143 | 1303 SOUTH MAIN | Sikeston | MO | 63801 | Supercenter | 36.8912 | -89.5836 | 3 | 1 | 1968 |
... (2982 rows omitted)
# Number of Walmarts per state
wm_per_state = wm.group('STRSTATE')
wm_per_state
STRSTATE | count |
---|---|
AL | 90 |
AR | 81 |
AZ | 55 |
CA | 159 |
CO | 56 |
DE | 8 |
FL | 175 |
GA | 114 |
IA | 55 |
ID | 17 |
... (31 rows omitted)
px.choropleth(wm_per_state.to_df(),
locations = 'STRSTATE',
color = 'count',
locationmode = 'USA-states',
scope = 'usa',
title = 'Number of Walmarts Per State')
penguins = Table.from_df(sns.load_dataset('penguins'))
penguins
species | island | bill_length_mm | bill_depth_mm | flipper_length_mm | body_mass_g | sex |
---|---|---|---|---|---|---|
Adelie | Torgersen | 39.1 | 18.7 | 181 | 3750 | Male |
Adelie | Torgersen | 39.5 | 17.4 | 186 | 3800 | Female |
Adelie | Torgersen | 40.3 | 18 | 195 | 3250 | Female |
Adelie | Torgersen | nan | nan | nan | nan | nan |
Adelie | Torgersen | 36.7 | 19.3 | 193 | 3450 | Female |
Adelie | Torgersen | 39.3 | 20.6 | 190 | 3650 | Male |
Adelie | Torgersen | 38.9 | 17.8 | 181 | 3625 | Female |
Adelie | Torgersen | 39.2 | 19.6 | 195 | 4675 | Male |
Adelie | Torgersen | 34.1 | 18.1 | 193 | 3475 | nan |
Adelie | Torgersen | 42 | 20.2 | 190 | 4250 | nan |
... (334 rows omitted)
px.scatter_3d(penguins.to_df(),
x = 'bill_length_mm',
y = 'bill_depth_mm',
z = 'flipper_length_mm',
color = 'species',
hover_name = 'island',
title = 'Flipper Length vs. Bill Depth vs. Bill Length')