from datascience import *
import numpy as np
schools = Table.read_table('data/cal_unis.csv')
schools
Name | City | County | Enrollment | Founded |
---|---|---|---|---|
University of California, Berkeley | Berkeley | Alameda | 42,519 | 1869 |
University of California, Davis | Davis | Yolo | 39,152 | 1905 |
University of California, Irvine | Irvine | Orange | 35,220 | 1965 |
University of California, Los Angeles | Los Angeles | Los Angeles | 45,428 | 1882 |
University of California, Merced | Merced | Merced | 8,544 | 2005 |
University of California, Riverside | Riverside | Riverside | 23,278 | 1954 |
University of California, San Diego | San Diego | San Diego | 38,798 | 1960 |
University of California, Santa Barbara | Santa Barbara | Santa Barbara | 24,346 | 1891 |
University of California, Santa Cruz | Santa Cruz | Santa Cruz | 19,700 | 1965 |
California State University Maritime Academy | Vallejo | Solano | 1,017 | 1929 |
... (22 rows omitted)
schools.num_rows
32
schools.num_columns
5
# A subset of schools just for illustration purposes
some_schools = schools.take(np.arange(5))
some_schools
Name | City | County | Enrollment | Founded |
---|---|---|---|---|
University of California, Berkeley | Berkeley | Alameda | 42,519 | 1869 |
University of California, Davis | Davis | Yolo | 39,152 | 1905 |
University of California, Irvine | Irvine | Orange | 35,220 | 1965 |
University of California, Los Angeles | Los Angeles | Los Angeles | 45,428 | 1882 |
University of California, Merced | Merced | Merced | 8,544 | 2005 |
some_schools.column('City')
array(['Berkeley', 'Davis', 'Irvine', 'Los Angeles', 'Merced'], dtype='<U15')
some_schools.column(2)
array(['Alameda', 'Yolo', 'Orange', 'Los Angeles', 'Merced'], dtype='<U15')
countries = Table.read_table('data/us-state-capitals.csv')
countries
name | description | latitude | longitude |
---|---|---|---|
Alabama | Montgomery | 32.3777 | -86.3006 |
Alaska | Juneau | 58.3016 | -134.42 |
Arizona | Phoenix | 33.4481 | -112.097 |
Arkansas | Little Rock | 34.7466 | -92.289 |
California | Sacramento | 38.5767 | -121.494 |
Colorado | Denver | 39.7392 | -104.985 |
Connecticut | Hartford | 41.764 | -72.6822 |
Delaware | Dover | 39.1573 | -75.5197 |
Hawaii | Honolulu | 21.3074 | -157.857 |
Florida | Tallahassee | 30.4381 | -84.2813 |
... (40 rows omitted)
countries.column('latitude')
array([32.377716, 58.301598, 33.448143, 34.746613, 38.576668, 39.739227, 41.764046, 39.157307, 21.307442, 30.438118, 33.749027, 43.617775, 39.798363, 39.768623, 41.591087, 39.048191, 38.186722, 30.457069, 44.307167, 38.978764, 42.358162, 42.733635, 44.955097, 32.303848, 38.579201, 46.585709, 40.808075, 39.163914, 43.206898, 40.220596, 35.68224 , 35.78043 , 46.82085 , 42.652843, 39.961346, 35.492207, 44.938461, 40.264378, 41.830914, 34.000343, 44.367031, 36.16581 , 30.27467 , 40.777477, 44.262436, 37.538857, 47.035805, 38.336246, 43.074684, 41.140259])
select
and drop
¶some_schools
Name | City | County | Enrollment | Founded |
---|---|---|---|---|
University of California, Berkeley | Berkeley | Alameda | 42,519 | 1869 |
University of California, Davis | Davis | Yolo | 39,152 | 1905 |
University of California, Irvine | Irvine | Orange | 35,220 | 1965 |
University of California, Los Angeles | Los Angeles | Los Angeles | 45,428 | 1882 |
University of California, Merced | Merced | Merced | 8,544 | 2005 |
some_schools.select('Name', 'Enrollment')
Name | Enrollment |
---|---|
University of California, Berkeley | 42,519 |
University of California, Davis | 39,152 |
University of California, Irvine | 35,220 |
University of California, Los Angeles | 45,428 |
University of California, Merced | 8,544 |
some_schools.drop('Founded', 'County')
Name | City | Enrollment |
---|---|---|
University of California, Berkeley | Berkeley | 42,519 |
University of California, Davis | Davis | 39,152 |
University of California, Irvine | Irvine | 35,220 |
University of California, Los Angeles | Los Angeles | 45,428 |
University of California, Merced | Merced | 8,544 |
some_schools
Name | City | County | Enrollment | Founded |
---|---|---|---|---|
University of California, Berkeley | Berkeley | Alameda | 42,519 | 1869 |
University of California, Davis | Davis | Yolo | 39,152 | 1905 |
University of California, Irvine | Irvine | Orange | 35,220 | 1965 |
University of California, Los Angeles | Los Angeles | Los Angeles | 45,428 | 1882 |
University of California, Merced | Merced | Merced | 8,544 | 2005 |
with_columns
¶some_schools
Name | City | County | Enrollment | Founded |
---|---|---|---|---|
University of California, Berkeley | Berkeley | Alameda | 42,519 | 1869 |
University of California, Davis | Davis | Yolo | 39,152 | 1905 |
University of California, Irvine | Irvine | Orange | 35,220 | 1965 |
University of California, Los Angeles | Los Angeles | Los Angeles | 45,428 | 1882 |
University of California, Merced | Merced | Merced | 8,544 | 2005 |
some_schools.with_columns(
'Nickname', np.array(['Cal', 'UCD', 'UCI', 'UCLA', 'UCM'])
)
Name | City | County | Enrollment | Founded | Nickname |
---|---|---|---|---|---|
University of California, Berkeley | Berkeley | Alameda | 42,519 | 1869 | Cal |
University of California, Davis | Davis | Yolo | 39,152 | 1905 | UCD |
University of California, Irvine | Irvine | Orange | 35,220 | 1965 | UCI |
University of California, Los Angeles | Los Angeles | Los Angeles | 45,428 | 1882 | UCLA |
University of California, Merced | Merced | Merced | 8,544 | 2005 | UCM |
some_schools.with_columns(
'Nickname', np.array(['Cal', 'UCD', 'UCI', 'UCLA', 'UCM']),
'Years Old', 2021 - some_schools.column('Founded')
)
Name | City | County | Enrollment | Founded | Nickname | Years Old |
---|---|---|---|---|---|---|
University of California, Berkeley | Berkeley | Alameda | 42,519 | 1869 | Cal | 152 |
University of California, Davis | Davis | Yolo | 39,152 | 1905 | UCD | 116 |
University of California, Irvine | Irvine | Orange | 35,220 | 1965 | UCI | 56 |
University of California, Los Angeles | Los Angeles | Los Angeles | 45,428 | 1882 | UCLA | 139 |
University of California, Merced | Merced | Merced | 8,544 | 2005 | UCM | 16 |
Table()
states = Table().with_columns(
'State', np.array(['California', 'New York', 'Florida', 'Texas', 'Pennsylvania']),
'Code', np.array(['CA', 'NY', 'FL', 'TX', 'PA']),
'Population', np.array([39.3, 19.3, 21.7, 29.3, 12.8])
)
states
State | Code | Population |
---|---|---|
California | CA | 39.3 |
New York | NY | 19.3 |
Florida | FL | 21.7 |
Texas | TX | 29.3 |
Pennsylvania | PA | 12.8 |
states
State | Code | Population |
---|---|---|
California | CA | 39.3 |
New York | NY | 19.3 |
Florida | FL | 21.7 |
Texas | TX | 29.3 |
Pennsylvania | PA | 12.8 |
quick_check2 = states.drop('Population').with_columns(
"FedVote", ['D', 'D', 'R', 'R', 'D']
)
quick_check2
State | Code | FedVote |
---|---|---|
California | CA | D |
New York | NY | D |
Florida | FL | R |
Texas | TX | R |
Pennsylvania | PA | D |
show
¶schools.show(3)
Name | City | County | Enrollment | Founded |
---|---|---|---|---|
University of California, Berkeley | Berkeley | Alameda | 42,519 | 1869 |
University of California, Davis | Davis | Yolo | 39,152 | 1905 |
University of California, Irvine | Irvine | Orange | 35,220 | 1965 |
... (29 rows omitted)
schools.show()
Name | City | County | Enrollment | Founded |
---|---|---|---|---|
University of California, Berkeley | Berkeley | Alameda | 42,519 | 1869 |
University of California, Davis | Davis | Yolo | 39,152 | 1905 |
University of California, Irvine | Irvine | Orange | 35,220 | 1965 |
University of California, Los Angeles | Los Angeles | Los Angeles | 45,428 | 1882 |
University of California, Merced | Merced | Merced | 8,544 | 2005 |
University of California, Riverside | Riverside | Riverside | 23,278 | 1954 |
University of California, San Diego | San Diego | San Diego | 38,798 | 1960 |
University of California, Santa Barbara | Santa Barbara | Santa Barbara | 24,346 | 1891 |
University of California, Santa Cruz | Santa Cruz | Santa Cruz | 19,700 | 1965 |
California State University Maritime Academy | Vallejo | Solano | 1,017 | 1929 |
California Polytechnic State University | San Luis Obispo | San Luis Obispo | 21,812 | 1901 |
California State Polytechnic University, Pomona | Pomona | Los Angeles | 26,443 | 1938 |
California State University, Bakersfield | Bakersfield | Kern | 10,493 | 1965 |
California State University Channel Islands | Camarillo | Ventura | 7,095 | 2002 |
California State University, Chico | Chico | Butte | 17,488 | 1887 |
California State University, Dominguez Hills | Carson | Los Angeles | 15,741 | 1960 |
California State University, East Bay | Hayward | Alameda | 14,525 | 1959 |
California State University, Fresno | Fresno | Fresno | 24,995 | 1911 |
California State University, Fullerton | Fullerton | Orange | 39,774 | 1957 |
California State University, Long Beach | Long Beach | Los Angeles | 36,846 | 1949 |
California State University, Los Angeles | Los Angeles | Los Angeles | 27,685 | 1947 |
California State University, Monterey Bay | Seaside-Marina | Monterey | 7,079 | 1994 |
California State University, Northridge | Northridge | Los Angeles | 38,716 | 1958 |
California State University, Sacramento | Sacramento | Sacramento | 31,131 | 1947 |
California State University, San Bernardino | San Bernardino | San Bernardino | 19,973 | 1965 |
California State University San Marcos | San Marcos | San Diego | 14,511 | 1988 |
California State University, Stanislaus | Turlock | Stanislaus | 10,214 | 1957 |
Humboldt State University | Arcata | Humboldt | 7,774 | 1913 |
San Diego State University | San Diego | San Diego | 34,881 | 1897 |
San Francisco State University | San Francisco | San Francisco | 29,586 | 1899 |
San Jose State University | San Jose | Santa Clara | 32,828 | 1857 |
Sonoma State University | Rohnert Park | Sonoma | 9,201 | 1960 |
labels
¶schools.show(5)
Name | City | County | Enrollment | Founded |
---|---|---|---|---|
University of California, Berkeley | Berkeley | Alameda | 42,519 | 1869 |
University of California, Davis | Davis | Yolo | 39,152 | 1905 |
University of California, Irvine | Irvine | Orange | 35,220 | 1965 |
University of California, Los Angeles | Los Angeles | Los Angeles | 45,428 | 1882 |
University of California, Merced | Merced | Merced | 8,544 | 2005 |
... (27 rows omitted)
# The result is a "tuple" – think of it as a basic list
schools.labels
('Name', 'City', 'County', 'Enrollment', 'Founded')
schools.relabeled('Name', 'University').show(5)
University | City | County | Enrollment | Founded |
---|---|---|---|---|
University of California, Berkeley | Berkeley | Alameda | 42,519 | 1869 |
University of California, Davis | Davis | Yolo | 39,152 | 1905 |
University of California, Irvine | Irvine | Orange | 35,220 | 1965 |
University of California, Los Angeles | Los Angeles | Los Angeles | 45,428 | 1882 |
University of California, Merced | Merced | Merced | 8,544 | 2005 |
... (27 rows omitted)
wnba = Table.read_table('data/wnba-2020.csv')
wnba
Player | Tm | Pos | G | GS | MP | FG | FGA | FG% | 3P | 3PA | 3P% | 2P | 2PA | 2P% | eFG% | FT | FTA | FT% | ORB | TRB | AST | STL | BLK | TOV | PF | PTS |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Natalie Achonwa | IND | F | 18 | 11 | 364 | 57 | 115 | 0.496 | 0 | 10 | 0 | 57 | 105 | 0.543 | 0.496 | 26 | 32 | 0.813 | 26 | 99 | 31 | 9 | 11 | 33 | 46 | 140 |
Jaylyn Agnew | ATL | F | 12 | 0 | 71 | 4 | 15 | 0.267 | 3 | 13 | 0.231 | 1 | 2 | 0.5 | 0.367 | 4 | 5 | 0.8 | 0 | 5 | 3 | 0 | 0 | 5 | 7 | 15 |
Bella Alarie | DAL | C-F | 22 | 3 | 309 | 24 | 66 | 0.364 | 1 | 13 | 0.077 | 23 | 53 | 0.434 | 0.371 | 10 | 12 | 0.833 | 23 | 63 | 11 | 14 | 18 | 11 | 35 | 59 |
Kayla Alexander | MIN | C | 16 | 0 | 89 | 16 | 30 | 0.533 | 0 | 0 | nan | 16 | 30 | 0.533 | 0.533 | 5 | 8 | 0.625 | 9 | 15 | 3 | 2 | 3 | 6 | 14 | 37 |
Julie Allemand | IND | G | 22 | 22 | 716 | 61 | 134 | 0.455 | 44 | 92 | 0.478 | 17 | 42 | 0.405 | 0.619 | 22 | 30 | 0.733 | 8 | 100 | 128 | 25 | 9 | 57 | 57 | 188 |
Lindsay Allen | LVA | G | 21 | 21 | 284 | 28 | 66 | 0.424 | 6 | 17 | 0.353 | 22 | 49 | 0.449 | 0.47 | 8 | 10 | 0.8 | 6 | 23 | 51 | 7 | 1 | 16 | 20 | 70 |
Kristine Anigwe | LAS | F-C | 17 | 1 | 197 | 32 | 53 | 0.604 | 0 | 0 | nan | 32 | 53 | 0.604 | 0.604 | 14 | 26 | 0.538 | 18 | 45 | 4 | 11 | 7 | 16 | 33 | 78 |
Ariel Atkins | WAS | G | 22 | 22 | 682 | 110 | 251 | 0.438 | 44 | 107 | 0.411 | 66 | 144 | 0.458 | 0.526 | 62 | 70 | 0.886 | 18 | 64 | 53 | 40 | 4 | 42 | 58 | 326 |
Seimone Augustus | LAS | G-F | 21 | 0 | 332 | 54 | 110 | 0.491 | 12 | 22 | 0.545 | 42 | 88 | 0.477 | 0.545 | 4 | 6 | 0.667 | 3 | 37 | 25 | 12 | 2 | 8 | 20 | 124 |
Rachel Banham | MIN | G | 20 | 1 | 339 | 48 | 104 | 0.462 | 25 | 53 | 0.472 | 23 | 51 | 0.451 | 0.582 | 16 | 20 | 0.8 | 2 | 25 | 47 | 9 | 2 | 20 | 40 | 137 |
... (152 rows omitted)
wnba.num_rows, wnba.num_columns
(162, 27)
wnba.num_rows
162
wnba.num_columns
27
wnba_pts = wnba.select('Player', 'Tm', 'Pos', 'G', 'PTS')
wnba_pts
Player | Tm | Pos | G | PTS |
---|---|---|---|---|
Natalie Achonwa | IND | F | 18 | 140 |
Jaylyn Agnew | ATL | F | 12 | 15 |
Bella Alarie | DAL | C-F | 22 | 59 |
Kayla Alexander | MIN | C | 16 | 37 |
Julie Allemand | IND | G | 22 | 188 |
Lindsay Allen | LVA | G | 21 | 70 |
Kristine Anigwe | LAS | F-C | 17 | 78 |
Ariel Atkins | WAS | G | 22 | 326 |
Seimone Augustus | LAS | G-F | 21 | 124 |
Rachel Banham | MIN | G | 20 | 137 |
... (152 rows omitted)
# Computes number of points scored per game
ppg = wnba_pts.column('PTS') / wnba_pts.column('G')
max(ppg)
22.772727272727273
min(ppg)
0.0
np.mean(ppg)
7.485120421862195
wnba_pts = wnba_pts.with_columns(
'PPG', wnba_pts.column('PTS') / wnba_pts.column('G')
)
wnba_pts
Player | Tm | Pos | G | PTS | PPG |
---|---|---|---|---|---|
Natalie Achonwa | IND | F | 18 | 140 | 7.77778 |
Jaylyn Agnew | ATL | F | 12 | 15 | 1.25 |
Bella Alarie | DAL | C-F | 22 | 59 | 2.68182 |
Kayla Alexander | MIN | C | 16 | 37 | 2.3125 |
Julie Allemand | IND | G | 22 | 188 | 8.54545 |
Lindsay Allen | LVA | G | 21 | 70 | 3.33333 |
Kristine Anigwe | LAS | F-C | 17 | 78 | 4.58824 |
Ariel Atkins | WAS | G | 22 | 326 | 14.8182 |
Seimone Augustus | LAS | G-F | 21 | 124 | 5.90476 |
Rachel Banham | MIN | G | 20 | 137 | 6.85 |
... (152 rows omitted)