from datascience import *
import numpy as np
schools = Table.read_table('data/cal_unis.csv')
schools
| Name | City | County | Enrollment | Founded | 
|---|---|---|---|---|
| University of California, Berkeley | Berkeley | Alameda | 42,519 | 1869 | 
| University of California, Davis | Davis | Yolo | 39,152 | 1905 | 
| University of California, Irvine | Irvine | Orange | 35,220 | 1965 | 
| University of California, Los Angeles | Los Angeles | Los Angeles | 45,428 | 1882 | 
| University of California, Merced | Merced | Merced | 8,544 | 2005 | 
| University of California, Riverside | Riverside | Riverside | 23,278 | 1954 | 
| University of California, San Diego | San Diego | San Diego | 38,798 | 1960 | 
| University of California, Santa Barbara | Santa Barbara | Santa Barbara | 24,346 | 1891 | 
| University of California, Santa Cruz | Santa Cruz | Santa Cruz | 19,700 | 1965 | 
| California State University Maritime Academy | Vallejo | Solano | 1,017 | 1929 | 
... (22 rows omitted)
schools.num_rows
32
schools.num_columns
5
# A subset of schools just for illustration purposes
some_schools = schools.take(np.arange(5))
some_schools
| Name | City | County | Enrollment | Founded | 
|---|---|---|---|---|
| University of California, Berkeley | Berkeley | Alameda | 42,519 | 1869 | 
| University of California, Davis | Davis | Yolo | 39,152 | 1905 | 
| University of California, Irvine | Irvine | Orange | 35,220 | 1965 | 
| University of California, Los Angeles | Los Angeles | Los Angeles | 45,428 | 1882 | 
| University of California, Merced | Merced | Merced | 8,544 | 2005 | 
some_schools.column('City')
array(['Berkeley', 'Davis', 'Irvine', 'Los Angeles', 'Merced'],
      dtype='<U15')
some_schools.column(2)
array(['Alameda', 'Yolo', 'Orange', 'Los Angeles', 'Merced'], dtype='<U15')
countries = Table.read_table('data/us-state-capitals.csv')
countries
| name | description | latitude | longitude | 
|---|---|---|---|
| Alabama | Montgomery | 32.3777 | -86.3006 | 
| Alaska | Juneau | 58.3016 | -134.42 | 
| Arizona | Phoenix | 33.4481 | -112.097 | 
| Arkansas | Little Rock | 34.7466 | -92.289 | 
| California | Sacramento | 38.5767 | -121.494 | 
| Colorado | Denver | 39.7392 | -104.985 | 
| Connecticut | Hartford | 41.764 | -72.6822 | 
| Delaware | Dover | 39.1573 | -75.5197 | 
| Hawaii | Honolulu | 21.3074 | -157.857 | 
| Florida | Tallahassee | 30.4381 | -84.2813 | 
... (40 rows omitted)
countries.column('latitude')
array([32.377716, 58.301598, 33.448143, 34.746613, 38.576668, 39.739227,
       41.764046, 39.157307, 21.307442, 30.438118, 33.749027, 43.617775,
       39.798363, 39.768623, 41.591087, 39.048191, 38.186722, 30.457069,
       44.307167, 38.978764, 42.358162, 42.733635, 44.955097, 32.303848,
       38.579201, 46.585709, 40.808075, 39.163914, 43.206898, 40.220596,
       35.68224 , 35.78043 , 46.82085 , 42.652843, 39.961346, 35.492207,
       44.938461, 40.264378, 41.830914, 34.000343, 44.367031, 36.16581 ,
       30.27467 , 40.777477, 44.262436, 37.538857, 47.035805, 38.336246,
       43.074684, 41.140259])
select and drop¶some_schools
| Name | City | County | Enrollment | Founded | 
|---|---|---|---|---|
| University of California, Berkeley | Berkeley | Alameda | 42,519 | 1869 | 
| University of California, Davis | Davis | Yolo | 39,152 | 1905 | 
| University of California, Irvine | Irvine | Orange | 35,220 | 1965 | 
| University of California, Los Angeles | Los Angeles | Los Angeles | 45,428 | 1882 | 
| University of California, Merced | Merced | Merced | 8,544 | 2005 | 
some_schools.select('Name', 'Enrollment')
| Name | Enrollment | 
|---|---|
| University of California, Berkeley | 42,519 | 
| University of California, Davis | 39,152 | 
| University of California, Irvine | 35,220 | 
| University of California, Los Angeles | 45,428 | 
| University of California, Merced | 8,544 | 
some_schools.drop('Founded', 'County')
| Name | City | Enrollment | 
|---|---|---|
| University of California, Berkeley | Berkeley | 42,519 | 
| University of California, Davis | Davis | 39,152 | 
| University of California, Irvine | Irvine | 35,220 | 
| University of California, Los Angeles | Los Angeles | 45,428 | 
| University of California, Merced | Merced | 8,544 | 
some_schools
| Name | City | County | Enrollment | Founded | 
|---|---|---|---|---|
| University of California, Berkeley | Berkeley | Alameda | 42,519 | 1869 | 
| University of California, Davis | Davis | Yolo | 39,152 | 1905 | 
| University of California, Irvine | Irvine | Orange | 35,220 | 1965 | 
| University of California, Los Angeles | Los Angeles | Los Angeles | 45,428 | 1882 | 
| University of California, Merced | Merced | Merced | 8,544 | 2005 | 
with_columns¶some_schools
| Name | City | County | Enrollment | Founded | 
|---|---|---|---|---|
| University of California, Berkeley | Berkeley | Alameda | 42,519 | 1869 | 
| University of California, Davis | Davis | Yolo | 39,152 | 1905 | 
| University of California, Irvine | Irvine | Orange | 35,220 | 1965 | 
| University of California, Los Angeles | Los Angeles | Los Angeles | 45,428 | 1882 | 
| University of California, Merced | Merced | Merced | 8,544 | 2005 | 
some_schools.with_columns(
    'Nickname', np.array(['Cal', 'UCD', 'UCI', 'UCLA', 'UCM'])
)
| Name | City | County | Enrollment | Founded | Nickname | 
|---|---|---|---|---|---|
| University of California, Berkeley | Berkeley | Alameda | 42,519 | 1869 | Cal | 
| University of California, Davis | Davis | Yolo | 39,152 | 1905 | UCD | 
| University of California, Irvine | Irvine | Orange | 35,220 | 1965 | UCI | 
| University of California, Los Angeles | Los Angeles | Los Angeles | 45,428 | 1882 | UCLA | 
| University of California, Merced | Merced | Merced | 8,544 | 2005 | UCM | 
some_schools.with_columns(
    'Nickname', np.array(['Cal', 'UCD', 'UCI', 'UCLA', 'UCM']),
    'Years Old', 2021 - some_schools.column('Founded')
)
| Name | City | County | Enrollment | Founded | Nickname | Years Old | 
|---|---|---|---|---|---|---|
| University of California, Berkeley | Berkeley | Alameda | 42,519 | 1869 | Cal | 152 | 
| University of California, Davis | Davis | Yolo | 39,152 | 1905 | UCD | 116 | 
| University of California, Irvine | Irvine | Orange | 35,220 | 1965 | UCI | 56 | 
| University of California, Los Angeles | Los Angeles | Los Angeles | 45,428 | 1882 | UCLA | 139 | 
| University of California, Merced | Merced | Merced | 8,544 | 2005 | UCM | 16 | 
Table()
states = Table().with_columns(
    'State', np.array(['California', 'New York', 'Florida', 'Texas', 'Pennsylvania']),
    'Code', np.array(['CA', 'NY', 'FL', 'TX', 'PA']),
    'Population', np.array([39.3, 19.3, 21.7, 29.3, 12.8])
)
states
| State | Code | Population | 
|---|---|---|
| California | CA | 39.3 | 
| New York | NY | 19.3 | 
| Florida | FL | 21.7 | 
| Texas | TX | 29.3 | 
| Pennsylvania | PA | 12.8 | 
states
| State | Code | Population | 
|---|---|---|
| California | CA | 39.3 | 
| New York | NY | 19.3 | 
| Florida | FL | 21.7 | 
| Texas | TX | 29.3 | 
| Pennsylvania | PA | 12.8 | 
quick_check2 = states.drop('Population').with_columns(
    "FedVote", ['D', 'D', 'R', 'R', 'D']
)
quick_check2
| State | Code | FedVote | 
|---|---|---|
| California | CA | D | 
| New York | NY | D | 
| Florida | FL | R | 
| Texas | TX | R | 
| Pennsylvania | PA | D | 
show¶schools.show(3)
| Name | City | County | Enrollment | Founded | 
|---|---|---|---|---|
| University of California, Berkeley | Berkeley | Alameda | 42,519 | 1869 | 
| University of California, Davis | Davis | Yolo | 39,152 | 1905 | 
| University of California, Irvine | Irvine | Orange | 35,220 | 1965 | 
... (29 rows omitted)
schools.show()
| Name | City | County | Enrollment | Founded | 
|---|---|---|---|---|
| University of California, Berkeley | Berkeley | Alameda | 42,519 | 1869 | 
| University of California, Davis | Davis | Yolo | 39,152 | 1905 | 
| University of California, Irvine | Irvine | Orange | 35,220 | 1965 | 
| University of California, Los Angeles | Los Angeles | Los Angeles | 45,428 | 1882 | 
| University of California, Merced | Merced | Merced | 8,544 | 2005 | 
| University of California, Riverside | Riverside | Riverside | 23,278 | 1954 | 
| University of California, San Diego | San Diego | San Diego | 38,798 | 1960 | 
| University of California, Santa Barbara | Santa Barbara | Santa Barbara | 24,346 | 1891 | 
| University of California, Santa Cruz | Santa Cruz | Santa Cruz | 19,700 | 1965 | 
| California State University Maritime Academy | Vallejo | Solano | 1,017 | 1929 | 
| California Polytechnic State University | San Luis Obispo | San Luis Obispo | 21,812 | 1901 | 
| California State Polytechnic University, Pomona | Pomona | Los Angeles | 26,443 | 1938 | 
| California State University, Bakersfield | Bakersfield | Kern | 10,493 | 1965 | 
| California State University Channel Islands | Camarillo | Ventura | 7,095 | 2002 | 
| California State University, Chico | Chico | Butte | 17,488 | 1887 | 
| California State University, Dominguez Hills | Carson | Los Angeles | 15,741 | 1960 | 
| California State University, East Bay | Hayward | Alameda | 14,525 | 1959 | 
| California State University, Fresno | Fresno | Fresno | 24,995 | 1911 | 
| California State University, Fullerton | Fullerton | Orange | 39,774 | 1957 | 
| California State University, Long Beach | Long Beach | Los Angeles | 36,846 | 1949 | 
| California State University, Los Angeles | Los Angeles | Los Angeles | 27,685 | 1947 | 
| California State University, Monterey Bay | Seaside-Marina | Monterey | 7,079 | 1994 | 
| California State University, Northridge | Northridge | Los Angeles | 38,716 | 1958 | 
| California State University, Sacramento | Sacramento | Sacramento | 31,131 | 1947 | 
| California State University, San Bernardino | San Bernardino | San Bernardino | 19,973 | 1965 | 
| California State University San Marcos | San Marcos | San Diego | 14,511 | 1988 | 
| California State University, Stanislaus | Turlock | Stanislaus | 10,214 | 1957 | 
| Humboldt State University | Arcata | Humboldt | 7,774 | 1913 | 
| San Diego State University | San Diego | San Diego | 34,881 | 1897 | 
| San Francisco State University | San Francisco | San Francisco | 29,586 | 1899 | 
| San Jose State University | San Jose | Santa Clara | 32,828 | 1857 | 
| Sonoma State University | Rohnert Park | Sonoma | 9,201 | 1960 | 
labels¶schools.show(5)
| Name | City | County | Enrollment | Founded | 
|---|---|---|---|---|
| University of California, Berkeley | Berkeley | Alameda | 42,519 | 1869 | 
| University of California, Davis | Davis | Yolo | 39,152 | 1905 | 
| University of California, Irvine | Irvine | Orange | 35,220 | 1965 | 
| University of California, Los Angeles | Los Angeles | Los Angeles | 45,428 | 1882 | 
| University of California, Merced | Merced | Merced | 8,544 | 2005 | 
... (27 rows omitted)
# The result is a "tuple" – think of it as a basic list
schools.labels
('Name', 'City', 'County', 'Enrollment', 'Founded')
schools.relabeled('Name', 'University').show(5)
| University | City | County | Enrollment | Founded | 
|---|---|---|---|---|
| University of California, Berkeley | Berkeley | Alameda | 42,519 | 1869 | 
| University of California, Davis | Davis | Yolo | 39,152 | 1905 | 
| University of California, Irvine | Irvine | Orange | 35,220 | 1965 | 
| University of California, Los Angeles | Los Angeles | Los Angeles | 45,428 | 1882 | 
| University of California, Merced | Merced | Merced | 8,544 | 2005 | 
... (27 rows omitted)
wnba = Table.read_table('data/wnba-2020.csv')
wnba
| Player | Tm | Pos | G | GS | MP | FG | FGA | FG% | 3P | 3PA | 3P% | 2P | 2PA | 2P% | eFG% | FT | FTA | FT% | ORB | TRB | AST | STL | BLK | TOV | PF | PTS | 
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Natalie Achonwa | IND | F | 18 | 11 | 364 | 57 | 115 | 0.496 | 0 | 10 | 0 | 57 | 105 | 0.543 | 0.496 | 26 | 32 | 0.813 | 26 | 99 | 31 | 9 | 11 | 33 | 46 | 140 | 
| Jaylyn Agnew | ATL | F | 12 | 0 | 71 | 4 | 15 | 0.267 | 3 | 13 | 0.231 | 1 | 2 | 0.5 | 0.367 | 4 | 5 | 0.8 | 0 | 5 | 3 | 0 | 0 | 5 | 7 | 15 | 
| Bella Alarie | DAL | C-F | 22 | 3 | 309 | 24 | 66 | 0.364 | 1 | 13 | 0.077 | 23 | 53 | 0.434 | 0.371 | 10 | 12 | 0.833 | 23 | 63 | 11 | 14 | 18 | 11 | 35 | 59 | 
| Kayla Alexander | MIN | C | 16 | 0 | 89 | 16 | 30 | 0.533 | 0 | 0 | nan | 16 | 30 | 0.533 | 0.533 | 5 | 8 | 0.625 | 9 | 15 | 3 | 2 | 3 | 6 | 14 | 37 | 
| Julie Allemand | IND | G | 22 | 22 | 716 | 61 | 134 | 0.455 | 44 | 92 | 0.478 | 17 | 42 | 0.405 | 0.619 | 22 | 30 | 0.733 | 8 | 100 | 128 | 25 | 9 | 57 | 57 | 188 | 
| Lindsay Allen | LVA | G | 21 | 21 | 284 | 28 | 66 | 0.424 | 6 | 17 | 0.353 | 22 | 49 | 0.449 | 0.47 | 8 | 10 | 0.8 | 6 | 23 | 51 | 7 | 1 | 16 | 20 | 70 | 
| Kristine Anigwe | LAS | F-C | 17 | 1 | 197 | 32 | 53 | 0.604 | 0 | 0 | nan | 32 | 53 | 0.604 | 0.604 | 14 | 26 | 0.538 | 18 | 45 | 4 | 11 | 7 | 16 | 33 | 78 | 
| Ariel Atkins | WAS | G | 22 | 22 | 682 | 110 | 251 | 0.438 | 44 | 107 | 0.411 | 66 | 144 | 0.458 | 0.526 | 62 | 70 | 0.886 | 18 | 64 | 53 | 40 | 4 | 42 | 58 | 326 | 
| Seimone Augustus | LAS | G-F | 21 | 0 | 332 | 54 | 110 | 0.491 | 12 | 22 | 0.545 | 42 | 88 | 0.477 | 0.545 | 4 | 6 | 0.667 | 3 | 37 | 25 | 12 | 2 | 8 | 20 | 124 | 
| Rachel Banham | MIN | G | 20 | 1 | 339 | 48 | 104 | 0.462 | 25 | 53 | 0.472 | 23 | 51 | 0.451 | 0.582 | 16 | 20 | 0.8 | 2 | 25 | 47 | 9 | 2 | 20 | 40 | 137 | 
... (152 rows omitted)
wnba.num_rows, wnba.num_columns
(162, 27)
wnba.num_rows
162
wnba.num_columns
27
wnba_pts = wnba.select('Player', 'Tm', 'Pos', 'G', 'PTS')
wnba_pts
| Player | Tm | Pos | G | PTS | 
|---|---|---|---|---|
| Natalie Achonwa | IND | F | 18 | 140 | 
| Jaylyn Agnew | ATL | F | 12 | 15 | 
| Bella Alarie | DAL | C-F | 22 | 59 | 
| Kayla Alexander | MIN | C | 16 | 37 | 
| Julie Allemand | IND | G | 22 | 188 | 
| Lindsay Allen | LVA | G | 21 | 70 | 
| Kristine Anigwe | LAS | F-C | 17 | 78 | 
| Ariel Atkins | WAS | G | 22 | 326 | 
| Seimone Augustus | LAS | G-F | 21 | 124 | 
| Rachel Banham | MIN | G | 20 | 137 | 
... (152 rows omitted)
# Computes number of points scored per game
ppg = wnba_pts.column('PTS') / wnba_pts.column('G')
max(ppg)
22.772727272727273
min(ppg)
0.0
np.mean(ppg)
7.485120421862195
wnba_pts = wnba_pts.with_columns(
    'PPG', wnba_pts.column('PTS') / wnba_pts.column('G')
)
wnba_pts
| Player | Tm | Pos | G | PTS | PPG | 
|---|---|---|---|---|---|
| Natalie Achonwa | IND | F | 18 | 140 | 7.77778 | 
| Jaylyn Agnew | ATL | F | 12 | 15 | 1.25 | 
| Bella Alarie | DAL | C-F | 22 | 59 | 2.68182 | 
| Kayla Alexander | MIN | C | 16 | 37 | 2.3125 | 
| Julie Allemand | IND | G | 22 | 188 | 8.54545 | 
| Lindsay Allen | LVA | G | 21 | 70 | 3.33333 | 
| Kristine Anigwe | LAS | F-C | 17 | 78 | 4.58824 | 
| Ariel Atkins | WAS | G | 22 | 326 | 14.8182 | 
| Seimone Augustus | LAS | G-F | 21 | 124 | 5.90476 | 
| Rachel Banham | MIN | G | 20 | 137 | 6.85 | 
... (152 rows omitted)