from datascience import *
import numpy as np
import plotly.express as px
sat = Table.read_table('data/sat2014-lecture.csv')
sat
State | Participation Rate | Critical Reading | Math | Writing |
---|---|---|---|---|
Alabama | 6.7 | 547 | 538 | 532 |
Alaska | 54.2 | 507 | 503 | 475 |
Arizona | 36.4 | 522 | 525 | 500 |
Arkansas | 4.2 | 573 | 571 | 554 |
California | 60.3 | 498 | 510 | 496 |
Colorado | 14.3 | 582 | 586 | 567 |
Connecticut | 88.4 | 507 | 510 | 508 |
Delaware | 100 | 456 | 459 | 444 |
District of Columbia | 100 | 440 | 438 | 431 |
Florida | 72.2 | 491 | 485 | 472 |
... (41 rows omitted)
sat.num_rows, sat.num_columns
(51, 5)
It would be nice to have a combined score too.
sat.column('Critical Reading') + sat.column('Math') + sat.column('Writing')
array([1617, 1485, 1547, 1698, 1504, 1735, 1525, 1359, 1309, 1448, 1445, 1460, 1364, 1802, 1474, 1794, 1753, 1746, 1667, 1387, 1468, 1556, 1784, 1786, 1714, 1771, 1637, 1745, 1458, 1566, 1526, 1617, 1468, 1483, 1816, 1652, 1697, 1544, 1481, 1480, 1443, 1792, 1714, 1432, 1690, 1554, 1530, 1519, 1522, 1782, 1762])
sat = sat.with_columns(
'Combined', sat.column('Critical Reading') + sat.column('Math') + sat.column('Writing')
)
sat
State | Participation Rate | Critical Reading | Math | Writing | Combined |
---|---|---|---|---|---|
Alabama | 6.7 | 547 | 538 | 532 | 1617 |
Alaska | 54.2 | 507 | 503 | 475 | 1485 |
Arizona | 36.4 | 522 | 525 | 500 | 1547 |
Arkansas | 4.2 | 573 | 571 | 554 | 1698 |
California | 60.3 | 498 | 510 | 496 | 1504 |
Colorado | 14.3 | 582 | 586 | 567 | 1735 |
Connecticut | 88.4 | 507 | 510 | 508 | 1525 |
Delaware | 100 | 456 | 459 | 444 | 1359 |
District of Columbia | 100 | 440 | 438 | 431 | 1309 |
Florida | 72.2 | 491 | 485 | 472 | 1448 |
... (41 rows omitted)
sort
¶Which states had the highest combined scores? The lowest?
sat.sort('Combined')
State | Participation Rate | Critical Reading | Math | Writing | Combined |
---|---|---|---|---|---|
District of Columbia | 100 | 440 | 438 | 431 | 1309 |
Delaware | 100 | 456 | 459 | 444 | 1359 |
Idaho | 100 | 458 | 456 | 450 | 1364 |
Maine | 95.6 | 467 | 471 | 449 | 1387 |
Texas | 62 | 476 | 495 | 461 | 1432 |
South Carolina | 64.9 | 488 | 490 | 465 | 1443 |
Georgia | 77.2 | 488 | 485 | 472 | 1445 |
Florida | 72.2 | 491 | 485 | 472 | 1448 |
Nevada | 54.2 | 495 | 494 | 469 | 1458 |
Hawaii | 62.6 | 484 | 504 | 472 | 1460 |
... (41 rows omitted)
# By default, descending is False
sat.sort('Combined', descending = True)
State | Participation Rate | Critical Reading | Math | Writing | Combined |
---|---|---|---|---|---|
North Dakota | 2.3 | 612 | 620 | 584 | 1816 |
Illinois | 4.6 | 599 | 616 | 587 | 1802 |
Iowa | 3.1 | 605 | 611 | 578 | 1794 |
South Dakota | 2.9 | 604 | 609 | 579 | 1792 |
Minnesota | 5.9 | 598 | 610 | 578 | 1786 |
Michigan | 3.8 | 593 | 610 | 581 | 1784 |
Wisconsin | 3.9 | 596 | 608 | 578 | 1782 |
Missouri | 4.2 | 595 | 597 | 579 | 1771 |
Wyoming | 3.3 | 590 | 599 | 573 | 1762 |
Kansas | 5.3 | 591 | 596 | 566 | 1753 |
... (41 rows omitted)
We can sort by any column:
sat.sort('Participation Rate')
State | Participation Rate | Critical Reading | Math | Writing | Combined |
---|---|---|---|---|---|
North Dakota | 2.3 | 612 | 620 | 584 | 1816 |
South Dakota | 2.9 | 604 | 609 | 579 | 1792 |
Iowa | 3.1 | 605 | 611 | 578 | 1794 |
Mississippi | 3.2 | 583 | 566 | 565 | 1714 |
Wyoming | 3.3 | 590 | 599 | 573 | 1762 |
Nebraska | 3.7 | 589 | 587 | 569 | 1745 |
Michigan | 3.8 | 593 | 610 | 581 | 1784 |
Wisconsin | 3.9 | 596 | 608 | 578 | 1782 |
Arkansas | 4.2 | 573 | 571 | 554 | 1698 |
Missouri | 4.2 | 595 | 597 | 579 | 1771 |
... (41 rows omitted)
sat.sort('Math', descending = True)
State | Participation Rate | Critical Reading | Math | Writing | Combined |
---|---|---|---|---|---|
North Dakota | 2.3 | 612 | 620 | 584 | 1816 |
Illinois | 4.6 | 599 | 616 | 587 | 1802 |
Iowa | 3.1 | 605 | 611 | 578 | 1794 |
Michigan | 3.8 | 593 | 610 | 581 | 1784 |
Minnesota | 5.9 | 598 | 610 | 578 | 1786 |
South Dakota | 2.9 | 604 | 609 | 579 | 1792 |
Wisconsin | 3.9 | 596 | 608 | 578 | 1782 |
Wyoming | 3.3 | 590 | 599 | 573 | 1762 |
Missouri | 4.2 | 595 | 597 | 579 | 1771 |
Kansas | 5.3 | 591 | 596 | 566 | 1753 |
... (41 rows omitted)
# Can also sort alphabetical columns; in this case the table was sorted by state name by default
sat.sort('State')
State | Participation Rate | Critical Reading | Math | Writing | Combined |
---|---|---|---|---|---|
Alabama | 6.7 | 547 | 538 | 532 | 1617 |
Alaska | 54.2 | 507 | 503 | 475 | 1485 |
Arizona | 36.4 | 522 | 525 | 500 | 1547 |
Arkansas | 4.2 | 573 | 571 | 554 | 1698 |
California | 60.3 | 498 | 510 | 496 | 1504 |
Colorado | 14.3 | 582 | 586 | 567 | 1735 |
Connecticut | 88.4 | 507 | 510 | 508 | 1525 |
Delaware | 100 | 456 | 459 | 444 | 1359 |
District of Columbia | 100 | 440 | 438 | 431 | 1309 |
Florida | 72.2 | 491 | 485 | 472 | 1448 |
... (41 rows omitted)
take
¶sat
State | Participation Rate | Critical Reading | Math | Writing | Combined |
---|---|---|---|---|---|
Alabama | 6.7 | 547 | 538 | 532 | 1617 |
Alaska | 54.2 | 507 | 503 | 475 | 1485 |
Arizona | 36.4 | 522 | 525 | 500 | 1547 |
Arkansas | 4.2 | 573 | 571 | 554 | 1698 |
California | 60.3 | 498 | 510 | 496 | 1504 |
Colorado | 14.3 | 582 | 586 | 567 | 1735 |
Connecticut | 88.4 | 507 | 510 | 508 | 1525 |
Delaware | 100 | 456 | 459 | 444 | 1359 |
District of Columbia | 100 | 440 | 438 | 431 | 1309 |
Florida | 72.2 | 491 | 485 | 472 | 1448 |
... (41 rows omitted)
sat.take(2)
State | Participation Rate | Critical Reading | Math | Writing | Combined |
---|---|---|---|---|---|
Arizona | 36.4 | 522 | 525 | 500 | 1547 |
sat.take(np.array([1, 4, 3]))
State | Participation Rate | Critical Reading | Math | Writing | Combined |
---|---|---|---|---|---|
Alaska | 54.2 | 507 | 503 | 475 | 1485 |
California | 60.3 | 498 | 510 | 496 | 1504 |
Arkansas | 4.2 | 573 | 571 | 554 | 1698 |
np.arange(5)
array([0, 1, 2, 3, 4])
sat.take(np.arange(7))
State | Participation Rate | Critical Reading | Math | Writing | Combined |
---|---|---|---|---|---|
Alabama | 6.7 | 547 | 538 | 532 | 1617 |
Alaska | 54.2 | 507 | 503 | 475 | 1485 |
Arizona | 36.4 | 522 | 525 | 500 | 1547 |
Arkansas | 4.2 | 573 | 571 | 554 | 1698 |
California | 60.3 | 498 | 510 | 496 | 1504 |
Colorado | 14.3 | 582 | 586 | 567 | 1735 |
Connecticut | 88.4 | 507 | 510 | 508 | 1525 |
When we combine sort
and take
, we can get some pretty powerful answers.
What are the top 5 states according to math scores?
sat.sort('Combined', descending = True).take(np.arange(5))
State | Participation Rate | Critical Reading | Math | Writing | Combined |
---|---|---|---|---|---|
North Dakota | 2.3 | 612 | 620 | 584 | 1816 |
Illinois | 4.6 | 599 | 616 | 587 | 1802 |
Iowa | 3.1 | 605 | 611 | 578 | 1794 |
South Dakota | 2.9 | 604 | 609 | 579 | 1792 |
Minnesota | 5.9 | 598 | 610 | 578 | 1786 |
What are the top 8 states in terms of participation?
sat.sort('Participation Rate', descending = True).take(np.arange(8))
State | Participation Rate | Critical Reading | Math | Writing | Combined |
---|---|---|---|---|---|
Delaware | 100 | 456 | 459 | 444 | 1359 |
District of Columbia | 100 | 440 | 438 | 431 | 1309 |
Idaho | 100 | 458 | 456 | 450 | 1364 |
Maine | 95.6 | 467 | 471 | 449 | 1387 |
Connecticut | 88.4 | 507 | 510 | 508 | 1525 |
Massachusetts | 84.1 | 516 | 531 | 509 | 1556 |
New Jersey | 79.3 | 501 | 523 | 502 | 1526 |
Maryland | 78.5 | 492 | 495 | 481 | 1468 |
Note: .take
works on arrays too, not just tables!
sat.column('State').take(np.arange(5))
array(['Alabama', 'Alaska', 'Arizona', 'Arkansas', 'California'], dtype='<U20')
sat.take(np.arange(5)).column('State')
array(['Alabama', 'Alaska', 'Arizona', 'Arkansas', 'California'], dtype='<U20')
animals = Table.read_table('data/animals.csv')
animals
brainwt | bodywt | animal |
---|---|---|
3.385 | 44.5 | Arctic_fox |
0.48 | 15.499 | Owl_monkey |
1.35 | 8.1 | Beaver |
464.983 | 423.012 | Cow |
36.328 | 119.498 | Gray_wolf |
27.66 | 114.996 | Goat |
14.831 | 98.199 | Roe_deer |
1.04 | 5.5 | Guinea_pig |
4.19 | 57.998 | Vervet |
0.425 | 6.4 | Chinchilla |
... (52 rows omitted)
animals.sort('bodywt').column('animal').take(np.arange(5))
array(['Lesser_short-tailed-shrew', 'Little_brown_bat', 'Big_brown_bat', 'Musk_shrew', 'Mouse'], dtype='<U25')
where
¶sat
State | Participation Rate | Critical Reading | Math | Writing | Combined |
---|---|---|---|---|---|
Alabama | 6.7 | 547 | 538 | 532 | 1617 |
Alaska | 54.2 | 507 | 503 | 475 | 1485 |
Arizona | 36.4 | 522 | 525 | 500 | 1547 |
Arkansas | 4.2 | 573 | 571 | 554 | 1698 |
California | 60.3 | 498 | 510 | 496 | 1504 |
Colorado | 14.3 | 582 | 586 | 567 | 1735 |
Connecticut | 88.4 | 507 | 510 | 508 | 1525 |
Delaware | 100 | 456 | 459 | 444 | 1359 |
District of Columbia | 100 | 440 | 438 | 431 | 1309 |
Florida | 72.2 | 491 | 485 | 472 | 1448 |
... (41 rows omitted)
sat.where('Combined', are.above(1800))
State | Participation Rate | Critical Reading | Math | Writing | Combined |
---|---|---|---|---|---|
Illinois | 4.6 | 599 | 616 | 587 | 1802 |
North Dakota | 2.3 | 612 | 620 | 584 | 1816 |
sat.where('State', are.equal_to('California'))
State | Participation Rate | Critical Reading | Math | Writing | Combined |
---|---|---|---|---|---|
California | 60.3 | 498 | 510 | 496 | 1504 |
sat.where('State', are.containing('Dakota'))
State | Participation Rate | Critical Reading | Math | Writing | Combined |
---|---|---|---|---|---|
North Dakota | 2.3 | 612 | 620 | 584 | 1816 |
South Dakota | 2.9 | 604 | 609 | 579 | 1792 |
sat.where('Math', are.between(580, 600))
State | Participation Rate | Critical Reading | Math | Writing | Combined |
---|---|---|---|---|---|
Colorado | 14.3 | 582 | 586 | 567 | 1735 |
Kansas | 5.3 | 591 | 596 | 566 | 1753 |
Kentucky | 4.6 | 589 | 585 | 572 | 1746 |
Missouri | 4.2 | 595 | 597 | 579 | 1771 |
Nebraska | 3.7 | 589 | 587 | 569 | 1745 |
Wyoming | 3.3 | 590 | 599 | 573 | 1762 |
are.equal_to
¶sat.where('State', are.equal_to('Pennsylvania'))
State | Participation Rate | Critical Reading | Math | Writing | Combined |
---|---|---|---|---|---|
Pennsylvania | 71.4 | 497 | 504 | 480 | 1481 |
sat.where('State', 'New York')
State | Participation Rate | Critical Reading | Math | Writing | Combined |
---|---|---|---|---|---|
New York | 76.3 | 488 | 502 | 478 | 1468 |
sat
State | Participation Rate | Critical Reading | Math | Writing | Combined |
---|---|---|---|---|---|
Alabama | 6.7 | 547 | 538 | 532 | 1617 |
Alaska | 54.2 | 507 | 503 | 475 | 1485 |
Arizona | 36.4 | 522 | 525 | 500 | 1547 |
Arkansas | 4.2 | 573 | 571 | 554 | 1698 |
California | 60.3 | 498 | 510 | 496 | 1504 |
Colorado | 14.3 | 582 | 586 | 567 | 1735 |
Connecticut | 88.4 | 507 | 510 | 508 | 1525 |
Delaware | 100 | 456 | 459 | 444 | 1359 |
District of Columbia | 100 | 440 | 438 | 431 | 1309 |
Florida | 72.2 | 491 | 485 | 472 | 1448 |
... (41 rows omitted)
sat.where('Participation Rate', are.above(20)).where('Combined', are.above(1500))
State | Participation Rate | Critical Reading | Math | Writing | Combined |
---|---|---|---|---|---|
Arizona | 36.4 | 522 | 525 | 500 | 1547 |
California | 60.3 | 498 | 510 | 496 | 1504 |
Connecticut | 88.4 | 507 | 510 | 508 | 1525 |
Massachusetts | 84.1 | 516 | 531 | 509 | 1556 |
New Hampshire | 70.3 | 524 | 530 | 512 | 1566 |
New Jersey | 79.3 | 501 | 523 | 502 | 1526 |
Oregon | 47.9 | 523 | 522 | 499 | 1544 |
Vermont | 63.1 | 522 | 525 | 507 | 1554 |
Virginia | 73.1 | 518 | 515 | 497 | 1530 |
Washington | 63.1 | 510 | 518 | 491 | 1519 |
sat.where('Participation Rate', are.below(10)).where('Combined', are.above(1600))
State | Participation Rate | Critical Reading | Math | Writing | Combined |
---|---|---|---|---|---|
Alabama | 6.7 | 547 | 538 | 532 | 1617 |
Arkansas | 4.2 | 573 | 571 | 554 | 1698 |
Illinois | 4.6 | 599 | 616 | 587 | 1802 |
Iowa | 3.1 | 605 | 611 | 578 | 1794 |
Kansas | 5.3 | 591 | 596 | 566 | 1753 |
Kentucky | 4.6 | 589 | 585 | 572 | 1746 |
Louisiana | 4.6 | 561 | 556 | 550 | 1667 |
Michigan | 3.8 | 593 | 610 | 581 | 1784 |
Minnesota | 5.9 | 598 | 610 | 578 | 1786 |
Mississippi | 3.2 | 583 | 566 | 565 | 1714 |
... (9 rows omitted)
deep_south = np.array(['Alabama', 'Georgia', 'Louisiana', 'Mississippi', 'South Carolina'])
sat.where('State', are.contained_in(deep_south))
State | Participation Rate | Critical Reading | Math | Writing | Combined |
---|---|---|---|---|---|
Alabama | 6.7 | 547 | 538 | 532 | 1617 |
Georgia | 77.2 | 488 | 485 | 472 | 1445 |
Louisiana | 4.6 | 561 | 556 | 550 | 1667 |
Mississippi | 3.2 | 583 | 566 | 565 | 1714 |
South Carolina | 64.9 | 488 | 490 | 465 | 1443 |
sat.where('State', are.contained_in(deep_south)) \
.where('Participation Rate', are.below(10)) \
.where('Combined', are.above(1600))
State | Participation Rate | Critical Reading | Math | Writing | Combined |
---|---|---|---|---|---|
Alabama | 6.7 | 547 | 538 | 532 | 1617 |
Louisiana | 4.6 | 561 | 556 | 550 | 1667 |
Mississippi | 3.2 | 583 | 566 | 565 | 1714 |
px.scatter(data_frame = sat.to_df(),
x = 'Combined',
y = 'Participation Rate',
hover_data = {'State': True},
title = 'Participation Rate vs. Combined SAT Score for States in 2014')
wnba = Table.read_table('data/wnba-2020.csv').select('Player', 'Tm', 'Pos', 'G', 'PTS')
wnba
Player | Tm | Pos | G | PTS |
---|---|---|---|---|
Natalie Achonwa | IND | F | 18 | 140 |
Jaylyn Agnew | ATL | F | 12 | 15 |
Bella Alarie | DAL | C-F | 22 | 59 |
Kayla Alexander | MIN | C | 16 | 37 |
Julie Allemand | IND | G | 22 | 188 |
Lindsay Allen | LVA | G | 21 | 70 |
Kristine Anigwe | LAS | F-C | 17 | 78 |
Ariel Atkins | WAS | G | 22 | 326 |
Seimone Augustus | LAS | G-F | 21 | 124 |
Rachel Banham | MIN | G | 20 | 137 |
... (152 rows omitted)
wnba.where('Pos', 'F') \
.where('G', are.above_or_equal_to(15)) \
.column('PTS').mean()
182.44444444444446