from datascience import *
import numpy as np
join
ing¶phones = Table().with_columns(
'Model', np.array(['iPhone 12', 'iPhone 12 Pro Max', 'Samsung Galaxy S21', 'OnePlus 8']),
'Price', np.array([799, 1099, 799, 699]),
'Screen Size', np.array([6.1, 6.7, 6.2, 6.6])
)
inventory = Table().with_columns(
'Handset', np.array(['Samsung Galaxy S21', 'iPhone 12', 'iPhone 12', 'OnePlus 8', 'Pixel 5']),
'Units', np.array([50, 40, 10, 100, 25]),
'Store', np.array(['Berkeley', 'Berkeley', 'San Francisco', 'Oakland', 'Oakland'])
)
phones
Model | Price | Screen Size |
---|---|---|
iPhone 12 | 799 | 6.1 |
iPhone 12 Pro Max | 1099 | 6.7 |
Samsung Galaxy S21 | 799 | 6.2 |
OnePlus 8 | 699 | 6.6 |
inventory
Handset | Units | Store |
---|---|---|
Samsung Galaxy S21 | 50 | Berkeley |
iPhone 12 | 40 | Berkeley |
iPhone 12 | 10 | San Francisco |
OnePlus 8 | 100 | Oakland |
Pixel 5 | 25 | Oakland |
phones.join('Model', inventory, 'Handset')
Model | Price | Screen Size | Units | Store |
---|---|---|---|---|
OnePlus 8 | 699 | 6.6 | 100 | Oakland |
Samsung Galaxy S21 | 799 | 6.2 | 50 | Berkeley |
iPhone 12 | 799 | 6.1 | 40 | Berkeley |
iPhone 12 | 799 | 6.1 | 10 | San Francisco |
inventory.join('Handset', phones, 'Model')
Handset | Units | Store | Price | Screen Size |
---|---|---|---|---|
OnePlus 8 | 100 | Oakland | 699 | 6.6 |
Samsung Galaxy S21 | 50 | Berkeley | 799 | 6.2 |
iPhone 12 | 40 | Berkeley | 799 | 6.1 |
iPhone 12 | 10 | San Francisco | 799 | 6.1 |
store = phones.join('Model', inventory, 'Handset')
store
Model | Price | Screen Size | Units | Store |
---|---|---|---|---|
OnePlus 8 | 699 | 6.6 | 100 | Oakland |
Samsung Galaxy S21 | 799 | 6.2 | 50 | Berkeley |
iPhone 12 | 799 | 6.1 | 40 | Berkeley |
iPhone 12 | 799 | 6.1 | 10 | San Francisco |
store.column('Price') * store.column('Units')
array([69900, 39950, 31960, 7990])
# Total value of all of the phones in my inventory (that I know the price of)
np.sum(store.column('Price') * store.column('Units'))
149800
# Equivalent to the above
np.dot(store.column('Price'), store.column('Units'))
149800
contacts = Table().with_columns(
'Name', np.array(['Roxanne', 'Sandy', 'Stan', 'Tomas', 'Wilma']),
'Email', np.array(['roxanne@berkeley.edu', 'sandy@nyu.edu', 'stan.vg@gmail.com', 'tomastrain@umich.edu', 'wilma@columbia.edu']),
'Area Code', np.array([510, 212, 734, 734, 212]),
)
codes = Table().with_columns(
'Code', np.array([212, 310, 519, 734]),
'Region', np.array(['New York City', 'Los Angeles', 'Ontario, Canada', 'Metro Detroit'])
)
contacts
Name | Area Code | |
---|---|---|
Roxanne | roxanne@berkeley.edu | 510 |
Sandy | sandy@nyu.edu | 212 |
Stan | stan.vg@gmail.com | 734 |
Tomas | tomastrain@umich.edu | 734 |
Wilma | wilma@columbia.edu | 212 |
codes
Code | Region |
---|---|
212 | New York City |
310 | Los Angeles |
519 | Ontario, Canada |
734 | Metro Detroit |
contacts.join("Area Code", codes, "Code")
Area Code | Name | Region | |
---|---|---|---|
212 | Sandy | sandy@nyu.edu | New York City |
212 | Wilma | wilma@columbia.edu | New York City |
734 | Stan | stan.vg@gmail.com | Metro Detroit |
734 | Tomas | tomastrain@umich.edu | Metro Detroit |
extra_codes = Table().with_columns(
'Code', np.array([212, 212, 519, 734]),
'Region', np.array(['New York City', 'Los Angeles', 'Ontario, Canada', 'Metro Detroit'])
)
contacts
Name | Area Code | |
---|---|---|
Roxanne | roxanne@berkeley.edu | 510 |
Sandy | sandy@nyu.edu | 212 |
Stan | stan.vg@gmail.com | 734 |
Tomas | tomastrain@umich.edu | 734 |
Wilma | wilma@columbia.edu | 212 |
extra_codes
Code | Region |
---|---|
212 | New York City |
212 | Los Angeles |
519 | Ontario, Canada |
734 | Metro Detroit |
contacts.join('Area Code', extra_codes, 'Code')
Area Code | Name | Region | |
---|---|---|---|
212 | Sandy | sandy@nyu.edu | New York City |
212 | Sandy | sandy@nyu.edu | Los Angeles |
212 | Wilma | wilma@columbia.edu | New York City |
212 | Wilma | wilma@columbia.edu | Los Angeles |
734 | Stan | stan.vg@gmail.com | Metro Detroit |
734 | Tomas | tomastrain@umich.edu | Metro Detroit |
# No output – because there are no matches between
# the 'Name' column in contacts and the 'Code' column in codes
contacts.join('Name', codes, 'Code')
roster = Table.read_table('data/roster.csv')
grades = Table.read_table('data/grades.csv')
roster.show()
Name | SID | Grading Option | |
---|---|---|---|
Montrezl Harrell | trez15@lakers.com | 11259 | PNP |
LeBron James | kingjames@berkeley.edu | 29314 | GRD |
Kyle Kuzma | kuz@utah.edu | 29319 | GRD |
Marc Gasol | gasol@spain.co.es | 34892 | GRD |
Dennis Schroder | dennis@okc.com | 40561 | PNP |
Kentavious Caldwell-Pope | kc3p@pistons.ca | 40915 | PNP |
Talen Horton-Tucker | tht@stanford.edu | 44003 | GRD |
Markieff Morris | kieff@clippers.co.uk | 77143 | GRD |
Wesley Matthews | wesmath@mavs.dallas.com | 81920 | GRD |
Alex Caruso | carugoat@gov.on.ca | 83212 | GRD |
Anthony Davis | AD_brow@pels.edu | 85002 | PNP |
Jared Dudley | jdudz3@berkeley.edu | 85923 | GRD |
grades.show()
SID | Homework 1 | Homework 2 | Quiz 1 | Quiz 2 | Final |
---|---|---|---|---|---|
29314 | 24 | 18 | 98 | 63 | 88 |
44003 | 23 | 17 | 87 | 67 | 82 |
83212 | 23 | 9 | 88 | 58 | 19 |
29319 | 23 | 10 | 65 | 62.5 | 45 |
55510 | 23 | 15 | 78 | 63.5 | 56 |
98102 | 23 | 16 | 82 | 55 | 90 |
34892 | 22 | 17 | 83 | 21 | 90 |
40561 | 24 | 17 | 89 | 70 | 82.5 |
85923 | 21 | 18 | 92 | 70 | 78.25 |
85002 | 20 | 18 | 89.5 | 53 | 80 |
11259 | 18 | 17.5 | 88 | 64 | 78 |
81920 | 23 | 16 | 42 | 65 | 79 |
77143 | 24 | 15 | 90 | 61 | 90 |
roster.num_rows
12
grades.num_rows
13
grades_merged = roster.join('SID', grades)
grades_merged
SID | Name | Grading Option | Homework 1 | Homework 2 | Quiz 1 | Quiz 2 | Final | |
---|---|---|---|---|---|---|---|---|
11259 | Montrezl Harrell | trez15@lakers.com | PNP | 18 | 17.5 | 88 | 64 | 78 |
29314 | LeBron James | kingjames@berkeley.edu | GRD | 24 | 18 | 98 | 63 | 88 |
29319 | Kyle Kuzma | kuz@utah.edu | GRD | 23 | 10 | 65 | 62.5 | 45 |
34892 | Marc Gasol | gasol@spain.co.es | GRD | 22 | 17 | 83 | 21 | 90 |
40561 | Dennis Schroder | dennis@okc.com | PNP | 24 | 17 | 89 | 70 | 82.5 |
44003 | Talen Horton-Tucker | tht@stanford.edu | GRD | 23 | 17 | 87 | 67 | 82 |
77143 | Markieff Morris | kieff@clippers.co.uk | GRD | 24 | 15 | 90 | 61 | 90 |
81920 | Wesley Matthews | wesmath@mavs.dallas.com | GRD | 23 | 16 | 42 | 65 | 79 |
83212 | Alex Caruso | carugoat@gov.on.ca | GRD | 23 | 9 | 88 | 58 | 19 |
85002 | Anthony Davis | AD_brow@pels.edu | PNP | 20 | 18 | 89.5 | 53 | 80 |
... (1 rows omitted)
grades_merged.num_rows
11
Let's see if there are any students in the roster who we don't have grades for:
for sid in roster.column('SID'):
if sid not in grades.column('SID'):
display(roster.where('SID', sid))
Name | SID | Grading Option | |
---|---|---|---|
Kentavious Caldwell-Pope | kc3p@pistons.ca | 40915 | PNP |
And vice versa:
for sid in grades.column('SID'):
if sid not in roster.column('SID'):
display(grades.where('SID', sid))
SID | Homework 1 | Homework 2 | Quiz 1 | Quiz 2 | Final |
---|---|---|---|---|---|
55510 | 23 | 15 | 78 | 63.5 | 56 |
SID | Homework 1 | Homework 2 | Quiz 1 | Quiz 2 | Final |
---|---|---|---|---|---|
98102 | 23 | 16 | 82 | 55 | 90 |
Whoever the above students are, they're not in our roster. (This happens often when students drop a class.)
Now we can do some grade calculations:
grades_merged
SID | Name | Grading Option | Homework 1 | Homework 2 | Quiz 1 | Quiz 2 | Final | |
---|---|---|---|---|---|---|---|---|
11259 | Montrezl Harrell | trez15@lakers.com | PNP | 18 | 17.5 | 88 | 64 | 78 |
29314 | LeBron James | kingjames@berkeley.edu | GRD | 24 | 18 | 98 | 63 | 88 |
29319 | Kyle Kuzma | kuz@utah.edu | GRD | 23 | 10 | 65 | 62.5 | 45 |
34892 | Marc Gasol | gasol@spain.co.es | GRD | 22 | 17 | 83 | 21 | 90 |
40561 | Dennis Schroder | dennis@okc.com | PNP | 24 | 17 | 89 | 70 | 82.5 |
44003 | Talen Horton-Tucker | tht@stanford.edu | GRD | 23 | 17 | 87 | 67 | 82 |
77143 | Markieff Morris | kieff@clippers.co.uk | GRD | 24 | 15 | 90 | 61 | 90 |
81920 | Wesley Matthews | wesmath@mavs.dallas.com | GRD | 23 | 16 | 42 | 65 | 79 |
83212 | Alex Caruso | carugoat@gov.on.ca | GRD | 23 | 9 | 88 | 58 | 19 |
85002 | Anthony Davis | AD_brow@pels.edu | PNP | 20 | 18 | 89.5 | 53 | 80 |
... (1 rows omitted)
assignment_totals = {
'Homework 1': 24,
'Homework 2': 18,
'Quiz 1': 100,
'Quiz 2': 70,
'Final': 90
}
# Divides each assignment column by its denominator in assignment_totals
for assignment in assignment_totals.keys():
grades_merged = grades_merged.with_columns(
assignment + ' Percentage', grades_merged.column(assignment) / assignment_totals[assignment]
)
grades_merged
SID | Name | Grading Option | Homework 1 | Homework 2 | Quiz 1 | Quiz 2 | Final | Homework 1 Percentage | Homework 2 Percentage | Quiz 1 Percentage | Quiz 2 Percentage | Final Percentage | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
11259 | Montrezl Harrell | trez15@lakers.com | PNP | 18 | 17.5 | 88 | 64 | 78 | 0.75 | 0.972222 | 0.88 | 0.914286 | 0.866667 |
29314 | LeBron James | kingjames@berkeley.edu | GRD | 24 | 18 | 98 | 63 | 88 | 1 | 1 | 0.98 | 0.9 | 0.977778 |
29319 | Kyle Kuzma | kuz@utah.edu | GRD | 23 | 10 | 65 | 62.5 | 45 | 0.958333 | 0.555556 | 0.65 | 0.892857 | 0.5 |
34892 | Marc Gasol | gasol@spain.co.es | GRD | 22 | 17 | 83 | 21 | 90 | 0.916667 | 0.944444 | 0.83 | 0.3 | 1 |
40561 | Dennis Schroder | dennis@okc.com | PNP | 24 | 17 | 89 | 70 | 82.5 | 1 | 0.944444 | 0.89 | 1 | 0.916667 |
44003 | Talen Horton-Tucker | tht@stanford.edu | GRD | 23 | 17 | 87 | 67 | 82 | 0.958333 | 0.944444 | 0.87 | 0.957143 | 0.911111 |
77143 | Markieff Morris | kieff@clippers.co.uk | GRD | 24 | 15 | 90 | 61 | 90 | 1 | 0.833333 | 0.9 | 0.871429 | 1 |
81920 | Wesley Matthews | wesmath@mavs.dallas.com | GRD | 23 | 16 | 42 | 65 | 79 | 0.958333 | 0.888889 | 0.42 | 0.928571 | 0.877778 |
83212 | Alex Caruso | carugoat@gov.on.ca | GRD | 23 | 9 | 88 | 58 | 19 | 0.958333 | 0.5 | 0.88 | 0.828571 | 0.211111 |
85002 | Anthony Davis | AD_brow@pels.edu | PNP | 20 | 18 | 89.5 | 53 | 80 | 0.833333 | 1 | 0.895 | 0.757143 | 0.888889 |
... (1 rows omitted)
.row
¶phones
Model | Price | Screen Size |
---|---|---|
iPhone 12 | 799 | 6.1 |
iPhone 12 Pro Max | 1099 | 6.7 |
Samsung Galaxy S21 | 799 | 6.2 |
OnePlus 8 | 699 | 6.6 |
phones.row(1)
Row(Model='iPhone 12 Pro Max', Price=1099, Screen Size=6.7)
type(phones.row(1))
datascience.tables.Row
phones.row(1).item(1)
1099
list(phones.row(-1))
['OnePlus 8', 699, 6.6]
.with_rows
¶phones
Model | Price | Screen Size |
---|---|---|
iPhone 12 | 799 | 6.1 |
iPhone 12 Pro Max | 1099 | 6.7 |
Samsung Galaxy S21 | 799 | 6.2 |
OnePlus 8 | 699 | 6.6 |
phones.with_row(['iPhone 12 Mini', 699, 5.8])
Model | Price | Screen Size |
---|---|---|
iPhone 12 | 799 | 6.1 |
iPhone 12 Pro Max | 1099 | 6.7 |
Samsung Galaxy S21 | 799 | 6.2 |
OnePlus 8 | 699 | 6.6 |
iPhone 12 Mini | 699 | 5.8 |
phones.with_rows([['iPhone 12 Mini', 699, 5.8],
['Moto RAZR', 459, 3.5]])
Model | Price | Screen Size |
---|---|---|
iPhone 12 | 799 | 6.1 |
iPhone 12 Pro Max | 1099 | 6.7 |
Samsung Galaxy S21 | 799 | 6.2 |
OnePlus 8 | 699 | 6.6 |
iPhone 12 Mini | 699 | 5.8 |
Moto RAZR | 459 | 3.5 |
codes.with_row([907, 'Alaska']).with_columns('Primary Network', np.array(['AT&T', 'T-Mobile', 'Bell', 'Verizon', 'T-Mobile']))
Code | Region | Primary Network |
---|---|---|
212 | New York City | AT&T |
310 | Los Angeles | T-Mobile |
519 | Ontario, Canada | Bell |
734 | Metro Detroit | Verizon |
907 | Alaska | T-Mobile |