from datascience import * # datascience has plotting features built in
import numpy as np
Table.interactive_plots()
schools = Table.read_table('data/r1_with_students.csv')
schools
University | Number_students | Score_Result | Control | City | State |
---|---|---|---|---|---|
Auburn University | 26641 | 33.4 | Public | Auburn | AL |
Boston College | 12904 | 45.9 | Private (non-profit) | Chestnut Hill | MA |
Boston University | 25662 | 68.4 | Private (non-profit) | Boston | MA |
Brandeis University | 5375 | 50.3 | Private (non-profit) | Waltham | MA |
Brown University | 9391 | 70 | Private (non-profit) | Providence | RI |
California Institute of Technology | 2240 | 94.5 | Private (non-profit) | Pasadena | CA |
Carnegie Mellon University | 13430 | 81.3 | Private (non-profit) | Pittsburgh | PA |
Case Western Reserve University | 10654 | 60 | Private (non-profit) | Cleveland | OH |
Clemson University | 21436 | 30.7 | Public | Clemson | SC |
Columbia University | 26586 | 87 | Private (non-profit) | New York | NY |
... (86 rows omitted)
schools.group('Control')
Control | count |
---|---|
Private (non-profit) | 36 |
Public | 60 |
schools.group('Control').barh('Control')
streams = Table.read_table('data/regional-global-daily-latest.csv', header = 1)
top_10 = streams.select('Track Name', 'Streams').take(np.arange(10))
top_10
Track Name | Streams |
---|---|
Peaches (feat. Daniel Caesar & Giveon) | 7167120 |
Astronaut In The Ocean | 4525731 |
drivers license | 4156025 |
telepatía | 4105827 |
Save Your Tears | 3963053 |
Hold On | 3636991 |
Leave The Door Open | 3562490 |
Heartbreak Anniversary | 3253686 |
Blinding Lights | 3246141 |
DÁKITI | 3141723 |
top_10.barh('Track Name')
streams
Position | Track Name | Artist | Streams | URL |
---|---|---|---|---|
1 | Peaches (feat. Daniel Caesar & Giveon) | Justin Bieber | 7167120 | https://open.spotify.com/track/4iJyoBOLtHqaGxP12qzhQI |
2 | Astronaut In The Ocean | Masked Wolf | 4525731 | https://open.spotify.com/track/3VT8hOC5vuDXBsHrR53WFh |
3 | drivers license | Olivia Rodrigo | 4156025 | https://open.spotify.com/track/7lPN2DXiMsVn7XUKtOW1CS |
4 | telepatía | Kali Uchis | 4105827 | https://open.spotify.com/track/6tDDoYIxWvMLTdKpjFkc1B |
5 | Save Your Tears | The Weeknd | 3963053 | https://open.spotify.com/track/5QO79kh1waicV47BqGRL3g |
6 | Hold On | Justin Bieber | 3636991 | https://open.spotify.com/track/1nahzW3kfMuwReTka28tH5 |
7 | Leave The Door Open | Bruno Mars | 3562490 | https://open.spotify.com/track/7MAibcTli4IisCtbHKrGMh |
8 | Heartbreak Anniversary | Giveon | 3253686 | https://open.spotify.com/track/3FAJ6O0NOHQV8Mc5Ri6ENp |
9 | Blinding Lights | The Weeknd | 3246141 | https://open.spotify.com/track/0VjIjW4GlUZAMYd2vXMi3b |
10 | DÁKITI | Bad Bunny | 3141723 | https://open.spotify.com/track/4MzXwWMhyBbmu6hOcLVD49 |
... (190 rows omitted)
streams.group('Artist') \
.sort('count', descending = True) \
.where('count', are.above(2))
Artist | count |
---|---|
Justin Bieber | 16 |
Juice WRLD | 5 |
The Weeknd | 5 |
Billie Eilish | 4 |
Drake | 4 |
Dua Lipa | 4 |
Harry Styles | 4 |
KAROL G | 4 |
Pop Smoke | 4 |
Travis Scott | 4 |
... (5 rows omitted)
streams.group('Artist') \
.sort('count', descending = True) \
.where('count', are.above(2)) \
.barh('Artist')
schools
University | Number_students | Score_Result | Control | City | State |
---|---|---|---|---|---|
Auburn University | 26641 | 33.4 | Public | Auburn | AL |
Boston College | 12904 | 45.9 | Private (non-profit) | Chestnut Hill | MA |
Boston University | 25662 | 68.4 | Private (non-profit) | Boston | MA |
Brandeis University | 5375 | 50.3 | Private (non-profit) | Waltham | MA |
Brown University | 9391 | 70 | Private (non-profit) | Providence | RI |
California Institute of Technology | 2240 | 94.5 | Private (non-profit) | Pasadena | CA |
Carnegie Mellon University | 13430 | 81.3 | Private (non-profit) | Pittsburgh | PA |
Case Western Reserve University | 10654 | 60 | Private (non-profit) | Cleveland | OH |
Clemson University | 21436 | 30.7 | Public | Clemson | SC |
Columbia University | 26586 | 87 | Private (non-profit) | New York | NY |
... (86 rows omitted)
schools.select('University', 'Number_students') \
.sort('Number_students', descending = True) \
.take(np.arange(15))
University | Number_students |
---|---|
University of Minnesota | 61120 |
Texas A&M University | 60818 |
University of Central Florida | 56228 |
University of Texas at Austin | 49165 |
University of Florida | 46642 |
University of Washington | 45692 |
Michigan State University | 44789 |
New York University | 44466 |
University of Houston | 44128 |
University of California, Berkeley | 41081 |
... (5 rows omitted)
schools.select('University', 'Number_students') \
.sort('Number_students', descending = True) \
.take(np.arange(15)) \
.barh('University')
schools
University | Number_students | Score_Result | Control | City | State |
---|---|---|---|---|---|
Auburn University | 26641 | 33.4 | Public | Auburn | AL |
Boston College | 12904 | 45.9 | Private (non-profit) | Chestnut Hill | MA |
Boston University | 25662 | 68.4 | Private (non-profit) | Boston | MA |
Brandeis University | 5375 | 50.3 | Private (non-profit) | Waltham | MA |
Brown University | 9391 | 70 | Private (non-profit) | Providence | RI |
California Institute of Technology | 2240 | 94.5 | Private (non-profit) | Pasadena | CA |
Carnegie Mellon University | 13430 | 81.3 | Private (non-profit) | Pittsburgh | PA |
Case Western Reserve University | 10654 | 60 | Private (non-profit) | Cleveland | OH |
Clemson University | 21436 | 30.7 | Public | Clemson | SC |
Columbia University | 26586 | 87 | Private (non-profit) | New York | NY |
... (86 rows omitted)
schools.where('State', are.contained_in(['CA', 'TX', 'FL', 'NY', 'PA'])) \
.group('State', np.mean) \
.select('State', 'Number_students mean') \
.barh('State')
schools.take(np.arange(5)).barh('Control')
# Run this cell.
def remove_comma(s):
return int(s.replace(',', ''))
nominal = Table.read_table('data/gdp-nominal.csv')
ppp = Table.read_table('data/gdp-ppp.csv').drop(3)
gdp = nominal.join('Country/Territory', ppp) \
.drop(1, 3) \
.relabeled(['GDP(US$million)', 'GDP(millions of current Int$)'], ['GDP Nominal', 'GDP PPP'])
gdp = gdp.with_columns(
'GDP Nominal', gdp.apply(remove_comma, 'GDP Nominal'),
'GDP PPP', gdp.apply(remove_comma, 'GDP PPP')
)
gdp = gdp.sort('GDP Nominal', descending = True)
gdp
Country/Territory | GDP Nominal | GDP PPP |
---|---|---|
United States | 20807269 | 20807269 |
China | 14860775 | 24162435 |
Japan | 4910580 | 5236138 |
Germany | 3780553 | 4454498 |
United Kingdom | 2638296 | 2978564 |
India | 2592583 | 8681303 |
France | 2551451 | 2954196 |
Italy | 1848222 | 2415410 |
Canada | 1600264 | 1808995 |
South Korea | 1586786 | 2293475 |
... (171 rows omitted)
gdp.select('Country/Territory', 'GDP Nominal') \
.take(np.arange(15)) \
.barh('Country/Territory')
gdp.select('Country/Territory', 'GDP PPP') \
.take(np.arange(15)) \
.barh('Country/Territory')
gdp
Country/Territory | GDP Nominal | GDP PPP |
---|---|---|
United States | 20807269 | 20807269 |
China | 14860775 | 24162435 |
Japan | 4910580 | 5236138 |
Germany | 3780553 | 4454498 |
United Kingdom | 2638296 | 2978564 |
India | 2592583 | 8681303 |
France | 2551451 | 2954196 |
Italy | 1848222 | 2415410 |
Canada | 1600264 | 1808995 |
South Korea | 1586786 | 2293475 |
... (171 rows omitted)
gdp.take(np.arange(15)).barh('Country/Territory')
We can sort by GDP PPP, too:
gdp.sort('GDP PPP', descending = True).take(np.arange(15)).barh('Country/Territory')
Another example:
schools
University | Number_students | Score_Result | Control | City | State |
---|---|---|---|---|---|
Auburn University | 26641 | 33.4 | Public | Auburn | AL |
Boston College | 12904 | 45.9 | Private (non-profit) | Chestnut Hill | MA |
Boston University | 25662 | 68.4 | Private (non-profit) | Boston | MA |
Brandeis University | 5375 | 50.3 | Private (non-profit) | Waltham | MA |
Brown University | 9391 | 70 | Private (non-profit) | Providence | RI |
California Institute of Technology | 2240 | 94.5 | Private (non-profit) | Pasadena | CA |
Carnegie Mellon University | 13430 | 81.3 | Private (non-profit) | Pittsburgh | PA |
Case Western Reserve University | 10654 | 60 | Private (non-profit) | Cleveland | OH |
Clemson University | 21436 | 30.7 | Public | Clemson | SC |
Columbia University | 26586 | 87 | Private (non-profit) | New York | NY |
... (86 rows omitted)
schools.pivot('Control', 'State')
/opt/conda/lib/python3.8/site-packages/datascience/tables.py:920: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray
State | Private (non-profit) | Public |
---|---|---|
AL | 0 | 3 |
AR | 0 | 1 |
AZ | 0 | 1 |
CA | 3 | 8 |
CO | 0 | 1 |
CT | 1 | 1 |
DC | 2 | 0 |
DE | 0 | 1 |
FL | 1 | 4 |
GA | 1 | 3 |
... (29 rows omitted)
schools.pivot('Control', 'State') \
.where('Private (non-profit)', are.above(0)) \
.where('Public', are.above(0))
State | Private (non-profit) | Public |
---|---|---|
CA | 3 | 8 |
CT | 1 | 1 |
FL | 1 | 4 |
GA | 1 | 3 |
IL | 2 | 1 |
MD | 1 | 1 |
NC | 1 | 2 |
NJ | 1 | 1 |
NY | 6 | 2 |
OH | 1 | 1 |
... (2 rows omitted)
schools.pivot('Control', 'State') \
.where('Private (non-profit)', are.above(0)) \
.where('Public', are.above(0)) \
.barh('State')
schools.pivot('Control', 'State') \
.where('Private (non-profit)', are.above(0)) \
.where('Public', are.above(0)) \
.barh('State')
schools.pivot('Control', 'State') \
.where('Private (non-profit)', are.above(0)) \
.where('Public', are.above(0)) \
.barh('State', xaxis_title = 'Number of Universities',
title = 'Number of Private and Public R1 Universities in Each State',
width = 700,
height = 700)
top_gdp = gdp.take(np.arange(7))
top_gdp
Country/Territory | GDP Nominal | GDP PPP |
---|---|---|
United States | 20807269 | 20807269 |
China | 14860775 | 24162435 |
Japan | 4910580 | 5236138 |
Germany | 3780553 | 4454498 |
United Kingdom | 2638296 | 2978564 |
India | 2592583 | 8681303 |
France | 2551451 | 2954196 |
top_gdp.barh('Country/Territory', width = 800, height = 400,
xaxis_title = 'GDP (Millions of USD)', yaxis_title = 'Country',
title = 'The top 7 ranked countries by GDP (Nominal) in 2020')