from datascience import * # datascience has plotting features built in
import numpy as np
Table.interactive_plots() 
schools = Table.read_table('data/r1_with_students.csv')
schools
| University | Number_students | Score_Result | Control | City | State | 
|---|---|---|---|---|---|
| Auburn University | 26641 | 33.4 | Public | Auburn | AL | 
| Boston College | 12904 | 45.9 | Private (non-profit) | Chestnut Hill | MA | 
| Boston University | 25662 | 68.4 | Private (non-profit) | Boston | MA | 
| Brandeis University | 5375 | 50.3 | Private (non-profit) | Waltham | MA | 
| Brown University | 9391 | 70 | Private (non-profit) | Providence | RI | 
| California Institute of Technology | 2240 | 94.5 | Private (non-profit) | Pasadena | CA | 
| Carnegie Mellon University | 13430 | 81.3 | Private (non-profit) | Pittsburgh | PA | 
| Case Western Reserve University | 10654 | 60 | Private (non-profit) | Cleveland | OH | 
| Clemson University | 21436 | 30.7 | Public | Clemson | SC | 
| Columbia University | 26586 | 87 | Private (non-profit) | New York | NY | 
... (86 rows omitted)
schools.group('Control')
| Control | count | 
|---|---|
| Private (non-profit) | 36 | 
| Public | 60 | 
schools.group('Control').barh('Control')
streams = Table.read_table('data/regional-global-daily-latest.csv', header = 1)
top_10 = streams.select('Track Name', 'Streams').take(np.arange(10))
top_10
| Track Name | Streams | 
|---|---|
| Peaches (feat. Daniel Caesar & Giveon) | 7167120 | 
| Astronaut In The Ocean | 4525731 | 
| drivers license | 4156025 | 
| telepatía | 4105827 | 
| Save Your Tears | 3963053 | 
| Hold On | 3636991 | 
| Leave The Door Open | 3562490 | 
| Heartbreak Anniversary | 3253686 | 
| Blinding Lights | 3246141 | 
| DÁKITI | 3141723 | 
top_10.barh('Track Name')
streams
| Position | Track Name | Artist | Streams | URL | 
|---|---|---|---|---|
| 1 | Peaches (feat. Daniel Caesar & Giveon) | Justin Bieber | 7167120 | https://open.spotify.com/track/4iJyoBOLtHqaGxP12qzhQI | 
| 2 | Astronaut In The Ocean | Masked Wolf | 4525731 | https://open.spotify.com/track/3VT8hOC5vuDXBsHrR53WFh | 
| 3 | drivers license | Olivia Rodrigo | 4156025 | https://open.spotify.com/track/7lPN2DXiMsVn7XUKtOW1CS | 
| 4 | telepatía | Kali Uchis | 4105827 | https://open.spotify.com/track/6tDDoYIxWvMLTdKpjFkc1B | 
| 5 | Save Your Tears | The Weeknd | 3963053 | https://open.spotify.com/track/5QO79kh1waicV47BqGRL3g | 
| 6 | Hold On | Justin Bieber | 3636991 | https://open.spotify.com/track/1nahzW3kfMuwReTka28tH5 | 
| 7 | Leave The Door Open | Bruno Mars | 3562490 | https://open.spotify.com/track/7MAibcTli4IisCtbHKrGMh | 
| 8 | Heartbreak Anniversary | Giveon | 3253686 | https://open.spotify.com/track/3FAJ6O0NOHQV8Mc5Ri6ENp | 
| 9 | Blinding Lights | The Weeknd | 3246141 | https://open.spotify.com/track/0VjIjW4GlUZAMYd2vXMi3b | 
| 10 | DÁKITI | Bad Bunny | 3141723 | https://open.spotify.com/track/4MzXwWMhyBbmu6hOcLVD49 | 
... (190 rows omitted)
streams.group('Artist') \
       .sort('count', descending = True) \
       .where('count', are.above(2))
| Artist | count | 
|---|---|
| Justin Bieber | 16 | 
| Juice WRLD | 5 | 
| The Weeknd | 5 | 
| Billie Eilish | 4 | 
| Drake | 4 | 
| Dua Lipa | 4 | 
| Harry Styles | 4 | 
| KAROL G | 4 | 
| Pop Smoke | 4 | 
| Travis Scott | 4 | 
... (5 rows omitted)
streams.group('Artist') \
       .sort('count', descending = True) \
       .where('count', are.above(2)) \
       .barh('Artist')
schools
| University | Number_students | Score_Result | Control | City | State | 
|---|---|---|---|---|---|
| Auburn University | 26641 | 33.4 | Public | Auburn | AL | 
| Boston College | 12904 | 45.9 | Private (non-profit) | Chestnut Hill | MA | 
| Boston University | 25662 | 68.4 | Private (non-profit) | Boston | MA | 
| Brandeis University | 5375 | 50.3 | Private (non-profit) | Waltham | MA | 
| Brown University | 9391 | 70 | Private (non-profit) | Providence | RI | 
| California Institute of Technology | 2240 | 94.5 | Private (non-profit) | Pasadena | CA | 
| Carnegie Mellon University | 13430 | 81.3 | Private (non-profit) | Pittsburgh | PA | 
| Case Western Reserve University | 10654 | 60 | Private (non-profit) | Cleveland | OH | 
| Clemson University | 21436 | 30.7 | Public | Clemson | SC | 
| Columbia University | 26586 | 87 | Private (non-profit) | New York | NY | 
... (86 rows omitted)
schools.select('University', 'Number_students') \
       .sort('Number_students', descending = True) \
       .take(np.arange(15))
| University | Number_students | 
|---|---|
| University of Minnesota | 61120 | 
| Texas A&M University | 60818 | 
| University of Central Florida | 56228 | 
| University of Texas at Austin | 49165 | 
| University of Florida | 46642 | 
| University of Washington | 45692 | 
| Michigan State University | 44789 | 
| New York University | 44466 | 
| University of Houston | 44128 | 
| University of California, Berkeley | 41081 | 
... (5 rows omitted)
schools.select('University', 'Number_students') \
       .sort('Number_students', descending = True) \
       .take(np.arange(15)) \
       .barh('University')
schools
| University | Number_students | Score_Result | Control | City | State | 
|---|---|---|---|---|---|
| Auburn University | 26641 | 33.4 | Public | Auburn | AL | 
| Boston College | 12904 | 45.9 | Private (non-profit) | Chestnut Hill | MA | 
| Boston University | 25662 | 68.4 | Private (non-profit) | Boston | MA | 
| Brandeis University | 5375 | 50.3 | Private (non-profit) | Waltham | MA | 
| Brown University | 9391 | 70 | Private (non-profit) | Providence | RI | 
| California Institute of Technology | 2240 | 94.5 | Private (non-profit) | Pasadena | CA | 
| Carnegie Mellon University | 13430 | 81.3 | Private (non-profit) | Pittsburgh | PA | 
| Case Western Reserve University | 10654 | 60 | Private (non-profit) | Cleveland | OH | 
| Clemson University | 21436 | 30.7 | Public | Clemson | SC | 
| Columbia University | 26586 | 87 | Private (non-profit) | New York | NY | 
... (86 rows omitted)
schools.where('State', are.contained_in(['CA', 'TX', 'FL', 'NY', 'PA'])) \
       .group('State', np.mean) \
       .select('State', 'Number_students mean') \
       .barh('State')
schools.take(np.arange(5)).barh('Control')
# Run this cell.
def remove_comma(s):
    return int(s.replace(',', ''))
nominal = Table.read_table('data/gdp-nominal.csv')
ppp = Table.read_table('data/gdp-ppp.csv').drop(3)
gdp = nominal.join('Country/Territory', ppp) \
       .drop(1, 3) \
       .relabeled(['GDP(US$million)', 'GDP(millions of current Int$)'], ['GDP Nominal', 'GDP PPP'])
gdp = gdp.with_columns(
    'GDP Nominal', gdp.apply(remove_comma, 'GDP Nominal'),
    'GDP PPP', gdp.apply(remove_comma, 'GDP PPP')
)
gdp = gdp.sort('GDP Nominal', descending = True)
gdp
| Country/Territory | GDP Nominal | GDP PPP | 
|---|---|---|
| United States | 20807269 | 20807269 | 
| China | 14860775 | 24162435 | 
| Japan | 4910580 | 5236138 | 
| Germany | 3780553 | 4454498 | 
| United Kingdom | 2638296 | 2978564 | 
| India | 2592583 | 8681303 | 
| France | 2551451 | 2954196 | 
| Italy | 1848222 | 2415410 | 
| Canada | 1600264 | 1808995 | 
| South Korea | 1586786 | 2293475 | 
... (171 rows omitted)
gdp.select('Country/Territory', 'GDP Nominal') \
   .take(np.arange(15)) \
   .barh('Country/Territory')
gdp.select('Country/Territory', 'GDP PPP') \
   .take(np.arange(15)) \
   .barh('Country/Territory')
gdp
| Country/Territory | GDP Nominal | GDP PPP | 
|---|---|---|
| United States | 20807269 | 20807269 | 
| China | 14860775 | 24162435 | 
| Japan | 4910580 | 5236138 | 
| Germany | 3780553 | 4454498 | 
| United Kingdom | 2638296 | 2978564 | 
| India | 2592583 | 8681303 | 
| France | 2551451 | 2954196 | 
| Italy | 1848222 | 2415410 | 
| Canada | 1600264 | 1808995 | 
| South Korea | 1586786 | 2293475 | 
... (171 rows omitted)
gdp.take(np.arange(15)).barh('Country/Territory')
We can sort by GDP PPP, too:
gdp.sort('GDP PPP', descending = True).take(np.arange(15)).barh('Country/Territory')
Another example:
schools
| University | Number_students | Score_Result | Control | City | State | 
|---|---|---|---|---|---|
| Auburn University | 26641 | 33.4 | Public | Auburn | AL | 
| Boston College | 12904 | 45.9 | Private (non-profit) | Chestnut Hill | MA | 
| Boston University | 25662 | 68.4 | Private (non-profit) | Boston | MA | 
| Brandeis University | 5375 | 50.3 | Private (non-profit) | Waltham | MA | 
| Brown University | 9391 | 70 | Private (non-profit) | Providence | RI | 
| California Institute of Technology | 2240 | 94.5 | Private (non-profit) | Pasadena | CA | 
| Carnegie Mellon University | 13430 | 81.3 | Private (non-profit) | Pittsburgh | PA | 
| Case Western Reserve University | 10654 | 60 | Private (non-profit) | Cleveland | OH | 
| Clemson University | 21436 | 30.7 | Public | Clemson | SC | 
| Columbia University | 26586 | 87 | Private (non-profit) | New York | NY | 
... (86 rows omitted)
schools.pivot('Control', 'State')
/opt/conda/lib/python3.8/site-packages/datascience/tables.py:920: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray
| State | Private (non-profit) | Public | 
|---|---|---|
| AL | 0 | 3 | 
| AR | 0 | 1 | 
| AZ | 0 | 1 | 
| CA | 3 | 8 | 
| CO | 0 | 1 | 
| CT | 1 | 1 | 
| DC | 2 | 0 | 
| DE | 0 | 1 | 
| FL | 1 | 4 | 
| GA | 1 | 3 | 
... (29 rows omitted)
schools.pivot('Control', 'State') \
       .where('Private (non-profit)', are.above(0)) \
       .where('Public', are.above(0))
| State | Private (non-profit) | Public | 
|---|---|---|
| CA | 3 | 8 | 
| CT | 1 | 1 | 
| FL | 1 | 4 | 
| GA | 1 | 3 | 
| IL | 2 | 1 | 
| MD | 1 | 1 | 
| NC | 1 | 2 | 
| NJ | 1 | 1 | 
| NY | 6 | 2 | 
| OH | 1 | 1 | 
... (2 rows omitted)
schools.pivot('Control', 'State') \
       .where('Private (non-profit)', are.above(0)) \
       .where('Public', are.above(0)) \
       .barh('State')
schools.pivot('Control', 'State') \
       .where('Private (non-profit)', are.above(0)) \
       .where('Public', are.above(0)) \
       .barh('State')
schools.pivot('Control', 'State') \
       .where('Private (non-profit)', are.above(0)) \
       .where('Public', are.above(0)) \
       .barh('State', xaxis_title = 'Number of Universities',
                      title = 'Number of Private and Public R1 Universities in Each State',
                      width = 700,
                      height = 700)
top_gdp = gdp.take(np.arange(7))
top_gdp
| Country/Territory | GDP Nominal | GDP PPP | 
|---|---|---|
| United States | 20807269 | 20807269 | 
| China | 14860775 | 24162435 | 
| Japan | 4910580 | 5236138 | 
| Germany | 3780553 | 4454498 | 
| United Kingdom | 2638296 | 2978564 | 
| India | 2592583 | 8681303 | 
| France | 2551451 | 2954196 | 
top_gdp.barh('Country/Territory', width = 800, height = 400,
             xaxis_title = 'GDP (Millions of USD)', yaxis_title = 'Country',
             title = 'The top 7 ranked countries by GDP (Nominal) in 2020')