from datascience import *
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import plotly.express as px

Table.interactive_plots()


world = Table.read_table('data/World_University_Rank_2020.csv')


world


world.num_rows

1396


# Notice how we use apply here!
def remove_symbol(s):
    return int(s.replace('%', '').replace(',', ''))


# Remember, the result of calling apply is an array
world.apply(remove_symbol, 'Number_students')

array([20664,  2240, 18978, ..., 15236, 17101,  9285])


world = world.with_columns(
    'Number_students', world.apply(remove_symbol, 'Number_students'),
    'International_Students', world.apply(remove_symbol, 'International_Students'),
    'Percentage_Female', world.apply(remove_symbol, 'Percentage_Female'),
    'Percentage_Male', world.apply(remove_symbol, 'Percentage_Male')
)


world.sort('Percentage_Female')


world = world.relabeled('International_Students', '% International')


world


world.column('Number_students') * world.column('% International') / 100

array([8472.24,  672.  , 7021.86, ...,  457.08,    0.  ,  185.7 ])


num_international = np.round(world.column('Number_students') * world.column('% International') / 100, 0)
num_international

array([8472.,  672., 7022., ...,  457.,    0.,  186.])


world = world.with_columns(
    '# International', num_international
)


world.select('University', 'Country', 'Number_students', '% International', '# International') \
     .sort('# International', descending = True)


# __(a)__ means blank a

world.select('University', 'Country', 'Number_students', '% International', '# International') \
     .where('Country', 'United States') \
     .sort('# International', descending = True) \
     .take(np.arange(15))


scores_only = world.select('Score_Rank', 'University', 'Teaching', 'Research', 'Citations', 'International_Outlook', 'Industry_Income', 'Score_Result')


scores_only


0.3 * 90.5 + \
0.3 * 99.6 + \
0.3 * 98.4 + \
0.075 * 96.4 + \
0.025 * 65.5

95.4175


score_result_manual_calculation = \
0.3 * scores_only.column('Teaching') + \
0.3 * scores_only.column('Research') + \
0.3 * scores_only.column('Citations') + \
0.075 * scores_only.column('International_Outlook') + \
0.025 * scores_only.column('Industry_Income')


score_result_manual_calculation

array([95.4175, 94.5475, 94.3775, ..., 11.055 , 10.9625, 10.6875])


scores_only.with_columns(
    'Score Result Manual', score_result_manual_calculation
)


breakdown_1 = 0.6 * scores_only.column('Teaching') \
            + 0.3 * scores_only.column('International_Outlook') \
            + 0.1 * scores_only.column('Industry_Income')

breakdown_1

array([89.77, 88.81, 89.27, ..., 18.9 , 17.09, 18.39])


scores_only = scores_only.with_columns(
    'Breakdown 1', breakdown_1
)


scores_only.sort('Breakdown 1', descending = True)


scores_only.sort('Breakdown 1', descending = True).take(23)


breakdown_2 = 0.5 * scores_only.column('Teaching') \
            + 0.15 * scores_only.column('Research') \
            + 0.15 * scores_only.column('Citations') \
            + 0.15 * scores_only.column('International_Outlook') \
            + 0.05 * scores_only.column('Industry_Income')


scores_only = scores_only.with_columns(
    'Breakdown 2', breakdown_2
)


scores_only.sort('Breakdown 2', descending = True)


world


world.group('Country')


world.group('Country').sort('count', descending = True)


world.group('Country').where('count', are.above_or_equal_to(25))


world.group('Country').where('count', are.above_or_equal_to(25)).sort('count').barh('Country')


world.group('Country', np.mean)


world.group('Country', np.mean).select('Country', 'Score_Result mean').sort('Score_Result mean', descending = True)


world.where('Country', 'Singapore')


def first(arr):
    return arr.item(0)


world.group('Country', first)


world.group('Country', first) \
.sort('Score_Result first', descending = True) \
.select('Score_Rank first', 'Country', 'University first', 'Score_Result first')


us_only = world.where('Country', 'United States') \
               .select('University', 'Number_students', 'Score_Result')


us_only


us_r1 = Table.read_table('data/us-r1-universities.csv')
us_r1


# Think about why these are the arguments to join!
us_with_state = us_only.join('University', us_r1, 'Institution')
us_with_state


us_with_state.sort('Score_Result', descending = True)


us_with_state.group('State').sort('count', descending = True)


# sort(1) means sort by the column at index 1, which is 'Score_Result mean'
us_with_state.group('State', np.mean).select('State', 'Score_Result mean').sort(1, descending = True)


us_with_state.where('State', 'MD')


us_with_state.group('Control')


us_with_state.group('Control', np.mean)


us_with_state.group(['State', 'Control'])

/opt/conda/lib/python3.8/site-packages/datascience/tables.py:920: VisibleDeprecationWarning:

Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


us_with_state.pivot('Control', 'State')


us_with_state.pivot('Control', 'State').sort('Public', descending = True)


us_with_state.pivot('Control', 'State', 'Score_Result', np.mean)


us_with_state.pivot('Control', 'State', 'Score_Result', np.mean).sort('Public', descending = True)


us_with_state.where('State', 'MN')


us_with_state.pivot('State', 'University')

Rank_Char	Score_Rank	University	Country	Number_students	Numb_students_per_Staff	International_Students	Percentage_Female	Percentage_Male	Teaching	Research	Citations	Industry_Income	International_Outlook	Score_Result	Overall_Ranking
1	1	University of Oxford	United Kingdom	20,664	11.2	41%	46%	54%	90.5	99.6	98.4	65.5	96.4	95.4	95.40
2	2	California Institute of Technology	United States	2,240	6.4	30%	34%	66%	92.1	97.2	97.9	88	82.5	94.5	94.50
3	3	University of Cambridge	United Kingdom	18,978	10.9	37%	47%	53%	91.4	98.7	95.8	59.3	95	94.4	94.40
4	4	Stanford University	United States	16,135	7.3	23%	43%	57%	92.8	96.4	99.9	66.2	79.5	94.3	94.30
5	5	Massachusetts Institute of Technology	United States	11,247	8.6	34%	39%	61%	90.5	92.4	99.5	86.9	89	93.6	93.60
6	6	Princeton University	United States	7,983	8.1	25%	45%	55%	90.3	96.3	98.8	58.6	81.1	93.2	93.20
7	7	Harvard University	United States	20,823	9.2	24%	49%	51%	89.2	98.6	99.1	47.3	76.3	93	93.00
8	8	Yale University	United States	12,402	5.4	20%	50%	50%	92	94.8	97.3	52.4	68.7	91.7	91.70
9	9	University of Chicago	United States	13,833	5.7	28%	46%	54%	89.1	91.4	96.7	52.7	76	90.2	90.20
10	10	Imperial College London	United Kingdom	16,760	11.7	56%	38%	62%	84.5	87.6	97	69.9	97.1	89.8	89.80

Rank_Char	Score_Rank	University	Country	Number_students	Numb_students_per_Staff	International_Students	Teaching	Research	Citations	Industry_Income	International_Outlook	Score_Result	Overall_Ranking
16	15	Columbia University	United States	26586	5.8	37	85.6	82.6	98.2	44.8	79.3	87	87.00
36	33	The University of Tokyo	Japan	25913	10.6	12	85.9	89.6	60.7	77.4	38.2	75.7	75.70
47	40	The Hong Kong University of Science and Technology	Hong Kong	10125	22.3	31	57.4	66.1	89.8	71.9	97.7	73.1	73.10
51	44	University of Wisconsin-Madison	United States	39154	10	13	68.8	70.3	85.3	46.3	47.4	72	72.00
52	45	Washington University in St Louis	United States	13401	7.5	20	64.2	57.5	98.8	40.7	57.1	71.5	71.50
53	46	Brown University	United States	9391	10.8	20	64	56.1	94.9	36.5	61.4	70	70.00
57	49	Chinese University of Hong Kong	Hong Kong	18340	18.6	32	55.8	62.8	84.5	55.2	97.8	69.6	69.60
64	54	Seoul National University	South Korea	26182	12.4	12	72.3	71.6	66.5	86.6	35.8	68	68.00
74	62	Humboldt University of Berlin	Germany	33463	56.1	17	61.8	66.8	67.9	40.2	67.8	65	65.00
80	67	University of Science and Technology of China	China	16245	7.7	4	64.6	59.5	74.7	79.6	31.3	64	64.00

Rank_Char	Score_Rank	University	Country	Number_students	Numb_students_per_Staff	% International	Percentage_Female	Percentage_Male	Teaching	Research	Citations	Industry_Income	International_Outlook	Score_Result	Overall_Ranking
1	1	University of Oxford	United Kingdom	20664	11.2	41	46	54	90.5	99.6	98.4	65.5	96.4	95.4	95.40
2	2	California Institute of Technology	United States	2240	6.4	30	34	66	92.1	97.2	97.9	88	82.5	94.5	94.50
3	3	University of Cambridge	United Kingdom	18978	10.9	37	47	53	91.4	98.7	95.8	59.3	95	94.4	94.40
4	4	Stanford University	United States	16135	7.3	23	43	57	92.8	96.4	99.9	66.2	79.5	94.3	94.30
5	5	Massachusetts Institute of Technology	United States	11247	8.6	34	39	61	90.5	92.4	99.5	86.9	89	93.6	93.60
6	6	Princeton University	United States	7983	8.1	25	45	55	90.3	96.3	98.8	58.6	81.1	93.2	93.20
7	7	Harvard University	United States	20823	9.2	24	49	51	89.2	98.6	99.1	47.3	76.3	93	93.00
8	8	Yale University	United States	12402	5.4	20	50	50	92	94.8	97.3	52.4	68.7	91.7	91.70
9	9	University of Chicago	United States	13833	5.7	28	46	54	89.1	91.4	96.7	52.7	76	90.2	90.20
10	10	Imperial College London	United Kingdom	16760	11.7	56	38	62	84.5	87.6	97	69.9	97.1	89.8	89.80

University	Country	Number_students	% International	# International
University of Melbourne	Australia	47385	46	21797
Monash University	Australia	52989	39	20666
Al-Azhar University	Egypt	342151	6	20529
UNSW Sydney	Australia	44336	41	18178
University of Sydney	Australia	45111	39	17593
UCL	United Kingdom	32665	52	16986
University of British Columbia	Canada	52108	32	16675
Eastern Mediterranean University	Northern Cyprus	18865	83	15658
University of Toronto	Canada	73370	21	15408
University of Manchester	United Kingdom	37038	40	14815

University	Country	Number_students	% International	# International
New York University	United States	44466	33	14674
University of Illinois at Urbana-Champaign	United States	44916	24	10780
Indiana University	United States	66872	15	10031
Columbia University	United States	26586	37	9837
Arizona State University (Tempe)	United States	46683	21	9803
Purdue University West Lafayette	United States	40451	23	9304
University of Southern California	United States	36929	25	9232
University of Massachusetts	United States	61204	15	9181
University of California, Irvine	United States	32706	26	8504
University of California, San Diego	United States	33579	23	7723

Lecture 21 – Case Study¶

Data 6, Summer 2021¶

World University Rankings 2020¶

Which universities have the most international students?¶

Quick Check 1¶

How do the rankings actually work?¶

What if we want to change the methodology?¶

Quick Check 2¶

Which countries have the most universities in the ranking?¶

What's the best university in each country?¶

In which states are the best US universities located?¶

Followup – how do public and private schools in the US compare?¶

A warning¶

Done!¶

Country	count
Algeria	8
Argentina	4
Australia	35
Austria	11
Bangladesh	1
Belarus	1
Belgium	8
Brazil	46
Brunei Darussalam	1
Bulgaria	1

Country	Score_Rank mean	Number_students mean	Numb_students_per_Staff mean	% International mean	Percentage_Female mean	Percentage_Male mean	Teaching mean	Research mean	Citations mean	Industry_Income mean	International_Outlook mean	Score_Result mean	# International mean
Algeria	465.875	35610.2	21.6125	0.75	59.75	40.25	18.325	7.5375	22.5125	34.7	36.9125	18.1625	246.75
Argentina	468.75	35442.2	12.95	2.5	63	37	17.875	8.125	21.625	35.075	34.125	17.75	867
Australia	178.514	23652.1	29.3314	28.3714	54.5143	42.6286	29.6943	35.6486	75	50.1229	84.86	49.7171	6970.23
Austria	225	10101.5	14.1	25	49	51	31.2545	23.6273	65.3	51.3909	81.6909	43.4727	2531.45
Bangladesh	483	34108	15.6	0	42	58	16	8.8	16.4	36.6	40.8	16.3	0
Belarus	459	27101	8.4	9	55	45	21.3	9.7	13.3	42.4	58.1	18.7	2439
Belgium	138.375	23646.5	33.075	19	53.125	46.875	37.4875	48.1875	71.075	72.675	72.8125	54.3125	4399.88
Brazil	447.609	27438.1	15.4935	0.934783	48.6087	47.0435	23.0587	13.2543	21.2739	39.0848	23.1239	19.987	299.043
Brunei Darussalam	234	3830	10.9	15	67	33	23	19.3	74.4	34.8	85.8	42.3	574
Bulgaria	471	21988	9.3	6	65	35	20.7	9.6	14.2	35.3	43.7	17.5	1319

Country	Score_Result mean
Singapore	77.4
Hong Kong	62.4167
Netherlands	61.4923
Switzerland	57.3909
Belgium	54.3125
Luxembourg	53.7
Denmark	51.6286
Germany	51.3188
Sweden	51.1167
United States	50.1081

Rank_Char	Score_Rank	University	Country	Number_students	Numb_students_per_Staff	% International	Percentage_Female	Percentage_Male	Teaching	Research	Citations	Industry_Income	International_Outlook	Score_Result	Overall_Ranking	# International
25	24	National University of Singapore	Singapore	30869	17.9	28	51	49	76.8	90.4	76.9	58.8	95.5	81.9	81.90	8643
48	42	Nanyang Technological University, Singapore	Singapore	25088	15.9	27	48	52	57.6	70.4	84.9	76.5	95.1	72.9	72.90	6774

Country	Rank_Char first	Score_Rank first	University first	Number_students first	Numb_students_per_Staff first	% International first	Percentage_Female first	Percentage_Male first	Teaching first	Research first	Citations first	Industry_Income first	International_Outlook first	Score_Result first	Overall_Ranking first	# International first
Algeria	601�800	318	Ferhat Abbas S�tif University 1	34002	22.7	1	65	35	17.7	7.7	73	34.4	41.8	33.5	28.3�35.2	340
Argentina	1001+	437	National University of San Mart�n	15236	15.8	5	65	35	16.5	8.8	31.9	35	39.1	21	10.7�22.1	762
Australia	32	30	University of Melbourne	47385	26.3	46	56	44	65.9	74.1	89.8	76.3	93.1	77.8	77.80	21797
Austria	134	101	University of Vienna	33547	40.2	27	66	34	48.1	55.3	64.6	37.1	94.2	58.4	58.40	9058
Bangladesh	1001+	483	University of Dhaka	34108	15.6	0	42	58	16	8.8	16.4	36.6	40.8	16.3	10.7�22.1	0
Belarus	1001+	459	Belarusian State University	27101	8.4	9	55	45	21.3	9.7	13.3	42.4	58.1	18.7	10.7�22.1	2439
Belgium	45	39	KU Leuven	45049	36.3	15	50	50	58.7	73.9	85.3	99.3	71.8	73.2	73.20	6757
Brazil	251�300	172	University of S�o Paulo	83214	15.9	4	48	52	56.4	54	40.6	39.9	33.9	48.8	46.9�50.0	3329
Brunei Darussalam	401�500	234	Universiti Brunei Darussalam	3830	10.9	15	67	33	23	19.3	74.4	34.8	85.8	42.3	38.8�42.3	574
Bulgaria	1001+	471	Sofia University	21988	9.3	6	65	35	20.7	9.6	14.2	35.3	43.7	17.5	10.7�22.1	1319

Institution	Control	City	State
University of Alabama	Public	Tuscaloosa	AL
University of Alabama at Birmingham	Public	Birmingham	AL
University at Albany	Public	Albany	NY
University of Arizona	Public	Tucson	AZ
Arizona State University	Public	Tempe	AZ
University of Arkansas	Public	Fayetteville	AR
Auburn University	Public	Auburn	AL
Binghamton University	Public	Vestal	NY
Boston College	Private (non-profit)	Chestnut Hill	MA
Boston University	Private (non-profit)	Boston	MA

University	Number_students	Score_Result	Control	City	State
Auburn University	26641	33.4	Public	Auburn	AL
Boston College	12904	45.9	Private (non-profit)	Chestnut Hill	MA
Boston University	25662	68.4	Private (non-profit)	Boston	MA
Brandeis University	5375	50.3	Private (non-profit)	Waltham	MA
Brown University	9391	70	Private (non-profit)	Providence	RI
California Institute of Technology	2240	94.5	Private (non-profit)	Pasadena	CA
Carnegie Mellon University	13430	81.3	Private (non-profit)	Pittsburgh	PA
Case Western Reserve University	10654	60	Private (non-profit)	Cleveland	OH
Clemson University	21436	30.7	Public	Clemson	SC
Columbia University	26586	87	Private (non-profit)	New York	NY

State	Score_Result mean
MD	76
IL	74.4333
CA	74.3364
RI	70
CT	67.2
NC	66.7667
MA	66.4
NJ	64.75
PA	64.575
MN	64.1

University	Number_students	Score_Result	Control	City	State
Johns Hopkins University	16171	89.2	Private (non-profit)	Baltimore	MD
University of Maryland, College Park	33108	62.8	Public	College Park	MD

State	Private (non-profit)	Public
AL	0	43.4
AR	0	29.4
AZ	0	61.8
CA	85.6333	70.1
CO	0	59.6
CT	91.7	42.7
DC	57.8	0
DE	0	47.3
FL	51.2	44.325
GA	64	52.9667