We use square brackets to create lists.
years = [1998, 2001, 2007]
years
[1998, 2001, 2007]
schools = ['cal', 1868, 'cal poly', 1901, 'columbia', 1754]
schools
['cal', 1868, 'cal poly', 1901, 'columbia', 1754]
nums = [2 + 2, 5, 5 - 1]
nums
[4, 5, 4]
type([3, 1, 2])
list
type([])
list
[3, 1, 2] == [3, 1, 2]
True
[3, 1, 2] == [3, 1, 2, -4]
False
len([9, 2.5, 7])
3
max([9, 2.5, 7])
9
# Earliest in dictionary
min(['hello', 'hi', 'abbey'])
'abbey'
# TypeError!
min(['hello', 2.5, 'abbey'])
--------------------------------------------------------------------------- TypeError Traceback (most recent call last) /tmp/ipykernel_40/3620951238.py in <module> 1 # TypeError! ----> 2 min(['hello', 2.5, 'abbey']) TypeError: '<' not supported between instances of 'float' and 'str'
sum([9, 2.5, 7])
18.5
[1, 2] + [3, 4] * 2
[1, 2, 3, 4, 3, 4]
groceries = ['eggs', 'milk']
groceries
['eggs', 'milk']
groceries.append('bread')
groceries
['eggs', 'milk', 'bread']
3 in [3, 1, 'dog']
True
10 not in [3, 1, 'dog']
True
not 10 in [3, 1, 'dog']
True
[3, 1] in [3, 1, 'dog']
False
A = [1] + [2] == [1 + 2]
A
False
B = sum([4, 5, 9]) / len([4, 5, 9])
B
6.0
C = [4, 0, 2]
C = C.append(min(C))
print(C)
None
nums = [3, 1, 'dog', -9.5, 'berk']
nums[0]
3
nums[3]
-9.5
nums[5]
--------------------------------------------------------------------------- IndexError Traceback (most recent call last) /tmp/ipykernel_40/3723135418.py in <module> ----> 1 nums[5] IndexError: list index out of range
nums = [3, 1, 'dog', -9.5, 'berk']
nums[1:3]
[1, 'dog']
nums[0:4]
[3, 1, 'dog', -9.5]
# If you don't include 'start',
# the slice starts at the
# beginning of the list
nums[:4]
[3, 1, 'dog', -9.5]
# If you don't include 'stop',
# the slice goes until the
# end of the list
nums[2:]
['dog', -9.5, 'berk']
nums = [3, 1, 'dog', -9.5, 'berk']
nums[len(nums) - 1]
'berk'
nums[-1]
'berk'
nums[-3]
'dog'
nums[-3:]
['dog', -9.5, 'berk']
threes = [3, 6, 9, 12, 15]
fours = threes[1:4]
five = fours[-1] + fours[1]
five
21
def square_all(vals):
output = []
i = 0
while i < len(vals):
val_squared = vals[i] ** 2
output.append(val_squared)
i += 1
return output
square_all([1, 10, 3, 4])
[1, 100, 9, 16]
.index
tells us the position of an element in a list – if it is in the list.
[9, 8, 14, -1].index(14)
2
[9, 8, 14, -1].index(15)
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) /tmp/ipykernel_40/3856422112.py in <module> ----> 1 [9, 8, 14, -1].index(15) ValueError: 15 is not in list
# Two occurrences of 2
# Gives index of first one
[1, 2, 4, 2, 4].index(2)
1
[1, 2, 4, 2, 4].count(2)
2
def next_day(day):
week = ['Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday']
curr = week.index(day)
return week[(curr + 1) % 7]
next_day('Wednesday')
'Thursday'
next_day('Saturday')
'Sunday'
university = 'uc berkeley'
list(university)
['u', 'c', ' ', 'b', 'e', 'r', 'k', 'e', 'l', 'e', 'y']
university[3:7]
'berk'
university[1]
'c'
university[-8:]
'berkeley'
# Weird slicing that can be used
# to reverse a string or list
university[::-1]
'yelekreb cu'
'alfalfa'.find('f')
2
'alfalfa'.rfind('a')
6
'alfalfa'.find('b')
-1
'alfalfa'.index('b')
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) /tmp/ipykernel_40/3743236407.py in <module> ----> 1 'alfalfa'.index('b') ValueError: substring not found
test_list = [8, 0, 2, 4]
test_string = 'zebra'
test_list[1] = 99
test_list
[8, 99, 2, 4]
test_string[1] = 'f'
--------------------------------------------------------------------------- TypeError Traceback (most recent call last) /tmp/ipykernel_40/3748101269.py in <module> ----> 1 test_string[1] = 'f' TypeError: 'str' object does not support item assignment
test_string[:1] + 'f' + test_string[2:]
'zfbra'
Let's use data about survivors of the Titanic, downloaded from here.
from datascience import *
table = Table.read_table('data/titanic.csv').select(['Name', 'Age', 'Sex', 'Fare', 'Survived'])
table
Name | Age | Sex | Fare | Survived |
---|---|---|---|---|
Braund, Mr. Owen Harris | 22 | male | 7.25 | 0 |
Cumings, Mrs. John Bradley (Florence Briggs Thayer) | 38 | female | 71.2833 | 1 |
Heikkinen, Miss. Laina | 26 | female | 7.925 | 1 |
Futrelle, Mrs. Jacques Heath (Lily May Peel) | 35 | female | 53.1 | 1 |
Allen, Mr. William Henry | 35 | male | 8.05 | 0 |
Moran, Mr. James | nan | male | 8.4583 | 0 |
McCarthy, Mr. Timothy J | 54 | male | 51.8625 | 0 |
Palsson, Master. Gosta Leonard | 2 | male | 21.075 | 0 |
Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg) | 27 | female | 11.1333 | 1 |
Nasser, Mrs. Nicholas (Adele Achem) | 14 | female | 30.0708 | 1 |
... (881 rows omitted)
Soon, we will learn how to load in data like the above and extract columns as lists. But for now, just run the following cell.
names = list(table.column('Name'))
ages = list(table.column('Age'))
survived = list(table.column('Survived'))
ages[:5]
[22.0, 38.0, 26.0, 35.0, 35.0]
names[:5]
['Braund, Mr. Owen Harris', 'Cumings, Mrs. John Bradley (Florence Briggs Thayer)', 'Heikkinen, Miss. Laina', 'Futrelle, Mrs. Jacques Heath (Lily May Peel)', 'Allen, Mr. William Henry']
100 * sum(survived) / len(survived)
38.38383838383838