# Get several columns of dataselect_columns = ["First Name", "Last Name"]df[select_columns]
First Name
Last Name
0
Shohei
Ohtani
1
Mike
Trout
2
Aaron
Judge
3
Mookie
Betts
4
Fernando
Tatis
5
Jacob
deGrom
6
Bryce
Harper
7
Vladimir
Guerrero
8
Ronald
Acuna
9
Gerrit
Cole
# Get a row of datadf.loc[0]
First Name Shohei
Last Name Ohtani
Age 29
Position Pitcher/DH
Salary 30000000
Name: 0, dtype: object
# Get several rows of datadf.loc[[0, 1, 3]]
First Name
Last Name
Age
Position
Salary
0
Shohei
Ohtani
29
Pitcher/DH
30000000
1
Mike
Trout
32
Center Fielder
37116667
3
Mookie
Betts
30
Right Fielder
30000000
# Get a cell of datadf.iloc[0, 0]
'Shohei'
# Get any data using iloc# first rowdisplay(df.iloc[0,])print("----------------------------")# second columndisplay(df.iloc[:, 1])print("----------------------------")# any combination of rows and columnsdisplay(df.iloc[[0, 1, 3], 0:3])
First Name Shohei
Last Name Ohtani
Age 29
Position Pitcher/DH
Salary 30000000
Name: 0, dtype: object
# Exercise 4 # Show players who are younger or equal to 28# store the data in the variable young_players
Let’s sort the data based on our preferences
# Big payplayers20m.sort_values("Salary")
First Name
Last Name
Age
Position
Salary
6
Bryce
Harper
31
Right Fielder
27000000
0
Shohei
Ohtani
29
Pitcher/DH
30000000
3
Mookie
Betts
30
Right Fielder
30000000
5
Jacob
deGrom
35
Pitcher
30000000
9
Gerrit
Cole
33
Pitcher
36000000
1
Mike
Trout
32
Center Fielder
37116667
2
Aaron
Judge
31
Right Fielder
40000000
# Big pay, Biggest firstplayers20m.sort_values("Salary", ascending=False)
First Name
Last Name
Age
Position
Salary
2
Aaron
Judge
31
Right Fielder
40000000
1
Mike
Trout
32
Center Fielder
37116667
9
Gerrit
Cole
33
Pitcher
36000000
0
Shohei
Ohtani
29
Pitcher/DH
30000000
3
Mookie
Betts
30
Right Fielder
30000000
5
Jacob
deGrom
35
Pitcher
30000000
6
Bryce
Harper
31
Right Fielder
27000000
# Young player, Youngest firstplayers20m.sort_values("Age")
First Name
Last Name
Age
Position
Salary
0
Shohei
Ohtani
29
Pitcher/DH
30000000
3
Mookie
Betts
30
Right Fielder
30000000
2
Aaron
Judge
31
Right Fielder
40000000
6
Bryce
Harper
31
Right Fielder
27000000
1
Mike
Trout
32
Center Fielder
37116667
9
Gerrit
Cole
33
Pitcher
36000000
5
Jacob
deGrom
35
Pitcher
30000000
# Exercise 5# Find the salary for players who are younger or equal to 28# Order by the salary, highest first
# Show all 10 players, order by Last Name, then First Name, alphabeticallydf.sort_values(["Last Name", "First Name"])
First Name
Last Name
Age
Position
Salary
8
Ronald
Acuna
25
Center Fielder
17000000
3
Mookie
Betts
30
Right Fielder
30000000
9
Gerrit
Cole
33
Pitcher
36000000
7
Vladimir
Guerrero
24
First Baseman
7500000
6
Bryce
Harper
31
Right Fielder
27000000
2
Aaron
Judge
31
Right Fielder
40000000
0
Shohei
Ohtani
29
Pitcher/DH
30000000
4
Fernando
Tatis
25
Shortstop
5000000
1
Mike
Trout
32
Center Fielder
37116667
5
Jacob
deGrom
35
Pitcher
30000000
# Display Last Name firstdf_name_sort = df.sort_values(["Last Name", "First Name"])df_name_sort[["Last Name", "First Name", "Age", "Position", "Salary"]]
Last Name
First Name
Age
Position
Salary
8
Acuna
Ronald
25
Center Fielder
17000000
3
Betts
Mookie
30
Right Fielder
30000000
9
Cole
Gerrit
33
Pitcher
36000000
7
Guerrero
Vladimir
24
First Baseman
7500000
6
Harper
Bryce
31
Right Fielder
27000000
2
Judge
Aaron
31
Right Fielder
40000000
0
Ohtani
Shohei
29
Pitcher/DH
30000000
4
Tatis
Fernando
25
Shortstop
5000000
1
Trout
Mike
32
Center Fielder
37116667
5
deGrom
Jacob
35
Pitcher
30000000
4.4 Operations
You can calculate and modify data in the dataframe
# This data is from 2023. Let's update the age# We will create a new dataframe df24df24 = df.copy()df24["Age"] = df["Age"] +1
df24
First Name
Last Name
Age
Position
Salary
0
Shohei
Ohtani
30
Pitcher/DH
30000000
1
Mike
Trout
33
Center Fielder
37116667
2
Aaron
Judge
32
Right Fielder
40000000
3
Mookie
Betts
31
Right Fielder
30000000
4
Fernando
Tatis
26
Shortstop
5000000
5
Jacob
deGrom
36
Pitcher
30000000
6
Bryce
Harper
32
Right Fielder
27000000
7
Vladimir
Guerrero
25
First Baseman
7500000
8
Ronald
Acuna
26
Center Fielder
17000000
9
Gerrit
Cole
34
Pitcher
36000000
# We can also combine the First Name and Last Name into one column, Full Namedf24["Full Name"] = df24["Last Name"] +", "+ df24["First Name"]df24
First Name
Last Name
Age
Position
Salary
Full Name
0
Shohei
Ohtani
30
Pitcher/DH
30000000
Ohtani, Shohei
1
Mike
Trout
33
Center Fielder
37116667
Trout, Mike
2
Aaron
Judge
32
Right Fielder
40000000
Judge, Aaron
3
Mookie
Betts
31
Right Fielder
30000000
Betts, Mookie
4
Fernando
Tatis
26
Shortstop
5000000
Tatis, Fernando
5
Jacob
deGrom
36
Pitcher
30000000
deGrom, Jacob
6
Bryce
Harper
32
Right Fielder
27000000
Harper, Bryce
7
Vladimir
Guerrero
25
First Baseman
7500000
Guerrero, Vladimir
8
Ronald
Acuna
26
Center Fielder
17000000
Acuna, Ronald
9
Gerrit
Cole
34
Pitcher
36000000
Cole, Gerrit
# If you only wnat to keep Full Namedf24_fullname = df24[["Full Name", "Age", "Position", "Salary"]]df24_fullname
Full Name
Age
Position
Salary
0
Ohtani, Shohei
30
Pitcher/DH
30000000
1
Trout, Mike
33
Center Fielder
37116667
2
Judge, Aaron
32
Right Fielder
40000000
3
Betts, Mookie
31
Right Fielder
30000000
4
Tatis, Fernando
26
Shortstop
5000000
5
deGrom, Jacob
36
Pitcher
30000000
6
Harper, Bryce
32
Right Fielder
27000000
7
Guerrero, Vladimir
25
First Baseman
7500000
8
Acuna, Ronald
26
Center Fielder
17000000
9
Cole, Gerrit
34
Pitcher
36000000
# Exercise 6# We want to find the salary in Australian dollar# the USD to AUD exchange rate is 1.44# continue with df24_fullname, store data in df24_aud
4.5 Aggregate Operations
# Find the total salary of these 10 players# Find the average salary of these 10 playerssalary_total =np.sum(df["Salary"])salary_avg = np.mean(df["Salary"])salary_max = np.max(df["Salary"])salary_min = np.min(df["Salary"])salary_count =len(df)print(f"Player number is {salary_count}")print(f"Total Salary is {salary_total:,.0f}")print(f"Average Salary is {salary_avg:,.0f}")print(f"Highest Salary is {salary_max:,.0f}")print(f"Lowest Salary is {salary_min:,.0f}")
Player number is 10
Total Salary is 259,616,667
Average Salary is 25,961,667
Highest Salary is 40,000,000
Lowest Salary is 5,000,000
4.6 Data Investigation
Do you agree that MLB pitchers get paid more?
Create an equation to help you decide how to compare players.
Justify your answer.
What other data (not available here) can help you answer this question?