-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfifa_2.py
61 lines (50 loc) · 3.08 KB
/
fifa_2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
'''
More information file.
Demonstrates:-
Find data type of a single column - df.col.dtype
Find the number of unique values in a column - df.col.unique()
List data having null values in another attribute of theirs. - without using isnull()
Counts of data samples for every unique value in a column - df.col.value_counts()
Find number of data samples with a null value in an attribute - df.col.isnull().sum()
Find number of samples in a DataFrame or a Series object - df.size() / df.col.size()
'''
import pandas as pd
from fifa_0_splitting import split_fifa
pd.set_option('display.expand_frame_repr', False)
def main():
#Load miniset of "more information"
miniset = "more_info"
df_more = split_fifa(miniset)
#~~~~~~MORE INFO ANALYSIS ~~~~~~~
#Find how many different values appear for preferred foot (Right/Left/Other) or (Right/Left)
print("TYPES of preferred foots:\n", df_more['Preferred Foot'].unique())
#Samples with preferred foot == NaN
print("Sample of players with 'nan' as preferred foot:\n", df_more[(df_more['Preferred Foot'] != 'Right') & (df_more['Preferred Foot'] != 'Left') & (df_more.Overall > 60)].sample(5), end="\n\n")
#List all the different positions and how many players in each position
print("Positions with COUNTS of players in each:\n", df_more.Position.value_counts(ascending=True), end="\n\n") #Default is ascending=False
#List the samples containing the position which occurs the least
print("Players playing the position which occurs least in the game: ")
print(df_more[df_more.Position == df_more.Position.value_counts(ascending=True).keys()[0]], end="\n\n")
#Find International Reputation stats
print("CHANGING TYPE of International Reputation column (float64)...")
df_more['International Reputation'] = df_more['International Reputation'].fillna(0).astype(int)
print("New dtype of International Reputation column = ", df_more['International Reputation'].dtype)
#Here dtype shows as float because of mean and std calculation
print("Mean and spread of International Reputation:\n", df_more['International Reputation'].describe(), end="\n\n")
#Players above 22 with unknown quality of weaker foot
#isnull() returns True for null values and False otherwise; and sum adds up the True and False values treating True as 1 and False as 0
print("Players above 22 years of age with unknown weaker foot stat = ", df_more['Weak Foot'][df_more.Age > 22].isnull().sum())
#TEAM SPECIFIC
#Find team-size (Size returns the number of elements!!)
team = 'Manchester United'
print("Size of {0} squad: ".format(team), df_more.Club[df_more.Club == team].size)
print("Number of elements in entire team DataFrame: ", df_more[df_more.Club == team].size, end="\n\n")
def test():
#function to do rough work.
#Change the main() to test() in the if statement right at the bottom to run only this.
#Used to help me sanity-check myself or test small lines of code without disrupting the main function.
l = [0, 1, 2, 3]
s = pd.Series(l)
print(s.size()) #Won't work
if __name__ == "__main__":
main()