This is just an example EDA for absolute beginners. For more detailed EDA and proper documentation refer more projects on EDA.
https://www.kaggle.com/c/titanic/data Use this to acquire domain knowledge and for the dataset.
Link to another example EDA: https://www.kaggle.com/code/gunesevitan/titanic-advanced-feature-engineering-tutorial
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
df = pd.read_csv("titanic.csv", index_col = 'PassengerId')
df
Survived | Pclass | Name | Sex | Age | SibSp | Parch | Ticket | Fare | Cabin | Embarked | |
---|---|---|---|---|---|---|---|---|---|---|---|
PassengerId | |||||||||||
1 | 0 | 3 | Braund, Mr. Owen Harris | male | 22.0 | 1 | 0 | A/5 21171 | 7.2500 | NaN | S |
2 | 1 | 1 | Cumings, Mrs. John Bradley (Florence Briggs Th... | female | 38.0 | 1 | 0 | PC 17599 | 71.2833 | C85 | C |
3 | 1 | 3 | Heikkinen, Miss. Laina | female | 26.0 | 0 | 0 | STON/O2. 3101282 | 7.9250 | NaN | S |
4 | 1 | 1 | Futrelle, Mrs. Jacques Heath (Lily May Peel) | female | 35.0 | 1 | 0 | 113803 | 53.1000 | C123 | S |
5 | 0 | 3 | Allen, Mr. William Henry | male | 35.0 | 0 | 0 | 373450 | 8.0500 | NaN | S |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
887 | 0 | 2 | Montvila, Rev. Juozas | male | 27.0 | 0 | 0 | 211536 | 13.0000 | NaN | S |
888 | 1 | 1 | Graham, Miss. Margaret Edith | female | 19.0 | 0 | 0 | 112053 | 30.0000 | B42 | S |
889 | 0 | 3 | Johnston, Miss. Catherine Helen "Carrie" | female | NaN | 1 | 2 | W./C. 6607 | 23.4500 | NaN | S |
890 | 1 | 1 | Behr, Mr. Karl Howell | male | 26.0 | 0 | 0 | 111369 | 30.0000 | C148 | C |
891 | 0 | 3 | Dooley, Mr. Patrick | male | 32.0 | 0 | 0 | 370376 | 7.7500 | NaN | Q |
891 rows × 11 columns
# df[df.Price == df.Price.max()].Company
df.isnull().sum()
Survived 0 Pclass 0 Name 0 Sex 0 Age 177 SibSp 0 Parch 0 Ticket 0 Fare 0 Cabin 687 Embarked 2 dtype: int64
df[df.Embarked.isnull()]
Survived | Pclass | Name | Sex | Age | SibSp | Parch | Ticket | Fare | Cabin | Embarked | |
---|---|---|---|---|---|---|---|---|---|---|---|
PassengerId | |||||||||||
62 | 1 | 1 | Icard, Miss. Amelie | female | 38.0 | 0 | 0 | 113572 | 80.0 | B28 | NaN |
830 | 1 | 1 | Stone, Mrs. George Nelson (Martha Evelyn) | female | 62.0 | 0 | 0 | 113572 | 80.0 | B28 | NaN |
df[(df.Pclass == 1) & (df.Sex == 'female') & (df.Survived == 1) & (df.SibSp == 0)].Embarked.value_counts()
S 24 C 22 Name: Embarked, dtype: int64
df['Cabin'].str[0]
PassengerId 1 NaN 2 C 3 NaN 4 C 5 NaN ... 887 NaN 888 B 889 NaN 890 C 891 NaN Name: Cabin, Length: 891, dtype: object
df[df.Cabin.notnull()]['Cabin'].str[0]
PassengerId 2 C 4 C 7 E 11 G 12 C .. 872 D 873 B 880 C 888 B 890 C Name: Cabin, Length: 204, dtype: object
df['Deck'] = df['Cabin'].str[0]
df
Survived | Pclass | Name | Sex | Age | SibSp | Parch | Ticket | Fare | Cabin | Embarked | Deck | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
PassengerId | ||||||||||||
1 | 0 | 3 | Braund, Mr. Owen Harris | male | 22.0 | 1 | 0 | A/5 21171 | 7.2500 | NaN | S | NaN |
2 | 1 | 1 | Cumings, Mrs. John Bradley (Florence Briggs Th... | female | 38.0 | 1 | 0 | PC 17599 | 71.2833 | C85 | C | C |
3 | 1 | 3 | Heikkinen, Miss. Laina | female | 26.0 | 0 | 0 | STON/O2. 3101282 | 7.9250 | NaN | S | NaN |
4 | 1 | 1 | Futrelle, Mrs. Jacques Heath (Lily May Peel) | female | 35.0 | 1 | 0 | 113803 | 53.1000 | C123 | S | C |
5 | 0 | 3 | Allen, Mr. William Henry | male | 35.0 | 0 | 0 | 373450 | 8.0500 | NaN | S | NaN |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
887 | 0 | 2 | Montvila, Rev. Juozas | male | 27.0 | 0 | 0 | 211536 | 13.0000 | NaN | S | NaN |
888 | 1 | 1 | Graham, Miss. Margaret Edith | female | 19.0 | 0 | 0 | 112053 | 30.0000 | B42 | S | B |
889 | 0 | 3 | Johnston, Miss. Catherine Helen "Carrie" | female | NaN | 1 | 2 | W./C. 6607 | 23.4500 | NaN | S | NaN |
890 | 1 | 1 | Behr, Mr. Karl Howell | male | 26.0 | 0 | 0 | 111369 | 30.0000 | C148 | C | C |
891 | 0 | 3 | Dooley, Mr. Patrick | male | 32.0 | 0 | 0 | 370376 | 7.7500 | NaN | Q | NaN |
891 rows × 12 columns
df[(df.Pclass == 1) & (df.Sex == 'female') & (df.Survived == 1) & (df.Deck == 'B')].Embarked.mode()
0 S dtype: object
df.Embarked.fillna('S', inplace = True)
df
Survived | Pclass | Name | Sex | Age | SibSp | Parch | Ticket | Fare | Cabin | Embarked | Deck | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
PassengerId | ||||||||||||
1 | 0 | 3 | Braund, Mr. Owen Harris | male | 22.0 | 1 | 0 | A/5 21171 | 7.2500 | NaN | S | NaN |
2 | 1 | 1 | Cumings, Mrs. John Bradley (Florence Briggs Th... | female | 38.0 | 1 | 0 | PC 17599 | 71.2833 | C85 | C | C |
3 | 1 | 3 | Heikkinen, Miss. Laina | female | 26.0 | 0 | 0 | STON/O2. 3101282 | 7.9250 | NaN | S | NaN |
4 | 1 | 1 | Futrelle, Mrs. Jacques Heath (Lily May Peel) | female | 35.0 | 1 | 0 | 113803 | 53.1000 | C123 | S | C |
5 | 0 | 3 | Allen, Mr. William Henry | male | 35.0 | 0 | 0 | 373450 | 8.0500 | NaN | S | NaN |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
887 | 0 | 2 | Montvila, Rev. Juozas | male | 27.0 | 0 | 0 | 211536 | 13.0000 | NaN | S | NaN |
888 | 1 | 1 | Graham, Miss. Margaret Edith | female | 19.0 | 0 | 0 | 112053 | 30.0000 | B42 | S | B |
889 | 0 | 3 | Johnston, Miss. Catherine Helen "Carrie" | female | NaN | 1 | 2 | W./C. 6607 | 23.4500 | NaN | S | NaN |
890 | 1 | 1 | Behr, Mr. Karl Howell | male | 26.0 | 0 | 0 | 111369 | 30.0000 | C148 | C | C |
891 | 0 | 3 | Dooley, Mr. Patrick | male | 32.0 | 0 | 0 | 370376 | 7.7500 | NaN | Q | NaN |
891 rows × 12 columns
df.isnull().sum()
Survived 0 Pclass 0 Name 0 Sex 0 Age 177 SibSp 0 Parch 0 Ticket 0 Fare 0 Cabin 687 Embarked 2 Deck 687 dtype: int64
df.loc[[62, 830]]
Survived | Pclass | Name | Sex | Age | SibSp | Parch | Ticket | Fare | Cabin | Embarked | Deck | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
PassengerId | ||||||||||||
62 | 1 | 1 | Icard, Miss. Amelie | female | 38.0 | 0 | 0 | 113572 | 80.0 | B28 | S | B |
830 | 1 | 1 | Stone, Mrs. George Nelson (Martha Evelyn) | female | 62.0 | 0 | 0 | 113572 | 80.0 | B28 | S | B |
df['Family'] = df['SibSp'] + df['Parch']
df
Survived | Pclass | Name | Sex | Age | SibSp | Parch | Ticket | Fare | Cabin | Embarked | Deck | Family | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PassengerId | |||||||||||||
1 | 0 | 3 | Braund, Mr. Owen Harris | male | 22.0 | 1 | 0 | A/5 21171 | 7.2500 | NaN | S | NaN | 1 |
2 | 1 | 1 | Cumings, Mrs. John Bradley (Florence Briggs Th... | female | 38.0 | 1 | 0 | PC 17599 | 71.2833 | C85 | C | C | 1 |
3 | 1 | 3 | Heikkinen, Miss. Laina | female | 26.0 | 0 | 0 | STON/O2. 3101282 | 7.9250 | NaN | S | NaN | 0 |
4 | 1 | 1 | Futrelle, Mrs. Jacques Heath (Lily May Peel) | female | 35.0 | 1 | 0 | 113803 | 53.1000 | C123 | S | C | 1 |
5 | 0 | 3 | Allen, Mr. William Henry | male | 35.0 | 0 | 0 | 373450 | 8.0500 | NaN | S | NaN | 0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
887 | 0 | 2 | Montvila, Rev. Juozas | male | 27.0 | 0 | 0 | 211536 | 13.0000 | NaN | S | NaN | 0 |
888 | 1 | 1 | Graham, Miss. Margaret Edith | female | 19.0 | 0 | 0 | 112053 | 30.0000 | B42 | S | B | 0 |
889 | 0 | 3 | Johnston, Miss. Catherine Helen "Carrie" | female | NaN | 1 | 2 | W./C. 6607 | 23.4500 | NaN | S | NaN | 3 |
890 | 1 | 1 | Behr, Mr. Karl Howell | male | 26.0 | 0 | 0 | 111369 | 30.0000 | C148 | C | C | 0 |
891 | 0 | 3 | Dooley, Mr. Patrick | male | 32.0 | 0 | 0 | 370376 | 7.7500 | NaN | Q | NaN | 0 |
891 rows × 13 columns
df.corr(numeric_only = True) # Correlation Matrix
Survived | Pclass | Age | SibSp | Parch | Fare | Family | |
---|---|---|---|---|---|---|---|
Survived | 1.000000 | -0.338481 | -0.077221 | -0.035322 | 0.081629 | 0.257307 | 0.016639 |
Pclass | -0.338481 | 1.000000 | -0.369226 | 0.083081 | 0.018443 | -0.549500 | 0.065997 |
Age | -0.077221 | -0.369226 | 1.000000 | -0.308247 | -0.189119 | 0.096067 | -0.301914 |
SibSp | -0.035322 | 0.083081 | -0.308247 | 1.000000 | 0.414838 | 0.159651 | 0.890712 |
Parch | 0.081629 | 0.018443 | -0.189119 | 0.414838 | 1.000000 | 0.216225 | 0.783111 |
Fare | 0.257307 | -0.549500 | 0.096067 | 0.159651 | 0.216225 | 1.000000 | 0.217138 |
Family | 0.016639 | 0.065997 | -0.301914 | 0.890712 | 0.783111 | 0.217138 | 1.000000 |
df.corr(numeric_only = True).abs() # Correlation Matrix
Survived | Pclass | Age | SibSp | Parch | Fare | |
---|---|---|---|---|---|---|
Survived | 1.000000 | 0.338481 | 0.077221 | 0.035322 | 0.081629 | 0.257307 |
Pclass | 0.338481 | 1.000000 | 0.369226 | 0.083081 | 0.018443 | 0.549500 |
Age | 0.077221 | 0.369226 | 1.000000 | 0.308247 | 0.189119 | 0.096067 |
SibSp | 0.035322 | 0.083081 | 0.308247 | 1.000000 | 0.414838 | 0.159651 |
Parch | 0.081629 | 0.018443 | 0.189119 | 0.414838 | 1.000000 | 0.216225 |
Fare | 0.257307 | 0.549500 | 0.096067 | 0.159651 | 0.216225 | 1.000000 |
df.corr(numeric_only = True).unstack()
Survived Survived 1.000000 Pclass -0.338481 Age -0.077221 SibSp -0.035322 Parch 0.081629 Fare 0.257307 Pclass Survived -0.338481 Pclass 1.000000 Age -0.369226 SibSp 0.083081 Parch 0.018443 Fare -0.549500 Age Survived -0.077221 Pclass -0.369226 Age 1.000000 SibSp -0.308247 Parch -0.189119 Fare 0.096067 SibSp Survived -0.035322 Pclass 0.083081 Age -0.308247 SibSp 1.000000 Parch 0.414838 Fare 0.159651 Parch Survived 0.081629 Pclass 0.018443 Age -0.189119 SibSp 0.414838 Parch 1.000000 Fare 0.216225 Fare Survived 0.257307 Pclass -0.549500 Age 0.096067 SibSp 0.159651 Parch 0.216225 Fare 1.000000 dtype: float64
df.groupby(['Pclass', 'Family'])[['Age']].median()
Age | ||
---|---|---|
Pclass | Family | |
1 | 0 | 38.5 |
1 | 37.0 | |
2 | 39.5 | |
3 | 14.0 | |
4 | 19.5 | |
5 | 23.5 | |
2 | 0 | 31.0 |
1 | 29.0 | |
2 | 22.0 | |
3 | 24.0 | |
4 | 54.0 | |
5 | 24.0 | |
3 | 0 | 26.0 |
1 | 24.5 | |
2 | 24.0 | |
3 | 5.0 | |
4 | 12.5 | |
5 | 8.0 | |
6 | 9.0 | |
7 | 12.5 | |
10 | NaN |
pfa = df.groupby(['Pclass', 'Family'])[['Age']].median().unstack()
pfa
Age | |||||||||
---|---|---|---|---|---|---|---|---|---|
Family | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 10 |
Pclass | |||||||||
1 | 38.5 | 37.0 | 39.5 | 14.0 | 19.5 | 23.5 | NaN | NaN | NaN |
2 | 31.0 | 29.0 | 22.0 | 24.0 | 54.0 | 24.0 | NaN | NaN | NaN |
3 | 26.0 | 24.5 | 24.0 | 5.0 | 12.5 | 8.0 | 9.0 | 12.5 | NaN |
pfa.plot(kind = 'bar')
<Axes: xlabel='Pclass'>
df.groupby(['Pclass'])[['Age']].median()
Age | |
---|---|
Pclass | |
1 | 37.0 |
2 | 29.0 |
3 | 24.0 |
df.groupby(['Pclass'])[['Age']].mean()
Age | |
---|---|
Pclass | |
1 | 38.233441 |
2 | 29.877630 |
3 | 25.140620 |
(df.groupby(['Pclass'])[['Age']].mean() + df.groupby(['Pclass'])[['Age']].median())/2
Age | |
---|---|
Pclass | |
1 | 37.616720 |
2 | 29.438815 |
3 | 24.570310 |
Analysis
- Univariate Analysis
- Bivariate Analysis
- Multivariate Analysis
sns.violinplot(y = df['Age'])
<Axes: ylabel='Age'>
sns.violinplot(y = df['Age'], x = df.Pclass)
<Axes: xlabel='Pclass', ylabel='Age'>
sns.boxplot(y = df['Age'], x = df.Pclass)
<Axes: xlabel='Pclass', ylabel='Age'>
df.Age.mean()
29.69911764705882
df.Age.median()
28.0
df.Fare.min()
0.0
df.Fare.mean()
32.204207968574636
df.Fare.median()
14.4542
df.Fare.max()
512.3292
# Majority of the people paid less money but the people who paid more had to pay really high
sns.displot(x = df.Fare, kde = True)
<seaborn.axisgrid.FacetGrid at 0x19f507423b0>
df1 = df.copy()
df1
Survived | Pclass | Name | Sex | Age | SibSp | Parch | Ticket | Fare | Cabin | Embarked | Deck | Family | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PassengerId | |||||||||||||
1 | 0 | 3 | Braund, Mr. Owen Harris | male | 22.0 | 1 | 0 | A/5 21171 | 7.2500 | NaN | S | NaN | 1 |
2 | 1 | 1 | Cumings, Mrs. John Bradley (Florence Briggs Th... | female | 38.0 | 1 | 0 | PC 17599 | 71.2833 | C85 | C | C | 1 |
3 | 1 | 3 | Heikkinen, Miss. Laina | female | 26.0 | 0 | 0 | STON/O2. 3101282 | 7.9250 | NaN | S | NaN | 0 |
4 | 1 | 1 | Futrelle, Mrs. Jacques Heath (Lily May Peel) | female | 35.0 | 1 | 0 | 113803 | 53.1000 | C123 | S | C | 1 |
5 | 0 | 3 | Allen, Mr. William Henry | male | 35.0 | 0 | 0 | 373450 | 8.0500 | NaN | S | NaN | 0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
887 | 0 | 2 | Montvila, Rev. Juozas | male | 27.0 | 0 | 0 | 211536 | 13.0000 | NaN | S | NaN | 0 |
888 | 1 | 1 | Graham, Miss. Margaret Edith | female | 19.0 | 0 | 0 | 112053 | 30.0000 | B42 | S | B | 0 |
889 | 0 | 3 | Johnston, Miss. Catherine Helen "Carrie" | female | NaN | 1 | 2 | W./C. 6607 | 23.4500 | NaN | S | NaN | 3 |
890 | 1 | 1 | Behr, Mr. Karl Howell | male | 26.0 | 0 | 0 | 111369 | 30.0000 | C148 | C | C | 0 |
891 | 0 | 3 | Dooley, Mr. Patrick | male | 32.0 | 0 | 0 | 370376 | 7.7500 | NaN | Q | NaN | 0 |
891 rows × 13 columns
df1.Embarked.replace(('S', 'C', 'Q'), (1, 2, 3), inplace = True)
df1
Survived | Pclass | Name | Sex | Age | SibSp | Parch | Ticket | Fare | Cabin | Embarked | Deck | Family | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PassengerId | |||||||||||||
1 | 0 | 3 | Braund, Mr. Owen Harris | 1 | 22.0 | 1 | 0 | A/5 21171 | 7.2500 | NaN | 1.0 | NaN | 1 |
2 | 1 | 1 | Cumings, Mrs. John Bradley (Florence Briggs Th... | 2 | 38.0 | 1 | 0 | PC 17599 | 71.2833 | C85 | 2.0 | C | 1 |
3 | 1 | 3 | Heikkinen, Miss. Laina | 2 | 26.0 | 0 | 0 | STON/O2. 3101282 | 7.9250 | NaN | 1.0 | NaN | 0 |
4 | 1 | 1 | Futrelle, Mrs. Jacques Heath (Lily May Peel) | 2 | 35.0 | 1 | 0 | 113803 | 53.1000 | C123 | 1.0 | C | 1 |
5 | 0 | 3 | Allen, Mr. William Henry | 1 | 35.0 | 0 | 0 | 373450 | 8.0500 | NaN | 1.0 | NaN | 0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
887 | 0 | 2 | Montvila, Rev. Juozas | 1 | 27.0 | 0 | 0 | 211536 | 13.0000 | NaN | 1.0 | NaN | 0 |
888 | 1 | 1 | Graham, Miss. Margaret Edith | 2 | 19.0 | 0 | 0 | 112053 | 30.0000 | B42 | 1.0 | B | 0 |
889 | 0 | 3 | Johnston, Miss. Catherine Helen "Carrie" | 2 | NaN | 1 | 2 | W./C. 6607 | 23.4500 | NaN | 1.0 | NaN | 3 |
890 | 1 | 1 | Behr, Mr. Karl Howell | 1 | 26.0 | 0 | 0 | 111369 | 30.0000 | C148 | 2.0 | C | 0 |
891 | 0 | 3 | Dooley, Mr. Patrick | 1 | 32.0 | 0 | 0 | 370376 | 7.7500 | NaN | 3.0 | NaN | 0 |
891 rows × 13 columns
df1.Sex.replace(('male', 'female'), (1, 2), inplace = True)
df1
Survived | Pclass | Name | Sex | Age | SibSp | Parch | Ticket | Fare | Cabin | Embarked | Deck | Family | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PassengerId | |||||||||||||
1 | 0 | 3 | Braund, Mr. Owen Harris | 1 | 22.0 | 1 | 0 | A/5 21171 | 7.2500 | NaN | S | NaN | 1 |
2 | 1 | 1 | Cumings, Mrs. John Bradley (Florence Briggs Th... | 2 | 38.0 | 1 | 0 | PC 17599 | 71.2833 | C85 | C | C | 1 |
3 | 1 | 3 | Heikkinen, Miss. Laina | 2 | 26.0 | 0 | 0 | STON/O2. 3101282 | 7.9250 | NaN | S | NaN | 0 |
4 | 1 | 1 | Futrelle, Mrs. Jacques Heath (Lily May Peel) | 2 | 35.0 | 1 | 0 | 113803 | 53.1000 | C123 | S | C | 1 |
5 | 0 | 3 | Allen, Mr. William Henry | 1 | 35.0 | 0 | 0 | 373450 | 8.0500 | NaN | S | NaN | 0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
887 | 0 | 2 | Montvila, Rev. Juozas | 1 | 27.0 | 0 | 0 | 211536 | 13.0000 | NaN | S | NaN | 0 |
888 | 1 | 1 | Graham, Miss. Margaret Edith | 2 | 19.0 | 0 | 0 | 112053 | 30.0000 | B42 | S | B | 0 |
889 | 0 | 3 | Johnston, Miss. Catherine Helen "Carrie" | 2 | NaN | 1 | 2 | W./C. 6607 | 23.4500 | NaN | S | NaN | 3 |
890 | 1 | 1 | Behr, Mr. Karl Howell | 1 | 26.0 | 0 | 0 | 111369 | 30.0000 | C148 | C | C | 0 |
891 | 0 | 3 | Dooley, Mr. Patrick | 1 | 32.0 | 0 | 0 | 370376 | 7.7500 | NaN | Q | NaN | 0 |
891 rows × 13 columns
df1 = df1[['Survived', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp', 'Parch', 'Family', 'Ticket',
'Fare', 'Cabin', 'Deck', 'Embarked']]
df1
Survived | Pclass | Name | Sex | Age | SibSp | Parch | Family | Ticket | Fare | Cabin | Deck | Embarked | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PassengerId | |||||||||||||
1 | 0 | 3 | Braund, Mr. Owen Harris | 1 | 22.0 | 1 | 0 | 1 | A/5 21171 | 7.2500 | NaN | NaN | 1.0 |
2 | 1 | 1 | Cumings, Mrs. John Bradley (Florence Briggs Th... | 2 | 38.0 | 1 | 0 | 1 | PC 17599 | 71.2833 | C85 | C | 2.0 |
3 | 1 | 3 | Heikkinen, Miss. Laina | 2 | 26.0 | 0 | 0 | 0 | STON/O2. 3101282 | 7.9250 | NaN | NaN | 1.0 |
4 | 1 | 1 | Futrelle, Mrs. Jacques Heath (Lily May Peel) | 2 | 35.0 | 1 | 0 | 1 | 113803 | 53.1000 | C123 | C | 1.0 |
5 | 0 | 3 | Allen, Mr. William Henry | 1 | 35.0 | 0 | 0 | 0 | 373450 | 8.0500 | NaN | NaN | 1.0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
887 | 0 | 2 | Montvila, Rev. Juozas | 1 | 27.0 | 0 | 0 | 0 | 211536 | 13.0000 | NaN | NaN | 1.0 |
888 | 1 | 1 | Graham, Miss. Margaret Edith | 2 | 19.0 | 0 | 0 | 0 | 112053 | 30.0000 | B42 | B | 1.0 |
889 | 0 | 3 | Johnston, Miss. Catherine Helen "Carrie" | 2 | NaN | 1 | 2 | 3 | W./C. 6607 | 23.4500 | NaN | NaN | 1.0 |
890 | 1 | 1 | Behr, Mr. Karl Howell | 1 | 26.0 | 0 | 0 | 0 | 111369 | 30.0000 | C148 | C | 2.0 |
891 | 0 | 3 | Dooley, Mr. Patrick | 1 | 32.0 | 0 | 0 | 0 | 370376 | 7.7500 | NaN | NaN | 3.0 |
891 rows × 13 columns
dfc = df1.corr(numeric_only = True)
dfc
Survived | Pclass | Sex | Age | SibSp | Parch | Family | Fare | Embarked | |
---|---|---|---|---|---|---|---|---|---|
Survived | 1.000000 | -0.338481 | 0.543351 | -0.077221 | -0.035322 | 0.081629 | 0.016639 | 0.257307 | 0.108669 |
Pclass | -0.338481 | 1.000000 | -0.131900 | -0.369226 | 0.083081 | 0.018443 | 0.065997 | -0.549500 | 0.043835 |
Sex | 0.543351 | -0.131900 | 1.000000 | -0.093254 | 0.114631 | 0.245489 | 0.200988 | 0.182333 | 0.118593 |
Age | -0.077221 | -0.369226 | -0.093254 | 1.000000 | -0.308247 | -0.189119 | -0.301914 | 0.096067 | 0.012186 |
SibSp | -0.035322 | 0.083081 | 0.114631 | -0.308247 | 1.000000 | 0.414838 | 0.890712 | 0.159651 | -0.060606 |
Parch | 0.081629 | 0.018443 | 0.245489 | -0.189119 | 0.414838 | 1.000000 | 0.783111 | 0.216225 | -0.079320 |
Family | 0.016639 | 0.065997 | 0.200988 | -0.301914 | 0.890712 | 0.783111 | 1.000000 | 0.217138 | -0.081057 |
Fare | 0.257307 | -0.549500 | 0.182333 | 0.096067 | 0.159651 | 0.216225 | 0.217138 | 1.000000 | 0.063462 |
Embarked | 0.108669 | 0.043835 | 0.118593 | 0.012186 | -0.060606 | -0.079320 | -0.081057 | 0.063462 | 1.000000 |
su = df1.corr(numeric_only = True).iloc[[0]].drop(columns = 'Survived')
su
Pclass | Sex | Age | SibSp | Parch | Family | Fare | Embarked | |
---|---|---|---|---|---|---|---|---|
Survived | -0.338481 | 0.543351 | -0.077221 | -0.035322 | 0.081629 | 0.016639 | 0.257307 | 0.108669 |
su.plot(kind = 'bar')
<Axes: >