This is just an example EDA for absolute beginners. For more detailed EDA and proper documentation refer more projects on EDA.
https://www.kaggle.com/c/titanic/data Use this to acquire domain knowledge and for the dataset.
Link to another example EDA: https://www.kaggle.com/code/gunesevitan/titanic-advanced-feature-engineering-tutorial
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
df = pd.read_csv("titanic.csv", index_col = 'PassengerId')
df
Survived | Pclass | Name | Sex | Age | SibSp | Parch | Ticket | Fare | Cabin | Embarked | |
---|---|---|---|---|---|---|---|---|---|---|---|
PassengerId | |||||||||||
1 | 0 | 3 | Braund, Mr. Owen Harris | male | 22.0 | 1 | 0 | A/5 21171 | 7.2500 | NaN | S |
2 | 1 | 1 | Cumings, Mrs. John Bradley (Florence Briggs Th... | female | 38.0 | 1 | 0 | PC 17599 | 71.2833 | C85 | C |
3 | 1 | 3 | Heikkinen, Miss. Laina | female | 26.0 | 0 | 0 | STON/O2. 3101282 | 7.9250 | NaN | S |
4 | 1 | 1 | Futrelle, Mrs. Jacques Heath (Lily May Peel) | female | 35.0 | 1 | 0 | 113803 | 53.1000 | C123 | S |
5 | 0 | 3 | Allen, Mr. William Henry | male | 35.0 | 0 | 0 | 373450 | 8.0500 | NaN | S |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
887 | 0 | 2 | Montvila, Rev. Juozas | male | 27.0 | 0 | 0 | 211536 | 13.0000 | NaN | S |
888 | 1 | 1 | Graham, Miss. Margaret Edith | female | 19.0 | 0 | 0 | 112053 | 30.0000 | B42 | S |
889 | 0 | 3 | Johnston, Miss. Catherine Helen "Carrie" | female | NaN | 1 | 2 | W./C. 6607 | 23.4500 | NaN | S |
890 | 1 | 1 | Behr, Mr. Karl Howell | male | 26.0 | 0 | 0 | 111369 | 30.0000 | C148 | C |
891 | 0 | 3 | Dooley, Mr. Patrick | male | 32.0 | 0 | 0 | 370376 | 7.7500 | NaN | Q |
891 rows × 11 columns
# df[df.Price == df.Price.max()].Company
df.isnull().sum()
Survived 0 Pclass 0 Name 0 Sex 0 Age 177 SibSp 0 Parch 0 Ticket 0 Fare 0 Cabin 687 Embarked 2 dtype: int64
df[df.Embarked.isnull()]
Survived | Pclass | Name | Sex | Age | SibSp | Parch | Ticket | Fare | Cabin | Embarked | |
---|---|---|---|---|---|---|---|---|---|---|---|
PassengerId | |||||||||||
62 | 1 | 1 | Icard, Miss. Amelie | female | 38.0 | 0 | 0 | 113572 | 80.0 | B28 | NaN |
830 | 1 | 1 | Stone, Mrs. George Nelson (Martha Evelyn) | female | 62.0 | 0 | 0 | 113572 | 80.0 | B28 | NaN |
df[(df.Pclass == 1) & (df.Sex == 'female') & (df.Survived == 1) & (df.SibSp == 0)].Embarked.value_counts()
S 24 C 22 Name: Embarked, dtype: int64
df['Cabin'].str[0]
PassengerId 1 NaN 2 C 3 NaN 4 C 5 NaN ... 887 NaN 888 B 889 NaN 890 C 891 NaN Name: Cabin, Length: 891, dtype: object
df[df.Cabin.notnull()]['Cabin'].str[0]
PassengerId 2 C 4 C 7 E 11 G 12 C .. 872 D 873 B 880 C 888 B 890 C Name: Cabin, Length: 204, dtype: object
df['Deck'] = df['Cabin'].str[0]
df
Survived | Pclass | Name | Sex | Age | SibSp | Parch | Ticket | Fare | Cabin | Embarked | Deck | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
PassengerId | ||||||||||||
1 | 0 | 3 | Braund, Mr. Owen Harris | male | 22.0 | 1 | 0 | A/5 21171 | 7.2500 | NaN | S | NaN |
2 | 1 | 1 | Cumings, Mrs. John Bradley (Florence Briggs Th... | female | 38.0 | 1 | 0 | PC 17599 | 71.2833 | C85 | C | C |
3 | 1 | 3 | Heikkinen, Miss. Laina | female | 26.0 | 0 | 0 | STON/O2. 3101282 | 7.9250 | NaN | S | NaN |
4 | 1 | 1 | Futrelle, Mrs. Jacques Heath (Lily May Peel) | female | 35.0 | 1 | 0 | 113803 | 53.1000 | C123 | S | C |
5 | 0 | 3 | Allen, Mr. William Henry | male | 35.0 | 0 | 0 | 373450 | 8.0500 | NaN | S | NaN |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
887 | 0 | 2 | Montvila, Rev. Juozas | male | 27.0 | 0 | 0 | 211536 | 13.0000 | NaN | S | NaN |
888 | 1 | 1 | Graham, Miss. Margaret Edith | female | 19.0 | 0 | 0 | 112053 | 30.0000 | B42 | S | B |
889 | 0 | 3 | Johnston, Miss. Catherine Helen "Carrie" | female | NaN | 1 | 2 | W./C. 6607 | 23.4500 | NaN | S | NaN |
890 | 1 | 1 | Behr, Mr. Karl Howell | male | 26.0 | 0 | 0 | 111369 | 30.0000 | C148 | C | C |
891 | 0 | 3 | Dooley, Mr. Patrick | male | 32.0 | 0 | 0 | 370376 | 7.7500 | NaN | Q | NaN |
891 rows × 12 columns
df[(df.Pclass == 1) & (df.Sex == 'female') & (df.Survived == 1) & (df.Deck == 'B')].Embarked.mode()
0 S dtype: object
df.Embarked.fillna('S', inplace = True)
df
Survived | Pclass | Name | Sex | Age | SibSp | Parch | Ticket | Fare | Cabin | Embarked | Deck | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
PassengerId | ||||||||||||
1 | 0 | 3 | Braund, Mr. Owen Harris | male | 22.0 | 1 | 0 | A/5 21171 | 7.2500 | NaN | S | NaN |
2 | 1 | 1 | Cumings, Mrs. John Bradley (Florence Briggs Th... | female | 38.0 | 1 | 0 | PC 17599 | 71.2833 | C85 | C | C |
3 | 1 | 3 | Heikkinen, Miss. Laina | female | 26.0 | 0 | 0 | STON/O2. 3101282 | 7.9250 | NaN | S | NaN |
4 | 1 | 1 | Futrelle, Mrs. Jacques Heath (Lily May Peel) | female | 35.0 | 1 | 0 | 113803 | 53.1000 | C123 | S | C |
5 | 0 | 3 | Allen, Mr. William Henry | male | 35.0 | 0 | 0 | 373450 | 8.0500 | NaN | S | NaN |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
887 | 0 | 2 | Montvila, Rev. Juozas | male | 27.0 | 0 | 0 | 211536 | 13.0000 | NaN | S | NaN |
888 | 1 | 1 | Graham, Miss. Margaret Edith | female | 19.0 | 0 | 0 | 112053 | 30.0000 | B42 | S | B |
889 | 0 | 3 | Johnston, Miss. Catherine Helen "Carrie" | female | NaN | 1 | 2 | W./C. 6607 | 23.4500 | NaN | S | NaN |
890 | 1 | 1 | Behr, Mr. Karl Howell | male | 26.0 | 0 | 0 | 111369 | 30.0000 | C148 | C | C |
891 | 0 | 3 | Dooley, Mr. Patrick | male | 32.0 | 0 | 0 | 370376 | 7.7500 | NaN | Q | NaN |
891 rows × 12 columns
df.isnull().sum()
Survived 0 Pclass 0 Name 0 Sex 0 Age 177 SibSp 0 Parch 0 Ticket 0 Fare 0 Cabin 687 Embarked 2 Deck 687 dtype: int64
df.loc[[62, 830]]
Survived | Pclass | Name | Sex | Age | SibSp | Parch | Ticket | Fare | Cabin | Embarked | Deck | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
PassengerId | ||||||||||||
62 | 1 | 1 | Icard, Miss. Amelie | female | 38.0 | 0 | 0 | 113572 | 80.0 | B28 | S | B |
830 | 1 | 1 | Stone, Mrs. George Nelson (Martha Evelyn) | female | 62.0 | 0 | 0 | 113572 | 80.0 | B28 | S | B |
df['Family'] = df['SibSp'] + df['Parch']
df
Survived | Pclass | Name | Sex | Age | SibSp | Parch | Ticket | Fare | Cabin | Embarked | Deck | Family | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PassengerId | |||||||||||||
1 | 0 | 3 | Braund, Mr. Owen Harris | male | 22.0 | 1 | 0 | A/5 21171 | 7.2500 | NaN | S | NaN | 1 |
2 | 1 | 1 | Cumings, Mrs. John Bradley (Florence Briggs Th... | female | 38.0 | 1 | 0 | PC 17599 | 71.2833 | C85 | C | C | 1 |
3 | 1 | 3 | Heikkinen, Miss. Laina | female | 26.0 | 0 | 0 | STON/O2. 3101282 | 7.9250 | NaN | S | NaN | 0 |
4 | 1 | 1 | Futrelle, Mrs. Jacques Heath (Lily May Peel) | female | 35.0 | 1 | 0 | 113803 | 53.1000 | C123 | S | C | 1 |
5 | 0 | 3 | Allen, Mr. William Henry | male | 35.0 | 0 | 0 | 373450 | 8.0500 | NaN | S | NaN | 0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
887 | 0 | 2 | Montvila, Rev. Juozas | male | 27.0 | 0 | 0 | 211536 | 13.0000 | NaN | S | NaN | 0 |
888 | 1 | 1 | Graham, Miss. Margaret Edith | female | 19.0 | 0 | 0 | 112053 | 30.0000 | B42 | S | B | 0 |
889 | 0 | 3 | Johnston, Miss. Catherine Helen "Carrie" | female | NaN | 1 | 2 | W./C. 6607 | 23.4500 | NaN | S | NaN | 3 |
890 | 1 | 1 | Behr, Mr. Karl Howell | male | 26.0 | 0 | 0 | 111369 | 30.0000 | C148 | C | C | 0 |
891 | 0 | 3 | Dooley, Mr. Patrick | male | 32.0 | 0 | 0 | 370376 | 7.7500 | NaN | Q | NaN | 0 |
891 rows × 13 columns
df.corr(numeric_only = True) # Correlation Matrix
Survived | Pclass | Age | SibSp | Parch | Fare | Family | |
---|---|---|---|---|---|---|---|
Survived | 1.000000 | -0.338481 | -0.077221 | -0.035322 | 0.081629 | 0.257307 | 0.016639 |
Pclass | -0.338481 | 1.000000 | -0.369226 | 0.083081 | 0.018443 | -0.549500 | 0.065997 |
Age | -0.077221 | -0.369226 | 1.000000 | -0.308247 | -0.189119 | 0.096067 | -0.301914 |
SibSp | -0.035322 | 0.083081 | -0.308247 | 1.000000 | 0.414838 | 0.159651 | 0.890712 |
Parch | 0.081629 | 0.018443 | -0.189119 | 0.414838 | 1.000000 | 0.216225 | 0.783111 |
Fare | 0.257307 | -0.549500 | 0.096067 | 0.159651 | 0.216225 | 1.000000 | 0.217138 |
Family | 0.016639 | 0.065997 | -0.301914 | 0.890712 | 0.783111 | 0.217138 | 1.000000 |
df.corr(numeric_only = True).abs() # Correlation Matrix
Survived | Pclass | Age | SibSp | Parch | Fare | |
---|---|---|---|---|---|---|
Survived | 1.000000 | 0.338481 | 0.077221 | 0.035322 | 0.081629 | 0.257307 |
Pclass | 0.338481 | 1.000000 | 0.369226 | 0.083081 | 0.018443 | 0.549500 |
Age | 0.077221 | 0.369226 | 1.000000 | 0.308247 | 0.189119 | 0.096067 |
SibSp | 0.035322 | 0.083081 | 0.308247 | 1.000000 | 0.414838 | 0.159651 |
Parch | 0.081629 | 0.018443 | 0.189119 | 0.414838 | 1.000000 | 0.216225 |
Fare | 0.257307 | 0.549500 | 0.096067 | 0.159651 | 0.216225 | 1.000000 |
df.corr(numeric_only = True).unstack()
Survived Survived 1.000000 Pclass -0.338481 Age -0.077221 SibSp -0.035322 Parch 0.081629 Fare 0.257307 Pclass Survived -0.338481 Pclass 1.000000 Age -0.369226 SibSp 0.083081 Parch 0.018443 Fare -0.549500 Age Survived -0.077221 Pclass -0.369226 Age 1.000000 SibSp -0.308247 Parch -0.189119 Fare 0.096067 SibSp Survived -0.035322 Pclass 0.083081 Age -0.308247 SibSp 1.000000 Parch 0.414838 Fare 0.159651 Parch Survived 0.081629 Pclass 0.018443 Age -0.189119 SibSp 0.414838 Parch 1.000000 Fare 0.216225 Fare Survived 0.257307 Pclass -0.549500 Age 0.096067 SibSp 0.159651 Parch 0.216225 Fare 1.000000 dtype: float64
df.groupby(['Pclass', 'Family'])[['Age']].median()
Age | ||
---|---|---|
Pclass | Family | |
1 | 0 | 38.5 |
1 | 37.0 | |
2 | 39.5 | |
3 | 14.0 | |
4 | 19.5 | |
5 | 23.5 | |
2 | 0 | 31.0 |
1 | 29.0 | |
2 | 22.0 | |
3 | 24.0 | |
4 | 54.0 | |
5 | 24.0 | |
3 | 0 | 26.0 |
1 | 24.5 | |
2 | 24.0 | |
3 | 5.0 | |
4 | 12.5 | |
5 | 8.0 | |
6 | 9.0 | |
7 | 12.5 | |
10 | NaN |
pfa = df.groupby(['Pclass', 'Family'])[['Age']].median().unstack()
pfa
Age | |||||||||
---|---|---|---|---|---|---|---|---|---|
Family | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 10 |
Pclass | |||||||||
1 | 38.5 | 37.0 | 39.5 | 14.0 | 19.5 | 23.5 | NaN | NaN | NaN |
2 | 31.0 | 29.0 | 22.0 | 24.0 | 54.0 | 24.0 | NaN | NaN | NaN |
3 | 26.0 | 24.5 | 24.0 | 5.0 | 12.5 | 8.0 | 9.0 | 12.5 | NaN |
pfa.plot(kind = 'bar')
<Axes: xlabel='Pclass'>
df.groupby(['Pclass'])[['Age']].median()
Age | |
---|---|
Pclass | |
1 | 37.0 |
2 | 29.0 |
3 | 24.0 |
df.groupby(['Pclass'])[['Age']].mean()
Age | |
---|---|
Pclass | |
1 | 38.233441 |
2 | 29.877630 |
3 | 25.140620 |
(df.groupby(['Pclass'])[['Age']].mean() + df.groupby(['Pclass'])[['Age']].median())/2
Age | |
---|---|
Pclass | |
1 | 37.616720 |
2 | 29.438815 |
3 | 24.570310 |
Analysis
- Univariate Analysis
- Bivariate Analysis
- Multivariate Analysis
sns.violinplot(y = df['Age'])
<Axes: ylabel='Age'>
sns.violinplot(y = df['Age'], x = df.Pclass)
<Axes: xlabel='Pclass', ylabel='Age'>
sns.boxplot(y = df['Age'], x = df.Pclass)
<Axes: xlabel='Pclass', ylabel='Age'>
df.Age.mean()
29.69911764705882
df.Age.median()
28.0
df.Fare.min()
0.0
df.Fare.mean()
32.204207968574636
df.Fare.median()
14.4542
df.Fare.max()
512.3292
# Majority of the people paid less money but the people who paid more had to pay really high
sns.displot(x = df.Fare, kde = True)
<seaborn.axisgrid.FacetGrid at 0x19f507423b0>
df1 = df.copy()
df1
Survived | Pclass | Name | Sex | Age | SibSp | Parch | Ticket | Fare | Cabin | Embarked | Deck | Family | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PassengerId | |||||||||||||
1 | 0 | 3 | Braund, Mr. Owen Harris | male | 22.0 | 1 | 0 | A/5 21171 | 7.2500 | NaN | S | NaN | 1 |
2 | 1 | 1 | Cumings, Mrs. John Bradley (Florence Briggs Th... | female | 38.0 | 1 | 0 | PC 17599 | 71.2833 | C85 | C | C | 1 |
3 | 1 | 3 | Heikkinen, Miss. Laina | female | 26.0 | 0 | 0 | STON/O2. 3101282 | 7.9250 | NaN | S | NaN | 0 |
4 | 1 | 1 | Futrelle, Mrs. Jacques Heath (Lily May Peel) | female | 35.0 | 1 | 0 | 113803 | 53.1000 | C123 | S | C | 1 |
5 | 0 | 3 | Allen, Mr. William Henry | male | 35.0 | 0 | 0 | 373450 | 8.0500 | NaN | S | NaN | 0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
887 | 0 | 2 | Montvila, Rev. Juozas | male | 27.0 | 0 | 0 | 211536 | 13.0000 | NaN | S | NaN | 0 |
888 | 1 | 1 | Graham, Miss. Margaret Edith | female | 19.0 | 0 | 0 | 112053 | 30.0000 | B42 | S | B | 0 |
889 | 0 | 3 | Johnston, Miss. Catherine Helen "Carrie" | female | NaN | 1 | 2 | W./C. 6607 | 23.4500 | NaN | S | NaN | 3 |
890 | 1 | 1 | Behr, Mr. Karl Howell | male | 26.0 | 0 | 0 | 111369 | 30.0000 | C148 | C | C | 0 |
891 | 0 | 3 | Dooley, Mr. Patrick | male | 32.0 | 0 | 0 | 370376 | 7.7500 | NaN | Q | NaN | 0 |
891 rows × 13 columns
df1.Embarked.replace(('S', 'C', 'Q'), (1, 2, 3), inplace = True)
df1
Survived | Pclass | Name | Sex | Age | SibSp | Parch | Ticket | Fare | Cabin | Embarked | Deck | Family | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PassengerId | |||||||||||||
1 | 0 | 3 | Braund, Mr. Owen Harris | 1 | 22.0 | 1 | 0 | A/5 21171 | 7.2500 | NaN | 1.0 | NaN | 1 |
2 | 1 | 1 | Cumings, Mrs. John Bradley (Florence Briggs Th... | 2 | 38.0 | 1 | 0 | PC 17599 | 71.2833 | C85 | 2.0 | C | 1 |
3 | 1 | 3 | Heikkinen, Miss. Laina | 2 | 26.0 | 0 | 0 | STON/O2. 3101282 | 7.9250 | NaN | 1.0 | NaN | 0 |
4 | 1 | 1 | Futrelle, Mrs. Jacques Heath (Lily May Peel) | 2 | 35.0 | 1 | 0 | 113803 | 53.1000 | C123 | 1.0 | C | 1 |
5 | 0 | 3 | Allen, Mr. William Henry | 1 | 35.0 | 0 | 0 | 373450 | 8.0500 | NaN | 1.0 | NaN | 0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
887 | 0 | 2 | Montvila, Rev. Juozas | 1 | 27.0 | 0 | 0 | 211536 | 13.0000 | NaN | 1.0 | NaN | 0 |
888 | 1 | 1 | Graham, Miss. Margaret Edith | 2 | 19.0 | 0 | 0 | 112053 | 30.0000 | B42 | 1.0 | B | 0 |
889 | 0 | 3 | Johnston, Miss. Catherine Helen "Carrie" | 2 | NaN | 1 | 2 | W./C. 6607 | 23.4500 | NaN | 1.0 | NaN | 3 |
890 | 1 | 1 | Behr, Mr. Karl Howell | 1 | 26.0 | 0 | 0 | 111369 | 30.0000 | C148 | 2.0 | C | 0 |
891 | 0 | 3 | Dooley, Mr. Patrick | 1 | 32.0 | 0 | 0 | 370376 | 7.7500 | NaN | 3.0 | NaN | 0 |
891 rows × 13 columns
df1.Sex.replace(('male', 'female'), (1, 2), inplace = True)
df1
Survived | Pclass | Name | Sex | Age | SibSp | Parch | Ticket | Fare | Cabin | Embarked | Deck | Family | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PassengerId | |||||||||||||
1 | 0 | 3 | Braund, Mr. Owen Harris | 1 | 22.0 | 1 | 0 | A/5 21171 | 7.2500 | NaN | S | NaN | 1 |
2 | 1 | 1 | Cumings, Mrs. John Bradley (Florence Briggs Th... | 2 | 38.0 | 1 | 0 | PC 17599 | 71.2833 | C85 | C | C | 1 |
3 | 1 | 3 | Heikkinen, Miss. Laina | 2 | 26.0 | 0 | 0 | STON/O2. 3101282 | 7.9250 | NaN | S | NaN | 0 |
4 | 1 | 1 | Futrelle, Mrs. Jacques Heath (Lily May Peel) | 2 | 35.0 | 1 | 0 | 113803 | 53.1000 | C123 | S | C | 1 |
5 | 0 | 3 | Allen, Mr. William Henry | 1 | 35.0 | 0 | 0 | 373450 | 8.0500 | NaN | S | NaN | 0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
887 | 0 | 2 | Montvila, Rev. Juozas | 1 | 27.0 | 0 | 0 | 211536 | 13.0000 | NaN | S | NaN | 0 |
888 | 1 | 1 | Graham, Miss. Margaret Edith | 2 | 19.0 | 0 | 0 | 112053 | 30.0000 | B42 | S | B | 0 |
889 | 0 | 3 | Johnston, Miss. Catherine Helen "Carrie" | 2 | NaN | 1 | 2 | W./C. 6607 | 23.4500 | NaN | S | NaN | 3 |
890 | 1 | 1 | Behr, Mr. Karl Howell | 1 | 26.0 | 0 | 0 | 111369 | 30.0000 | C148 | C | C | 0 |
891 | 0 | 3 | Dooley, Mr. Patrick | 1 | 32.0 | 0 | 0 | 370376 | 7.7500 | NaN | Q | NaN | 0 |
891 rows × 13 columns
df1 = df1[['Survived', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp', 'Parch', 'Family', 'Ticket',
'Fare', 'Cabin', 'Deck', 'Embarked']]
df1
Survived | Pclass | Name | Sex | Age | SibSp | Parch | Family | Ticket | Fare | Cabin | Deck | Embarked | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PassengerId | |||||||||||||
1 | 0 | 3 | Braund, Mr. Owen Harris | 1 | 22.0 | 1 | 0 | 1 | A/5 21171 | 7.2500 | NaN | NaN | 1.0 |
2 | 1 | 1 | Cumings, Mrs. John Bradley (Florence Briggs Th... | 2 | 38.0 | 1 | 0 | 1 | PC 17599 | 71.2833 | C85 | C | 2.0 |
3 | 1 | 3 | Heikkinen, Miss. Laina | 2 | 26.0 | 0 | 0 | 0 | STON/O2. 3101282 | 7.9250 | NaN | NaN | 1.0 |
4 | 1 | 1 | Futrelle, Mrs. Jacques Heath (Lily May Peel) | 2 | 35.0 | 1 | 0 | 1 | 113803 | 53.1000 | C123 | C | 1.0 |
5 | 0 | 3 | Allen, Mr. William Henry | 1 | 35.0 | 0 | 0 | 0 | 373450 | 8.0500 | NaN | NaN | 1.0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
887 | 0 | 2 | Montvila, Rev. Juozas | 1 | 27.0 | 0 | 0 | 0 | 211536 | 13.0000 | NaN | NaN | 1.0 |
888 | 1 | 1 | Graham, Miss. Margaret Edith | 2 | 19.0 | 0 | 0 | 0 | 112053 | 30.0000 | B42 | B | 1.0 |
889 | 0 | 3 | Johnston, Miss. Catherine Helen "Carrie" | 2 | NaN | 1 | 2 | 3 | W./C. 6607 | 23.4500 | NaN | NaN | 1.0 |
890 | 1 | 1 | Behr, Mr. Karl Howell | 1 | 26.0 | 0 | 0 | 0 | 111369 | 30.0000 | C148 | C | 2.0 |
891 | 0 | 3 | Dooley, Mr. Patrick | 1 | 32.0 | 0 | 0 | 0 | 370376 | 7.7500 | NaN | NaN | 3.0 |
891 rows × 13 columns
dfc = df1.corr(numeric_only = True)
dfc
Survived | Pclass | Sex | Age | SibSp | Parch | Family | Fare | Embarked | |
---|---|---|---|---|---|---|---|---|---|
Survived | 1.000000 | -0.338481 | 0.543351 | -0.077221 | -0.035322 | 0.081629 | 0.016639 | 0.257307 | 0.108669 |
Pclass | -0.338481 | 1.000000 | -0.131900 | -0.369226 | 0.083081 | 0.018443 | 0.065997 | -0.549500 | 0.043835 |
Sex | 0.543351 | -0.131900 | 1.000000 | -0.093254 | 0.114631 | 0.245489 | 0.200988 | 0.182333 | 0.118593 |
Age | -0.077221 | -0.369226 | -0.093254 | 1.000000 | -0.308247 | -0.189119 | -0.301914 | 0.096067 | 0.012186 |
SibSp | -0.035322 | 0.083081 | 0.114631 | -0.308247 | 1.000000 | 0.414838 | 0.890712 | 0.159651 | -0.060606 |
Parch | 0.081629 | 0.018443 | 0.245489 | -0.189119 | 0.414838 | 1.000000 | 0.783111 | 0.216225 | -0.079320 |
Family | 0.016639 | 0.065997 | 0.200988 | -0.301914 | 0.890712 | 0.783111 | 1.000000 | 0.217138 | -0.081057 |
Fare | 0.257307 | -0.549500 | 0.182333 | 0.096067 | 0.159651 | 0.216225 | 0.217138 | 1.000000 | 0.063462 |
Embarked | 0.108669 | 0.043835 | 0.118593 | 0.012186 | -0.060606 | -0.079320 | -0.081057 | 0.063462 | 1.000000 |
su = df1.corr(numeric_only = True).iloc[[0]].drop(columns = 'Survived')
su
Pclass | Sex | Age | SibSp | Parch | Family | Fare | Embarked | |
---|---|---|---|---|---|---|---|---|
Survived | -0.338481 | 0.543351 | -0.077221 | -0.035322 | 0.081629 | 0.016639 | 0.257307 | 0.108669 |
su.plot(kind = 'bar')
<Axes: >
su = su.abs()
su
Pclass | Sex | Age | SibSp | Parch | Family | Fare | Embarked | |
---|---|---|---|---|---|---|---|---|
Survived | 0.338481 | 0.543351 | 0.077221 | 0.035322 | 0.081629 | 0.016639 | 0.257307 | 0.108669 |
su.plot(kind = 'bar')
<Axes: >
# df1['Deck'] = df1['Cabin'].apply(lambda s: s[0] if pd.notnull(s) else np.nan)
df1
Survived | Pclass | Name | Sex | Age | SibSp | Parch | Family | Ticket | Fare | Cabin | Deck | Embarked | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PassengerId | |||||||||||||
1 | 0 | 3 | Braund, Mr. Owen Harris | 1 | 22.0 | 1 | 0 | 1 | A/5 21171 | 7.2500 | NaN | NaN | 1.0 |
2 | 1 | 1 | Cumings, Mrs. John Bradley (Florence Briggs Th... | 2 | 38.0 | 1 | 0 | 1 | PC 17599 | 71.2833 | C85 | C | 2.0 |
3 | 1 | 3 | Heikkinen, Miss. Laina | 2 | 26.0 | 0 | 0 | 0 | STON/O2. 3101282 | 7.9250 | NaN | NaN | 1.0 |
4 | 1 | 1 | Futrelle, Mrs. Jacques Heath (Lily May Peel) | 2 | 35.0 | 1 | 0 | 1 | 113803 | 53.1000 | C123 | C | 1.0 |
5 | 0 | 3 | Allen, Mr. William Henry | 1 | 35.0 | 0 | 0 | 0 | 373450 | 8.0500 | NaN | NaN | 1.0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
887 | 0 | 2 | Montvila, Rev. Juozas | 1 | 27.0 | 0 | 0 | 0 | 211536 | 13.0000 | NaN | NaN | 1.0 |
888 | 1 | 1 | Graham, Miss. Margaret Edith | 2 | 19.0 | 0 | 0 | 0 | 112053 | 30.0000 | B42 | B | 1.0 |
889 | 0 | 3 | Johnston, Miss. Catherine Helen "Carrie" | 2 | NaN | 1 | 2 | 3 | W./C. 6607 | 23.4500 | NaN | NaN | 1.0 |
890 | 1 | 1 | Behr, Mr. Karl Howell | 1 | 26.0 | 0 | 0 | 0 | 111369 | 30.0000 | C148 | C | 2.0 |
891 | 0 | 3 | Dooley, Mr. Patrick | 1 | 32.0 | 0 | 0 | 0 | 370376 | 7.7500 | NaN | NaN | 3.0 |
891 rows × 13 columns
df1.Deck.unique()
array([nan, 'C', 'E', 'G', 'D', 'A', 'B', 'F', 'T'], dtype=object)
df1[df1.Deck == 'T']
Survived | Pclass | Name | Sex | Age | SibSp | Parch | Family | Ticket | Fare | Cabin | Deck | Embarked | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PassengerId | |||||||||||||
340 | 0 | 1 | Blackwell, Mr. Stephen Weart | 1 | 45.0 | 0 | 0 | 0 | 113784 | 35.5 | T | T | 1.0 |
df1.Deck.replace([np.nan, 'C', 'E', 'G', 'D', 'A', 'B', 'F', 'T'], [np.nan, 3, 5, 6, 4, 1, 2, 5, 0], inplace =True)
C:\Users\harsh\AppData\Local\Temp\ipykernel_19292\3993522731.py:1: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy df1.Deck.replace([np.nan, 'C', 'E', 'G', 'D', 'A', 'B', 'F', 'T'], [np.nan, 3, 5, 6, 4, 1, 2, 5, 0], inplace =True)
df1
Survived | Pclass | Name | Sex | Age | SibSp | Parch | Family | Ticket | Fare | Cabin | Deck | Embarked | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PassengerId | |||||||||||||
1 | 0 | 3 | Braund, Mr. Owen Harris | 1 | 22.0 | 1 | 0 | 1 | A/5 21171 | 7.2500 | NaN | NaN | 1.0 |
2 | 1 | 1 | Cumings, Mrs. John Bradley (Florence Briggs Th... | 2 | 38.0 | 1 | 0 | 1 | PC 17599 | 71.2833 | C85 | 3.0 | 2.0 |
3 | 1 | 3 | Heikkinen, Miss. Laina | 2 | 26.0 | 0 | 0 | 0 | STON/O2. 3101282 | 7.9250 | NaN | NaN | 1.0 |
4 | 1 | 1 | Futrelle, Mrs. Jacques Heath (Lily May Peel) | 2 | 35.0 | 1 | 0 | 1 | 113803 | 53.1000 | C123 | 3.0 | 1.0 |
5 | 0 | 3 | Allen, Mr. William Henry | 1 | 35.0 | 0 | 0 | 0 | 373450 | 8.0500 | NaN | NaN | 1.0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
887 | 0 | 2 | Montvila, Rev. Juozas | 1 | 27.0 | 0 | 0 | 0 | 211536 | 13.0000 | NaN | NaN | 1.0 |
888 | 1 | 1 | Graham, Miss. Margaret Edith | 2 | 19.0 | 0 | 0 | 0 | 112053 | 30.0000 | B42 | 2.0 | 1.0 |
889 | 0 | 3 | Johnston, Miss. Catherine Helen "Carrie" | 2 | NaN | 1 | 2 | 3 | W./C. 6607 | 23.4500 | NaN | NaN | 1.0 |
890 | 1 | 1 | Behr, Mr. Karl Howell | 1 | 26.0 | 0 | 0 | 0 | 111369 | 30.0000 | C148 | 3.0 | 2.0 |
891 | 0 | 3 | Dooley, Mr. Patrick | 1 | 32.0 | 0 | 0 | 0 | 370376 | 7.7500 | NaN | NaN | 3.0 |
891 rows × 13 columns
dfc = df1.corr(numeric_only = True)
dfc
Survived | Pclass | Sex | Age | SibSp | Parch | Family | Fare | Deck | Embarked | |
---|---|---|---|---|---|---|---|---|---|---|
Survived | 1.000000 | -0.338481 | 0.543351 | -0.077221 | -0.035322 | 0.081629 | 0.016639 | 0.257307 | 0.074078 | 0.108669 |
Pclass | -0.338481 | 1.000000 | -0.131900 | -0.369226 | 0.083081 | 0.018443 | 0.065997 | -0.549500 | 0.522110 | 0.043835 |
Sex | 0.543351 | -0.131900 | 1.000000 | -0.093254 | 0.114631 | 0.245489 | 0.200988 | 0.182333 | 0.111241 | 0.118593 |
Age | -0.077221 | -0.369226 | -0.093254 | 1.000000 | -0.308247 | -0.189119 | -0.301914 | 0.096067 | -0.150319 | 0.012186 |
SibSp | -0.035322 | 0.083081 | 0.114631 | -0.308247 | 1.000000 | 0.414838 | 0.890712 | 0.159651 | 0.052095 | -0.060606 |
Parch | 0.081629 | 0.018443 | 0.245489 | -0.189119 | 0.414838 | 1.000000 | 0.783111 | 0.216225 | 0.020578 | -0.079320 |
Family | 0.016639 | 0.065997 | 0.200988 | -0.301914 | 0.890712 | 0.783111 | 1.000000 | 0.217138 | 0.044137 | -0.081057 |
Fare | 0.257307 | -0.549500 | 0.182333 | 0.096067 | 0.159651 | 0.216225 | 0.217138 | 1.000000 | -0.268925 | 0.063462 |
Deck | 0.074078 | 0.522110 | 0.111241 | -0.150319 | 0.052095 | 0.020578 | 0.044137 | -0.268925 | 1.000000 | -0.177969 |
Embarked | 0.108669 | 0.043835 | 0.118593 | 0.012186 | -0.060606 | -0.079320 | -0.081057 | 0.063462 | -0.177969 | 1.000000 |
dfd = df1.Deck
dfd.plot?
df.Deck.count()/len(df)*100
22.895622895622896
sns.countplot(x = df.Deck.sort_values())
<Axes: xlabel='Deck', ylabel='count'>
sns.countplot(x = df1.Deck, hue = df1.Pclass)
<Axes: xlabel='Deck', ylabel='count'>
df1.Deck.value_counts()
3.0 59 2.0 47 5.0 45 4.0 33 1.0 15 6.0 4 0.0 1 Name: Deck, dtype: int64
df1[['Deck', 'Pclass']].value_counts().unstack()
Pclass | 1 | 2 | 3 |
---|---|---|---|
Deck | |||
0.0 | 1.0 | NaN | NaN |
1.0 | 15.0 | NaN | NaN |
2.0 | 47.0 | NaN | NaN |
3.0 | 59.0 | NaN | NaN |
4.0 | 29.0 | 4.0 | NaN |
5.0 | 25.0 | 12.0 | 8.0 |
6.0 | NaN | NaN | 4.0 |
df[['Sex', 'Survived','Pclass']].value_counts().unstack()
Pclass | 1 | 2 | 3 | |
---|---|---|---|---|
Sex | Survived | |||
female | 0 | 3 | 6 | 72 |
1 | 91 | 70 | 72 | |
male | 0 | 77 | 91 | 300 |
1 | 45 | 17 | 47 |
df[['Sex', 'Survived','Pclass']].value_counts().unstack().plot(kind = 'bar')
<Axes: xlabel='Sex,Survived'>
sns.heatmap(dfc, cmap = 'coolwarm')
<Axes: >
len(df[df.Sex == 'male'])
577
len(df[df.Sex == 'female'])
314
Conclusion: The dataset is biased against females
len(df)
891
round((len(df[df['Sex'] == 'male'])/len(df))*100)
65
round((len(df[df['Sex'] == 'female'])/len(df))*100)
35
df[df['Sex'] == 'female']
Survived | Pclass | Name | Sex | Age | SibSp | Parch | Ticket | Fare | Cabin | Embarked | Deck | Family | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PassengerId | |||||||||||||
2 | 1 | 1 | Cumings, Mrs. John Bradley (Florence Briggs Th... | female | 38.0 | 1 | 0 | PC 17599 | 71.2833 | C85 | C | C | 1 |
3 | 1 | 3 | Heikkinen, Miss. Laina | female | 26.0 | 0 | 0 | STON/O2. 3101282 | 7.9250 | NaN | S | NaN | 0 |
4 | 1 | 1 | Futrelle, Mrs. Jacques Heath (Lily May Peel) | female | 35.0 | 1 | 0 | 113803 | 53.1000 | C123 | S | C | 1 |
9 | 1 | 3 | Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg) | female | 27.0 | 0 | 2 | 347742 | 11.1333 | NaN | S | NaN | 2 |
10 | 1 | 2 | Nasser, Mrs. Nicholas (Adele Achem) | female | 14.0 | 1 | 0 | 237736 | 30.0708 | NaN | C | NaN | 1 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
881 | 1 | 2 | Shelley, Mrs. William (Imanita Parrish Hall) | female | 25.0 | 0 | 1 | 230433 | 26.0000 | NaN | S | NaN | 1 |
883 | 0 | 3 | Dahlberg, Miss. Gerda Ulrika | female | 22.0 | 0 | 0 | 7552 | 10.5167 | NaN | S | NaN | 0 |
886 | 0 | 3 | Rice, Mrs. William (Margaret Norton) | female | 39.0 | 0 | 5 | 382652 | 29.1250 | NaN | Q | NaN | 5 |
888 | 1 | 1 | Graham, Miss. Margaret Edith | female | 19.0 | 0 | 0 | 112053 | 30.0000 | B42 | S | B | 0 |
889 | 0 | 3 | Johnston, Miss. Catherine Helen "Carrie" | female | NaN | 1 | 2 | W./C. 6607 | 23.4500 | NaN | S | NaN | 3 |
314 rows × 13 columns
df[(df['Sex'] == 'female') & (df['Survived'] == 1)]
Survived | Pclass | Name | Sex | Age | SibSp | Parch | Ticket | Fare | Cabin | Embarked | |
---|---|---|---|---|---|---|---|---|---|---|---|
PassengerId | |||||||||||
2 | 1 | 1 | Cumings, Mrs. John Bradley (Florence Briggs Th... | female | 38.0 | 1 | 0 | PC 17599 | 71.2833 | C85 | C |
3 | 1 | 3 | Heikkinen, Miss. Laina | female | 26.0 | 0 | 0 | STON/O2. 3101282 | 7.9250 | NaN | S |
4 | 1 | 1 | Futrelle, Mrs. Jacques Heath (Lily May Peel) | female | 35.0 | 1 | 0 | 113803 | 53.1000 | C123 | S |
9 | 1 | 3 | Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg) | female | 27.0 | 0 | 2 | 347742 | 11.1333 | NaN | S |
10 | 1 | 2 | Nasser, Mrs. Nicholas (Adele Achem) | female | 14.0 | 1 | 0 | 237736 | 30.0708 | NaN | C |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
875 | 1 | 2 | Abelson, Mrs. Samuel (Hannah Wizosky) | female | 28.0 | 1 | 0 | P/PP 3381 | 24.0000 | NaN | C |
876 | 1 | 3 | Najib, Miss. Adele Kiamie "Jane" | female | 15.0 | 0 | 0 | 2667 | 7.2250 | NaN | C |
880 | 1 | 1 | Potter, Mrs. Thomas Jr (Lily Alexenia Wilson) | female | 56.0 | 0 | 1 | 11767 | 83.1583 | C50 | C |
881 | 1 | 2 | Shelley, Mrs. William (Imanita Parrish Hall) | female | 25.0 | 0 | 1 | 230433 | 26.0000 | NaN | S |
888 | 1 | 1 | Graham, Miss. Margaret Edith | female | 19.0 | 0 | 0 | 112053 | 30.0000 | B42 | S |
233 rows × 11 columns
len(df[(df['Sex'] == 'female') & (df['Survived'] == 1)])
233
len(df[(df['Sex'] == 'female') & (df['Survived'] == 1)])/len(df[(df['Sex'] == 'female')])
0.7420382165605095
len(df[(df['Sex'] == 'male') & (df['Survived'] == 1)])
109
len(df[(df['Sex'] == 'male') & (df['Survived'] == 1)])/len(df[(df['Sex'] == 'male')])
0.18890814558058924
Observation: More females survived than males
Inference: The possibility of survival for a female is more than male
df[5:13]
Survived | Pclass | Name | Sex | Age | SibSp | Parch | Ticket | Fare | Cabin | Embarked | |
---|---|---|---|---|---|---|---|---|---|---|---|
PassengerId | |||||||||||
6 | 0 | 3 | Moran, Mr. James | male | NaN | 0 | 0 | 330877 | 8.4583 | NaN | Q |
7 | 0 | 1 | McCarthy, Mr. Timothy J | male | 54.0 | 0 | 0 | 17463 | 51.8625 | E46 | S |
8 | 0 | 3 | Palsson, Master. Gosta Leonard | male | 2.0 | 3 | 1 | 349909 | 21.0750 | NaN | S |
9 | 1 | 3 | Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg) | female | 27.0 | 0 | 2 | 347742 | 11.1333 | NaN | S |
10 | 1 | 2 | Nasser, Mrs. Nicholas (Adele Achem) | female | 14.0 | 1 | 0 | 237736 | 30.0708 | NaN | C |
11 | 1 | 3 | Sandstrom, Miss. Marguerite Rut | female | 4.0 | 1 | 1 | PP 9549 | 16.7000 | G6 | S |
12 | 1 | 1 | Bonnell, Miss. Elizabeth | female | 58.0 | 0 | 0 | 113783 | 26.5500 | C103 | S |
13 | 0 | 3 | Saundercock, Mr. William Henry | male | 20.0 | 0 | 0 | A/5. 2151 | 8.0500 | NaN | S |
df.head(10)
Survived | Pclass | Name | Sex | Age | SibSp | Parch | Ticket | Fare | Cabin | Embarked | |
---|---|---|---|---|---|---|---|---|---|---|---|
PassengerId | |||||||||||
1 | 0 | 3 | Braund, Mr. Owen Harris | male | 22.0 | 1 | 0 | A/5 21171 | 7.2500 | NaN | S |
2 | 1 | 1 | Cumings, Mrs. John Bradley (Florence Briggs Th... | female | 38.0 | 1 | 0 | PC 17599 | 71.2833 | C85 | C |
3 | 1 | 3 | Heikkinen, Miss. Laina | female | 26.0 | 0 | 0 | STON/O2. 3101282 | 7.9250 | NaN | S |
4 | 1 | 1 | Futrelle, Mrs. Jacques Heath (Lily May Peel) | female | 35.0 | 1 | 0 | 113803 | 53.1000 | C123 | S |
5 | 0 | 3 | Allen, Mr. William Henry | male | 35.0 | 0 | 0 | 373450 | 8.0500 | NaN | S |
6 | 0 | 3 | Moran, Mr. James | male | NaN | 0 | 0 | 330877 | 8.4583 | NaN | Q |
7 | 0 | 1 | McCarthy, Mr. Timothy J | male | 54.0 | 0 | 0 | 17463 | 51.8625 | E46 | S |
8 | 0 | 3 | Palsson, Master. Gosta Leonard | male | 2.0 | 3 | 1 | 349909 | 21.0750 | NaN | S |
9 | 1 | 3 | Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg) | female | 27.0 | 0 | 2 | 347742 | 11.1333 | NaN | S |
10 | 1 | 2 | Nasser, Mrs. Nicholas (Adele Achem) | female | 14.0 | 1 | 0 | 237736 | 30.0708 | NaN | C |
df.tail(30)
Survived | Pclass | Name | Sex | Age | SibSp | Parch | Ticket | Fare | Cabin | Embarked | |
---|---|---|---|---|---|---|---|---|---|---|---|
PassengerId | |||||||||||
862 | 0 | 2 | Giles, Mr. Frederick Edward | male | 21.0 | 1 | 0 | 28134 | 11.5000 | NaN | S |
863 | 1 | 1 | Swift, Mrs. Frederick Joel (Margaret Welles Ba... | female | 48.0 | 0 | 0 | 17466 | 25.9292 | D17 | S |
864 | 0 | 3 | Sage, Miss. Dorothy Edith "Dolly" | female | NaN | 8 | 2 | CA. 2343 | 69.5500 | NaN | S |
865 | 0 | 2 | Gill, Mr. John William | male | 24.0 | 0 | 0 | 233866 | 13.0000 | NaN | S |
866 | 1 | 2 | Bystrom, Mrs. (Karolina) | female | 42.0 | 0 | 0 | 236852 | 13.0000 | NaN | S |
867 | 1 | 2 | Duran y More, Miss. Asuncion | female | 27.0 | 1 | 0 | SC/PARIS 2149 | 13.8583 | NaN | C |
868 | 0 | 1 | Roebling, Mr. Washington Augustus II | male | 31.0 | 0 | 0 | PC 17590 | 50.4958 | A24 | S |
869 | 0 | 3 | van Melkebeke, Mr. Philemon | male | NaN | 0 | 0 | 345777 | 9.5000 | NaN | S |
870 | 1 | 3 | Johnson, Master. Harold Theodor | male | 4.0 | 1 | 1 | 347742 | 11.1333 | NaN | S |
871 | 0 | 3 | Balkic, Mr. Cerin | male | 26.0 | 0 | 0 | 349248 | 7.8958 | NaN | S |
872 | 1 | 1 | Beckwith, Mrs. Richard Leonard (Sallie Monypeny) | female | 47.0 | 1 | 1 | 11751 | 52.5542 | D35 | S |
873 | 0 | 1 | Carlsson, Mr. Frans Olof | male | 33.0 | 0 | 0 | 695 | 5.0000 | B51 B53 B55 | S |
874 | 0 | 3 | Vander Cruyssen, Mr. Victor | male | 47.0 | 0 | 0 | 345765 | 9.0000 | NaN | S |
875 | 1 | 2 | Abelson, Mrs. Samuel (Hannah Wizosky) | female | 28.0 | 1 | 0 | P/PP 3381 | 24.0000 | NaN | C |
876 | 1 | 3 | Najib, Miss. Adele Kiamie "Jane" | female | 15.0 | 0 | 0 | 2667 | 7.2250 | NaN | C |
877 | 0 | 3 | Gustafsson, Mr. Alfred Ossian | male | 20.0 | 0 | 0 | 7534 | 9.8458 | NaN | S |
878 | 0 | 3 | Petroff, Mr. Nedelio | male | 19.0 | 0 | 0 | 349212 | 7.8958 | NaN | S |
879 | 0 | 3 | Laleff, Mr. Kristo | male | NaN | 0 | 0 | 349217 | 7.8958 | NaN | S |
880 | 1 | 1 | Potter, Mrs. Thomas Jr (Lily Alexenia Wilson) | female | 56.0 | 0 | 1 | 11767 | 83.1583 | C50 | C |
881 | 1 | 2 | Shelley, Mrs. William (Imanita Parrish Hall) | female | 25.0 | 0 | 1 | 230433 | 26.0000 | NaN | S |
882 | 0 | 3 | Markun, Mr. Johann | male | 33.0 | 0 | 0 | 349257 | 7.8958 | NaN | S |
883 | 0 | 3 | Dahlberg, Miss. Gerda Ulrika | female | 22.0 | 0 | 0 | 7552 | 10.5167 | NaN | S |
884 | 0 | 2 | Banfield, Mr. Frederick James | male | 28.0 | 0 | 0 | C.A./SOTON 34068 | 10.5000 | NaN | S |
885 | 0 | 3 | Sutehall, Mr. Henry Jr | male | 25.0 | 0 | 0 | SOTON/OQ 392076 | 7.0500 | NaN | S |
886 | 0 | 3 | Rice, Mrs. William (Margaret Norton) | female | 39.0 | 0 | 5 | 382652 | 29.1250 | NaN | Q |
887 | 0 | 2 | Montvila, Rev. Juozas | male | 27.0 | 0 | 0 | 211536 | 13.0000 | NaN | S |
888 | 1 | 1 | Graham, Miss. Margaret Edith | female | 19.0 | 0 | 0 | 112053 | 30.0000 | B42 | S |
889 | 0 | 3 | Johnston, Miss. Catherine Helen "Carrie" | female | NaN | 1 | 2 | W./C. 6607 | 23.4500 | NaN | S |
890 | 1 | 1 | Behr, Mr. Karl Howell | male | 26.0 | 0 | 0 | 111369 | 30.0000 | C148 | C |
891 | 0 | 3 | Dooley, Mr. Patrick | male | 32.0 | 0 | 0 | 370376 | 7.7500 | NaN | Q |
df.sample(7)
Survived | Pclass | Name | Sex | Age | SibSp | Parch | Ticket | Fare | Cabin | Embarked | |
---|---|---|---|---|---|---|---|---|---|---|---|
PassengerId | |||||||||||
387 | 0 | 3 | Goodwin, Master. Sidney Leonard | male | 1.0 | 5 | 2 | CA 2144 | 46.9000 | NaN | S |
161 | 0 | 3 | Cribb, Mr. John Hatfield | male | 44.0 | 0 | 1 | 371362 | 16.1000 | NaN | S |
870 | 1 | 3 | Johnson, Master. Harold Theodor | male | 4.0 | 1 | 1 | 347742 | 11.1333 | NaN | S |
407 | 0 | 3 | Widegren, Mr. Carl/Charles Peter | male | 51.0 | 0 | 0 | 347064 | 7.7500 | NaN | S |
530 | 0 | 2 | Hocking, Mr. Richard George | male | 23.0 | 2 | 1 | 29104 | 11.5000 | NaN | S |
615 | 0 | 3 | Brocklebank, Mr. William Alfred | male | 35.0 | 0 | 0 | 364512 | 8.0500 | NaN | S |
414 | 0 | 2 | Cunningham, Mr. Alfred Fleming | male | NaN | 0 | 0 | 239853 | 0.0000 | NaN | S |
print(1, ':', len(df[df.Pclass == 1]))
print(2, ':', len(df[df.Pclass == 2]))
print(3, ':', len(df[df.Pclass == 3]))
1 : 216 2 : 184 3 : 491
# Conclusion: The number of passengers from Pclass 3 are more than Pclass 1 and 2 combined.
# There were more lower case passengers on the ship.
# 100 Questions
# 80 Questions for Training
# 20 Questions for Testing
df.Fare.max()
512.3292
round((len(df[df.Fare == 0])/len(df))*100, 1)
1.7
# Observation: 1.7% of the sample population didn't pay for the ticket
# Inference: Possibilities: Crew Members, Freeloaders passengers,
df[df.Fare != 0].Fare.min()
4.0125
df.Fare.mean()
32.204207968574636
df.Fare.median()
14.4542
# Observation: Majority of the passengers paid less Fare.
# The mean value is more than the median
# Inference: Very less number of people paid high fare but the ticket was priced really high for them.
# Domain Inference: There is a lot of gap between upper class and lower class people in the society.
# Conclusion: There is a lot of variation in the ticket prices for the ship.
n = df[df['Sex'] == 'male']
n
Survived | Pclass | Name | Sex | Age | SibSp | Parch | Ticket | Fare | Cabin | Embarked | |
---|---|---|---|---|---|---|---|---|---|---|---|
PassengerId | |||||||||||
1 | 0 | 3 | Braund, Mr. Owen Harris | male | 22.0 | 1 | 0 | A/5 21171 | 7.2500 | NaN | S |
5 | 0 | 3 | Allen, Mr. William Henry | male | 35.0 | 0 | 0 | 373450 | 8.0500 | NaN | S |
6 | 0 | 3 | Moran, Mr. James | male | NaN | 0 | 0 | 330877 | 8.4583 | NaN | Q |
7 | 0 | 1 | McCarthy, Mr. Timothy J | male | 54.0 | 0 | 0 | 17463 | 51.8625 | E46 | S |
8 | 0 | 3 | Palsson, Master. Gosta Leonard | male | 2.0 | 3 | 1 | 349909 | 21.0750 | NaN | S |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
884 | 0 | 2 | Banfield, Mr. Frederick James | male | 28.0 | 0 | 0 | C.A./SOTON 34068 | 10.5000 | NaN | S |
885 | 0 | 3 | Sutehall, Mr. Henry Jr | male | 25.0 | 0 | 0 | SOTON/OQ 392076 | 7.0500 | NaN | S |
887 | 0 | 2 | Montvila, Rev. Juozas | male | 27.0 | 0 | 0 | 211536 | 13.0000 | NaN | S |
890 | 1 | 1 | Behr, Mr. Karl Howell | male | 26.0 | 0 | 0 | 111369 | 30.0000 | C148 | C |
891 | 0 | 3 | Dooley, Mr. Patrick | male | 32.0 | 0 | 0 | 370376 | 7.7500 | NaN | Q |
577 rows × 11 columns
len(df[df['Sex'] == 'male'])/len(df)*100
64.75869809203144
[1] Observation: The dataset is biased for male passengers
((df.isnull().sum())/len(df))*100
Survived 0.000000 Pclass 0.000000 Name 0.000000 Sex 0.000000 Age 19.865320 SibSp 0.000000 Parch 0.000000 Ticket 0.000000 Fare 0.000000 Cabin 77.104377 Embarked 0.224467 dtype: float64
df.corr()
Survived | Pclass | Age | SibSp | Parch | Fare | |
---|---|---|---|---|---|---|
Survived | 1.000000 | -0.338481 | -0.077221 | -0.035322 | 0.081629 | 0.257307 |
Pclass | -0.338481 | 1.000000 | -0.369226 | 0.083081 | 0.018443 | -0.549500 |
Age | -0.077221 | -0.369226 | 1.000000 | -0.308247 | -0.189119 | 0.096067 |
SibSp | -0.035322 | 0.083081 | -0.308247 | 1.000000 | 0.414838 | 0.159651 |
Parch | 0.081629 | 0.018443 | -0.189119 | 0.414838 | 1.000000 | 0.216225 |
Fare | 0.257307 | -0.549500 | 0.096067 | 0.159651 | 0.216225 | 1.000000 |
# Observation: Age is negatively correlated to Pclass with a value of -0.369.
# Inference: There are more older people in Upper class
print('Upper: ', len(df[(df.Age > 45) & (df.Pclass == 1)]))
print('Middle: ', len(df[(df.Age > 45) & (df.Pclass == 2)]))
print('Lower: ', len(df[(df.Age > 45) & (df.Pclass == 3)]))
Upper: 62 Middle: 23 Lower: 18
# Conclusion: There are more older people in Upper class
df.corr()
Survived | Pclass | Age | SibSp | Parch | Fare | |
---|---|---|---|---|---|---|
Survived | 1.000000 | -0.338481 | -0.077221 | -0.035322 | 0.081629 | 0.257307 |
Pclass | -0.338481 | 1.000000 | -0.369226 | 0.083081 | 0.018443 | -0.549500 |
Age | -0.077221 | -0.369226 | 1.000000 | -0.308247 | -0.189119 | 0.096067 |
SibSp | -0.035322 | 0.083081 | -0.308247 | 1.000000 | 0.414838 | 0.159651 |
Parch | 0.081629 | 0.018443 | -0.189119 | 0.414838 | 1.000000 | 0.216225 |
Fare | 0.257307 | -0.549500 | 0.096067 | 0.159651 | 0.216225 | 1.000000 |
df.corr().abs()
Survived | Pclass | Age | SibSp | Parch | Fare | |
---|---|---|---|---|---|---|
Survived | 1.000000 | 0.338481 | 0.077221 | 0.035322 | 0.081629 | 0.257307 |
Pclass | 0.338481 | 1.000000 | 0.369226 | 0.083081 | 0.018443 | 0.549500 |
Age | 0.077221 | 0.369226 | 1.000000 | 0.308247 | 0.189119 | 0.096067 |
SibSp | 0.035322 | 0.083081 | 0.308247 | 1.000000 | 0.414838 | 0.159651 |
Parch | 0.081629 | 0.018443 | 0.189119 | 0.414838 | 1.000000 | 0.216225 |
Fare | 0.257307 | 0.549500 | 0.096067 | 0.159651 | 0.216225 | 1.000000 |
df.corr().unstack()
Survived Survived 1.000000 Pclass -0.338481 Age -0.077221 SibSp -0.035322 Parch 0.081629 Fare 0.257307 Pclass Survived -0.338481 Pclass 1.000000 Age -0.369226 SibSp 0.083081 Parch 0.018443 Fare -0.549500 Age Survived -0.077221 Pclass -0.369226 Age 1.000000 SibSp -0.308247 Parch -0.189119 Fare 0.096067 SibSp Survived -0.035322 Pclass 0.083081 Age -0.308247 SibSp 1.000000 Parch 0.414838 Fare 0.159651 Parch Survived 0.081629 Pclass 0.018443 Age -0.189119 SibSp 0.414838 Parch 1.000000 Fare 0.216225 Fare Survived 0.257307 Pclass -0.549500 Age 0.096067 SibSp 0.159651 Parch 0.216225 Fare 1.000000 dtype: float64
df.corr().unstack().sort_values(ascending=False).reset_index().loc[6:]
level_0 | level_1 | 0 | |
---|---|---|---|
6 | SibSp | Parch | 0.414838 |
7 | Parch | SibSp | 0.414838 |
8 | Survived | Fare | 0.257307 |
9 | Fare | Survived | 0.257307 |
10 | Fare | Parch | 0.216225 |
11 | Parch | Fare | 0.216225 |
12 | Fare | SibSp | 0.159651 |
13 | SibSp | Fare | 0.159651 |
14 | Fare | Age | 0.096067 |
15 | Age | Fare | 0.096067 |
16 | SibSp | Pclass | 0.083081 |
17 | Pclass | SibSp | 0.083081 |
18 | Parch | Survived | 0.081629 |
19 | Survived | Parch | 0.081629 |
20 | Parch | Pclass | 0.018443 |
21 | Pclass | Parch | 0.018443 |
22 | Survived | SibSp | -0.035322 |
23 | SibSp | Survived | -0.035322 |
24 | Survived | Age | -0.077221 |
25 | Age | Survived | -0.077221 |
26 | Parch | Age | -0.189119 |
27 | Age | Parch | -0.189119 |
28 | SibSp | Age | -0.308247 |
29 | Age | SibSp | -0.308247 |
30 | Survived | Pclass | -0.338481 |
31 | Pclass | Survived | -0.338481 |
32 | Age | Pclass | -0.369226 |
33 | Pclass | Age | -0.369226 |
34 | Fare | Pclass | -0.549500 |
35 | Pclass | Fare | -0.549500 |
df.corr().abs().unstack().sort_values(ascending=False).reset_index().loc[6:]
level_0 | level_1 | 0 | |
---|---|---|---|
6 | Fare | Pclass | 0.549500 |
7 | Pclass | Fare | 0.549500 |
8 | Parch | SibSp | 0.414838 |
9 | SibSp | Parch | 0.414838 |
10 | Age | Pclass | 0.369226 |
11 | Pclass | Age | 0.369226 |
12 | Survived | Pclass | 0.338481 |
13 | Pclass | Survived | 0.338481 |
14 | SibSp | Age | 0.308247 |
15 | Age | SibSp | 0.308247 |
16 | Fare | Survived | 0.257307 |
17 | Survived | Fare | 0.257307 |
18 | Fare | Parch | 0.216225 |
19 | Parch | Fare | 0.216225 |
20 | Age | Parch | 0.189119 |
21 | Parch | Age | 0.189119 |
22 | SibSp | Fare | 0.159651 |
23 | Fare | SibSp | 0.159651 |
24 | Age | Fare | 0.096067 |
25 | Fare | Age | 0.096067 |
26 | Pclass | SibSp | 0.083081 |
27 | SibSp | Pclass | 0.083081 |
28 | Parch | Survived | 0.081629 |
29 | Survived | Parch | 0.081629 |
30 | Survived | Age | 0.077221 |
31 | Age | Survived | 0.077221 |
32 | Survived | SibSp | 0.035322 |
33 | SibSp | Survived | 0.035322 |
34 | Parch | Pclass | 0.018443 |
35 | Pclass | Parch | 0.018443 |
df[df.Embarked.isnull()]
Survived | Pclass | Name | Sex | Age | SibSp | Parch | Ticket | Fare | Cabin | Embarked | |
---|---|---|---|---|---|---|---|---|---|---|---|
PassengerId | |||||||||||
62 | 1 | 1 | Icard, Miss. Amelie | female | 38.0 | 0 | 0 | 113572 | 80.0 | B28 | NaN |
830 | 1 | 1 | Stone, Mrs. George Nelson (Martha Evelyn) | female | 62.0 | 0 | 0 | 113572 | 80.0 | B28 | NaN |
df[df['Sex'] == 'male'].count()
Survived 577 Pclass 577 Name 577 Sex 577 Age 453 SibSp 577 Parch 577 Ticket 577 Fare 577 Cabin 107 Embarked 577 dtype: int64
df[df['Sex'] == 'male'].count()['Age']
453
df.Name
PassengerId 1 Braund, Mr. Owen Harris 2 Cumings, Mrs. John Bradley (Florence Briggs Th... 3 Heikkinen, Miss. Laina 4 Futrelle, Mrs. Jacques Heath (Lily May Peel) 5 Allen, Mr. William Henry ... 887 Montvila, Rev. Juozas 888 Graham, Miss. Margaret Edith 889 Johnston, Miss. Catherine Helen "Carrie" 890 Behr, Mr. Karl Howell 891 Dooley, Mr. Patrick Name: Name, Length: 891, dtype: object
df[(df['Sex'] == 'male') & (df['Survived'] == 1)]
Survived | Pclass | Name | Sex | Age | SibSp | Parch | Ticket | Fare | Cabin | Embarked | |
---|---|---|---|---|---|---|---|---|---|---|---|
PassengerId | |||||||||||
18 | 1 | 2 | Williams, Mr. Charles Eugene | male | NaN | 0 | 0 | 244373 | 13.0000 | NaN | S |
22 | 1 | 2 | Beesley, Mr. Lawrence | male | 34.0 | 0 | 0 | 248698 | 13.0000 | D56 | S |
24 | 1 | 1 | Sloper, Mr. William Thompson | male | 28.0 | 0 | 0 | 113788 | 35.5000 | A6 | S |
37 | 1 | 3 | Mamee, Mr. Hanna | male | NaN | 0 | 0 | 2677 | 7.2292 | NaN | C |
56 | 1 | 1 | Woolner, Mr. Hugh | male | NaN | 0 | 0 | 19947 | 35.5000 | C52 | S |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
839 | 1 | 3 | Chip, Mr. Chang | male | 32.0 | 0 | 0 | 1601 | 56.4958 | NaN | S |
840 | 1 | 1 | Marechal, Mr. Pierre | male | NaN | 0 | 0 | 11774 | 29.7000 | C47 | C |
858 | 1 | 1 | Daly, Mr. Peter Denis | male | 51.0 | 0 | 0 | 113055 | 26.5500 | E17 | S |
870 | 1 | 3 | Johnson, Master. Harold Theodor | male | 4.0 | 1 | 1 | 347742 | 11.1333 | NaN | S |
890 | 1 | 1 | Behr, Mr. Karl Howell | male | 26.0 | 0 | 0 | 111369 | 30.0000 | C148 | C |
109 rows × 11 columns
len(df[(df['Sex'] == 'male') & (df['Survived'] == 1)])/len(df[df['Sex'] == 'male'])
0.18890814558058924
len(df[(df['Sex'] == 'female') & (df['Survived'] == 1)])/len(df[df['Sex'] == 'female'])
0.7420382165605095
df.Embarked.unique()
array(['S', 'C', 'Q', nan], dtype=object)
len(df[df['Sex'] == 'male'])/len(df)
0.6475869809203143
len(df[df['Sex'] == 'female'])/len(df)
0.35241301907968575
len(df[df['Pclass']==3])/len(df)*100
55.106621773288445
len(df[df['Pclass']==2])/len(df)*100
20.65095398428732
len(df[df['Pclass']==1])/len(df)*100
24.242424242424242
df.index
Int64Index([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, ... 882, 883, 884, 885, 886, 887, 888, 889, 890, 891], dtype='int64', name='PassengerId', length=891)
tuple(df.columns)
('Survived', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp', 'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked')
# access column Age
df["Age"].head(15)
PassengerId 1 22.0 2 38.0 3 26.0 4 35.0 5 35.0 6 NaN 7 54.0 8 2.0 9 27.0 10 14.0 11 4.0 12 58.0 13 20.0 14 39.0 15 14.0 Name: Age, dtype: float64
# access column Age, Fare, SibSp
df[["Age", "Fare", 'SibSp']].head()
Age | Fare | SibSp | |
---|---|---|---|
PassengerId | |||
1 | 22.0 | 7.2500 | 1 |
2 | 38.0 | 71.2833 | 1 |
3 | 26.0 | 7.9250 | 0 |
4 | 35.0 | 53.1000 | 1 |
5 | 35.0 | 8.0500 | 0 |
df.Embarked.unique() # to see the unique values in a particular column
array(['S', 'C', 'Q', nan], dtype=object)
df
Survived | Pclass | Name | Sex | Age | SibSp | Parch | Ticket | Fare | Cabin | Embarked | |
---|---|---|---|---|---|---|---|---|---|---|---|
PassengerId | |||||||||||
1 | 0 | 3 | Braund, Mr. Owen Harris | male | 22.0 | 1 | 0 | A/5 21171 | 7.2500 | NaN | S |
2 | 1 | 1 | Cumings, Mrs. John Bradley (Florence Briggs Th... | female | 38.0 | 1 | 0 | PC 17599 | 71.2833 | C85 | C |
3 | 1 | 3 | Heikkinen, Miss. Laina | female | 26.0 | 0 | 0 | STON/O2. 3101282 | 7.9250 | NaN | S |
4 | 1 | 1 | Futrelle, Mrs. Jacques Heath (Lily May Peel) | female | 35.0 | 1 | 0 | 113803 | 53.1000 | C123 | S |
5 | 0 | 3 | Allen, Mr. William Henry | male | 35.0 | 0 | 0 | 373450 | 8.0500 | NaN | S |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
887 | 0 | 2 | Montvila, Rev. Juozas | male | 27.0 | 0 | 0 | 211536 | 13.0000 | NaN | S |
888 | 1 | 1 | Graham, Miss. Margaret Edith | female | 19.0 | 0 | 0 | 112053 | 30.0000 | B42 | S |
889 | 0 | 3 | Johnston, Miss. Catherine Helen "Carrie" | female | NaN | 1 | 2 | W./C. 6607 | 23.4500 | NaN | S |
890 | 1 | 1 | Behr, Mr. Karl Howell | male | 26.0 | 0 | 0 | 111369 | 30.0000 | C148 | C |
891 | 0 | 3 | Dooley, Mr. Patrick | male | 32.0 | 0 | 0 | 370376 | 7.7500 | NaN | Q |
891 rows × 11 columns
t = df['Embarked'].isnull()
t
PassengerId 1 False 2 False 3 False 4 False 5 False ... 887 False 888 False 889 False 890 False 891 False Name: Embarked, Length: 891, dtype: bool
df[df['Embarked'].isnull()]
Survived | Pclass | Name | Sex | Age | SibSp | Parch | Ticket | Fare | Cabin | Embarked | |
---|---|---|---|---|---|---|---|---|---|---|---|
PassengerId | |||||||||||
62 | 1 | 1 | Icard, Miss. Amelie | female | 38.0 | 0 | 0 | 113572 | 80.0 | B28 | NaN |
830 | 1 | 1 | Stone, Mrs. George Nelson (Martha Evelyn) | female | 62.0 | 0 | 0 | 113572 | 80.0 | B28 | NaN |
df.Embarked
PassengerId 1 S 2 C 3 S 4 S 5 S .. 887 S 888 S 889 S 890 C 891 Q Name: Embarked, Length: 891, dtype: object
df.Parch.unique()
array([0, 1, 2, 5, 3, 4, 6], dtype=int64)
df['SibSp'].unique() #df.SibSp.unique()
array([1, 0, 3, 4, 2, 5, 8], dtype=int64)
df.head()
Survived | Pclass | Name | Sex | Age | SibSp | Parch | Ticket | Fare | Cabin | Embarked | |
---|---|---|---|---|---|---|---|---|---|---|---|
PassengerId | |||||||||||
1 | 0 | 3 | Braund, Mr. Owen Harris | male | 22.0 | 1 | 0 | A/5 21171 | 7.2500 | NaN | S |
2 | 1 | 1 | Cumings, Mrs. John Bradley (Florence Briggs Th... | female | 38.0 | 1 | 0 | PC 17599 | 71.2833 | C85 | C |
3 | 1 | 3 | Heikkinen, Miss. Laina | female | 26.0 | 0 | 0 | STON/O2. 3101282 | 7.9250 | NaN | S |
4 | 1 | 1 | Futrelle, Mrs. Jacques Heath (Lily May Peel) | female | 35.0 | 1 | 0 | 113803 | 53.1000 | C123 | S |
5 | 0 | 3 | Allen, Mr. William Henry | male | 35.0 | 0 | 0 | 373450 | 8.0500 | NaN | S |
df[3:17]
Survived | Pclass | Name | Sex | Age | SibSp | Parch | Ticket | Fare | Cabin | Embarked | |
---|---|---|---|---|---|---|---|---|---|---|---|
PassengerId | |||||||||||
4 | 1 | 1 | Futrelle, Mrs. Jacques Heath (Lily May Peel) | female | 35.0 | 1 | 0 | 113803 | 53.1000 | C123 | S |
5 | 0 | 3 | Allen, Mr. William Henry | male | 35.0 | 0 | 0 | 373450 | 8.0500 | NaN | S |
6 | 0 | 3 | Moran, Mr. James | male | NaN | 0 | 0 | 330877 | 8.4583 | NaN | Q |
7 | 0 | 1 | McCarthy, Mr. Timothy J | male | 54.0 | 0 | 0 | 17463 | 51.8625 | E46 | S |
8 | 0 | 3 | Palsson, Master. Gosta Leonard | male | 2.0 | 3 | 1 | 349909 | 21.0750 | NaN | S |
9 | 1 | 3 | Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg) | female | 27.0 | 0 | 2 | 347742 | 11.1333 | NaN | S |
10 | 1 | 2 | Nasser, Mrs. Nicholas (Adele Achem) | female | 14.0 | 1 | 0 | 237736 | 30.0708 | NaN | C |
11 | 1 | 3 | Sandstrom, Miss. Marguerite Rut | female | 4.0 | 1 | 1 | PP 9549 | 16.7000 | G6 | S |
12 | 1 | 1 | Bonnell, Miss. Elizabeth | female | 58.0 | 0 | 0 | 113783 | 26.5500 | C103 | S |
13 | 0 | 3 | Saundercock, Mr. William Henry | male | 20.0 | 0 | 0 | A/5. 2151 | 8.0500 | NaN | S |
14 | 0 | 3 | Andersson, Mr. Anders Johan | male | 39.0 | 1 | 5 | 347082 | 31.2750 | NaN | S |
15 | 0 | 3 | Vestrom, Miss. Hulda Amanda Adolfina | female | 14.0 | 0 | 0 | 350406 | 7.8542 | NaN | S |
16 | 1 | 2 | Hewlett, Mrs. (Mary D Kingcome) | female | 55.0 | 0 | 0 | 248706 | 16.0000 | NaN | S |
17 | 0 | 3 | Rice, Master. Eugene | male | 2.0 | 4 | 1 | 382652 | 29.1250 | NaN | Q |
df.loc[4:32, ["Fare"]].head(10) # df[['Fare']].loc[4:32].head(10) # df.loc[4:32][['Fare']].head(10)
Fare | |
---|---|
PassengerId | |
4 | 53.1000 |
5 | 8.0500 |
6 | 8.4583 |
7 | 51.8625 |
8 | 21.0750 |
9 | 11.1333 |
10 | 30.0708 |
11 | 16.7000 |
12 | 26.5500 |
13 | 8.0500 |
df.loc[: ,"Fare"].head(2) # df['Fare'].head(2)
PassengerId 1 7.2500 2 71.2833 Name: Fare, dtype: float64
df.loc[10:20][["Fare", "Name"]] # df[["Fare", "Name"]].loc[10:20]
Fare | Name | |
---|---|---|
PassengerId | ||
10 | 30.0708 | Nasser, Mrs. Nicholas (Adele Achem) |
11 | 16.7000 | Sandstrom, Miss. Marguerite Rut |
12 | 26.5500 | Bonnell, Miss. Elizabeth |
13 | 8.0500 | Saundercock, Mr. William Henry |
14 | 31.2750 | Andersson, Mr. Anders Johan |
15 | 7.8542 | Vestrom, Miss. Hulda Amanda Adolfina |
16 | 16.0000 | Hewlett, Mrs. (Mary D Kingcome) |
17 | 29.1250 | Rice, Master. Eugene |
18 | 13.0000 | Williams, Mr. Charles Eugene |
19 | 18.0000 | Vander Planke, Mrs. Julius (Emelia Maria Vande... |
20 | 7.2250 | Masselmani, Mrs. Fatima |
df.loc[3 : 5 ,["Fare", "Name"]].head(2)
Fare | Name | |
---|---|---|
PassengerId | ||
3 | 7.925 | Heikkinen, Miss. Laina |
4 | 53.100 | Futrelle, Mrs. Jacques Heath (Lily May Peel) |
df
Survived | Pclass | Name | Sex | Age | SibSp | Parch | Ticket | Fare | Cabin | Embarked | |
---|---|---|---|---|---|---|---|---|---|---|---|
PassengerId | |||||||||||
1 | 0 | 3 | Braund, Mr. Owen Harris | male | 22.0 | 1 | 0 | A/5 21171 | 7.2500 | NaN | S |
2 | 1 | 1 | Cumings, Mrs. John Bradley (Florence Briggs Th... | female | 38.0 | 1 | 0 | PC 17599 | 71.2833 | C85 | C |
3 | 1 | 3 | Heikkinen, Miss. Laina | female | 26.0 | 0 | 0 | STON/O2. 3101282 | 7.9250 | NaN | S |
4 | 1 | 1 | Futrelle, Mrs. Jacques Heath (Lily May Peel) | female | 35.0 | 1 | 0 | 113803 | 53.1000 | C123 | S |
5 | 0 | 3 | Allen, Mr. William Henry | male | 35.0 | 0 | 0 | 373450 | 8.0500 | NaN | S |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
887 | 0 | 2 | Montvila, Rev. Juozas | male | 27.0 | 0 | 0 | 211536 | 13.0000 | NaN | S |
888 | 1 | 1 | Graham, Miss. Margaret Edith | female | 19.0 | 0 | 0 | 112053 | 30.0000 | B42 | S |
889 | 0 | 3 | Johnston, Miss. Catherine Helen "Carrie" | female | NaN | 1 | 2 | W./C. 6607 | 23.4500 | NaN | S |
890 | 1 | 1 | Behr, Mr. Karl Howell | male | 26.0 | 0 | 0 | 111369 | 30.0000 | C148 | C |
891 | 0 | 3 | Dooley, Mr. Patrick | male | 32.0 | 0 | 0 | 370376 | 7.7500 | NaN | Q |
891 rows × 11 columns
r = df[(df['Embarked'] == 'Q') & (df['Pclass'] == 3)]
r
Survived | Pclass | Name | Sex | Age | SibSp | Parch | Ticket | Fare | Cabin | Embarked | |
---|---|---|---|---|---|---|---|---|---|---|---|
PassengerId | |||||||||||
6 | 0 | 3 | Moran, Mr. James | male | NaN | 0 | 0 | 330877 | 8.4583 | NaN | Q |
17 | 0 | 3 | Rice, Master. Eugene | male | 2.0 | 4 | 1 | 382652 | 29.1250 | NaN | Q |
23 | 1 | 3 | McGowan, Miss. Anna "Annie" | female | 15.0 | 0 | 0 | 330923 | 8.0292 | NaN | Q |
29 | 1 | 3 | O'Dwyer, Miss. Ellen "Nellie" | female | NaN | 0 | 0 | 330959 | 7.8792 | NaN | Q |
33 | 1 | 3 | Glynn, Miss. Mary Agatha | female | NaN | 0 | 0 | 335677 | 7.7500 | NaN | Q |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
791 | 0 | 3 | Keane, Mr. Andrew "Andy" | male | NaN | 0 | 0 | 12460 | 7.7500 | NaN | Q |
826 | 0 | 3 | Flynn, Mr. John | male | NaN | 0 | 0 | 368323 | 6.9500 | NaN | Q |
829 | 1 | 3 | McCormack, Mr. Thomas Joseph | male | NaN | 0 | 0 | 367228 | 7.7500 | NaN | Q |
886 | 0 | 3 | Rice, Mrs. William (Margaret Norton) | female | 39.0 | 0 | 5 | 382652 | 29.1250 | NaN | Q |
891 | 0 | 3 | Dooley, Mr. Patrick | male | 32.0 | 0 | 0 | 370376 | 7.7500 | NaN | Q |
72 rows × 11 columns
df[(df['Embarked'] == 'Q') & (df['Pclass'] == 2)]
Survived | Pclass | Name | Sex | Age | SibSp | Parch | Ticket | Fare | Cabin | Embarked | |
---|---|---|---|---|---|---|---|---|---|---|---|
PassengerId | |||||||||||
304 | 1 | 2 | Keane, Miss. Nora A | female | NaN | 0 | 0 | 226593 | 12.35 | E101 | Q |
323 | 1 | 2 | Slayter, Miss. Hilda Mary | female | 30.0 | 0 | 0 | 234818 | 12.35 | NaN | Q |
627 | 0 | 2 | Kirkland, Rev. Charles Leonard | male | 57.0 | 0 | 0 | 219533 | 12.35 | NaN | Q |
df[(df['Embarked'] == 'Q') & (df['Pclass'] == 1)]
Survived | Pclass | Name | Sex | Age | SibSp | Parch | Ticket | Fare | Cabin | Embarked | |
---|---|---|---|---|---|---|---|---|---|---|---|
PassengerId | |||||||||||
246 | 0 | 1 | Minahan, Dr. William Edward | male | 44.0 | 2 | 0 | 19928 | 90.0 | C78 | Q |
413 | 1 | 1 | Minahan, Miss. Daisy E | female | 33.0 | 1 | 0 | 19928 | 90.0 | C78 | Q |
df[(df['Embarked'] == 'C') & (df['Pclass'] == 1)]
Survived | Pclass | Name | Sex | Age | SibSp | Parch | Ticket | Fare | Cabin | Embarked | |
---|---|---|---|---|---|---|---|---|---|---|---|
PassengerId | |||||||||||
2 | 1 | 1 | Cumings, Mrs. John Bradley (Florence Briggs Th... | female | 38.0 | 1 | 0 | PC 17599 | 71.2833 | C85 | C |
31 | 0 | 1 | Uruchurtu, Don. Manuel E | male | 40.0 | 0 | 0 | PC 17601 | 27.7208 | NaN | C |
32 | 1 | 1 | Spencer, Mrs. William Augustus (Marie Eugenie) | female | NaN | 1 | 0 | PC 17569 | 146.5208 | B78 | C |
35 | 0 | 1 | Meyer, Mr. Edgar Joseph | male | 28.0 | 1 | 0 | PC 17604 | 82.1708 | NaN | C |
53 | 1 | 1 | Harper, Mrs. Henry Sleeper (Myna Haxtun) | female | 49.0 | 1 | 0 | PC 17572 | 76.7292 | D33 | C |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
840 | 1 | 1 | Marechal, Mr. Pierre | male | NaN | 0 | 0 | 11774 | 29.7000 | C47 | C |
843 | 1 | 1 | Serepeca, Miss. Augusta | female | 30.0 | 0 | 0 | 113798 | 31.0000 | NaN | C |
850 | 1 | 1 | Goldenberg, Mrs. Samuel L (Edwiga Grabowska) | female | NaN | 1 | 0 | 17453 | 89.1042 | C92 | C |
880 | 1 | 1 | Potter, Mrs. Thomas Jr (Lily Alexenia Wilson) | female | 56.0 | 0 | 1 | 11767 | 83.1583 | C50 | C |
890 | 1 | 1 | Behr, Mr. Karl Howell | male | 26.0 | 0 | 0 | 111369 | 30.0000 | C148 | C |
85 rows × 11 columns
df[(df['Embarked'] == 'S') & (df['Pclass'] == 1)]
Survived | Pclass | Name | Sex | Age | SibSp | Parch | Ticket | Fare | Cabin | Embarked | |
---|---|---|---|---|---|---|---|---|---|---|---|
PassengerId | |||||||||||
4 | 1 | 1 | Futrelle, Mrs. Jacques Heath (Lily May Peel) | female | 35.0 | 1 | 0 | 113803 | 53.1000 | C123 | S |
7 | 0 | 1 | McCarthy, Mr. Timothy J | male | 54.0 | 0 | 0 | 17463 | 51.8625 | E46 | S |
12 | 1 | 1 | Bonnell, Miss. Elizabeth | female | 58.0 | 0 | 0 | 113783 | 26.5500 | C103 | S |
24 | 1 | 1 | Sloper, Mr. William Thompson | male | 28.0 | 0 | 0 | 113788 | 35.5000 | A6 | S |
28 | 0 | 1 | Fortune, Mr. Charles Alexander | male | 19.0 | 3 | 2 | 19950 | 263.0000 | C23 C25 C27 | S |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
863 | 1 | 1 | Swift, Mrs. Frederick Joel (Margaret Welles Ba... | female | 48.0 | 0 | 0 | 17466 | 25.9292 | D17 | S |
868 | 0 | 1 | Roebling, Mr. Washington Augustus II | male | 31.0 | 0 | 0 | PC 17590 | 50.4958 | A24 | S |
872 | 1 | 1 | Beckwith, Mrs. Richard Leonard (Sallie Monypeny) | female | 47.0 | 1 | 1 | 11751 | 52.5542 | D35 | S |
873 | 0 | 1 | Carlsson, Mr. Frans Olof | male | 33.0 | 0 | 0 | 695 | 5.0000 | B51 B53 B55 | S |
888 | 1 | 1 | Graham, Miss. Margaret Edith | female | 19.0 | 0 | 0 | 112053 | 30.0000 | B42 | S |
127 rows × 11 columns
C1 C2 C2, S1 S2 S3, Q1 Q2 Q3
df1 = df[(df['Survived'] == 1) & (df['Parch'] == 1) & (df['Pclass'] == 1)]
df1
Survived | Pclass | Name | Sex | Age | SibSp | Parch | Ticket | Fare | Cabin | Embarked | |
---|---|---|---|---|---|---|---|---|---|---|---|
PassengerId | |||||||||||
98 | 1 | 1 | Greenfield, Mr. William Bertram | male | 23.0 | 0 | 1 | PC 17759 | 63.3583 | D10 D12 | C |
167 | 1 | 1 | Chibnall, Mrs. (Edith Martha Bowerman) | female | NaN | 0 | 1 | 113505 | 55.0000 | E33 | S |
249 | 1 | 1 | Beckwith, Mr. Richard Leonard | male | 37.0 | 1 | 1 | 11751 | 52.5542 | D35 | S |
269 | 1 | 1 | Graham, Mrs. William Thompson (Edith Junkins) | female | 58.0 | 0 | 1 | PC 17582 | 153.4625 | C125 | S |
300 | 1 | 1 | Baxter, Mrs. James (Helene DeLaudeniere Chaput) | female | 50.0 | 0 | 1 | PC 17558 | 247.5208 | B58 B60 | C |
320 | 1 | 1 | Spedden, Mrs. Frederic Oakley (Margaretta Corn... | female | 40.0 | 1 | 1 | 16966 | 134.5000 | E34 | C |
330 | 1 | 1 | Hippach, Miss. Jean Gertrude | female | 16.0 | 0 | 1 | 111361 | 57.9792 | B18 | C |
357 | 1 | 1 | Bowerman, Miss. Elsie Edith | female | 22.0 | 0 | 1 | 113505 | 55.0000 | E33 | S |
524 | 1 | 1 | Hippach, Mrs. Louis Albert (Ida Sophia Fischer) | female | 44.0 | 0 | 1 | 111361 | 57.9792 | B18 | C |
559 | 1 | 1 | Taussig, Mrs. Emil (Tillie Mandelbaum) | female | 39.0 | 1 | 1 | 110413 | 79.6500 | E67 | S |
582 | 1 | 1 | Thayer, Mrs. John Borland (Marian Longstreth M... | female | 39.0 | 1 | 1 | 17421 | 110.8833 | C68 | C |
588 | 1 | 1 | Frolicher-Stehli, Mr. Maxmillian | male | 60.0 | 1 | 1 | 13567 | 79.2000 | B41 | C |
680 | 1 | 1 | Cardeza, Mr. Thomas Drake Martinez | male | 36.0 | 0 | 1 | PC 17755 | 512.3292 | B51 B53 B55 | C |
690 | 1 | 1 | Madill, Miss. Georgette Alexandra | female | 15.0 | 0 | 1 | 24160 | 211.3375 | B5 | S |
780 | 1 | 1 | Robert, Mrs. Edward Scott (Elisabeth Walton Mc... | female | 43.0 | 0 | 1 | 24160 | 211.3375 | B3 | S |
821 | 1 | 1 | Hays, Mrs. Charles Melville (Clara Jennings Gr... | female | 52.0 | 1 | 1 | 12749 | 93.5000 | B69 | S |
836 | 1 | 1 | Compton, Miss. Sara Rebecca | female | 39.0 | 1 | 1 | PC 17756 | 83.1583 | E49 | C |
854 | 1 | 1 | Lines, Miss. Mary Conover | female | 16.0 | 0 | 1 | PC 17592 | 39.4000 | D28 | S |
857 | 1 | 1 | Wick, Mrs. George Dennick (Mary Hitchcock) | female | 45.0 | 1 | 1 | 36928 | 164.8667 | NaN | S |
872 | 1 | 1 | Beckwith, Mrs. Richard Leonard (Sallie Monypeny) | female | 47.0 | 1 | 1 | 11751 | 52.5542 | D35 | S |
880 | 1 | 1 | Potter, Mrs. Thomas Jr (Lily Alexenia Wilson) | female | 56.0 | 0 | 1 | 11767 | 83.1583 | C50 | C |
pd.set_option('display.max_rows', 10)
df['SibSp'].unique()
array([1, 0, 3, 4, 2, 5, 8], dtype=int64)
len(df.columns)
11
df['Fare'].median()
14.4542
df['Fare'].mean()
32.204207968574636
len(df[df['Fare'] > df['Fare'].median()])/len(df)
0.3950617283950617
df.describe()
Survived | Pclass | Age | SibSp | Parch | Fare | |
---|---|---|---|---|---|---|
count | 891.000000 | 891.000000 | 714.000000 | 891.000000 | 891.000000 | 891.000000 |
mean | 0.383838 | 2.308642 | 29.699118 | 0.523008 | 0.381594 | 32.204208 |
std | 0.486592 | 0.836071 | 14.526497 | 1.102743 | 0.806057 | 49.693429 |
min | 0.000000 | 1.000000 | 0.420000 | 0.000000 | 0.000000 | 0.000000 |
25% | 0.000000 | 2.000000 | 20.125000 | 0.000000 | 0.000000 | 7.910400 |
50% | 0.000000 | 3.000000 | 28.000000 | 0.000000 | 0.000000 | 14.454200 |
75% | 1.000000 | 3.000000 | 38.000000 | 1.000000 | 0.000000 | 31.000000 |
max | 1.000000 | 3.000000 | 80.000000 | 8.000000 | 6.000000 | 512.329200 |
Age:
A: 0 - 10 Children
B: 11- 19 Teenagers
C: 20- 30 Youngsters
D: 31- 45 Adults
E: 45- 60 Middle Age
F: >60 Old
# query for people of age 30 - 40
df[(df.Age > 18) & (df.Age < 28)] # Apply sample() to analyse random data records
Survived | Pclass | Name | Sex | Age | SibSp | Parch | Ticket | Fare | Cabin | Embarked | |
---|---|---|---|---|---|---|---|---|---|---|---|
PassengerId | |||||||||||
1 | 0 | 3 | Braund, Mr. Owen Harris | male | 22.0 | 1 | 0 | A/5 21171 | 7.2500 | NaN | S |
3 | 1 | 3 | Heikkinen, Miss. Laina | female | 26.0 | 0 | 0 | STON/O2. 3101282 | 7.9250 | NaN | S |
9 | 1 | 3 | Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg) | female | 27.0 | 0 | 2 | 347742 | 11.1333 | NaN | S |
13 | 0 | 3 | Saundercock, Mr. William Henry | male | 20.0 | 0 | 0 | A/5. 2151 | 8.0500 | NaN | S |
28 | 0 | 1 | Fortune, Mr. Charles Alexander | male | 19.0 | 3 | 2 | 19950 | 263.0000 | C23 C25 C27 | S |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
883 | 0 | 3 | Dahlberg, Miss. Gerda Ulrika | female | 22.0 | 0 | 0 | 7552 | 10.5167 | NaN | S |
885 | 0 | 3 | Sutehall, Mr. Henry Jr | male | 25.0 | 0 | 0 | SOTON/OQ 392076 | 7.0500 | NaN | S |
887 | 0 | 2 | Montvila, Rev. Juozas | male | 27.0 | 0 | 0 | 211536 | 13.0000 | NaN | S |
888 | 1 | 1 | Graham, Miss. Margaret Edith | female | 19.0 | 0 | 0 | 112053 | 30.0000 | B42 | S |
890 | 1 | 1 | Behr, Mr. Karl Howell | male | 26.0 | 0 | 0 | 111369 | 30.0000 | C148 | C |
198 rows × 11 columns
df[~(df.Age > 30)] # Age <= 30
Survived | Pclass | Name | Sex | Age | SibSp | Parch | Ticket | Fare | Cabin | Embarked | |
---|---|---|---|---|---|---|---|---|---|---|---|
PassengerId | |||||||||||
1 | 0 | 3 | Braund, Mr. Owen Harris | male | 22.0 | 1 | 0 | A/5 21171 | 7.2500 | NaN | S |
3 | 1 | 3 | Heikkinen, Miss. Laina | female | 26.0 | 0 | 0 | STON/O2. 3101282 | 7.9250 | NaN | S |
6 | 0 | 3 | Moran, Mr. James | male | NaN | 0 | 0 | 330877 | 8.4583 | NaN | Q |
8 | 0 | 3 | Palsson, Master. Gosta Leonard | male | 2.0 | 3 | 1 | 349909 | 21.0750 | NaN | S |
9 | 1 | 3 | Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg) | female | 27.0 | 0 | 2 | 347742 | 11.1333 | NaN | S |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
885 | 0 | 3 | Sutehall, Mr. Henry Jr | male | 25.0 | 0 | 0 | SOTON/OQ 392076 | 7.0500 | NaN | S |
887 | 0 | 2 | Montvila, Rev. Juozas | male | 27.0 | 0 | 0 | 211536 | 13.0000 | NaN | S |
888 | 1 | 1 | Graham, Miss. Margaret Edith | female | 19.0 | 0 | 0 | 112053 | 30.0000 | B42 | S |
889 | 0 | 3 | Johnston, Miss. Catherine Helen "Carrie" | female | NaN | 1 | 2 | W./C. 6607 | 23.4500 | NaN | S |
890 | 1 | 1 | Behr, Mr. Karl Howell | male | 26.0 | 0 | 0 | 111369 | 30.0000 | C148 | C |
586 rows × 11 columns
df[(df.Sex == 'male') & (df.Survived == 1)]
Survived | Pclass | Name | Sex | Age | SibSp | Parch | Ticket | Fare | Cabin | Embarked | |
---|---|---|---|---|---|---|---|---|---|---|---|
PassengerId | |||||||||||
18 | 1 | 2 | Williams, Mr. Charles Eugene | male | NaN | 0 | 0 | 244373 | 13.0000 | NaN | S |
22 | 1 | 2 | Beesley, Mr. Lawrence | male | 34.0 | 0 | 0 | 248698 | 13.0000 | D56 | S |
24 | 1 | 1 | Sloper, Mr. William Thompson | male | 28.0 | 0 | 0 | 113788 | 35.5000 | A6 | S |
37 | 1 | 3 | Mamee, Mr. Hanna | male | NaN | 0 | 0 | 2677 | 7.2292 | NaN | C |
56 | 1 | 1 | Woolner, Mr. Hugh | male | NaN | 0 | 0 | 19947 | 35.5000 | C52 | S |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
839 | 1 | 3 | Chip, Mr. Chang | male | 32.0 | 0 | 0 | 1601 | 56.4958 | NaN | S |
840 | 1 | 1 | Marechal, Mr. Pierre | male | NaN | 0 | 0 | 11774 | 29.7000 | C47 | C |
858 | 1 | 1 | Daly, Mr. Peter Denis | male | 51.0 | 0 | 0 | 113055 | 26.5500 | E17 | S |
870 | 1 | 3 | Johnson, Master. Harold Theodor | male | 4.0 | 1 | 1 | 347742 | 11.1333 | NaN | S |
890 | 1 | 1 | Behr, Mr. Karl Howell | male | 26.0 | 0 | 0 | 111369 | 30.0000 | C148 | C |
109 rows × 11 columns
df[(df.Sex == 'female') & (df.Survived == 1)]
Survived | Pclass | Name | Sex | Age | SibSp | Parch | Ticket | Fare | Cabin | Embarked | |
---|---|---|---|---|---|---|---|---|---|---|---|
PassengerId | |||||||||||
2 | 1 | 1 | Cumings, Mrs. John Bradley (Florence Briggs Th... | female | 38.0 | 1 | 0 | PC 17599 | 71.2833 | C85 | C |
3 | 1 | 3 | Heikkinen, Miss. Laina | female | 26.0 | 0 | 0 | STON/O2. 3101282 | 7.9250 | NaN | S |
4 | 1 | 1 | Futrelle, Mrs. Jacques Heath (Lily May Peel) | female | 35.0 | 1 | 0 | 113803 | 53.1000 | C123 | S |
9 | 1 | 3 | Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg) | female | 27.0 | 0 | 2 | 347742 | 11.1333 | NaN | S |
10 | 1 | 2 | Nasser, Mrs. Nicholas (Adele Achem) | female | 14.0 | 1 | 0 | 237736 | 30.0708 | NaN | C |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
875 | 1 | 2 | Abelson, Mrs. Samuel (Hannah Wizosky) | female | 28.0 | 1 | 0 | P/PP 3381 | 24.0000 | NaN | C |
876 | 1 | 3 | Najib, Miss. Adele Kiamie "Jane" | female | 15.0 | 0 | 0 | 2667 | 7.2250 | NaN | C |
880 | 1 | 1 | Potter, Mrs. Thomas Jr (Lily Alexenia Wilson) | female | 56.0 | 0 | 1 | 11767 | 83.1583 | C50 | C |
881 | 1 | 2 | Shelley, Mrs. William (Imanita Parrish Hall) | female | 25.0 | 0 | 1 | 230433 | 26.0000 | NaN | S |
888 | 1 | 1 | Graham, Miss. Margaret Edith | female | 19.0 | 0 | 0 | 112053 | 30.0000 | B42 | S |
233 rows × 11 columns
df[(df.Sex == 'male')].count()
Survived 577 Pclass 577 Name 577 Sex 577 Age 453 SibSp 577 Parch 577 Ticket 577 Fare 577 Cabin 107 Embarked 577 dtype: int64
df[(df.Sex == 'female')].count()
Survived 314 Pclass 314 Name 314 Sex 314 Age 261 SibSp 314 Parch 314 Ticket 314 Fare 314 Cabin 97 Embarked 312 dtype: int64
df[df['Sex'] == 'male'].count()
Survived 577 Pclass 577 Name 577 Sex 577 Age 453 SibSp 577 Parch 577 Ticket 577 Fare 577 Cabin 107 Embarked 577 dtype: int64
df[(df.Sex == 'male') & (df.Survived == 1)].count()
Survived 109 Pclass 109 Name 109 Sex 109 Age 93 SibSp 109 Parch 109 Ticket 109 Fare 109 Cabin 45 Embarked 109 dtype: int64
df[(df.Sex == 'female') & (df.Survived == 1)].count()
Survived 233 Pclass 233 Name 233 Sex 233 Age 197 SibSp 233 Parch 233 Ticket 233 Fare 233 Cabin 91 Embarked 231 dtype: int64
len(df[(df.Sex == 'male') & (df.Survived == 1)])/len(df[(df.Sex == 'male')])
0.18890814558058924
len(df[(df.Sex == 'female') & (df.Survived == 1)])/len(df[(df.Sex == 'female')])
0.7420382165605095
# dim of df ??
df.shape
(891, 11)
df.Fare.max()
512.3292
df.Fare.mode()
0 8.05 dtype: float64
df.Fare.median()
14.4542
df.Fare.mean()
32.204207968574636
df[(df.Fare < df.Fare.mean())] # df[df.Fare == df.Fare.median()]
Survived | Pclass | Name | Sex | Age | SibSp | Parch | Ticket | Fare | Cabin | Embarked | |
---|---|---|---|---|---|---|---|---|---|---|---|
PassengerId | |||||||||||
1 | 0 | 3 | Braund, Mr. Owen Harris | male | 22.0 | 1 | 0 | A/5 21171 | 7.2500 | NaN | S |
3 | 1 | 3 | Heikkinen, Miss. Laina | female | 26.0 | 0 | 0 | STON/O2. 3101282 | 7.9250 | NaN | S |
5 | 0 | 3 | Allen, Mr. William Henry | male | 35.0 | 0 | 0 | 373450 | 8.0500 | NaN | S |
6 | 0 | 3 | Moran, Mr. James | male | NaN | 0 | 0 | 330877 | 8.4583 | NaN | Q |
8 | 0 | 3 | Palsson, Master. Gosta Leonard | male | 2.0 | 3 | 1 | 349909 | 21.0750 | NaN | S |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
887 | 0 | 2 | Montvila, Rev. Juozas | male | 27.0 | 0 | 0 | 211536 | 13.0000 | NaN | S |
888 | 1 | 1 | Graham, Miss. Margaret Edith | female | 19.0 | 0 | 0 | 112053 | 30.0000 | B42 | S |
889 | 0 | 3 | Johnston, Miss. Catherine Helen "Carrie" | female | NaN | 1 | 2 | W./C. 6607 | 23.4500 | NaN | S |
890 | 1 | 1 | Behr, Mr. Karl Howell | male | 26.0 | 0 | 0 | 111369 | 30.0000 | C148 | C |
891 | 0 | 3 | Dooley, Mr. Patrick | male | 32.0 | 0 | 0 | 370376 | 7.7500 | NaN | Q |
680 rows × 11 columns
df[(df.Fare < df.Fare.mean()) & (df.Survived == 1)] # df[df.Fare == df.Fare.median()]
Survived | Pclass | Name | Sex | Age | SibSp | Parch | Ticket | Fare | Cabin | Embarked | |
---|---|---|---|---|---|---|---|---|---|---|---|
PassengerId | |||||||||||
3 | 1 | 3 | Heikkinen, Miss. Laina | female | 26.0 | 0 | 0 | STON/O2. 3101282 | 7.9250 | NaN | S |
9 | 1 | 3 | Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg) | female | 27.0 | 0 | 2 | 347742 | 11.1333 | NaN | S |
10 | 1 | 2 | Nasser, Mrs. Nicholas (Adele Achem) | female | 14.0 | 1 | 0 | 237736 | 30.0708 | NaN | C |
11 | 1 | 3 | Sandstrom, Miss. Marguerite Rut | female | 4.0 | 1 | 1 | PP 9549 | 16.7000 | G6 | S |
12 | 1 | 1 | Bonnell, Miss. Elizabeth | female | 58.0 | 0 | 0 | 113783 | 26.5500 | C103 | S |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
875 | 1 | 2 | Abelson, Mrs. Samuel (Hannah Wizosky) | female | 28.0 | 1 | 0 | P/PP 3381 | 24.0000 | NaN | C |
876 | 1 | 3 | Najib, Miss. Adele Kiamie "Jane" | female | 15.0 | 0 | 0 | 2667 | 7.2250 | NaN | C |
881 | 1 | 2 | Shelley, Mrs. William (Imanita Parrish Hall) | female | 25.0 | 0 | 1 | 230433 | 26.0000 | NaN | S |
888 | 1 | 1 | Graham, Miss. Margaret Edith | female | 19.0 | 0 | 0 | 112053 | 30.0000 | B42 | S |
890 | 1 | 1 | Behr, Mr. Karl Howell | male | 26.0 | 0 | 0 | 111369 | 30.0000 | C148 | C |
216 rows × 11 columns
#Observation/Assumption
len(df[(df.Fare < df.Fare.mean()) & (df.Survived == 1)])/len(df[(df.Fare < df.Fare.mean())])
0.3176470588235294
len(df[(df.Fare < df.Fare.mean()) & (df.Survived == 0)])/len(df[(df.Fare < df.Fare.mean())])
0.6823529411764706
len(df[(df.Fare >= df.Fare.mean()) & (df.Survived == 1)])/len(df[(df.Fare >= df.Fare.mean())])
0.5971563981042654
len(df[(df.Fare >= df.Fare.mean()) & (df.Survived == 0)])/len(df[(df.Fare >= df.Fare.mean())])
0.4028436018957346
len(df[(df.Fare < df.Fare.mean())])
680
len(df[(df.Fare >= df.Fare.mean())])
211
#Conclusion/Inference
r[r['Survived'] == 1]
Survived | Pclass | Name | Sex | Age | SibSp | Parch | Ticket | Fare | Cabin | Embarked | |
---|---|---|---|---|---|---|---|---|---|---|---|
PassengerId | |||||||||||
3 | 1 | 3 | Heikkinen, Miss. Laina | female | 26.0 | 0 | 0 | STON/O2. 3101282 | 7.9250 | NaN | S |
9 | 1 | 3 | Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg) | female | 27.0 | 0 | 2 | 347742 | 11.1333 | NaN | S |
10 | 1 | 2 | Nasser, Mrs. Nicholas (Adele Achem) | female | 14.0 | 1 | 0 | 237736 | 30.0708 | NaN | C |
11 | 1 | 3 | Sandstrom, Miss. Marguerite Rut | female | 4.0 | 1 | 1 | PP 9549 | 16.7000 | G6 | S |
12 | 1 | 1 | Bonnell, Miss. Elizabeth | female | 58.0 | 0 | 0 | 113783 | 26.5500 | C103 | S |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
875 | 1 | 2 | Abelson, Mrs. Samuel (Hannah Wizosky) | female | 28.0 | 1 | 0 | P/PP 3381 | 24.0000 | NaN | C |
876 | 1 | 3 | Najib, Miss. Adele Kiamie "Jane" | female | 15.0 | 0 | 0 | 2667 | 7.2250 | NaN | C |
881 | 1 | 2 | Shelley, Mrs. William (Imanita Parrish Hall) | female | 25.0 | 0 | 1 | 230433 | 26.0000 | NaN | S |
888 | 1 | 1 | Graham, Miss. Margaret Edith | female | 19.0 | 0 | 0 | 112053 | 30.0000 | B42 | S |
890 | 1 | 1 | Behr, Mr. Karl Howell | male | 26.0 | 0 | 0 | 111369 | 30.0000 | C148 | C |
216 rows × 11 columns
df.Fare
PassengerId 1 7.2500 2 71.2833 3 7.9250 4 53.1000 5 8.0500 ... 887 13.0000 888 30.0000 889 23.4500 890 30.0000 891 7.7500 Name: Fare, Length: 891, dtype: float64
plt.hist(df.Fare)
(array([732., 106., 31., 2., 11., 6., 0., 0., 0., 3.]), array([ 0. , 51.23292, 102.46584, 153.69876, 204.93168, 256.1646 , 307.39752, 358.63044, 409.86336, 461.09628, 512.3292 ]), <BarContainer object of 10 artists>)
sns.pairplot?
Object `sns.pairplot` not found.
sns.pairplot(df)
<seaborn.axisgrid.PairGrid at 0x20c463b9a90>
df.columns
Index(['Survived', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp', 'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked'], dtype='object')
df.Embarked.unique()
array(['S', 'C', 'Q', nan], dtype=object)
df['Sex'].unique()
array(['male', 'female'], dtype=object)
df.Pclass.unique()
array([3, 1, 2], dtype=int64)
for i in df.columns :
try:
print(i, " = \n", df[i].max())
except:
pass
Survived = 1 Pclass = 3 Name = van Melkebeke, Mr. Philemon Sex = male Age = 80.0 SibSp = 8 Parch = 6 Ticket = WE/P 5735 Fare = 512.3292
# only datatypes of each col
df.dtypes
Survived int64 Pclass int64 Name object Sex object Age float64 SibSp int64 Parch int64 Ticket object Fare float64 Cabin object Embarked object dtype: object
# datatypes, rows, cols, size
df.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 891 entries, 1 to 891 Data columns (total 11 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Survived 891 non-null int64 1 Pclass 891 non-null int64 2 Name 891 non-null object 3 Sex 891 non-null object 4 Age 714 non-null float64 5 SibSp 891 non-null int64 6 Parch 891 non-null int64 7 Ticket 891 non-null object 8 Fare 891 non-null float64 9 Cabin 204 non-null object 10 Embarked 889 non-null object dtypes: float64(2), int64(4), object(5) memory usage: 115.8+ KB
df.isnull().sum()
Survived 0 Pclass 0 Name 0 Sex 0 Age 177 SibSp 0 Parch 0 Ticket 0 Fare 0 Cabin 687 Embarked 2 dtype: int64
(df.isnull().sum() / len(df)) * 100
Survived 0.000000 Pclass 0.000000 Name 0.000000 Sex 0.000000 Age 19.865320 SibSp 0.000000 Parch 0.000000 Ticket 0.000000 Fare 0.000000 Cabin 77.104377 Embarked 0.224467 dtype: float64
df.count()
Survived 891 Pclass 891 Name 891 Sex 891 Age 714 SibSp 891 Parch 891 Ticket 891 Fare 891 Cabin 204 Embarked 889 dtype: int64
df.dtypes
Survived int64 Pclass int64 Name object Sex object Age float64 SibSp int64 Parch int64 Ticket object Fare float64 Cabin object Embarked object dtype: object
df.dtypes == "object"
Survived False Pclass False Name True Sex True Age False SibSp False Parch False Ticket True Fare False Cabin True Embarked True dtype: bool
df.dtypes[df.dtypes == 'object']
Name object Sex object Ticket object Cabin object Embarked object dtype: object
df.dtypes[df.dtypes == 'object'].index
Index(['Name', 'Sex', 'Ticket', 'Cabin', 'Embarked'], dtype='object')
df_cat = df[df.dtypes[df.dtypes == 'object'].index]
df_cat.head()
Name | Sex | Ticket | Cabin | Embarked | |
---|---|---|---|---|---|
PassengerId | |||||
1 | Braund, Mr. Owen Harris | male | A/5 21171 | NaN | S |
2 | Cumings, Mrs. John Bradley (Florence Briggs Th... | female | PC 17599 | C85 | C |
3 | Heikkinen, Miss. Laina | female | STON/O2. 3101282 | NaN | S |
4 | Futrelle, Mrs. Jacques Heath (Lily May Peel) | female | 113803 | C123 | S |
5 | Allen, Mr. William Henry | male | 373450 | NaN | S |
for i in df_cat.columns :
print(i, " = \n", df[i].unique())
Name = ['Braund, Mr. Owen Harris' 'Cumings, Mrs. John Bradley (Florence Briggs Thayer)' 'Heikkinen, Miss. Laina' 'Futrelle, Mrs. Jacques Heath (Lily May Peel)' 'Allen, Mr. William Henry' 'Moran, Mr. James' 'McCarthy, Mr. Timothy J' 'Palsson, Master. Gosta Leonard' 'Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)' 'Nasser, Mrs. Nicholas (Adele Achem)' 'Sandstrom, Miss. Marguerite Rut' 'Bonnell, Miss. Elizabeth' 'Saundercock, Mr. William Henry' 'Andersson, Mr. Anders Johan' 'Vestrom, Miss. Hulda Amanda Adolfina' 'Hewlett, Mrs. (Mary D Kingcome) ' 'Rice, Master. Eugene' 'Williams, Mr. Charles Eugene' 'Vander Planke, Mrs. Julius (Emelia Maria Vandemoortele)' 'Masselmani, Mrs. Fatima' 'Fynney, Mr. Joseph J' 'Beesley, Mr. Lawrence' 'McGowan, Miss. Anna "Annie"' 'Sloper, Mr. William Thompson' 'Palsson, Miss. Torborg Danira' 'Asplund, Mrs. Carl Oscar (Selma Augusta Emilia Johansson)' 'Emir, Mr. Farred Chehab' 'Fortune, Mr. Charles Alexander' 'O\'Dwyer, Miss. Ellen "Nellie"' 'Todoroff, Mr. Lalio' 'Uruchurtu, Don. Manuel E' 'Spencer, Mrs. William Augustus (Marie Eugenie)' 'Glynn, Miss. Mary Agatha' 'Wheadon, Mr. Edward H' 'Meyer, Mr. Edgar Joseph' 'Holverson, Mr. Alexander Oskar' 'Mamee, Mr. Hanna' 'Cann, Mr. Ernest Charles' 'Vander Planke, Miss. Augusta Maria' 'Nicola-Yarred, Miss. Jamila' 'Ahlin, Mrs. Johan (Johanna Persdotter Larsson)' 'Turpin, Mrs. William John Robert (Dorothy Ann Wonnacott)' 'Kraeff, Mr. Theodor' 'Laroche, Miss. Simonne Marie Anne Andree' 'Devaney, Miss. Margaret Delia' 'Rogers, Mr. William John' 'Lennon, Mr. Denis' "O'Driscoll, Miss. Bridget" 'Samaan, Mr. Youssef' 'Arnold-Franchi, Mrs. Josef (Josefine Franchi)' 'Panula, Master. Juha Niilo' 'Nosworthy, Mr. Richard Cater' 'Harper, Mrs. Henry Sleeper (Myna Haxtun)' 'Faunthorpe, Mrs. Lizzie (Elizabeth Anne Wilkinson)' 'Ostby, Mr. Engelhart Cornelius' 'Woolner, Mr. Hugh' 'Rugg, Miss. Emily' 'Novel, Mr. Mansouer' 'West, Miss. Constance Mirium' 'Goodwin, Master. William Frederick' 'Sirayanian, Mr. Orsen' 'Icard, Miss. Amelie' 'Harris, Mr. Henry Birkhardt' 'Skoog, Master. Harald' 'Stewart, Mr. Albert A' 'Moubarek, Master. Gerios' 'Nye, Mrs. (Elizabeth Ramell)' 'Crease, Mr. Ernest James' 'Andersson, Miss. Erna Alexandra' 'Kink, Mr. Vincenz' 'Jenkin, Mr. Stephen Curnow' 'Goodwin, Miss. Lillian Amy' 'Hood, Mr. Ambrose Jr' 'Chronopoulos, Mr. Apostolos' 'Bing, Mr. Lee' 'Moen, Mr. Sigurd Hansen' 'Staneff, Mr. Ivan' 'Moutal, Mr. Rahamin Haim' 'Caldwell, Master. Alden Gates' 'Dowdell, Miss. Elizabeth' 'Waelens, Mr. Achille' 'Sheerlinck, Mr. Jan Baptist' 'McDermott, Miss. Brigdet Delia' 'Carrau, Mr. Francisco M' 'Ilett, Miss. Bertha' 'Backstrom, Mrs. Karl Alfred (Maria Mathilda Gustafsson)' 'Ford, Mr. William Neal' 'Slocovski, Mr. Selman Francis' 'Fortune, Miss. Mabel Helen' 'Celotti, Mr. Francesco' 'Christmann, Mr. Emil' 'Andreasson, Mr. Paul Edvin' 'Chaffee, Mr. Herbert Fuller' 'Dean, Mr. Bertram Frank' 'Coxon, Mr. Daniel' 'Shorney, Mr. Charles Joseph' 'Goldschmidt, Mr. George B' 'Greenfield, Mr. William Bertram' 'Doling, Mrs. John T (Ada Julia Bone)' 'Kantor, Mr. Sinai' 'Petranec, Miss. Matilda' 'Petroff, Mr. Pastcho ("Pentcho")' 'White, Mr. Richard Frasar' 'Johansson, Mr. Gustaf Joel' 'Gustafsson, Mr. Anders Vilhelm' 'Mionoff, Mr. Stoytcho' 'Salkjelsvik, Miss. Anna Kristine' 'Moss, Mr. Albert Johan' 'Rekic, Mr. Tido' 'Moran, Miss. Bertha' 'Porter, Mr. Walter Chamberlain' 'Zabour, Miss. Hileni' 'Barton, Mr. David John' 'Jussila, Miss. Katriina' 'Attalah, Miss. Malake' 'Pekoniemi, Mr. Edvard' 'Connors, Mr. Patrick' 'Turpin, Mr. William John Robert' 'Baxter, Mr. Quigg Edmond' 'Andersson, Miss. Ellis Anna Maria' 'Hickman, Mr. Stanley George' 'Moore, Mr. Leonard Charles' 'Nasser, Mr. Nicholas' 'Webber, Miss. Susan' 'White, Mr. Percival Wayland' 'Nicola-Yarred, Master. Elias' 'McMahon, Mr. Martin' 'Madsen, Mr. Fridtjof Arne' 'Peter, Miss. Anna' 'Ekstrom, Mr. Johan' 'Drazenoic, Mr. Jozef' 'Coelho, Mr. Domingos Fernandeo' 'Robins, Mrs. Alexander A (Grace Charity Laury)' 'Weisz, Mrs. Leopold (Mathilde Francoise Pede)' 'Sobey, Mr. Samuel James Hayden' 'Richard, Mr. Emile' 'Newsom, Miss. Helen Monypeny' 'Futrelle, Mr. Jacques Heath' 'Osen, Mr. Olaf Elon' 'Giglio, Mr. Victor' 'Boulos, Mrs. Joseph (Sultana)' 'Nysten, Miss. Anna Sofia' 'Hakkarainen, Mrs. Pekka Pietari (Elin Matilda Dolck)' 'Burke, Mr. Jeremiah' 'Andrew, Mr. Edgardo Samuel' 'Nicholls, Mr. Joseph Charles' 'Andersson, Mr. August Edvard ("Wennerstrom")' 'Ford, Miss. Robina Maggie "Ruby"' 'Navratil, Mr. Michel ("Louis M Hoffman")' 'Byles, Rev. Thomas Roussel Davids' 'Bateman, Rev. Robert James' 'Pears, Mrs. Thomas (Edith Wearne)' 'Meo, Mr. Alfonzo' 'van Billiard, Mr. Austin Blyler' 'Olsen, Mr. Ole Martin' 'Williams, Mr. Charles Duane' 'Gilnagh, Miss. Katherine "Katie"' 'Corn, Mr. Harry' 'Smiljanic, Mr. Mile' 'Sage, Master. Thomas Henry' 'Cribb, Mr. John Hatfield' 'Watt, Mrs. James (Elizabeth "Bessie" Inglis Milne)' 'Bengtsson, Mr. John Viktor' 'Calic, Mr. Jovo' 'Panula, Master. Eino Viljami' 'Goldsmith, Master. Frank John William "Frankie"' 'Chibnall, Mrs. (Edith Martha Bowerman)' 'Skoog, Mrs. William (Anna Bernhardina Karlsson)' 'Baumann, Mr. John D' 'Ling, Mr. Lee' 'Van der hoef, Mr. Wyckoff' 'Rice, Master. Arthur' 'Johnson, Miss. Eleanor Ileen' 'Sivola, Mr. Antti Wilhelm' 'Smith, Mr. James Clinch' 'Klasen, Mr. Klas Albin' 'Lefebre, Master. Henry Forbes' 'Isham, Miss. Ann Elizabeth' 'Hale, Mr. Reginald' 'Leonard, Mr. Lionel' 'Sage, Miss. Constance Gladys' 'Pernot, Mr. Rene' 'Asplund, Master. Clarence Gustaf Hugo' 'Becker, Master. Richard F' 'Kink-Heilmann, Miss. Luise Gretchen' 'Rood, Mr. Hugh Roscoe' 'O\'Brien, Mrs. Thomas (Johanna "Hannah" Godfrey)' 'Romaine, Mr. Charles Hallace ("Mr C Rolmane")' 'Bourke, Mr. John' 'Turcin, Mr. Stjepan' 'Pinsky, Mrs. (Rosa)' 'Carbines, Mr. William' 'Andersen-Jensen, Miss. Carla Christine Nielsine' 'Navratil, Master. Michel M' 'Brown, Mrs. James Joseph (Margaret Tobin)' 'Lurette, Miss. Elise' 'Mernagh, Mr. Robert' 'Olsen, Mr. Karl Siegwart Andreas' 'Madigan, Miss. Margaret "Maggie"' 'Yrois, Miss. Henriette ("Mrs Harbeck")' 'Vande Walle, Mr. Nestor Cyriel' 'Sage, Mr. Frederick' 'Johanson, Mr. Jakob Alfred' 'Youseff, Mr. Gerious' 'Cohen, Mr. Gurshon "Gus"' 'Strom, Miss. Telma Matilda' 'Backstrom, Mr. Karl Alfred' 'Albimona, Mr. Nassef Cassem' 'Carr, Miss. Helen "Ellen"' 'Blank, Mr. Henry' 'Ali, Mr. Ahmed' 'Cameron, Miss. Clear Annie' 'Perkin, Mr. John Henry' 'Givard, Mr. Hans Kristensen' 'Kiernan, Mr. Philip' 'Newell, Miss. Madeleine' 'Honkanen, Miss. Eliina' 'Jacobsohn, Mr. Sidney Samuel' 'Bazzani, Miss. Albina' 'Harris, Mr. Walter' 'Sunderland, Mr. Victor Francis' 'Bracken, Mr. James H' 'Green, Mr. George Henry' 'Nenkoff, Mr. Christo' 'Hoyt, Mr. Frederick Maxfield' 'Berglund, Mr. Karl Ivar Sven' 'Mellors, Mr. William John' 'Lovell, Mr. John Hall ("Henry")' 'Fahlstrom, Mr. Arne Jonas' 'Lefebre, Miss. Mathilde' 'Harris, Mrs. Henry Birkhardt (Irene Wallach)' 'Larsson, Mr. Bengt Edvin' 'Sjostedt, Mr. Ernst Adolf' 'Asplund, Miss. Lillian Gertrud' 'Leyson, Mr. Robert William Norman' 'Harknett, Miss. Alice Phoebe' 'Hold, Mr. Stephen' 'Collyer, Miss. Marjorie "Lottie"' 'Pengelly, Mr. Frederick William' 'Hunt, Mr. George Henry' 'Zabour, Miss. Thamine' 'Murphy, Miss. Katherine "Kate"' 'Coleridge, Mr. Reginald Charles' 'Maenpaa, Mr. Matti Alexanteri' 'Attalah, Mr. Sleiman' 'Minahan, Dr. William Edward' 'Lindahl, Miss. Agda Thorilda Viktoria' 'Hamalainen, Mrs. William (Anna)' 'Beckwith, Mr. Richard Leonard' 'Carter, Rev. Ernest Courtenay' 'Reed, Mr. James George' 'Strom, Mrs. Wilhelm (Elna Matilda Persson)' 'Stead, Mr. William Thomas' 'Lobb, Mr. William Arthur' 'Rosblom, Mrs. Viktor (Helena Wilhelmina)' 'Touma, Mrs. Darwis (Hanne Youssef Razi)' 'Thorne, Mrs. Gertrude Maybelle' 'Cherry, Miss. Gladys' 'Ward, Miss. Anna' 'Parrish, Mrs. (Lutie Davis)' 'Smith, Mr. Thomas' 'Asplund, Master. Edvin Rojj Felix' 'Taussig, Mr. Emil' 'Harrison, Mr. William' 'Henry, Miss. Delia' 'Reeves, Mr. David' 'Panula, Mr. Ernesti Arvid' 'Persson, Mr. Ernst Ulrik' 'Graham, Mrs. William Thompson (Edith Junkins)' 'Bissette, Miss. Amelia' 'Cairns, Mr. Alexander' 'Tornquist, Mr. William Henry' 'Mellinger, Mrs. (Elizabeth Anne Maidment)' 'Natsch, Mr. Charles H' 'Healy, Miss. Hanora "Nora"' 'Andrews, Miss. Kornelia Theodosia' 'Lindblom, Miss. Augusta Charlotta' 'Parkes, Mr. Francis "Frank"' 'Rice, Master. Eric' 'Abbott, Mrs. Stanton (Rosa Hunt)' 'Duane, Mr. Frank' 'Olsson, Mr. Nils Johan Goransson' 'de Pelsmaeker, Mr. Alfons' 'Dorking, Mr. Edward Arthur' 'Smith, Mr. Richard William' 'Stankovic, Mr. Ivan' 'de Mulder, Mr. Theodore' 'Naidenoff, Mr. Penko' 'Hosono, Mr. Masabumi' 'Connolly, Miss. Kate' 'Barber, Miss. Ellen "Nellie"' 'Bishop, Mrs. Dickinson H (Helen Walton)' 'Levy, Mr. Rene Jacques' 'Haas, Miss. Aloisia' 'Mineff, Mr. Ivan' 'Lewy, Mr. Ervin G' 'Hanna, Mr. Mansour' 'Allison, Miss. Helen Loraine' 'Saalfeld, Mr. Adolphe' 'Baxter, Mrs. James (Helene DeLaudeniere Chaput)' 'Kelly, Miss. Anna Katherine "Annie Kate"' 'McCoy, Mr. Bernard' 'Johnson, Mr. William Cahoone Jr' 'Keane, Miss. Nora A' 'Williams, Mr. Howard Hugh "Harry"' 'Allison, Master. Hudson Trevor' 'Fleming, Miss. Margaret' 'Penasco y Castellana, Mrs. Victor de Satode (Maria Josefa Perez de Soto y Vallejo)' 'Abelson, Mr. Samuel' 'Francatelli, Miss. Laura Mabel' 'Hays, Miss. Margaret Bechstein' 'Ryerson, Miss. Emily Borie' 'Lahtinen, Mrs. William (Anna Sylfven)' 'Hendekovic, Mr. Ignjac' 'Hart, Mr. Benjamin' 'Nilsson, Miss. Helmina Josefina' 'Kantor, Mrs. Sinai (Miriam Sternin)' 'Moraweck, Dr. Ernest' 'Wick, Miss. Mary Natalie' 'Spedden, Mrs. Frederic Oakley (Margaretta Corning Stone)' 'Dennis, Mr. Samuel' 'Danoff, Mr. Yoto' 'Slayter, Miss. Hilda Mary' 'Caldwell, Mrs. Albert Francis (Sylvia Mae Harbaugh)' 'Sage, Mr. George John Jr' 'Young, Miss. Marie Grice' 'Nysveen, Mr. Johan Hansen' 'Ball, Mrs. (Ada E Hall)' 'Goldsmith, Mrs. Frank John (Emily Alice Brown)' 'Hippach, Miss. Jean Gertrude' 'McCoy, Miss. Agnes' 'Partner, Mr. Austen' 'Graham, Mr. George Edward' 'Vander Planke, Mr. Leo Edmondus' 'Frauenthal, Mrs. Henry William (Clara Heinsheimer)' 'Denkoff, Mr. Mitto' 'Pears, Mr. Thomas Clinton' 'Burns, Miss. Elizabeth Margaret' 'Dahl, Mr. Karl Edwart' 'Blackwell, Mr. Stephen Weart' 'Navratil, Master. Edmond Roger' 'Fortune, Miss. Alice Elizabeth' 'Collander, Mr. Erik Gustaf' 'Sedgwick, Mr. Charles Frederick Waddington' 'Fox, Mr. Stanley Hubert' 'Brown, Miss. Amelia "Mildred"' 'Smith, Miss. Marion Elsie' 'Davison, Mrs. Thomas Henry (Mary E Finck)' 'Coutts, Master. William Loch "William"' 'Dimic, Mr. Jovan' 'Odahl, Mr. Nils Martin' 'Williams-Lambert, Mr. Fletcher Fellows' 'Elias, Mr. Tannous' 'Arnold-Franchi, Mr. Josef' 'Yousif, Mr. Wazli' 'Vanden Steen, Mr. Leo Peter' 'Bowerman, Miss. Elsie Edith' 'Funk, Miss. Annie Clemmer' 'McGovern, Miss. Mary' 'Mockler, Miss. Helen Mary "Ellie"' 'Skoog, Mr. Wilhelm' 'del Carlo, Mr. Sebastiano' 'Barbara, Mrs. (Catherine David)' 'Asim, Mr. Adola' "O'Brien, Mr. Thomas" 'Adahl, Mr. Mauritz Nils Martin' 'Warren, Mrs. Frank Manley (Anna Sophia Atkinson)' 'Moussa, Mrs. (Mantoura Boulos)' 'Jermyn, Miss. Annie' 'Aubart, Mme. Leontine Pauline' 'Harder, Mr. George Achilles' 'Wiklund, Mr. Jakob Alfred' 'Beavan, Mr. William Thomas' 'Ringhini, Mr. Sante' 'Palsson, Miss. Stina Viola' 'Meyer, Mrs. Edgar Joseph (Leila Saks)' 'Landergren, Miss. Aurora Adelia' 'Widener, Mr. Harry Elkins' 'Betros, Mr. Tannous' 'Gustafsson, Mr. Karl Gideon' 'Bidois, Miss. Rosalie' 'Nakid, Miss. Maria ("Mary")' 'Tikkanen, Mr. Juho' 'Holverson, Mrs. Alexander Oskar (Mary Aline Towner)' 'Plotcharsky, Mr. Vasil' 'Davies, Mr. Charles Henry' 'Goodwin, Master. Sidney Leonard' 'Buss, Miss. Kate' 'Sadlier, Mr. Matthew' 'Lehmann, Miss. Bertha' 'Carter, Mr. William Ernest' 'Jansson, Mr. Carl Olof' 'Gustafsson, Mr. Johan Birger' 'Newell, Miss. Marjorie' 'Sandstrom, Mrs. Hjalmar (Agnes Charlotta Bengtsson)' 'Johansson, Mr. Erik' 'Olsson, Miss. Elina' 'McKane, Mr. Peter David' 'Pain, Dr. Alfred' 'Trout, Mrs. William H (Jessie L)' 'Niskanen, Mr. Juha' 'Adams, Mr. John' 'Jussila, Miss. Mari Aina' 'Hakkarainen, Mr. Pekka Pietari' 'Oreskovic, Miss. Marija' 'Gale, Mr. Shadrach' 'Widegren, Mr. Carl/Charles Peter' 'Richards, Master. William Rowe' 'Birkeland, Mr. Hans Martin Monsen' 'Lefebre, Miss. Ida' 'Sdycoff, Mr. Todor' 'Hart, Mr. Henry' 'Minahan, Miss. Daisy E' 'Cunningham, Mr. Alfred Fleming' 'Sundman, Mr. Johan Julian' 'Meek, Mrs. Thomas (Annie Louise Rowley)' 'Drew, Mrs. James Vivian (Lulu Thorne Christian)' 'Silven, Miss. Lyyli Karoliina' 'Matthews, Mr. William John' 'Van Impe, Miss. Catharina' 'Gheorgheff, Mr. Stanio' 'Charters, Mr. David' 'Zimmerman, Mr. Leo' 'Danbom, Mrs. Ernst Gilbert (Anna Sigrid Maria Brogren)' 'Rosblom, Mr. Viktor Richard' 'Wiseman, Mr. Phillippe' 'Clarke, Mrs. Charles V (Ada Maria Winfield)' 'Phillips, Miss. Kate Florence ("Mrs Kate Louise Phillips Marshall")' 'Flynn, Mr. James' 'Pickard, Mr. Berk (Berk Trembisky)' 'Bjornstrom-Steffansson, Mr. Mauritz Hakan' 'Thorneycroft, Mrs. Percival (Florence Kate White)' 'Louch, Mrs. Charles Alexander (Alice Adelaide Slow)' 'Kallio, Mr. Nikolai Erland' 'Silvey, Mr. William Baird' 'Carter, Miss. Lucile Polk' 'Ford, Miss. Doolina Margaret "Daisy"' 'Richards, Mrs. Sidney (Emily Hocking)' 'Fortune, Mr. Mark' 'Kvillner, Mr. Johan Henrik Johannesson' 'Hart, Mrs. Benjamin (Esther Ada Bloomfield)' 'Hampe, Mr. Leon' 'Petterson, Mr. Johan Emil' 'Reynaldo, Ms. Encarnacion' 'Johannesen-Bratthammer, Mr. Bernt' 'Dodge, Master. Washington' 'Mellinger, Miss. Madeleine Violet' 'Seward, Mr. Frederic Kimber' 'Baclini, Miss. Marie Catherine' 'Peuchen, Major. Arthur Godfrey' 'West, Mr. Edwy Arthur' 'Hagland, Mr. Ingvald Olai Olsen' 'Foreman, Mr. Benjamin Laventall' 'Goldenberg, Mr. Samuel L' 'Peduzzi, Mr. Joseph' 'Jalsevac, Mr. Ivan' 'Millet, Mr. Francis Davis' 'Kenyon, Mrs. Frederick R (Marion)' 'Toomey, Miss. Ellen' "O'Connor, Mr. Maurice" 'Anderson, Mr. Harry' 'Morley, Mr. William' 'Gee, Mr. Arthur H' 'Milling, Mr. Jacob Christian' 'Maisner, Mr. Simon' 'Goncalves, Mr. Manuel Estanslas' 'Campbell, Mr. William' 'Smart, Mr. John Montgomery' 'Scanlan, Mr. James' 'Baclini, Miss. Helene Barbara' 'Keefe, Mr. Arthur' 'Cacic, Mr. Luka' 'West, Mrs. Edwy Arthur (Ada Mary Worth)' 'Jerwan, Mrs. Amin S (Marie Marthe Thuillard)' 'Strandberg, Miss. Ida Sofia' 'Clifford, Mr. George Quincy' 'Renouf, Mr. Peter Henry' 'Braund, Mr. Lewis Richard' 'Karlsson, Mr. Nils August' 'Hirvonen, Miss. Hildur E' 'Goodwin, Master. Harold Victor' 'Frost, Mr. Anthony Wood "Archie"' 'Rouse, Mr. Richard Henry' 'Turkula, Mrs. (Hedwig)' 'Bishop, Mr. Dickinson H' 'Lefebre, Miss. Jeannie' 'Hoyt, Mrs. Frederick Maxfield (Jane Anne Forby)' 'Kent, Mr. Edward Austin' 'Somerton, Mr. Francis William' 'Coutts, Master. Eden Leslie "Neville"' 'Hagland, Mr. Konrad Mathias Reiersen' 'Windelov, Mr. Einar' 'Molson, Mr. Harry Markland' 'Artagaveytia, Mr. Ramon' 'Stanley, Mr. Edward Roland' 'Yousseff, Mr. Gerious' 'Eustis, Miss. Elizabeth Mussey' 'Shellard, Mr. Frederick William' 'Allison, Mrs. Hudson J C (Bessie Waldo Daniels)' 'Svensson, Mr. Olof' 'Calic, Mr. Petar' 'Canavan, Miss. Mary' "O'Sullivan, Miss. Bridget Mary" 'Laitinen, Miss. Kristina Sofia' 'Maioni, Miss. Roberta' 'Penasco y Castellana, Mr. Victor de Satode' 'Quick, Mrs. Frederick Charles (Jane Richards)' 'Bradley, Mr. George ("George Arthur Brayton")' 'Olsen, Mr. Henry Margido' 'Lang, Mr. Fang' 'Daly, Mr. Eugene Patrick' 'Webber, Mr. James' 'McGough, Mr. James Robert' 'Rothschild, Mrs. Martin (Elizabeth L. Barrett)' 'Coleff, Mr. Satio' 'Walker, Mr. William Anderson' 'Lemore, Mrs. (Amelia Milley)' 'Ryan, Mr. Patrick' 'Angle, Mrs. William A (Florence "Mary" Agnes Hughes)' 'Pavlovic, Mr. Stefo' 'Perreault, Miss. Anne' 'Vovk, Mr. Janko' 'Lahoud, Mr. Sarkis' 'Hippach, Mrs. Louis Albert (Ida Sophia Fischer)' 'Kassem, Mr. Fared' 'Farrell, Mr. James' 'Ridsdale, Miss. Lucy' 'Farthing, Mr. John' 'Salonen, Mr. Johan Werner' 'Hocking, Mr. Richard George' 'Quick, Miss. Phyllis May' 'Toufik, Mr. Nakli' 'Elias, Mr. Joseph Jr' 'Peter, Mrs. Catherine (Catherine Rizk)' 'Cacic, Miss. Marija' 'Hart, Miss. Eva Miriam' 'Butt, Major. Archibald Willingham' 'LeRoy, Miss. Bertha' 'Risien, Mr. Samuel Beard' 'Frolicher, Miss. Hedwig Margaritha' 'Crosby, Miss. Harriet R' 'Andersson, Miss. Ingeborg Constanzia' 'Andersson, Miss. Sigrid Elisabeth' 'Beane, Mr. Edward' 'Douglas, Mr. Walter Donald' 'Nicholson, Mr. Arthur Ernest' 'Beane, Mrs. Edward (Ethel Clarke)' 'Padro y Manent, Mr. Julian' 'Goldsmith, Mr. Frank John' 'Davies, Master. John Morgan Jr' 'Thayer, Mr. John Borland Jr' 'Sharp, Mr. Percival James R' "O'Brien, Mr. Timothy" 'Leeni, Mr. Fahim ("Philip Zenni")' 'Ohman, Miss. Velin' 'Wright, Mr. George' 'Duff Gordon, Lady. (Lucille Christiana Sutherland) ("Mrs Morgan")' 'Robbins, Mr. Victor' 'Taussig, Mrs. Emil (Tillie Mandelbaum)' 'de Messemaeker, Mrs. Guillaume Joseph (Emma)' 'Morrow, Mr. Thomas Rowan' 'Sivic, Mr. Husein' 'Norman, Mr. Robert Douglas' 'Simmons, Mr. John' 'Meanwell, Miss. (Marion Ogden)' 'Davies, Mr. Alfred J' 'Stoytcheff, Mr. Ilia' 'Palsson, Mrs. Nils (Alma Cornelia Berglund)' 'Doharr, Mr. Tannous' 'Jonsson, Mr. Carl' 'Harris, Mr. George' 'Appleton, Mrs. Edward Dale (Charlotte Lamson)' 'Flynn, Mr. John Irwin ("Irving")' 'Kelly, Miss. Mary' 'Rush, Mr. Alfred George John' 'Patchett, Mr. George' 'Garside, Miss. Ethel' 'Silvey, Mrs. William Baird (Alice Munger)' 'Caram, Mrs. Joseph (Maria Elias)' 'Jussila, Mr. Eiriik' 'Christy, Miss. Julie Rachel' 'Thayer, Mrs. John Borland (Marian Longstreth Morris)' 'Downton, Mr. William James' 'Ross, Mr. John Hugo' 'Paulner, Mr. Uscher' 'Taussig, Miss. Ruth' 'Jarvis, Mr. John Denzil' 'Frolicher-Stehli, Mr. Maxmillian' 'Gilinski, Mr. Eliezer' 'Murdlin, Mr. Joseph' 'Rintamaki, Mr. Matti' 'Stephenson, Mrs. Walter Bertram (Martha Eustis)' 'Elsbury, Mr. William James' 'Bourke, Miss. Mary' 'Chapman, Mr. John Henry' 'Van Impe, Mr. Jean Baptiste' 'Leitch, Miss. Jessie Wills' 'Johnson, Mr. Alfred' 'Boulos, Mr. Hanna' 'Duff Gordon, Sir. Cosmo Edmund ("Mr Morgan")' 'Jacobsohn, Mrs. Sidney Samuel (Amy Frances Christy)' 'Slabenoff, Mr. Petco' 'Harrington, Mr. Charles H' 'Torber, Mr. Ernst William' 'Homer, Mr. Harry ("Mr E Haven")' 'Lindell, Mr. Edvard Bengtsson' 'Karaic, Mr. Milan' 'Daniel, Mr. Robert Williams' 'Laroche, Mrs. Joseph (Juliette Marie Louise Lafargue)' 'Shutes, Miss. Elizabeth W' 'Andersson, Mrs. Anders Johan (Alfrida Konstantia Brogren)' 'Jardin, Mr. Jose Neto' 'Murphy, Miss. Margaret Jane' 'Horgan, Mr. John' 'Brocklebank, Mr. William Alfred' 'Herman, Miss. Alice' 'Danbom, Mr. Ernst Gilbert' 'Lobb, Mrs. William Arthur (Cordelia K Stanlick)' 'Becker, Miss. Marion Louise' 'Gavey, Mr. Lawrence' 'Yasbeck, Mr. Antoni' 'Kimball, Mr. Edwin Nelson Jr' 'Nakid, Mr. Sahid' 'Hansen, Mr. Henry Damsgaard' 'Bowen, Mr. David John "Dai"' 'Sutton, Mr. Frederick' 'Kirkland, Rev. Charles Leonard' 'Longley, Miss. Gretchen Fiske' 'Bostandyeff, Mr. Guentcho' "O'Connell, Mr. Patrick D" 'Barkworth, Mr. Algernon Henry Wilson' 'Lundahl, Mr. Johan Svensson' 'Stahelin-Maeglin, Dr. Max' 'Parr, Mr. William Henry Marsh' 'Skoog, Miss. Mabel' 'Davis, Miss. Mary' 'Leinonen, Mr. Antti Gustaf' 'Collyer, Mr. Harvey' 'Panula, Mrs. Juha (Maria Emilia Ojala)' 'Thorneycroft, Mr. Percival' 'Jensen, Mr. Hans Peder' 'Sagesser, Mlle. Emma' 'Skoog, Miss. Margit Elizabeth' 'Foo, Mr. Choong' 'Baclini, Miss. Eugenie' 'Harper, Mr. Henry Sleeper' 'Cor, Mr. Liudevit' 'Simonius-Blumer, Col. Oberst Alfons' 'Willey, Mr. Edward' 'Stanley, Miss. Amy Zillah Elsie' 'Mitkoff, Mr. Mito' 'Doling, Miss. Elsie' 'Kalvik, Mr. Johannes Halvorsen' 'O\'Leary, Miss. Hanora "Norah"' 'Hegarty, Miss. Hanora "Nora"' 'Hickman, Mr. Leonard Mark' 'Radeff, Mr. Alexander' 'Bourke, Mrs. John (Catherine)' 'Eitemiller, Mr. George Floyd' 'Newell, Mr. Arthur Webster' 'Frauenthal, Dr. Henry William' 'Badt, Mr. Mohamed' 'Colley, Mr. Edward Pomeroy' 'Coleff, Mr. Peju' 'Lindqvist, Mr. Eino William' 'Hickman, Mr. Lewis' 'Butler, Mr. Reginald Fenton' 'Rommetvedt, Mr. Knud Paust' 'Cook, Mr. Jacob' 'Taylor, Mrs. Elmer Zebley (Juliet Cummins Wright)' 'Brown, Mrs. Thomas William Solomon (Elizabeth Catherine Ford)' 'Davidson, Mr. Thornton' 'Mitchell, Mr. Henry Michael' 'Wilhelms, Mr. Charles' 'Watson, Mr. Ennis Hastings' 'Edvardsson, Mr. Gustaf Hjalmar' 'Sawyer, Mr. Frederick Charles' 'Turja, Miss. Anna Sofia' 'Goodwin, Mrs. Frederick (Augusta Tyler)' 'Cardeza, Mr. Thomas Drake Martinez' 'Peters, Miss. Katie' 'Hassab, Mr. Hammad' 'Olsvigen, Mr. Thor Anderson' 'Goodwin, Mr. Charles Edward' 'Brown, Mr. Thomas William Solomon' 'Laroche, Mr. Joseph Philippe Lemercier' 'Panula, Mr. Jaako Arnold' 'Dakic, Mr. Branko' 'Fischer, Mr. Eberhard Thelander' 'Madill, Miss. Georgette Alexandra' 'Dick, Mr. Albert Adrian' 'Karun, Miss. Manca' 'Lam, Mr. Ali' 'Saad, Mr. Khalil' 'Weir, Col. John' 'Chapman, Mr. Charles Henry' 'Kelly, Mr. James' 'Mullens, Miss. Katherine "Katie"' 'Thayer, Mr. John Borland' 'Humblen, Mr. Adolf Mathias Nicolai Olsen' 'Astor, Mrs. John Jacob (Madeleine Talmadge Force)' 'Silverthorne, Mr. Spencer Victor' 'Barbara, Miss. Saiide' 'Gallagher, Mr. Martin' 'Hansen, Mr. Henrik Juul' 'Morley, Mr. Henry Samuel ("Mr Henry Marshall")' 'Kelly, Mrs. Florence "Fannie"' 'Calderhead, Mr. Edward Pennington' 'Cleaver, Miss. Alice' 'Moubarek, Master. Halim Gonios ("William George")' 'Mayne, Mlle. Berthe Antonine ("Mrs de Villiers")' 'Klaber, Mr. Herman' 'Taylor, Mr. Elmer Zebley' 'Larsson, Mr. August Viktor' 'Greenberg, Mr. Samuel' 'Soholt, Mr. Peter Andreas Lauritz Andersen' 'Endres, Miss. Caroline Louise' 'Troutt, Miss. Edwina Celia "Winnie"' 'McEvoy, Mr. Michael' 'Johnson, Mr. Malkolm Joackim' 'Harper, Miss. Annie Jessie "Nina"' 'Jensen, Mr. Svend Lauritz' 'Gillespie, Mr. William Henry' 'Hodges, Mr. Henry Price' 'Chambers, Mr. Norman Campbell' 'Oreskovic, Mr. Luka' 'Renouf, Mrs. Peter Henry (Lillian Jefferys)' 'Mannion, Miss. Margareth' 'Bryhl, Mr. Kurt Arnold Gottfrid' 'Ilmakangas, Miss. Pieta Sofia' 'Allen, Miss. Elisabeth Walton' 'Hassan, Mr. Houssein G N' 'Knight, Mr. Robert J' 'Berriman, Mr. William John' 'Troupiansky, Mr. Moses Aaron' 'Williams, Mr. Leslie' 'Ford, Mrs. Edward (Margaret Ann Watson)' 'Lesurer, Mr. Gustave J' 'Ivanoff, Mr. Kanio' 'Nankoff, Mr. Minko' 'Hawksford, Mr. Walter James' 'Cavendish, Mr. Tyrell William' 'Ryerson, Miss. Susan Parker "Suzette"' 'McNamee, Mr. Neal' 'Stranden, Mr. Juho' 'Crosby, Capt. Edward Gifford' 'Abbott, Mr. Rossmore Edward' 'Sinkkonen, Miss. Anna' 'Marvin, Mr. Daniel Warner' 'Connaghton, Mr. Michael' 'Wells, Miss. Joan' 'Moor, Master. Meier' 'Vande Velde, Mr. Johannes Joseph' 'Jonkoff, Mr. Lalio' 'Herman, Mrs. Samuel (Jane Laver)' 'Hamalainen, Master. Viljo' 'Carlsson, Mr. August Sigfrid' 'Bailey, Mr. Percy Andrew' 'Theobald, Mr. Thomas Leonard' 'Rothes, the Countess. of (Lucy Noel Martha Dyer-Edwards)' 'Garfirth, Mr. John' 'Nirva, Mr. Iisakki Antino Aijo' 'Barah, Mr. Hanna Assi' 'Carter, Mrs. William Ernest (Lucile Polk)' 'Eklund, Mr. Hans Linus' 'Hogeboom, Mrs. John C (Anna Andrews)' 'Brewe, Dr. Arthur Jackson' 'Mangan, Miss. Mary' 'Moran, Mr. Daniel J' 'Gronnestad, Mr. Daniel Danielsen' 'Lievens, Mr. Rene Aime' 'Jensen, Mr. Niels Peder' 'Mack, Mrs. (Mary)' 'Elias, Mr. Dibo' 'Hocking, Mrs. Elizabeth (Eliza Needs)' 'Myhrman, Mr. Pehr Fabian Oliver Malkolm' 'Tobin, Mr. Roger' 'Emanuel, Miss. Virginia Ethel' 'Kilgannon, Mr. Thomas J' 'Robert, Mrs. Edward Scott (Elisabeth Walton McMillan)' 'Ayoub, Miss. Banoura' 'Dick, Mrs. Albert Adrian (Vera Gillespie)' 'Long, Mr. Milton Clyde' 'Johnston, Mr. Andrew G' 'Ali, Mr. William' 'Harmer, Mr. Abraham (David Lishin)' 'Sjoblom, Miss. Anna Sofia' 'Rice, Master. George Hugh' 'Dean, Master. Bertram Vere' 'Guggenheim, Mr. Benjamin' 'Keane, Mr. Andrew "Andy"' 'Gaskell, Mr. Alfred' 'Sage, Miss. Stella Anna' 'Hoyt, Mr. William Fisher' 'Dantcheff, Mr. Ristiu' 'Otter, Mr. Richard' 'Leader, Dr. Alice (Farnham)' 'Osman, Mrs. Mara' 'Ibrahim Shawah, Mr. Yousseff' 'Van Impe, Mrs. Jean Baptiste (Rosalie Paula Govaert)' 'Ponesell, Mr. Martin' 'Collyer, Mrs. Harvey (Charlotte Annie Tate)' 'Carter, Master. William Thornton II' 'Thomas, Master. Assad Alexander' 'Hedman, Mr. Oskar Arvid' 'Johansson, Mr. Karl Johan' 'Andrews, Mr. Thomas Jr' 'Pettersson, Miss. Ellen Natalia' 'Meyer, Mr. August' 'Chambers, Mrs. Norman Campbell (Bertha Griggs)' 'Alexander, Mr. William' 'Lester, Mr. James' 'Slemen, Mr. Richard James' 'Andersson, Miss. Ebba Iris Alfrida' 'Tomlin, Mr. Ernest Portage' 'Fry, Mr. Richard' 'Heininen, Miss. Wendla Maria' 'Mallet, Mr. Albert' 'Holm, Mr. John Fredrik Alexander' 'Skoog, Master. Karl Thorsten' 'Hays, Mrs. Charles Melville (Clara Jennings Gregg)' 'Lulic, Mr. Nikola' 'Reuchlin, Jonkheer. John George' 'Moor, Mrs. (Beila)' 'Panula, Master. Urho Abraham' 'Flynn, Mr. John' 'Lam, Mr. Len' 'Mallet, Master. Andre' 'McCormack, Mr. Thomas Joseph' 'Stone, Mrs. George Nelson (Martha Evelyn)' 'Yasbeck, Mrs. Antoni (Selini Alexander)' 'Richards, Master. George Sibley' 'Saad, Mr. Amin' 'Augustsson, Mr. Albert' 'Allum, Mr. Owen George' 'Compton, Miss. Sara Rebecca' 'Pasic, Mr. Jakob' 'Sirota, Mr. Maurice' 'Chip, Mr. Chang' 'Marechal, Mr. Pierre' 'Alhomaki, Mr. Ilmari Rudolf' 'Mudd, Mr. Thomas Charles' 'Serepeca, Miss. Augusta' 'Lemberopolous, Mr. Peter L' 'Culumovic, Mr. Jeso' 'Abbing, Mr. Anthony' 'Sage, Mr. Douglas Bullen' 'Markoff, Mr. Marin' 'Harper, Rev. John' 'Goldenberg, Mrs. Samuel L (Edwiga Grabowska)' 'Andersson, Master. Sigvard Harald Elias' 'Svensson, Mr. Johan' 'Boulos, Miss. Nourelain' 'Lines, Miss. Mary Conover' 'Carter, Mrs. Ernest Courtenay (Lilian Hughes)' 'Aks, Mrs. Sam (Leah Rosen)' 'Wick, Mrs. George Dennick (Mary Hitchcock)' 'Daly, Mr. Peter Denis ' 'Baclini, Mrs. Solomon (Latifa Qurban)' 'Razi, Mr. Raihed' 'Hansen, Mr. Claus Peter' 'Giles, Mr. Frederick Edward' 'Swift, Mrs. Frederick Joel (Margaret Welles Barron)' 'Sage, Miss. Dorothy Edith "Dolly"' 'Gill, Mr. John William' 'Bystrom, Mrs. (Karolina)' 'Duran y More, Miss. Asuncion' 'Roebling, Mr. Washington Augustus II' 'van Melkebeke, Mr. Philemon' 'Johnson, Master. Harold Theodor' 'Balkic, Mr. Cerin' 'Beckwith, Mrs. Richard Leonard (Sallie Monypeny)' 'Carlsson, Mr. Frans Olof' 'Vander Cruyssen, Mr. Victor' 'Abelson, Mrs. Samuel (Hannah Wizosky)' 'Najib, Miss. Adele Kiamie "Jane"' 'Gustafsson, Mr. Alfred Ossian' 'Petroff, Mr. Nedelio' 'Laleff, Mr. Kristo' 'Potter, Mrs. Thomas Jr (Lily Alexenia Wilson)' 'Shelley, Mrs. William (Imanita Parrish Hall)' 'Markun, Mr. Johann' 'Dahlberg, Miss. Gerda Ulrika' 'Banfield, Mr. Frederick James' 'Sutehall, Mr. Henry Jr' 'Rice, Mrs. William (Margaret Norton)' 'Montvila, Rev. Juozas' 'Graham, Miss. Margaret Edith' 'Johnston, Miss. Catherine Helen "Carrie"' 'Behr, Mr. Karl Howell' 'Dooley, Mr. Patrick'] Sex = ['male' 'female'] Ticket = ['A/5 21171' 'PC 17599' 'STON/O2. 3101282' '113803' '373450' '330877' '17463' '349909' '347742' '237736' 'PP 9549' '113783' 'A/5. 2151' '347082' '350406' '248706' '382652' '244373' '345763' '2649' '239865' '248698' '330923' '113788' '347077' '2631' '19950' '330959' '349216' 'PC 17601' 'PC 17569' '335677' 'C.A. 24579' 'PC 17604' '113789' '2677' 'A./5. 2152' '345764' '2651' '7546' '11668' '349253' 'SC/Paris 2123' '330958' 'S.C./A.4. 23567' '370371' '14311' '2662' '349237' '3101295' 'A/4. 39886' 'PC 17572' '2926' '113509' '19947' 'C.A. 31026' '2697' 'C.A. 34651' 'CA 2144' '2669' '113572' '36973' '347088' 'PC 17605' '2661' 'C.A. 29395' 'S.P. 3464' '3101281' '315151' 'C.A. 33111' 'S.O.C. 14879' '2680' '1601' '348123' '349208' '374746' '248738' '364516' '345767' '345779' '330932' '113059' 'SO/C 14885' '3101278' 'W./C. 6608' 'SOTON/OQ 392086' '343275' '343276' '347466' 'W.E.P. 5734' 'C.A. 2315' '364500' '374910' 'PC 17754' 'PC 17759' '231919' '244367' '349245' '349215' '35281' '7540' '3101276' '349207' '343120' '312991' '349249' '371110' '110465' '2665' '324669' '4136' '2627' 'STON/O 2. 3101294' '370369' 'PC 17558' 'A4. 54510' '27267' '370372' 'C 17369' '2668' '347061' '349241' 'SOTON/O.Q. 3101307' 'A/5. 3337' '228414' 'C.A. 29178' 'SC/PARIS 2133' '11752' '7534' 'PC 17593' '2678' '347081' 'STON/O2. 3101279' '365222' '231945' 'C.A. 33112' '350043' '230080' '244310' 'S.O.P. 1166' '113776' 'A.5. 11206' 'A/5. 851' 'Fa 265302' 'PC 17597' '35851' 'SOTON/OQ 392090' '315037' 'CA. 2343' '371362' 'C.A. 33595' '347068' '315093' '363291' '113505' 'PC 17318' '111240' 'STON/O 2. 3101280' '17764' '350404' '4133' 'PC 17595' '250653' 'LINE' 'SC/PARIS 2131' '230136' '315153' '113767' '370365' '111428' '364849' '349247' '234604' '28424' '350046' 'PC 17610' '368703' '4579' '370370' '248747' '345770' '3101264' '2628' 'A/5 3540' '347054' '2699' '367231' '112277' 'SOTON/O.Q. 3101311' 'F.C.C. 13528' 'A/5 21174' '250646' '367229' '35273' 'STON/O2. 3101283' '243847' '11813' 'W/C 14208' 'SOTON/OQ 392089' '220367' '21440' '349234' '19943' 'PP 4348' 'SW/PP 751' 'A/5 21173' '236171' '347067' '237442' 'C.A. 29566' 'W./C. 6609' '26707' 'C.A. 31921' '28665' 'SCO/W 1585' '367230' 'W./C. 14263' 'STON/O 2. 3101275' '2694' '19928' '347071' '250649' '11751' '244252' '362316' '113514' 'A/5. 3336' '370129' '2650' 'PC 17585' '110152' 'PC 17755' '230433' '384461' '110413' '112059' '382649' 'C.A. 17248' '347083' 'PC 17582' 'PC 17760' '113798' '250644' 'PC 17596' '370375' '13502' '347073' '239853' 'C.A. 2673' '336439' '347464' '345778' 'A/5. 10482' '113056' '349239' '345774' '349206' '237798' '370373' '19877' '11967' 'SC/Paris 2163' '349236' '349233' 'PC 17612' '2693' '113781' '19988' '9234' '367226' '226593' 'A/5 2466' '17421' 'PC 17758' 'P/PP 3381' 'PC 17485' '11767' 'PC 17608' '250651' '349243' 'F.C.C. 13529' '347470' '29011' '36928' '16966' 'A/5 21172' '349219' '234818' '345364' '28551' '111361' '113043' 'PC 17611' '349225' '7598' '113784' '248740' '244361' '229236' '248733' '31418' '386525' 'C.A. 37671' '315088' '7267' '113510' '2695' '2647' '345783' '237671' '330931' '330980' 'SC/PARIS 2167' '2691' 'SOTON/O.Q. 3101310' 'C 7076' '110813' '2626' '14313' 'PC 17477' '11765' '3101267' '323951' 'C 7077' '113503' '2648' '347069' 'PC 17757' '2653' 'STON/O 2. 3101293' '349227' '27849' '367655' 'SC 1748' '113760' '350034' '3101277' '350052' '350407' '28403' '244278' '240929' 'STON/O 2. 3101289' '341826' '4137' '315096' '28664' '347064' '29106' '312992' '349222' '394140' 'STON/O 2. 3101269' '343095' '28220' '250652' '28228' '345773' '349254' 'A/5. 13032' '315082' '347080' 'A/4. 34244' '2003' '250655' '364851' 'SOTON/O.Q. 392078' '110564' '376564' 'SC/AH 3085' 'STON/O 2. 3101274' '13507' 'C.A. 18723' '345769' '347076' '230434' '65306' '33638' '113794' '2666' '113786' '65303' '113051' '17453' 'A/5 2817' '349240' '13509' '17464' 'F.C.C. 13531' '371060' '19952' '364506' '111320' '234360' 'A/S 2816' 'SOTON/O.Q. 3101306' '113792' '36209' '323592' '315089' 'SC/AH Basle 541' '7553' '31027' '3460' '350060' '3101298' '239854' 'A/5 3594' '4134' '11771' 'A.5. 18509' '65304' 'SOTON/OQ 3101317' '113787' 'PC 17609' 'A/4 45380' '36947' 'C.A. 6212' '350035' '315086' '364846' '330909' '4135' '26360' '111427' 'C 4001' '382651' 'SOTON/OQ 3101316' 'PC 17473' 'PC 17603' '349209' '36967' 'C.A. 34260' '226875' '349242' '12749' '349252' '2624' '2700' '367232' 'W./C. 14258' 'PC 17483' '3101296' '29104' '2641' '2690' '315084' '113050' 'PC 17761' '364498' '13568' 'WE/P 5735' '2908' '693' 'SC/PARIS 2146' '244358' '330979' '2620' '347085' '113807' '11755' '345572' '372622' '349251' '218629' 'SOTON/OQ 392082' 'SOTON/O.Q. 392087' 'A/4 48871' '349205' '2686' '350417' 'S.W./PP 752' '11769' 'PC 17474' '14312' 'A/4. 20589' '358585' '243880' '2689' 'STON/O 2. 3101286' '237789' '13049' '3411' '237565' '13567' '14973' 'A./5. 3235' 'STON/O 2. 3101273' 'A/5 3902' '364848' 'SC/AH 29037' '248727' '2664' '349214' '113796' '364511' '111426' '349910' '349246' '113804' 'SOTON/O.Q. 3101305' '370377' '364512' '220845' '31028' '2659' '11753' '350029' '54636' '36963' '219533' '349224' '334912' '27042' '347743' '13214' '112052' '237668' 'STON/O 2. 3101292' '350050' '349231' '13213' 'S.O./P.P. 751' 'CA. 2314' '349221' '8475' '330919' '365226' '349223' '29751' '2623' '5727' '349210' 'STON/O 2. 3101285' '234686' '312993' 'A/5 3536' '19996' '29750' 'F.C. 12750' 'C.A. 24580' '244270' '239856' '349912' '342826' '4138' '330935' '6563' '349228' '350036' '24160' '17474' '349256' '2672' '113800' '248731' '363592' '35852' '348121' 'PC 17475' '36864' '350025' '223596' 'PC 17476' 'PC 17482' '113028' '7545' '250647' '348124' '34218' '36568' '347062' '350048' '12233' '250643' '113806' '315094' '36866' '236853' 'STON/O2. 3101271' '239855' '28425' '233639' '349201' '349218' '16988' '376566' 'STON/O 2. 3101288' '250648' '113773' '335097' '29103' '392096' '345780' '349204' '350042' '29108' '363294' 'SOTON/O2 3101272' '2663' '347074' '112379' '364850' '8471' '345781' '350047' 'S.O./P.P. 3' '2674' '29105' '347078' '383121' '36865' '2687' '113501' 'W./C. 6607' 'SOTON/O.Q. 3101312' '374887' '3101265' '12460' 'PC 17600' '349203' '28213' '17465' '349244' '2685' '2625' '347089' '347063' '112050' '347087' '248723' '3474' '28206' '364499' '112058' 'STON/O2. 3101290' 'S.C./PARIS 2079' 'C 7075' '315098' '19972' '368323' '367228' '2671' '347468' '2223' 'PC 17756' '315097' '392092' '11774' 'SOTON/O2 3101287' '2683' '315090' 'C.A. 5547' '349213' '347060' 'PC 17592' '392091' '113055' '2629' '350026' '28134' '17466' '233866' '236852' 'SC/PARIS 2149' 'PC 17590' '345777' '349248' '695' '345765' '2667' '349212' '349217' '349257' '7552' 'C.A./SOTON 34068' 'SOTON/OQ 392076' '211536' '112053' '111369' '370376'] Cabin = [nan 'C85' 'C123' 'E46' 'G6' 'C103' 'D56' 'A6' 'C23 C25 C27' 'B78' 'D33' 'B30' 'C52' 'B28' 'C83' 'F33' 'F G73' 'E31' 'A5' 'D10 D12' 'D26' 'C110' 'B58 B60' 'E101' 'F E69' 'D47' 'B86' 'F2' 'C2' 'E33' 'B19' 'A7' 'C49' 'F4' 'A32' 'B4' 'B80' 'A31' 'D36' 'D15' 'C93' 'C78' 'D35' 'C87' 'B77' 'E67' 'B94' 'C125' 'C99' 'C118' 'D7' 'A19' 'B49' 'D' 'C22 C26' 'C106' 'C65' 'E36' 'C54' 'B57 B59 B63 B66' 'C7' 'E34' 'C32' 'B18' 'C124' 'C91' 'E40' 'T' 'C128' 'D37' 'B35' 'E50' 'C82' 'B96 B98' 'E10' 'E44' 'A34' 'C104' 'C111' 'C92' 'E38' 'D21' 'E12' 'E63' 'A14' 'B37' 'C30' 'D20' 'B79' 'E25' 'D46' 'B73' 'C95' 'B38' 'B39' 'B22' 'C86' 'C70' 'A16' 'C101' 'C68' 'A10' 'E68' 'B41' 'A20' 'D19' 'D50' 'D9' 'A23' 'B50' 'A26' 'D48' 'E58' 'C126' 'B71' 'B51 B53 B55' 'D49' 'B5' 'B20' 'F G63' 'C62 C64' 'E24' 'C90' 'C45' 'E8' 'B101' 'D45' 'C46' 'D30' 'E121' 'D11' 'E77' 'F38' 'B3' 'D6' 'B82 B84' 'D17' 'A36' 'B102' 'B69' 'E49' 'C47' 'D28' 'E17' 'A24' 'C50' 'B42' 'C148'] Embarked = ['S' 'C' 'Q' nan]
df.describe()
Survived | Pclass | Age | SibSp | Parch | Fare | |
---|---|---|---|---|---|---|
count | 891.000000 | 891.000000 | 714.000000 | 891.000000 | 891.000000 | 891.000000 |
mean | 0.383838 | 2.308642 | 29.699118 | 0.523008 | 0.381594 | 32.204208 |
std | 0.486592 | 0.836071 | 14.526497 | 1.102743 | 0.806057 | 49.693429 |
min | 0.000000 | 1.000000 | 0.420000 | 0.000000 | 0.000000 | 0.000000 |
25% | 0.000000 | 2.000000 | 20.125000 | 0.000000 | 0.000000 | 7.910400 |
50% | 0.000000 | 3.000000 | 28.000000 | 0.000000 | 0.000000 | 14.454200 |
75% | 1.000000 | 3.000000 | 38.000000 | 1.000000 | 0.000000 | 31.000000 |
max | 1.000000 | 3.000000 | 80.000000 | 8.000000 | 6.000000 | 512.329200 |
df.describe(include = np.object) #List of data types to be included while describing dataframe. Default is None
<ipython-input-52-a823d8da510e>:1: DeprecationWarning: `np.object` is a deprecated alias for the builtin `object`. To silence this warning, use `object` by itself. Doing this will not modify any behavior and is safe. Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations df.describe(include = np.object)
Name | Sex | Ticket | Cabin | Embarked | |
---|---|---|---|---|---|
count | 891 | 891 | 891 | 204 | 889 |
unique | 891 | 2 | 681 | 147 | 3 |
top | Frost, Mr. Anthony Wood "Archie" | male | 1601 | C23 C25 C27 | S |
freq | 1 | 577 | 7 | 4 | 644 |
df.describe(include = 'all') #List of data types to be included while describing dataframe. Default is None
Survived | Pclass | Name | Sex | Age | SibSp | Parch | Ticket | Fare | Cabin | Embarked | |
---|---|---|---|---|---|---|---|---|---|---|---|
count | 891.000000 | 891.000000 | 891 | 891 | 714.000000 | 891.000000 | 891.000000 | 891 | 891.000000 | 204 | 889 |
unique | NaN | NaN | 891 | 2 | NaN | NaN | NaN | 681 | NaN | 147 | 3 |
top | NaN | NaN | Wells, Miss. Joan | male | NaN | NaN | NaN | CA. 2343 | NaN | B96 B98 | S |
freq | NaN | NaN | 1 | 577 | NaN | NaN | NaN | 7 | NaN | 4 | 644 |
mean | 0.383838 | 2.308642 | NaN | NaN | 29.699118 | 0.523008 | 0.381594 | NaN | 32.204208 | NaN | NaN |
std | 0.486592 | 0.836071 | NaN | NaN | 14.526497 | 1.102743 | 0.806057 | NaN | 49.693429 | NaN | NaN |
min | 0.000000 | 1.000000 | NaN | NaN | 0.420000 | 0.000000 | 0.000000 | NaN | 0.000000 | NaN | NaN |
25% | 0.000000 | 2.000000 | NaN | NaN | 20.125000 | 0.000000 | 0.000000 | NaN | 7.910400 | NaN | NaN |
50% | 0.000000 | 3.000000 | NaN | NaN | 28.000000 | 0.000000 | 0.000000 | NaN | 14.454200 | NaN | NaN |
75% | 1.000000 | 3.000000 | NaN | NaN | 38.000000 | 1.000000 | 0.000000 | NaN | 31.000000 | NaN | NaN |
max | 1.000000 | 3.000000 | NaN | NaN | 80.000000 | 8.000000 | 6.000000 | NaN | 512.329200 | NaN | NaN |
df.describe(percentiles = [0.1, 0.6, 0.9])
Survived | Pclass | Age | SibSp | Parch | Fare | |
---|---|---|---|---|---|---|
count | 891.000000 | 891.000000 | 714.000000 | 891.000000 | 891.000000 | 891.000000 |
mean | 0.383838 | 2.308642 | 29.699118 | 0.523008 | 0.381594 | 32.204208 |
std | 0.486592 | 0.836071 | 14.526497 | 1.102743 | 0.806057 | 49.693429 |
min | 0.000000 | 1.000000 | 0.420000 | 0.000000 | 0.000000 | 0.000000 |
10% | 0.000000 | 1.000000 | 14.000000 | 0.000000 | 0.000000 | 7.550000 |
50% | 0.000000 | 3.000000 | 28.000000 | 0.000000 | 0.000000 | 14.454200 |
60% | 0.000000 | 3.000000 | 31.800000 | 0.000000 | 0.000000 | 21.679200 |
90% | 1.000000 | 3.000000 | 50.000000 | 1.000000 | 2.000000 | 77.958300 |
max | 1.000000 | 3.000000 | 80.000000 | 8.000000 | 6.000000 | 512.329200 |
# list like data type of numbers between 0-1 to return the respective percentile
df.describe(include = 'all', percentiles = [0.1, 0.9])
Survived | Pclass | Name | Sex | Age | SibSp | Parch | Ticket | Fare | Cabin | Embarked | |
---|---|---|---|---|---|---|---|---|---|---|---|
count | 891.000000 | 891.000000 | 891 | 891 | 714.000000 | 891.000000 | 891.000000 | 891 | 891.000000 | 204 | 889 |
unique | NaN | NaN | 891 | 2 | NaN | NaN | NaN | 681 | NaN | 147 | 3 |
top | NaN | NaN | Frost, Mr. Anthony Wood "Archie" | male | NaN | NaN | NaN | 1601 | NaN | C23 C25 C27 | S |
freq | NaN | NaN | 1 | 577 | NaN | NaN | NaN | 7 | NaN | 4 | 644 |
mean | 0.383838 | 2.308642 | NaN | NaN | 29.699118 | 0.523008 | 0.381594 | NaN | 32.204208 | NaN | NaN |
std | 0.486592 | 0.836071 | NaN | NaN | 14.526497 | 1.102743 | 0.806057 | NaN | 49.693429 | NaN | NaN |
min | 0.000000 | 1.000000 | NaN | NaN | 0.420000 | 0.000000 | 0.000000 | NaN | 0.000000 | NaN | NaN |
10% | 0.000000 | 1.000000 | NaN | NaN | 14.000000 | 0.000000 | 0.000000 | NaN | 7.550000 | NaN | NaN |
50% | 0.000000 | 3.000000 | NaN | NaN | 28.000000 | 0.000000 | 0.000000 | NaN | 14.454200 | NaN | NaN |
90% | 1.000000 | 3.000000 | NaN | NaN | 50.000000 | 1.000000 | 2.000000 | NaN | 77.958300 | NaN | NaN |
max | 1.000000 | 3.000000 | NaN | NaN | 80.000000 | 8.000000 | 6.000000 | NaN | 512.329200 | NaN | NaN |
df.drop(['Name', 'Ticket', "Cabin"], axis = 1, inplace = True)
df
Survived | Pclass | Sex | Age | SibSp | Parch | Fare | Embarked | |
---|---|---|---|---|---|---|---|---|
PassengerId | ||||||||
1 | 0 | 3 | male | 22.0 | 1 | 0 | 7.2500 | S |
2 | 1 | 1 | female | 38.0 | 1 | 0 | 71.2833 | C |
3 | 1 | 3 | female | 26.0 | 0 | 0 | 7.9250 | S |
4 | 1 | 1 | female | 35.0 | 1 | 0 | 53.1000 | S |
5 | 0 | 3 | male | 35.0 | 0 | 0 | 8.0500 | S |
... | ... | ... | ... | ... | ... | ... | ... | ... |
887 | 0 | 2 | male | 27.0 | 0 | 0 | 13.0000 | S |
888 | 1 | 1 | female | 19.0 | 0 | 0 | 30.0000 | S |
889 | 0 | 3 | female | NaN | 1 | 2 | 23.4500 | S |
890 | 1 | 1 | male | 26.0 | 0 | 0 | 30.0000 | C |
891 | 0 | 3 | male | 32.0 | 0 | 0 | 7.7500 | Q |
891 rows × 8 columns
df.head()
Survived | Pclass | Sex | Age | SibSp | Parch | Fare | Embarked | |
---|---|---|---|---|---|---|---|---|
PassengerId | ||||||||
1 | 0 | 3 | male | 22.0 | 1 | 0 | 7.2500 | S |
2 | 1 | 1 | female | 38.0 | 1 | 0 | 71.2833 | C |
3 | 1 | 3 | female | 26.0 | 0 | 0 | 7.9250 | S |
4 | 1 | 1 | female | 35.0 | 1 | 0 | 53.1000 | S |
5 | 0 | 3 | male | 35.0 | 0 | 0 | 8.0500 | S |
df.fillna(value = 100, axis = 1).info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 891 entries, 1 to 891 Data columns (total 8 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Survived 891 non-null int64 1 Pclass 891 non-null int64 2 Sex 891 non-null object 3 Age 891 non-null float64 4 SibSp 891 non-null int64 5 Parch 891 non-null int64 6 Fare 891 non-null float64 7 Embarked 891 non-null object dtypes: float64(2), int64(4), object(2) memory usage: 94.9+ KB
plt.hist(df.Age) #This parameter is an optional parameter and it contains the integer or sequence or string.
plt.show()
df.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 891 entries, 1 to 891 Data columns (total 8 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Survived 891 non-null int64 1 Pclass 891 non-null int64 2 Sex 891 non-null object 3 Age 714 non-null float64 4 SibSp 891 non-null int64 5 Parch 891 non-null int64 6 Fare 891 non-null float64 7 Embarked 889 non-null object dtypes: float64(2), int64(4), object(2) memory usage: 94.9+ KB
df1 = df.copy()
# DataFrame.copy(deep=True)
# When deep=True (default), a new object will be created with a copy of the calling object’s data and indices.
# Modifications to the data or indices of the copy will not be reflected in the original object (see notes below).
# When deep=False, a new object will be created without copying the calling object’s data or
# index (only references to the data and index are copied). Any changes to the data of the original will
# be reflected in the shallow copy (and vice versa).
df1
Survived | Pclass | Sex | Age | SibSp | Parch | Fare | Embarked | |
---|---|---|---|---|---|---|---|---|
PassengerId | ||||||||
1 | 0 | 3 | male | 22.0 | 1 | 0 | 7.2500 | S |
2 | 1 | 1 | female | 38.0 | 1 | 0 | 71.2833 | C |
3 | 1 | 3 | female | 26.0 | 0 | 0 | 7.9250 | S |
4 | 1 | 1 | female | 35.0 | 1 | 0 | 53.1000 | S |
5 | 0 | 3 | male | 35.0 | 0 | 0 | 8.0500 | S |
... | ... | ... | ... | ... | ... | ... | ... | ... |
887 | 0 | 2 | male | 27.0 | 0 | 0 | 13.0000 | S |
888 | 1 | 1 | female | 19.0 | 0 | 0 | 30.0000 | S |
889 | 0 | 3 | female | NaN | 1 | 2 | 23.4500 | S |
890 | 1 | 1 | male | 26.0 | 0 | 0 | 30.0000 | C |
891 | 0 | 3 | male | 32.0 | 0 | 0 | 7.7500 | Q |
891 rows × 8 columns
df1['Age'].median()
28.0
df1.fillna({"Age" : df1['Age'].median(), "Embarked" : "S"}, inplace = True)
# df1['Age'].fillna(df1['Age'].median(), inplace = True)
# df1['Embarked'].fillna('S', inplace= True)
Survived | Pclass | Sex | Age | SibSp | Parch | Fare | Embarked | |
---|---|---|---|---|---|---|---|---|
PassengerId | ||||||||
1 | 0 | 3 | male | 22.0 | 1 | 0 | 7.2500 | S |
2 | 1 | 1 | female | 38.0 | 1 | 0 | 71.2833 | C |
3 | 1 | 3 | female | 26.0 | 0 | 0 | 7.9250 | S |
4 | 1 | 1 | female | 35.0 | 1 | 0 | 53.1000 | S |
5 | 0 | 3 | male | 35.0 | 0 | 0 | 8.0500 | S |
... | ... | ... | ... | ... | ... | ... | ... | ... |
887 | 0 | 2 | male | 27.0 | 0 | 0 | 13.0000 | S |
888 | 1 | 1 | female | 19.0 | 0 | 0 | 30.0000 | S |
889 | 0 | 3 | female | 28.0 | 1 | 2 | 23.4500 | S |
890 | 1 | 1 | male | 26.0 | 0 | 0 | 30.0000 | C |
891 | 0 | 3 | male | 32.0 | 0 | 0 | 7.7500 | Q |
891 rows × 8 columns
df['Age'].median()
28.0
df1.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 891 entries, 1 to 891 Data columns (total 8 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Survived 891 non-null int64 1 Pclass 891 non-null int64 2 Sex 891 non-null object 3 Age 891 non-null float64 4 SibSp 891 non-null int64 5 Parch 891 non-null int64 6 Fare 891 non-null float64 7 Embarked 891 non-null object dtypes: float64(2), int64(4), object(2) memory usage: 94.9+ KB
plt.hist(df1.Age, bins = 100)
plt.show()
sns.boxplot(x = df.Pclass, y = df.Age)
<AxesSubplot:xlabel='Pclass', ylabel='Age'>
sns.boxplot(x = df.Pclass, y = df.Age, hue = df.Sex)
<AxesSubplot:xlabel='Pclass', ylabel='Age'>
df
Survived | Pclass | Name | Sex | Age | SibSp | Parch | Ticket | Fare | Cabin | Embarked | |
---|---|---|---|---|---|---|---|---|---|---|---|
PassengerId | |||||||||||
1 | 0 | 3 | Braund, Mr. Owen Harris | male | 22.0 | 1 | 0 | A/5 21171 | 7.2500 | NaN | S |
2 | 1 | 1 | Cumings, Mrs. John Bradley (Florence Briggs Th... | female | 38.0 | 1 | 0 | PC 17599 | 71.2833 | C85 | C |
3 | 1 | 3 | Heikkinen, Miss. Laina | female | 26.0 | 0 | 0 | STON/O2. 3101282 | 7.9250 | NaN | S |
4 | 1 | 1 | Futrelle, Mrs. Jacques Heath (Lily May Peel) | female | 35.0 | 1 | 0 | 113803 | 53.1000 | C123 | S |
5 | 0 | 3 | Allen, Mr. William Henry | male | 35.0 | 0 | 0 | 373450 | 8.0500 | NaN | S |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
887 | 0 | 2 | Montvila, Rev. Juozas | male | 27.0 | 0 | 0 | 211536 | 13.0000 | NaN | S |
888 | 1 | 1 | Graham, Miss. Margaret Edith | female | 19.0 | 0 | 0 | 112053 | 30.0000 | B42 | S |
889 | 0 | 3 | Johnston, Miss. Catherine Helen "Carrie" | female | NaN | 1 | 2 | W./C. 6607 | 23.4500 | NaN | S |
890 | 1 | 1 | Behr, Mr. Karl Howell | male | 26.0 | 0 | 0 | 111369 | 30.0000 | C148 | C |
891 | 0 | 3 | Dooley, Mr. Patrick | male | 32.0 | 0 | 0 | 370376 | 7.7500 | NaN | Q |
891 rows × 11 columns
df2 = df.copy()
df2
Survived | Pclass | Name | Sex | Age | SibSp | Parch | Ticket | Fare | Cabin | Embarked | |
---|---|---|---|---|---|---|---|---|---|---|---|
PassengerId | |||||||||||
1 | 0 | 3 | Braund, Mr. Owen Harris | male | 22.0 | 1 | 0 | A/5 21171 | 7.2500 | NaN | S |
2 | 1 | 1 | Cumings, Mrs. John Bradley (Florence Briggs Th... | female | 38.0 | 1 | 0 | PC 17599 | 71.2833 | C85 | C |
3 | 1 | 3 | Heikkinen, Miss. Laina | female | 26.0 | 0 | 0 | STON/O2. 3101282 | 7.9250 | NaN | S |
4 | 1 | 1 | Futrelle, Mrs. Jacques Heath (Lily May Peel) | female | 35.0 | 1 | 0 | 113803 | 53.1000 | C123 | S |
5 | 0 | 3 | Allen, Mr. William Henry | male | 35.0 | 0 | 0 | 373450 | 8.0500 | NaN | S |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
887 | 0 | 2 | Montvila, Rev. Juozas | male | 27.0 | 0 | 0 | 211536 | 13.0000 | NaN | S |
888 | 1 | 1 | Graham, Miss. Margaret Edith | female | 19.0 | 0 | 0 | 112053 | 30.0000 | B42 | S |
889 | 0 | 3 | Johnston, Miss. Catherine Helen "Carrie" | female | NaN | 1 | 2 | W./C. 6607 | 23.4500 | NaN | S |
890 | 1 | 1 | Behr, Mr. Karl Howell | male | 26.0 | 0 | 0 | 111369 | 30.0000 | C148 | C |
891 | 0 | 3 | Dooley, Mr. Patrick | male | 32.0 | 0 | 0 | 370376 | 7.7500 | NaN | Q |
891 rows × 11 columns
df2.corr()
Survived | Pclass | Age | SibSp | Parch | Fare | |
---|---|---|---|---|---|---|
Survived | 1.000000 | -0.338481 | -0.077221 | -0.035322 | 0.081629 | 0.257307 |
Pclass | -0.338481 | 1.000000 | -0.369226 | 0.083081 | 0.018443 | -0.549500 |
Age | -0.077221 | -0.369226 | 1.000000 | -0.308247 | -0.189119 | 0.096067 |
SibSp | -0.035322 | 0.083081 | -0.308247 | 1.000000 | 0.414838 | 0.159651 |
Parch | 0.081629 | 0.018443 | -0.189119 | 0.414838 | 1.000000 | 0.216225 |
Fare | 0.257307 | -0.549500 | 0.096067 | 0.159651 | 0.216225 | 1.000000 |
sns.heatmap(df2.corr(), annot = True, cmap = 'coolwarm')
<AxesSubplot:>
sns.heatmap(df2.isnull(), yticklabels=False)
<AxesSubplot:ylabel='PassengerId'>
def fun(c) :
age = c[0]
pclass = c[1]
if pd.isnull(age) :
if pclass == 1 :
return 39
elif pclass == 2 :
return 34
else :
return 27
else :
return age
df2['Age'] = df2[["Age", 'Pclass']].apply(fun, axis = 1)
plt.hist(df2.Age, bins = 100)
plt.show()
df2.fillna({"Embarked" : "S"}, inplace = True)
df2.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 891 entries, 1 to 891 Data columns (total 8 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Survived 891 non-null int64 1 Pclass 891 non-null int64 2 Sex 891 non-null object 3 Age 891 non-null float64 4 SibSp 891 non-null int64 5 Parch 891 non-null int64 6 Fare 891 non-null float64 7 Embarked 891 non-null object dtypes: float64(2), int64(4), object(2) memory usage: 94.9+ KB
df2.head()
Survived | Pclass | Sex | Age | SibSp | Parch | Fare | Embarked | |
---|---|---|---|---|---|---|---|---|
PassengerId | ||||||||
1 | 0 | 3 | male | 22.0 | 1 | 0 | 7.2500 | S |
2 | 1 | 1 | female | 38.0 | 1 | 0 | 71.2833 | C |
3 | 1 | 3 | female | 26.0 | 0 | 0 | 7.9250 | S |
4 | 1 | 1 | female | 35.0 | 1 | 0 | 53.1000 | S |
5 | 0 | 3 | male | 35.0 | 0 | 0 | 8.0500 | S |
# univariate analysis : exploring one column at a time
# numerical
# sns.distplot(df.Age, kde = False, bins = 50 )
sns.distplot(df2.Age, kde = False, bins = 5 )
d:\installed softwares\python\lib\site-packages\seaborn\distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms). warnings.warn(msg, FutureWarning)
<AxesSubplot:xlabel='Age'>
# categorical :
sns.countplot(x = "Survived", data = df2)
plt.show()
# sns.countplot(x = df2["Survived"])
df1.Survived.value_counts()
0 549 1 342 Name: Survived, dtype: int64
#sns.countplot(x = "Survived", data = df2, hue = "Pclass")
#plt.show()
sns.countplot(x = "Survived", data = df2, hue = "Sex")
<AxesSubplot:xlabel='Survived', ylabel='count'>
# sns.countplot(x = "Survived", data = df2, hue = "Age")
# bivariate analysis - exploring 2 columns at a time
# num vs num, cat vs num, cat vs cat
df2.head()
Survived | Pclass | Sex | Age | SibSp | Parch | Fare | Embarked | |
---|---|---|---|---|---|---|---|---|
PassengerId | ||||||||
1 | 0 | 3 | male | 22.0 | 1 | 0 | 7.2500 | S |
2 | 1 | 1 | female | 38.0 | 1 | 0 | 71.2833 | C |
3 | 1 | 3 | female | 26.0 | 0 | 0 | 7.9250 | S |
4 | 1 | 1 | female | 35.0 | 1 | 0 | 53.1000 | S |
5 | 0 | 3 | male | 35.0 | 0 | 0 | 8.0500 | S |
sns.jointplot(x = "Age", y = "Fare", data = df2)
plt.show()
sns.jointplot(x = "Age", y = "Fare", data = df2, color = 'red')
plt.show()
sns.jointplot(x = "Age", y = "Fare", data = df2, color = 'red', kind = "kde")
plt.show()
# cat vs num :
# boxplot, barplot, striplot, swarmplot, violinplot
sns.boxplot(x = "Survived", y = "Age", data = df2)
plt.show()
sns.boxplot(x = "Survived", y = "Age", data = df2, hue = "Pclass")
plt.show()
sns.swarmplot(x = "Survived", y = "Age", data = df2, hue = "Pclass")
plt.show()
d:\installed softwares\python\lib\site-packages\seaborn\categorical.py:1296: UserWarning: 26.4% of the points cannot be placed; you may want to decrease the size of the markers or use stripplot. warnings.warn(msg, UserWarning) d:\installed softwares\python\lib\site-packages\seaborn\categorical.py:1296: UserWarning: 7.9% of the points cannot be placed; you may want to decrease the size of the markers or use stripplot. warnings.warn(msg, UserWarning)
sns.violinplot(x = "Survived", y = "Age", data = df2, hue = "Pclass", width = 0.5)
plt.show()
sns.stripplot(x = "Survived", y = "Age", data = df2, hue = "Pclass", jitter = False)
# sns.color_palette("crest")
plt.show()
sns.stripplot(x = "Survived", y = "Age", data = df2, hue = "Sex", palette = 'magma')
<AxesSubplot:xlabel='Survived', ylabel='Age'>
df2.head()
Survived | Pclass | Sex | Age | SibSp | Parch | Fare | Embarked | |
---|---|---|---|---|---|---|---|---|
PassengerId | ||||||||
1 | 0 | 3 | male | 22.0 | 1 | 0 | 7.2500 | S |
2 | 1 | 1 | female | 38.0 | 1 | 0 | 71.2833 | C |
3 | 1 | 3 | female | 26.0 | 0 | 0 | 7.9250 | S |
4 | 1 | 1 | female | 35.0 | 1 | 0 | 53.1000 | S |
5 | 0 | 3 | male | 35.0 | 0 | 0 | 8.0500 | S |
df2.groupby(['Embarked']).sum()
df2.groupby(['Embarked']).sum().plot(kind = 'pie',y = "Fare")
<AxesSubplot:ylabel='Fare'>
# plt.pie()
df2.groupby(['Embarked']).sum()
Survived | Pclass | Age | SibSp | Parch | Fare | |
---|---|---|---|---|---|---|
Embarked | ||||||
C | 93 | 317 | 5177.92 | 65 | 61 | 10072.2962 |
Q | 30 | 224 | 2116.50 | 33 | 13 | 1022.2543 |
S | 219 | 1516 | 19126.75 | 368 | 266 | 17599.3988 |
df2.groupby(['Pclass']).sum().plot(kind = 'pie',y = "Fare")
<AxesSubplot:ylabel='Fare'>
df2.groupby(['Pclass']).sum()
Survived | Age | SibSp | Parch | Fare | |
---|---|---|---|---|---|
Pclass | |||||
1 | 136 | 8281.42 | 90 | 77 | 18177.4125 |
2 | 87 | 5542.83 | 74 | 70 | 3801.8417 |
3 | 119 | 12596.92 | 302 | 193 | 6714.6951 |
# cat to num
# concat...
# joins, merge
df = df2.copy()
df.head()
Survived | Pclass | Sex | Age | SibSp | Parch | Fare | Embarked | |
---|---|---|---|---|---|---|---|---|
PassengerId | ||||||||
1 | 0 | 3 | male | 22.0 | 1 | 0 | 7.2500 | S |
2 | 1 | 1 | female | 38.0 | 1 | 0 | 71.2833 | C |
3 | 1 | 3 | female | 26.0 | 0 | 0 | 7.9250 | S |
4 | 1 | 1 | female | 35.0 | 1 | 0 | 53.1000 | S |
5 | 0 | 3 | male | 35.0 | 0 | 0 | 8.0500 | S |
# labelencoder, onehotencoder
# pandas get dummies
s_ = pd.get_dummies(df.Sex, drop_first=True)
s_.head()
male | |
---|---|
PassengerId | |
1 | 1 |
2 | 0 |
3 | 0 |
4 | 0 |
5 | 1 |
e_ = pd.get_dummies(df.Embarked, drop_first=True)
e_.head()
Q | S | |
---|---|---|
PassengerId | ||
1 | 0 | 1 |
2 | 0 | 0 |
3 | 0 | 1 |
4 | 0 | 1 |
5 | 0 | 1 |
df = pd.concat([df, s_, e_], axis = 1)
df.head()
Survived | Pclass | Sex | Age | SibSp | Parch | Fare | Embarked | male | Q | S | |
---|---|---|---|---|---|---|---|---|---|---|---|
PassengerId | |||||||||||
1 | 0 | 3 | male | 22.0 | 1 | 0 | 7.2500 | S | 1 | 0 | 1 |
2 | 1 | 1 | female | 38.0 | 1 | 0 | 71.2833 | C | 0 | 0 | 0 |
3 | 1 | 3 | female | 26.0 | 0 | 0 | 7.9250 | S | 0 | 0 | 1 |
4 | 1 | 1 | female | 35.0 | 1 | 0 | 53.1000 | S | 0 | 0 | 1 |
5 | 0 | 3 | male | 35.0 | 0 | 0 | 8.0500 | S | 1 | 0 | 1 |
df.drop(['Sex', "Embarked"], axis = 1, inplace = True)
df.head()
Survived | Pclass | Age | SibSp | Parch | Fare | male | Q | S | |
---|---|---|---|---|---|---|---|---|---|
PassengerId | |||||||||
1 | 0 | 3 | 22.0 | 1 | 0 | 7.2500 | 1 | 0 | 1 |
2 | 1 | 1 | 38.0 | 1 | 0 | 71.2833 | 0 | 0 | 0 |
3 | 1 | 3 | 26.0 | 0 | 0 | 7.9250 | 0 | 0 | 1 |
4 | 1 | 1 | 35.0 | 1 | 0 | 53.1000 | 0 | 0 | 1 |
5 | 0 | 3 | 35.0 | 0 | 0 | 8.0500 | 1 | 0 | 1 |
df2.head()
Survived | Pclass | Sex | Age | SibSp | Parch | Fare | Embarked | |
---|---|---|---|---|---|---|---|---|
PassengerId | ||||||||
1 | 0 | 3 | male | 22.0 | 1 | 0 | 7.2500 | S |
2 | 1 | 1 | female | 38.0 | 1 | 0 | 71.2833 | C |
3 | 1 | 3 | female | 26.0 | 0 | 0 | 7.9250 | S |
4 | 1 | 1 | female | 35.0 | 1 | 0 | 53.1000 | S |
5 | 0 | 3 | male | 35.0 | 0 | 0 | 8.0500 | S |
df2['S1'] = df2.Sex.replace(to_replace = ["male", 'female'], value = [0, 1])
df2.head()
Survived | Pclass | Sex | Age | SibSp | Parch | Fare | Embarked | S1 | |
---|---|---|---|---|---|---|---|---|---|
PassengerId | |||||||||
1 | 0 | 3 | male | 22.0 | 1 | 0 | 7.2500 | S | 0 |
2 | 1 | 1 | female | 38.0 | 1 | 0 | 71.2833 | C | 1 |
3 | 1 | 3 | female | 26.0 | 0 | 0 | 7.9250 | S | 1 |
4 | 1 | 1 | female | 35.0 | 1 | 0 | 53.1000 | S | 1 |
5 | 0 | 3 | male | 35.0 | 0 | 0 | 8.0500 | S | 0 |
df.head()
Survived | Pclass | Age | SibSp | Parch | Fare | male | Q | S | |
---|---|---|---|---|---|---|---|---|---|
PassengerId | |||||||||
1 | 0 | 3 | 22.0 | 1 | 0 | 7.2500 | 1 | 0 | 1 |
2 | 1 | 1 | 38.0 | 1 | 0 | 71.2833 | 0 | 0 | 0 |
3 | 1 | 3 | 26.0 | 0 | 0 | 7.9250 | 0 | 0 | 1 |
4 | 1 | 1 | 35.0 | 1 | 0 | 53.1000 | 0 | 0 | 1 |
5 | 0 | 3 | 35.0 | 0 | 0 | 8.0500 | 1 | 0 | 1 |
df['Family'] = df['SibSp'] + df['Parch']
df.head()
Survived | Pclass | Age | SibSp | Parch | Fare | male | Q | S | Family | |
---|---|---|---|---|---|---|---|---|---|---|
PassengerId | ||||||||||
1 | 0 | 3 | 22.0 | 1 | 0 | 7.2500 | 1 | 0 | 1 | 1 |
2 | 1 | 1 | 38.0 | 1 | 0 | 71.2833 | 0 | 0 | 0 | 1 |
3 | 1 | 3 | 26.0 | 0 | 0 | 7.9250 | 0 | 0 | 1 | 0 |
4 | 1 | 1 | 35.0 | 1 | 0 | 53.1000 | 0 | 0 | 1 | 1 |
5 | 0 | 3 | 35.0 | 0 | 0 | 8.0500 | 1 | 0 | 1 | 0 |
df.drop(['SibSp', "Parch"], axis = 1, inplace = True)
df.head()
Survived | Pclass | Age | Fare | male | Q | S | Family | |
---|---|---|---|---|---|---|---|---|
PassengerId | ||||||||
1 | 0 | 3 | 22.0 | 7.2500 | 1 | 0 | 1 | 1 |
2 | 1 | 1 | 38.0 | 71.2833 | 0 | 0 | 0 | 1 |
3 | 1 | 3 | 26.0 | 7.9250 | 0 | 0 | 1 | 0 |
4 | 1 | 1 | 35.0 | 53.1000 | 0 | 0 | 1 | 1 |
5 | 0 | 3 | 35.0 | 8.0500 | 1 | 0 | 1 | 0 |
df.shape
(891, 8)
df.drop_duplicates(inplace = True) # Pandas drop_duplicates() method helps in removing duplicates from the data frame.
df.shape
(776, 8)
Office:- 660, Sector 14A, Vasundhara, Ghaziabad, Uttar Pradesh - 201012, India