import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns


train_df = pd.read_excel('Data_Train.xlsx')


pd.set_option('display.max_columns', None)


train_df.head()


train_df.shape

(10683, 11)


type(train_df)

pandas.core.frame.DataFrame


train_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10683 entries, 0 to 10682
Data columns (total 11 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   Airline          10683 non-null  object
 1   Date_of_Journey  10683 non-null  object
 2   Source           10683 non-null  object
 3   Destination      10683 non-null  object
 4   Route            10682 non-null  object
 5   Dep_Time         10683 non-null  object
 6   Arrival_Time     10683 non-null  object
 7   Duration         10683 non-null  object
 8   Total_Stops      10682 non-null  object
 9   Additional_Info  10683 non-null  object
 10  Price            10683 non-null  int64 
dtypes: int64(1), object(10)
memory usage: 918.2+ KB


train_df.Duration.value_counts()

2h 50m     550
1h 30m     386
2h 45m     337
2h 55m     337
2h 35m     329
          ... 
31h 30m      1
30h 25m      1
42h 5m       1
4h 10m       1
47h 40m      1
Name: Duration, Length: 368, dtype: int64


train_df.isna().sum()

Airline            0
Date_of_Journey    0
Source             0
Destination        0
Route              1
Dep_Time           0
Arrival_Time       0
Duration           0
Total_Stops        1
Additional_Info    0
Price              0
dtype: int64


train_df.dropna(inplace=True)


train_df.isna().sum()

Airline            0
Date_of_Journey    0
Source             0
Destination        0
Route              0
Dep_Time           0
Arrival_Time       0
Duration           0
Total_Stops        0
Additional_Info    0
Price              0
dtype: int64


train_df['Journy_Day'] = pd.to_datetime(train_df.Date_of_Journey, format='%d/%m/%Y').dt.day


train_df['Journy_Month'] = pd.to_datetime(train_df.Date_of_Journey, format='%d/%m/%Y').dt.month


train_df.head()


train_df.drop(['Date_of_Journey'], axis=1, inplace=True)


train_df.head()


## Departure time is when the plane leaves the gate

train_df['Dep_hour'] = pd.to_datetime(train_df.Dep_Time).dt.hour
train_df['Dep_min'] = pd.to_datetime(train_df.Dep_Time).dt.minute

train_df.drop(['Dep_Time'], axis=1, inplace=True)


train_df.head()


## Arrival time is when the plane pulls up to the gate

train_df['Arrival_hour'] = pd.to_datetime(train_df.Arrival_Time).dt.hour
train_df['Arrival_min'] = pd.to_datetime(train_df.Arrival_Time).dt.minute

train_df.drop(['Arrival_Time'], axis=1, inplace=True)


train_df.head()


## Time taken to reach destination is called Duration
## It is difference between Departure Time and Arrival Time

## Assigning and converting Duration column into list
duration = list(train_df.Duration)

for i in range(len(duration)):
    if len(duration[i].split()) != 2:                       # Check if duration contains only hour or mins
        if 'h' in duration[i]:
            duration[i] = duration[i].strip() + " 0m"  # Adding 0 mins
        else:
            duration[i] = "0h " + duration[i]


duration_hours = []
duration_mins = []
for i in range(len(duration)):
    duration_hours.append(int(duration[i].split("h")[0]))    # Extract hours from Duration
    duration_mins.append(int(duration[i].split('m')[0].split()[-1]))  # Extract onlye minutes from Duration


train_df["Duration_hours"] = duration_hours
train_df["Duration_mins"] = duration_mins


train_df.tail()


train_df.drop(['Duration'], axis=1, inplace=True)


train_df.head()


train_df.Airline.value_counts()

Jet Airways                          3849
IndiGo                               2053
Air India                            1751
Multiple carriers                    1196
SpiceJet                              818
Vistara                               479
Air Asia                              319
GoAir                                 194
Multiple carriers Premium economy      13
Jet Airways Business                    6
Vistara Premium economy                 3
Trujet                                  1
Name: Airline, dtype: int64


sns.catplot(y="Price", x="Airline", data=train_df.sort_values("Price", ascending=False), kind='boxen', height=6, aspect=3)

<seaborn.axisgrid.FacetGrid at 0x137168d30>


len(train_df['Airline'].unique())

12


Airline = train_df[['Airline']]
Airline = pd.get_dummies(Airline, drop_first=True)
Airline.head()


train_df.Source.value_counts()

Delhi       4536
Kolkata     2871
Banglore    2197
Mumbai       697
Chennai      381
Name: Source, dtype: int64


sns.catplot(y='Price', x='Source', data=train_df.sort_values('Price', ascending=False), kind='boxen', height=6, aspect=3)

<seaborn.axisgrid.FacetGrid at 0x1370b8be0>


Source = train_df[['Source']]
Source = pd.get_dummies(Source, drop_first=True)
Source.head()


Destination = train_df[['Destination']]
Destination = pd.get_dummies(Destination, drop_first=True)
Destination.head()


train_df.Route

0                    BLR → DEL
1        CCU → IXR → BBI → BLR
2        DEL → LKO → BOM → COK
3              CCU → NAG → BLR
4              BLR → NAG → DEL
                 ...          
10678                CCU → BLR
10679                CCU → BLR
10680                BLR → DEL
10681                BLR → DEL
10682    DEL → GOI → BOM → COK
Name: Route, Length: 10682, dtype: object


a = train_df.Additional_Info=='No info'


a.mean()

0.781127129750983


## Additional_Info conatins almost 80% no_info
# Route and Total_Stops are related to each other

train_df.drop(['Route', 'Additional_Info'], axis=1, inplace=True)


train_df.head()


train_df.Total_Stops.value_counts()

1 stop      5625
non-stop    3491
2 stops     1520
3 stops       45
4 stops        1
Name: Total_Stops, dtype: int64


## As this is case of Ordinal Categorical type we perform LabelEncoder
## Here vlaues are assigned with corresponding keys
train_df.replace({"non-stop": 0, "1 stop": 1, "2 stops": 2, "3 stops": 3, "4 stops": 4}, inplace=True)


# train_df.head()


data_train = pd.concat([train_df, Airline, Source, Destination], axis=1)


data_train.head()


data_train.drop(['Destination', 'Source', 'Airline'], axis=1, inplace=True)


data_train.head()


data_train.shape

(10682, 30)


test_data = pd.read_excel('Test_set.xlsx')


test_data.head()


test_data.shape

(2671, 10)


print("Test data Info")
print("-"*75)
print(test_data.info())

print()
print()

print("Null values :")
print("-"*75)
test_data.dropna(inplace = True)
print(test_data.isnull().sum())

# EDA

# Date_of_Journey
test_data["Journey_day"] = pd.to_datetime(test_data.Date_of_Journey, format="%d/%m/%Y").dt.day
test_data["Journey_month"] = pd.to_datetime(test_data["Date_of_Journey"], format = "%d/%m/%Y").dt.month
test_data.drop(["Date_of_Journey"], axis = 1, inplace = True)

# Dep_Time
test_data["Dep_hour"] = pd.to_datetime(test_data["Dep_Time"]).dt.hour
test_data["Dep_min"] = pd.to_datetime(test_data["Dep_Time"]).dt.minute
test_data.drop(["Dep_Time"], axis = 1, inplace = True)

# Arrival_Time
test_data["Arrival_hour"] = pd.to_datetime(test_data.Arrival_Time).dt.hour
test_data["Arrival_min"] = pd.to_datetime(test_data.Arrival_Time).dt.minute
test_data.drop(["Arrival_Time"], axis = 1, inplace = True)

# Duration
duration = list(test_data["Duration"])

for i in range(len(duration)):
    if len(duration[i].split()) != 2:    # Check if duration contains only hour or mins
        if "h" in duration[i]:
            duration[i] = duration[i].strip() + " 0m"   # Adds 0 minute
        else:
            duration[i] = "0h " + duration[i]           # Adds 0 hour

duration_hours = []
duration_mins = []
for i in range(len(duration)):
    duration_hours.append(int(duration[i].split(sep = "h")[0]))    # Extract hours from duration
    duration_mins.append(int(duration[i].split(sep = "m")[0].split()[-1]))   # Extracts only minutes from duration

# Adding Duration column to test set
test_data["Duration_hours"] = duration_hours
test_data["Duration_mins"] = duration_mins
test_data.drop(["Duration"], axis = 1, inplace = True)


# Categorical data

print("Airline")
print("-"*75)
print(test_data["Airline"].value_counts())
Airline = pd.get_dummies(test_data["Airline"], drop_first= True)

print()

print("Source")
print("-"*75)
print(test_data["Source"].value_counts())
Source = pd.get_dummies(test_data["Source"], drop_first= True)

print()

print("Destination")
print("-"*75)
print(test_data["Destination"].value_counts())
Destination = pd.get_dummies(test_data["Destination"], drop_first = True)

# Additional_Info contains almost 80% no_info
# Route and Total_Stops are related to each other
test_data.drop(["Route", "Additional_Info"], axis = 1, inplace = True)

# Replacing Total_Stops
test_data.replace({"non-stop": 0, "1 stop": 1, "2 stops": 2, "3 stops": 3, "4 stops": 4}, inplace = True)

# Concatenate dataframe --> test_data + Airline + Source + Destination
data_test = pd.concat([test_data, Airline, Source, Destination], axis = 1)

data_test.drop(["Airline", "Source", "Destination"], axis = 1, inplace = True)

print()
print()

print("Shape of test data : ", data_test.shape)

Test data Info
---------------------------------------------------------------------------
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2671 entries, 0 to 2670
Data columns (total 10 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   Airline          2671 non-null   object
 1   Date_of_Journey  2671 non-null   object
 2   Source           2671 non-null   object
 3   Destination      2671 non-null   object
 4   Route            2671 non-null   object
 5   Dep_Time         2671 non-null   object
 6   Arrival_Time     2671 non-null   object
 7   Duration         2671 non-null   object
 8   Total_Stops      2671 non-null   object
 9   Additional_Info  2671 non-null   object
dtypes: object(10)
memory usage: 208.8+ KB
None


Null values :
---------------------------------------------------------------------------
Airline            0
Date_of_Journey    0
Source             0
Destination        0
Route              0
Dep_Time           0
Arrival_Time       0
Duration           0
Total_Stops        0
Additional_Info    0
dtype: int64
Airline
---------------------------------------------------------------------------
Jet Airways                          897
IndiGo                               511
Air India                            440
Multiple carriers                    347
SpiceJet                             208
Vistara                              129
Air Asia                              86
GoAir                                 46
Multiple carriers Premium economy      3
Vistara Premium economy                2
Jet Airways Business                   2
Name: Airline, dtype: int64

Source
---------------------------------------------------------------------------
Delhi       1145
Kolkata      710
Banglore     555
Mumbai       186
Chennai       75
Name: Source, dtype: int64

Destination
---------------------------------------------------------------------------
Cochin       1145
Banglore      710
Delhi         317
New Delhi     238
Hyderabad     186
Kolkata        75
Name: Destination, dtype: int64


Shape of test data :  (2671, 28)


data_test.shape

(2671, 28)


data_test.head()


data_train.head()


data_train.columns

Index(['Total_Stops', 'Price', 'Journy_Day', 'Journy_Month', 'Dep_hour',
       'Dep_min', 'Arrival_hour', 'Arrival_min', 'Duration_hours',
       'Duration_mins', 'Airline_Air India', 'Airline_GoAir', 'Airline_IndiGo',
       'Airline_Jet Airways', 'Airline_Jet Airways Business',
       'Airline_Multiple carriers',
       'Airline_Multiple carriers Premium economy', 'Airline_SpiceJet',
       'Airline_Trujet', 'Airline_Vistara', 'Airline_Vistara Premium economy',
       'Source_Chennai', 'Source_Delhi', 'Source_Kolkata', 'Source_Mumbai',
       'Destination_Cochin', 'Destination_Delhi', 'Destination_Hyderabad',
       'Destination_Kolkata', 'Destination_New Delhi'],
      dtype='object')


X = data_train.loc[:, ['Total_Stops', 'Journy_Day', 'Journy_Month', 'Dep_hour', 'Dep_min', 'Arrival_hour', 
                      'Duration_hours', 'Duration_mins', 'Airline_Air India', 'Airline_GoAir', 'Airline_IndiGo',
                      'Airline_Jet Airways', 'Airline_Jet Airways Business', 'Airline_Multiple carriers',
                      'Airline_Multiple carriers Premium economy', 'Airline_SpiceJet', 'Airline_Trujet',
                      'Airline_Vistara', 'Airline_Vistara Premium economy', 'Source_Chennai', 'Source_Delhi', 'Source_Kolkata',
                      'Source_Mumbai', 'Destination_Cochin', 'Destination_Delhi', 'Destination_Hyderabad', 'Destination_Kolkata',
                      'Destination_New Delhi']]


X.head()


y = data_train.iloc[:, 1]
y.head()

0     3897
1     7662
2    13882
3     6218
4    13302
Name: Price, dtype: int64


plt.figure(figsize=(18, 18))
sns.heatmap(train_df.corr(), annot=True, cmap='RdYlGn')
plt.show()

/var/folders/16/3468kndx5l1_zj5r84tsybgc0000gn/T/ipykernel_196/432270364.py:2: FutureWarning: The default value of numeric_only in DataFrame.corr is deprecated. In a future version, it will default to False. Select only valid columns or specify the value of numeric_only to silence this warning.
  sns.heatmap(train_df.corr(), annot=True, cmap='RdYlGn')


# Important feature using ExtraTreesRegressor
from sklearn.ensemble import ExtraTreesRegressor
selection = ExtraTreesRegressor()
selection.fit(X, y)

ExtraTreesRegressor()

ExtraTreesRegressor()


print(selection.feature_importances_)

[2.16281287e-01 1.43561114e-01 5.53275544e-02 2.68252534e-02
 2.46186722e-02 3.23170611e-02 1.29350477e-01 2.16917730e-02
 1.14972302e-02 1.97152005e-03 1.85747928e-02 1.38611825e-01
 6.79090519e-02 1.81502325e-02 8.97371777e-04 3.24599248e-03
 1.12508503e-04 4.97202809e-03 8.22143694e-05 6.20273501e-04
 1.48915815e-02 3.36118903e-03 6.09060533e-03 9.23108677e-03
 1.63018973e-02 8.03502905e-03 5.39686688e-04 2.49306908e-02]


plt.figure(figsize=(12, 8))
feature_imp = pd.Series(selection.feature_importances_, index=X.columns)
feature_imp.nlargest(20).plot(kind='barh')
plt.show()


from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


from sklearn.ensemble import RandomForestRegressor


rf_model = RandomForestRegressor()
rf_model.fit(X_train, y_train)

RandomForestRegressor()

RandomForestRegressor()


y_pred = rf_model.predict(X_test)


rf_model.score(X_train, y_train)

0.9524462409365866


rf_model.score(X_test, y_test)

0.7975464372137973


sns.distplot(y_test - y_pred)

/var/folders/16/3468kndx5l1_zj5r84tsybgc0000gn/T/ipykernel_196/2332411778.py:1: UserWarning: 

`distplot` is a deprecated function and will be removed in seaborn v0.14.0.

Please adapt your code to use either `displot` (a figure-level function with
similar flexibility) or `histplot` (an axes-level function for histograms).

For a guide to updating your code to use the new functions, please see
https://gist.github.com/mwaskom/de44147ed2974457ad6372750bbe5751

  sns.distplot(y_test - y_pred)

<AxesSubplot:xlabel='Price', ylabel='Density'>


plt.scatter(y_test, y_pred, alpha=0.5)
plt.xlabel('y_test')
plt.ylabel('y_label')
plt.show()


from sklearn import metrics


print("MAE: ", metrics.mean_absolute_error(y_test, y_pred))
print("MSE: ", metrics.mean_squared_error(y_test, y_pred))
print("RMSE: ", np.sqrt(metrics.mean_squared_error(y_test, y_pred)))

MAE:  1179.374780326833
MSE:  4365315.123823327
RMSE:  2089.3336554565253


metrics.r2_score(y_test, y_pred)

0.7975464372137973


from sklearn.model_selection import RandomizedSearchCV


# Randomized Search CV

## Number of trees in ramdom forest
n_estimators = [int(x) for x in np.linspace(start=100, stop=1200, num=12)]
## Number of features to consider at every split
max_features = ['auto', 'sqrt']
## Maximum number of level in tree
max_depth = [int(x) for x in np.linspace(5, 30, num=6)]
## Minimum number of samples required to split a node
min_samples_split = [2, 5, 10, 15, 100]
## Minimum number of samples required at each leaf node
min_samples_leaf = [1, 2, 5, 10]


## create the random grid
random_grid = {'n_estimators': n_estimators,
               'max_features': max_features,
               'max_depth': max_depth,
               'min_samples_split': min_samples_split,
               'min_samples_leaf': min_samples_leaf}
print(random_grid)

{'n_estimators': [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000, 1100, 1200], 'max_features': ['auto', 'sqrt'], 'max_depth': [5, 10, 15, 20, 25, 30], 'min_samples_split': [2, 5, 10, 15, 100], 'min_samples_leaf': [1, 2, 5, 10]}


rf_random = RandomizedSearchCV(estimator=rf_model, param_distributions=random_grid, scoring='neg_mean_squared_error',
                               n_iter=10, cv=5, verbose=2, random_state=42, n_jobs=1)


rf_random.fit(X_train, y_train)

Fitting 5 folds for each of 10 candidates, totalling 50 fits
[CV] END max_depth=10, max_features=sqrt, min_samples_leaf=5, min_samples_split=5, n_estimators=900; total time=   2.0s
[CV] END max_depth=10, max_features=sqrt, min_samples_leaf=5, min_samples_split=5, n_estimators=900; total time=   2.0s
[CV] END max_depth=10, max_features=sqrt, min_samples_leaf=5, min_samples_split=5, n_estimators=900; total time=   2.0s
[CV] END max_depth=10, max_features=sqrt, min_samples_leaf=5, min_samples_split=5, n_estimators=900; total time=   2.0s
[CV] END max_depth=10, max_features=sqrt, min_samples_leaf=5, min_samples_split=5, n_estimators=900; total time=   2.0s
[CV] END max_depth=15, max_features=sqrt, min_samples_leaf=2, min_samples_split=10, n_estimators=1100; total time=   3.0s
[CV] END max_depth=15, max_features=sqrt, min_samples_leaf=2, min_samples_split=10, n_estimators=1100; total time=   3.0s
[CV] END max_depth=15, max_features=sqrt, min_samples_leaf=2, min_samples_split=10, n_estimators=1100; total time=   3.0s
[CV] END max_depth=15, max_features=sqrt, min_samples_leaf=2, min_samples_split=10, n_estimators=1100; total time=   3.1s
[CV] END max_depth=15, max_features=sqrt, min_samples_leaf=2, min_samples_split=10, n_estimators=1100; total time=   3.1s

/Users/thomas/miniforge3/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:414: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features=1.0` or remove this parameter as it is also the default value for RandomForestRegressors and ExtraTreesRegressors.
  warn(

[CV] END max_depth=15, max_features=auto, min_samples_leaf=5, min_samples_split=100, n_estimators=300; total time=   2.0s

/Users/thomas/miniforge3/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:414: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features=1.0` or remove this parameter as it is also the default value for RandomForestRegressors and ExtraTreesRegressors.
  warn(

[CV] END max_depth=15, max_features=auto, min_samples_leaf=5, min_samples_split=100, n_estimators=300; total time=   2.0s

/Users/thomas/miniforge3/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:414: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features=1.0` or remove this parameter as it is also the default value for RandomForestRegressors and ExtraTreesRegressors.
  warn(

[CV] END max_depth=15, max_features=auto, min_samples_leaf=5, min_samples_split=100, n_estimators=300; total time=   2.0s

/Users/thomas/miniforge3/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:414: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features=1.0` or remove this parameter as it is also the default value for RandomForestRegressors and ExtraTreesRegressors.
  warn(

[CV] END max_depth=15, max_features=auto, min_samples_leaf=5, min_samples_split=100, n_estimators=300; total time=   2.0s

/Users/thomas/miniforge3/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:414: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features=1.0` or remove this parameter as it is also the default value for RandomForestRegressors and ExtraTreesRegressors.
  warn(


rf_random.best_params_

{'n_estimators': 700,
 'min_samples_split': 15,
 'min_samples_leaf': 1,
 'max_features': 'auto',
 'max_depth': 20}


y_pred = rf_random.predict(X_test)


plt.figure(figsize=(8, 8))
sns.displot(y_test-y_pred)
plt.show()

<Figure size 800x800 with 0 Axes>


plt.scatter(y_test, y_pred, alpha=0.5)
plt.xlabel('y_test')
plt.ylabel('y_label')
plt.show()


print("MAE: ", metrics.mean_absolute_error(y_test, y_pred))
print("MSE: ", metrics.mean_squared_error(y_test, y_pred))
print("RMSE: ", np.sqrt(metrics.mean_squared_error(y_test, y_pred)))

MAE:  1168.0038153543371
MSE:  4038348.193369491
RMSE:  2009.5641799578063


metrics.r2_score(y_test, y_pred)

0.812710432963472


import pickle


file = open('model.pkl', 'wb')

pickle.dump(rf_random, file)


model = pickle.load(open('model.pkl', 'rb'))


y_pred = model.predict(X_test)


metrics.r2_score(y_test, y_pred)

0.812710432963472


data_test.head()

EDA¶

Handling Categorical Data¶

Test Set¶

Feature Selection¶

Training part¶

Hyperparameter Tuning¶

	Airline	Date_of_Journey	Source	Destination	Route	Dep_Time	Arrival_Time	Duration	Total_Stops	Additional_Info	Price
0	IndiGo	24/03/2019	Banglore	New Delhi	BLR → DEL	22:20	01:10 22 Mar	2h 50m	non-stop	No info	3897
1	Air India	1/05/2019	Kolkata	Banglore	CCU → IXR → BBI → BLR	05:50	13:15	7h 25m	2 stops	No info	7662
2	Jet Airways	9/06/2019	Delhi	Cochin	DEL → LKO → BOM → COK	09:25	04:25 10 Jun	19h	2 stops	No info	13882
3	IndiGo	12/05/2019	Kolkata	Banglore	CCU → NAG → BLR	18:05	23:30	5h 25m	1 stop	No info	6218
4	IndiGo	01/03/2019	Banglore	New Delhi	BLR → NAG → DEL	16:50	21:35	4h 45m	1 stop	No info	13302

	Airline	Source	Destination	Route	Duration	Total_Stops	Additional_Info	Price	Journy_Day	Journy_Month	Dep_hour	Dep_min	Arrival_hour	Arrival_min	Duration_hours	Duration_mins
10678	Air Asia	Kolkata	Banglore	CCU → BLR	2h 30m	non-stop	No info	4107	9	4	19	55	22	25	2	30
10679	Air India	Kolkata	Banglore	CCU → BLR	2h 35m	non-stop	No info	4145	27	4	20	45	23	20	2	35
10680	Jet Airways	Banglore	Delhi	BLR → DEL	3h	non-stop	No info	7229	27	4	8	20	11	20	3	0
10681	Vistara	Banglore	New Delhi	BLR → DEL	2h 40m	non-stop	No info	12648	1	3	11	30	14	10	2	40
10682	Air India	Delhi	Cochin	DEL → GOI → BOM → COK	8h 20m	2 stops	No info	11753	9	5	10	55	19	15	8	20

	Airline_Air India	Airline_IndiGo	Airline_Jet Airways
0	0	1	0
1	1	0	0
2	0	0	1
3	0	1	0
4	0	1	0

	Airline	Date_of_Journey	Source	Destination	Route	Dep_Time	Arrival_Time	Duration	Total_Stops	Additional_Info
0	Jet Airways	6/06/2019	Delhi	Cochin	DEL → BOM → COK	17:30	04:25 07 Jun	10h 55m	1 stop	No info
1	IndiGo	12/05/2019	Kolkata	Banglore	CCU → MAA → BLR	06:20	10:20	4h	1 stop	No info
2	Jet Airways	21/05/2019	Delhi	Cochin	DEL → BOM → COK	19:15	19:00 22 May	23h 45m	1 stop	In-flight meal not included
3	Multiple carriers	21/05/2019	Delhi	Cochin	DEL → BOM → COK	08:00	21:00	13h	1 stop	No info
4	Air Asia	24/06/2019	Banglore	Delhi	BLR → DEL	23:55	02:45 25 Jun	2h 50m	non-stop	No info

	Total_Stops	Journey_day	Journey_month	Dep_hour	Dep_min	Arrival_hour	Arrival_min	Duration_hours	Duration_mins	IndiGo	Jet Airways	Multiple carriers	Delhi	Kolkata	Cochin	Delhi
0	1	6	6	17	30	4	25	10	55	0	1	0	1	0	1	0
1	1	12	5	6	20	10	20	4	0	1	0	0	0	1	0	0
2	1	21	5	19	15	19	0	23	45	0	1	0	1	0	1	0
3	1	21	5	8	0	21	0	13	0	0	0	1	1	0	1	0
4	0	24	6	23	55	2	45	2	50	0	0	0	0	0	0	1

	Total_Stops	Journy_Day	Journy_Month	Dep_hour	Dep_min	Arrival_hour	Duration_hours	Duration_mins	Airline_Air India	Airline_IndiGo	Airline_Jet Airways	Source_Delhi	Source_Kolkata	Destination_Cochin	Destination_New Delhi
0	0	24	3	22	20	1	2	50	0	1	0	0	0	0	1
1	2	1	5	5	50	13	7	25	1	0	0	0	1	0	0
2	2	9	6	9	25	4	19	0	0	0	1	1	0	1	0
3	1	12	5	18	5	23	5	25	0	1	0	0	1	0	0
4	1	1	3	16	50	21	4	45	0	1	0	0	0	0	1

	Total_Stops	Journey_day	Journey_month	Dep_hour	Dep_min	Arrival_hour	Arrival_min	Duration_hours	Duration_mins	IndiGo	Jet Airways	Multiple carriers	Delhi	Kolkata	Cochin	Delhi
0	1	6	6	17	30	4	25	10	55	0	1	0	1	0	1	0
1	1	12	5	6	20	10	20	4	0	1	0	0	0	1	0	0
2	1	21	5	19	15	19	0	23	45	0	1	0	1	0	1	0
3	1	21	5	8	0	21	0	13	0	0	0	1	1	0	1	0
4	0	24	6	23	55	2	45	2	50	0	0	0	0	0	0	1

	Airline_Air India	Airline_IndiGo	Airline_Jet Airways
0	0	1	0
1	1	0	0
2	0	0	1
3	0	1	0
4	0	1	0

	Total_Stops	Journey_day	Journey_month	Dep_hour	Dep_min	Arrival_hour	Arrival_min	Duration_hours	Duration_mins	IndiGo	Jet Airways	Multiple carriers	Delhi	Kolkata	Cochin	Delhi
0	1	6	6	17	30	4	25	10	55	0	1	0	1	0	1	0
1	1	12	5	6	20	10	20	4	0	1	0	0	0	1	0	0
2	1	21	5	19	15	19	0	23	45	0	1	0	1	0	1	0
3	1	21	5	8	0	21	0	13	0	0	0	1	1	0	1	0
4	0	24	6	23	55	2	45	2	50	0	0	0	0	0	0	1

	Total_Stops	Journy_Day	Journy_Month	Dep_hour	Dep_min	Arrival_hour	Duration_hours	Duration_mins	Airline_Air India	Airline_IndiGo	Airline_Jet Airways	Source_Delhi	Source_Kolkata	Destination_Cochin	Destination_New Delhi
0	0	24	3	22	20	1	2	50	0	1	0	0	0	0	1
1	2	1	5	5	50	13	7	25	1	0	0	0	1	0	0
2	2	9	6	9	25	4	19	0	0	0	1	1	0	1	0
3	1	12	5	18	5	23	5	25	0	1	0	0	1	0	0
4	1	1	3	16	50	21	4	45	0	1	0	0	0	0	1

	Total_Stops	Journey_day	Journey_month	Dep_hour	Dep_min	Arrival_hour	Arrival_min	Duration_hours	Duration_mins	IndiGo	Jet Airways	Multiple carriers	Delhi	Kolkata	Cochin	Delhi
0	1	6	6	17	30	4	25	10	55	0	1	0	1	0	1	0
1	1	12	5	6	20	10	20	4	0	1	0	0	0	1	0	0
2	1	21	5	19	15	19	0	23	45	0	1	0	1	0	1	0
3	1	21	5	8	0	21	0	13	0	0	0	1	1	0	1	0
4	0	24	6	23	55	2	45	2	50	0	0	0	0	0	0	1

	Airline_Air India	Airline_IndiGo	Airline_Jet Airways
0	0	1	0
1	1	0	0
2	0	0	1
3	0	1	0
4	0	1	0

	Total_Stops	Journey_day	Journey_month	Dep_hour	Dep_min	Arrival_hour	Arrival_min	Duration_hours	Duration_mins	IndiGo	Jet Airways	Multiple carriers	Delhi	Kolkata	Cochin	Delhi
0	1	6	6	17	30	4	25	10	55	0	1	0	1	0	1	0
1	1	12	5	6	20	10	20	4	0	1	0	0	0	1	0	0
2	1	21	5	19	15	19	0	23	45	0	1	0	1	0	1	0
3	1	21	5	8	0	21	0	13	0	0	0	1	1	0	1	0
4	0	24	6	23	55	2	45	2	50	0	0	0	0	0	0	1

	Total_Stops	Journy_Day	Journy_Month	Dep_hour	Dep_min	Arrival_hour	Duration_hours	Duration_mins	Airline_Air India	Airline_IndiGo	Airline_Jet Airways	Source_Delhi	Source_Kolkata	Destination_Cochin	Destination_New Delhi
0	0	24	3	22	20	1	2	50	0	1	0	0	0	0	1
1	2	1	5	5	50	13	7	25	1	0	0	0	1	0	0
2	2	9	6	9	25	4	19	0	0	0	1	1	0	1	0
3	1	12	5	18	5	23	5	25	0	1	0	0	1	0	0
4	1	1	3	16	50	21	4	45	0	1	0	0	0	0	1

	Total_Stops	Journey_day	Journey_month	Dep_hour	Dep_min	Arrival_hour	Arrival_min	Duration_hours	Duration_mins	IndiGo	Jet Airways	Multiple carriers	Delhi	Kolkata	Cochin	Delhi
0	1	6	6	17	30	4	25	10	55	0	1	0	1	0	1	0
1	1	12	5	6	20	10	20	4	0	1	0	0	0	1	0	0
2	1	21	5	19	15	19	0	23	45	0	1	0	1	0	1	0
3	1	21	5	8	0	21	0	13	0	0	0	1	1	0	1	0
4	0	24	6	23	55	2	45	2	50	0	0	0	0	0	0	1