import urllib.request import os import numpy import pandas as pd from sklearn import preprocessing from keras.models import Sequential from keras.layers import Dense,Dropout import matplotlib.pyplot as plt url = 'http://biostat.mc.vanderbilt.edu/wiki/pub/Main/DataSets/titanic3.xls' filepath = 'titanic3.xls' if not os.path.isfile(filepath): result = urllib.request.urlretrieve(url,filepath) print('download:',result) all_df = pd.read_excel(filepath) print(all_df[:2]) def PreprocessData(raw_df): raw_df = raw_df.drop(['name'], axis=1) age_mean = raw_df['age'].mean() raw_df['age'] = raw_df['age'].fillna(age_mean) fare_mean = raw_df['fare'].mean() raw_df['fare'] = raw_df['fare'].fillna(fare_mean) raw_df['sex'] = raw_df['sex'].map({'female': 0, 'male': 1}).astype(int) x_OneHot_df = pd.get_dummies(data=raw_df, columns=['embarked']) ndarray = x_OneHot_df.values Label = ndarray[:, 0] Features = ndarray[:, 1:] minmax_scale = preprocessing.MinMaxScaler(feature_range=(0, 1)) scaledFeatures = minmax_scale.fit_transform(Features) return scaledFeatures,Label cols = ['survived','name','pclass','sex','age','sibsp','parch','fare','embarked'] all_df = all_df[cols] print(all_df[:2]) msk = numpy.random.rand(len(all_df)) < 0.8 train_df = all_df[msk] test_df = all_df[~msk] print('total:',len(all_df),'train:',len(train_df),'test:',len(test_df)) train_Features,train_Label = PreprocessData(train_df) test_Features,test_Label = PreprocessData(test_df) model = Sequential() model.add(Dense(units=40,input_dim=9,kernel_initializer='uniform',activation='relu')) model.add(Dense(units=30,kernel_initializer='uniform',activation='relu')) model.add(Dense(units=1,kernel_initializer='uniform',activation='sigmoid')) model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy']) train_history = model.fit(x=train_Features,y=train_Label,validation_split=0.1,epochs=30,batch_size=30,verbose=2) def show_train_history(train_history, train, validation): plt.plot(train_history.history[train]) plt.plot(train_history.history[validation]) plt.title('Train History') plt.ylabel(train) plt.xlabel('Epoch') plt.legend(['train', 'validation'], loc='upper left') plt.show() show_train_history(train_history, 'acc', 'val_acc') show_train_history(train_history, 'loss', 'val_loss') scores = model.evaluate(x=test_Features,y=test_Label) print(scores[1]) Jack = pd.Series([0,'Jack',3,'male',23,1,0,5.0000,'S']) Rose = pd.Series([1,'Rose',1,'female',20,1,0,100.0000,'S']) JR_df = pd.DataFrame([list(Jack),list(Rose)], columns=['survived','name','pclass','sex','age','sibsp','parch','fare','embarked']) JR_df = pd.concat([all_df,JR_df]) JR_Features,JR_Label = PreprocessData(JR_df) JR_probability = model.predict(JR_Features) print(JR_probability) pd = JR_df pd.insert(len(JR_df.columns),'probability',JR_probability) print(pd)