from sklearn.metrics import make_scorer scoring = { 'mse_low': make_scorer(mse_low, greater_is_better=False), 'mse_mid': make_scorer(mse_mid, greater_is_better=False), 'mse_hi': make_scorer(mse_hi, greater_is_better=False), 'mse': make_scorer(mse, greater_is_better=False), 'q90': make_scorer(q90), } x_train = data[['g_mag', 'bp_mag', 'rp_mag']] y_train = data[['mag_aca_obs']]['mag_aca_obs'] scores = {} for name in ['simple', 'rf', 'ad_hoc', 'simple_10_3', 'simple_9', 'linear_pca_2', 'linear_pca_3', 'ad_hoc_2']: print(name) pipeline = pipelines[name]['pipeline']() scores[name] = cross_validate(pipeline, x_train, y_train, scoring=scoring, cv=20, return_train_score=True)
scores[['name', 'test_mse', 'test_mse_low', 'test_mse_mid', 'test_mse_hi', 'test_q90']]np.sqrt(- scores.groupby('name')[['test_mse', 'test_mse_low', 'test_mse_mid', 'test_mse_hi']].mean())scores.groupby('name')[['test_mse', 'test_mse_low', 'test_mse_mid', 'test_mse_hi', 'test_q90']].mean()
imputer = pipeline.steps[0][1] params = pipeline.steps[1][1] linear = pipeline.steps[2][1] train = data[~x_val].reset_index() test = data[x_val].reset_index() x_train = train[['g_mag', 'bp_mag', 'rp_mag']] y_train = train[['mag_aca_obs']] x_test = test[['g_mag', 'bp_mag', 'rp_mag']] y_test = test[['mag_aca_obs']] X_train = params.transform(imputer.transform(x_train)) X_test = params.transform(imputer.transform(x_test)) ad_hoc = AdHoc(low_threshold=9., method=0) ad_hoc.fit(X_train, y_train.values.ravel()) train['y_pred'] = ad_hoc.predict(X_train) train['y_res'] = train['y_pred'] - train['mag_aca_obs'] test['y_pred'] = ad_hoc.predict(X_test) test['y_res'] = test['y_pred'] - test['mag_aca_obs'] residuals = binned_residuals(train) t_residuals = binned_residuals(test) fig, axes = plot_res_v_mag(residuals, t_residuals) plt.sca(axes[0])