{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "markdown", "source": [ "# Compare Regression Losses with GridSearchCV\n", "\n", "Due to its compatibility with the scikit-learn API, `GridSearchCV` can be used to compare `Huber`, `MAE`, and `MSE` losses under a unified pipeline, identify the best loss, and select its optimal hyperparameter." ], "metadata": { "id": "_NvZXRY3oIW6" } }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "O_SFIipUcSyD" }, "outputs": [], "source": [ "## install rehline\n", "%pip install rehline -q" ] }, { "cell_type": "code", "source": [ "## simulate data\n", "from sklearn.datasets import make_regression\n", "\n", "X, y = make_regression(n_samples=10000, n_features=5, noise=30.0, random_state=42)\n", "y = y / y.std()\n", "y[-40:] += 20" ], "metadata": { "id": "mLPT_rZskyrr" }, "execution_count": 2, "outputs": [] }, { "cell_type": "code", "source": [ "## compare Huber, MAE and MSE via GridSearchCV\n", "from sklearn.pipeline import Pipeline\n", "from sklearn.preprocessing import StandardScaler\n", "from rehline import plq_Ridge_Regressor\n", "from sklearn.model_selection import GridSearchCV\n", "import warnings\n", "warnings.filterwarnings(\"ignore\")\n", "\n", "pipe = Pipeline([\n", " ('scaler', StandardScaler()),\n", " ('reg', plq_Ridge_Regressor(loss={'name': 'mse'}))\n", "])\n", "\n", "# Define the parameter grid to search\n", "param_grid = {\n", " 'reg__C': [0.1, 1.0, 10.0],\n", " 'reg__loss': [{'name': 'huber', 'tau': 1.0}, {'name': 'mae'}, {'name': 'mse'}]\n", "}\n", "\n", "# Create the GridSearchCV object\n", "grid_search = GridSearchCV(pipe, param_grid, cv=5)\n", "grid_search.fit(X, y)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 188 }, "id": "cd-YVK8UpDD0", "outputId": "d3d158df-1329-42a3-e402-1ab30ce693e9" }, "execution_count": 3, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "GridSearchCV(cv=5,\n", " estimator=Pipeline(steps=[('scaler', StandardScaler()),\n", " ('reg',\n", " plq_Ridge_Regressor(loss={'name': 'mse'}))]),\n", " param_grid={'reg__C': [0.1, 1.0, 10.0],\n", " 'reg__loss': [{'name': 'huber', 'tau': 1.0},\n", " {'name': 'mae'}, {'name': 'mse'}]})" ], "text/html": [ "
GridSearchCV(cv=5,\n",
" estimator=Pipeline(steps=[('scaler', StandardScaler()),\n",
" ('reg',\n",
" plq_Ridge_Regressor(loss={'name': 'mse'}))]),\n",
" param_grid={'reg__C': [0.1, 1.0, 10.0],\n",
" 'reg__loss': [{'name': 'huber', 'tau': 1.0},\n",
" {'name': 'mae'}, {'name': 'mse'}]})In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. GridSearchCV(cv=5,\n",
" estimator=Pipeline(steps=[('scaler', StandardScaler()),\n",
" ('reg',\n",
" plq_Ridge_Regressor(loss={'name': 'mse'}))]),\n",
" param_grid={'reg__C': [0.1, 1.0, 10.0],\n",
" 'reg__loss': [{'name': 'huber', 'tau': 1.0},\n",
" {'name': 'mae'}, {'name': 'mse'}]})Pipeline(steps=[('scaler', StandardScaler()),\n",
" ('reg', plq_Ridge_Regressor(loss={'name': 'mae'}))])StandardScaler()
plq_Ridge_Regressor(loss={'name': 'mae'})