{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "%matplotlib inline" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "\n=================================================\nSVM: Separating hyperplane for unbalanced classes\n=================================================\n\nFind the optimal separating hyperplane using an SVC for classes that\nare unbalanced.\n\nWe first find the separating plane with a plain SVC and then plot\n(dashed) the separating hyperplane with automatically correction for\nunbalanced classes.\n\n.. currentmodule:: sklearn.linear_model\n\n

#### Note

This example will also work by replacing ``SVC(kernel=\"linear\")``\n with ``SGDClassifier(loss=\"hinge\")``. Setting the ``loss`` parameter\n of the :class:`SGDClassifier` equal to ``hinge`` will yield behaviour\n such as that of a SVC with a linear kernel.\n\n For example try instead of the ``SVC``::\n\n clf = SGDClassifier(n_iter=100, alpha=0.01)

\n\n\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "print(__doc__)\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom sklearn import svm\n\n# we create clusters with 1000 and 100 points\nrng = np.random.RandomState(0)\nn_samples_1 = 1000\nn_samples_2 = 100\nX = np.r_[1.5 * rng.randn(n_samples_1, 2),\n 0.5 * rng.randn(n_samples_2, 2) + [2, 2]]\ny =  * (n_samples_1) +  * (n_samples_2)\n\n# fit the model and get the separating hyperplane\nclf = svm.SVC(kernel='linear', C=1.0)\nclf.fit(X, y)\n\n# fit the model and get the separating hyperplane using weighted classes\nwclf = svm.SVC(kernel='linear', class_weight={1: 10})\nwclf.fit(X, y)\n\n# plot separating hyperplanes and samples\nplt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.Paired, edgecolors='k')\nplt.legend()\n\n# plot the decision functions for both classifiers\nax = plt.gca()\nxlim = ax.get_xlim()\nylim = ax.get_ylim()\n\n# create grid to evaluate model\nxx = np.linspace(xlim, xlim, 30)\nyy = np.linspace(ylim, ylim, 30)\nYY, XX = np.meshgrid(yy, xx)\nxy = np.vstack([XX.ravel(), YY.ravel()]).T\n\n# get the separating hyperplane\nZ = clf.decision_function(xy).reshape(XX.shape)\n\n# plot decision boundary and margins\na = ax.contour(XX, YY, Z, colors='k', levels=, alpha=0.5, linestyles=['-'])\n\n# get the separating hyperplane for weighted classes\nZ = wclf.decision_function(xy).reshape(XX.shape)\n\n# plot decision boundary and margins for weighted classes\nb = ax.contour(XX, YY, Z, colors='r', levels=, alpha=0.5, linestyles=['-'])\n\nplt.legend([a.collections, b.collections], [\"non weighted\", \"weighted\"],\n loc=\"upper right\")\nplt.show()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.3" } }, "nbformat": 4, "nbformat_minor": 0 }