Ex. 6.12

Ex. 6.12

Write a computer program to perform a local linear discriminant analysis. At each query point \(x_0\), the training data receive weights \(K_\lambda(x_0, x_i)\) from a weighted kernel, and the ingredients for the linear decision boundaries (see Section 4.3) are computed by weighted averages. Try out your program on the zipcode data, and show the training and test errors for a series of five pre-chosen values of \(\lambda\). The zipcode data are available from the book website.

Soln. 6.12

We choose RBF kernel

\[\begin{equation} K_\lambda(x,y) = \exp(-\lambda\|x-y\|^2),\non \end{equation}\]

with five different choices of \(\lambda \in\{ 0.001, 0.01, 0.05, 0.1, 1\}\).

The linear discriminant function is defined as (see (4.10) in the text)

\[\begin{equation} \delta_k(x_0) = x_0^T\Sigma^{-1}(x_0)\mu_k(x_0) - \frac{1}{2}\mu_k^T(x_0)\Sigma^{-1}(x_0)\mu_k(x_0) + \log(\pi_k)\non \end{equation}\]

in which we estimate unknown parameters as

\[\begin{eqnarray} \hat{\pi}_k &=& N_k/N, \text{ where } N_k \text{ is the number of class-$k$ observations}\non\\ \hat{\mu}_k(x_0) &=& \sum_{g_i=k} K_\lambda(x_0, x_i)x_i/N_k\non\\ \hat{\Sigma}(x_0) &=& \sum_{k=1}^K\sum_{g_i=k}K_\lambda(x_0, x_i)(x_i-\hat{\mu}_k(x_0))(x_i-\hat{\mu}_k(x_0))^T/(N-K).\non \end{eqnarray}\]

We summarize training and test error rates in Table 1 below.

TODO

rerun the script to update the numbers in the table

Model Train Error Rate Test Error Rate
LDA 6.20% 11.45%
QDA 7.71% 18.19%
Local LDA (\(\lambda = 0.001\)) 0.0% 11.11%
Local LDA (\(\lambda = 0.01\)) 0.5% 3.0%
Local LDA (\(\lambda = 0.1\)) 0.65% 3.30%
Local LDA (\(\lambda = 1\)) 0.94% 3.85%

Table 1: Misclassification Rates for Local LDA Models

Code: 6.12
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import pathlib
import numpy as np
from LocalLDA import LocalLinearDiscriminantAnalysis


# get relative data folder
PATH = pathlib.Path(__file__).resolve().parents[1]
DATA_PATH = PATH.joinpath("data").resolve()

# get original data
train = np.genfromtxt(DATA_PATH.joinpath("zip_train.csv"), dtype=float, delimiter=',', skip_header=True)
test = np.genfromtxt(DATA_PATH.joinpath("zip_test.csv"), dtype=float, delimiter=',', skip_header=True)

# prepare training and testing data
x_train = train[:, 1:]
y_train = train[:, 0]
x_test = test[:, 1:]
y_test = test[:, 0]


# clf = LinearDiscriminantAnalysis()
# clf = QuadraticDiscriminantAnalysis()
# clf.fit(x_train, y_train)
# y_pred = clf.predict(x_train)
#
# err=0
# for i in range(len(y_pred)):
#     if y_pred[i] != y_train[i]:
#         err += 1
#
# print(err/len(y_pred))

clf = LocalLinearDiscriminantAnalysis(gamma=0.01)

err = 0
for i in range(x_test.shape[0]):
    print('running for {} -th point, current error rate: {}'.format(i, err / (i + 1)))
    pred = clf.predict(x_test[i], x_train, y_train)
    if pred != y_test[i]:
        err += 1

print(err / x_test.shape[0])
Code: LocalLDA
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import numpy as np
from sklearn.metrics.pairwise import rbf_kernel
from sklearn.base import BaseEstimator
from sklearn.utils.multiclass import unique_labels


def _class_weighted_means(x0, X, y, gamma):
    """Compute class weighted means .

    Parameters
    ----------
    x0: a local point

    X : array-like of shape (n_samples, n_features)
        Input data.

    y : array-like of shape (n_samples,) or (n_samples, n_targets)
        Target values.

    gamma: float, exp(-gamma * (x-y)^2)

    Returns
    -------
    means : array-like of shape (n_classes, n_features)
        Class weighted means.
    """
    classes, y = np.unique(y, return_inverse=True)
    cnt = np.bincount(y)
    x0 = x0.reshape(1, -1)
    weights = rbf_kernel(X, Y=x0, gamma=gamma)
    W = np.diag(weights.ravel())
    X_new = np.dot(W, X)
    means = np.zeros(shape=(len(classes), X_new.shape[1]))
    np.add.at(means, y, X_new)
    means /= cnt[:, None]
    return means


def _class_cov(x0, X, y, gamma):
    """Compute weighted within-class covariance matrix.

    The per-class covariance are weighted by the class priors.

    Parameters
    ----------
    X : array-like of shape (n_samples, n_features)
        Input data.

    y : array-like of shape (n_samples,) or (n_samples, n_targets)
        Target values.

    Returns
    -------
    cov : array-like of shape (n_features, n_features)
        Weighted within-class covariance matrix
    """
    classes = np.unique(y)
    cov = np.zeros(shape=(X.shape[1], X.shape[1]))
    N = X.shape[0]
    K = len(classes)
    if N <= K:
        raise ValueError('number of samples must be greater than number of classes')
    for idx, group in enumerate(classes):
        Xg = X[y == group, :]
        Xg -= _class_weighted_means(x0, X, y, gamma=gamma)[idx]
        cov += np.dot(Xg.T, Xg)
    return cov / (N - K)


class LocalLinearDiscriminantAnalysis(BaseEstimator):

    def __init__(self, gamma=0.5):
        """

        Parameters
        ----------
        gamma
        """
        self.gamma = gamma

    def predict(self, x0, X, y):
        """

        Parameters
        ----------
        x0
        X
        y

        Returns
        -------

        """
        X, y = self._validate_data(X, y, ensure_min_samples=2, estimator=self,
                                dtype=[np.float64, np.float32])
        self.classes_ = unique_labels(y)
        n_samples, _ = X.shape
        n_classes = len(self.classes_)

        if n_samples <= n_classes:
            raise ValueError("The number of samples must be more "
                            "than the number of classes.")

        cov = _class_cov(x0, X, y, self.gamma)
        cov_inv = np.linalg.inv(cov)
        means = _class_weighted_means(x0, X, y, self.gamma)

        deltas = []
        for i in range(n_classes):
            delta = np.dot(x0.T, np.dot(cov_inv, means[i]))
            delta += -0.5 * np.dot(means[i].T, np.dot(cov_inv, means[i]))
            _, y_ = np.unique(y, return_inverse=True)
            cnt = np.bincount(y_)
            pi = cnt[i]/n_samples
            delta += np.log(pi)
            deltas.append(delta)

        deltas = np.asarray(deltas)
        y_pred = self.classes_.take(deltas.argmax())
        return y_pred