(a)
By definition of the kernel \(K\), we have
\[\begin{equation}
K(x, y) = \sum_{m=1}^Mh_m(x)h_m(y) = \sum_{i=1}^\infty \gamma_i\phi_i(x)\phi_i(y).\non
\end{equation}\]
Multiply each summand above by \(\phi_k(x)\) and calculate \(\langle K(x,y), \phi_k(x) \rl\),
\[\begin{equation}
\label{eq:5-16a}
\sum_{m=1}^M\langle h_m(x), \phi_k(x)\rl h_m(y) = \sum_{i=1}^\infty \langle \phi_i(x), \phi_k(x)\rl \phi_i(y).
\end{equation}\]
Since \(\{\phi_i, i=1,...,\infty\}\) are orthogonal, we have
\[\begin{equation}
\langle \phi_i(x), \phi_k(x) \rl = \begin{cases}
1 & \text{ if } i=k,\\
0 & \text{ otherwise.}
\end{cases}\non
\end{equation}\]
Thus, \(\eqref{eq:5-16a}\) becomes
\[\begin{equation}
\sum_{m=1}^M\langle h_m(x), \phi_k(x)\rl h_m(y) = \gamma_k\phi_k(y).\non
\end{equation}\]
Let \(g_{km} = \langle h_m(x), \phi_k(x) \rl\) and calculate \(\langle K(x,y), \phi_l(y) \rl\), we get
\[\begin{eqnarray}
\sum_{m=1}^M g_{km}h_m(y) &=& \gamma_k\phi_k(y),\non\\
\sum_{m=1}^M g_{km} \langle h_m(y), \phi_l(y) \rl &=& \gamma_k \langle \phi_k(y), \phi_l(y) \rl ,\non\\
\sum_{m=1}^M g_{km}g_{lm} &=& \gamma_k\delta_{k,l}\non
\end{eqnarray}\]
where \(\delta_{k,l} = 1\) if \(k=l\) and 0 otherwise.
Let \(\bb{G}_M = \{g_{nm}\} \in \mathbb{R}^{M\times N}\), we have
\[\begin{equation}
\bb{G}_M\bb{G}_M^T = \text{diag}\{\gamma_1,\gamma_2,...,\gamma_M\} = \bb{D}_\gamma.\non
\end{equation}\]
Let \(\bb{V}^T = \bb{D}_\gamma^{-\frac{1}{2}}\bb{G}_M\), we have
\[\begin{equation}
\bb{V}\bb{V}^T\bb{G}_M^T = \bb{G}_M^T\bb{D}_\gamma^{-1}\bb{G}_M = \bb{I}_N.\non
\end{equation}\]
Let \(h(x) = (h_1(x), h_2(x), ..., h_M(x))^T\) and \(\phi(x) = (\phi_1(x), \phi_2(x), ..., \phi_M(x))^T\), then the three equations above can be rewritten as
\[\begin{eqnarray}
\bb{G}_Mh(x) &=& \bb{D}_\gamma\phi(x)\non\\
\bb{V}\bb{D}^{-\frac{1}{2}}_\gamma\bb{G}_Mh(x) &=& \bb{V}\bb{D}^{-\frac{1}{2}}_\gamma\bb{D}_\gamma\phi(x)\non\\
h(x) &=&\bb{V}\bb{D}^{\frac{1}{2}}_\gamma.\non
\end{eqnarray}\]
To show that (5.63) is equivalent to (5.53) in the text, we start with (5.63). Let \(\beta = (\beta_1, \beta_2,...,\beta_m)^T\) and \(c=\bb{D}_\gamma^{\frac{1}{2}}\bb{V}^T\beta\),
\[\begin{eqnarray}
&& \min_{\{\beta_m\}_1^M}\sum_{i=1}^N\left(y_i-\sum_{m=1}^M\beta_mh_m(x_i)\right)^2 + \lambda \sum_{m=1}^M\beta_m^2\non\\
&=&\min_{\beta}\sum_{i=1}^N(y_i-\beta^Th(x_i))^2 + \lambda\beta^T\beta\non\\
&=&\min_{\beta} \sum_{i=1}^N(y_i-\beta^T\bb{V}\bb{D}^{\frac{1}{2}}_\gamma\phi(x_i))^2 + \lambda\beta^T\beta\non\\
&=&\min_{c}\sum_{i=1}^N(y_i-c^T\phi(x_i))^2 + \lambda (\bb{V}\bb{D}^{\frac{1}{2}}_\gamma c)^T \bb{V}\bb{D}^{\frac{1}{2}}_\gamma c\non\\
&=&\min_{c}\sum_{i=1}^N(y_i-c^T\phi(x_i))^2 + \lambda c^Tc\bb{D}_\gamma^{-1}\non\\
&=&\min_{\{c_j\}_1^\infty}\sum_{i=1}^N\left(y_i-\sum_{j=1}^\infty c_j\phi_j(x_i)\right)^2 + \lambda \sum_{j=1}^\infty \frac{c_j^2}{\gamma_j},\non
\end{eqnarray}\]
which is (5.53) in the text.
(b)
Recall that in (a) we have
\[\begin{equation}
\min_{\beta}\sum_{i=1}^N(y_i-\beta^Th(x_i))^2 + \lambda\beta^T\beta.\non
\end{equation}\]
Taking derivative w.r.t \(\beta\) and setting it to be zero yields
\[\begin{equation}
-\bb{H}^T(\by-\bb{H}\hat\beta) + \lambda \hat\beta = 0.\non
\end{equation}\]
Thus we have
\[\begin{equation}
\hat\beta = (\bb{H}^T\bb{H} + \lambda \bb{I})^{-1}\bb{H}^T\by\non
\end{equation}\]
and
\[\begin{equation}
\hat{\mathbf{f}} = \bb{H}(\bb{H}^T\bb{H} + \lambda \bb{I})^{-1}\bb{H}^T\by.\non
\end{equation}\]
By Woodbury matrix identity, we have
\[\begin{eqnarray}
(\bb{H}^T\bb{H} + \lambda \bb{I})^{-1} &=& \frac{1}{\lambda}\bb{I} - \frac{1}{\lambda}\bb{I}\bb{H}^T\left(\bb{I} + \frac{1}{\lambda}\bb{H}\bb{H}^T\right)^{-1}\bb{H}\cdot\frac{1}{\lambda}\bb{I}\non.
\end{eqnarray}\]
Therefore, we have
\[\begin{eqnarray}
\hat{\mathbf{f}} &=& \frac{1}{\lambda}\bb{H}\bb{H}^T\by - \frac{1}{\lambda}\bb{H}\bb{H}^T\left(\lambda\bb{I}+\bb{H}\bb{H}^T\right)^{-1}\bb{H}\bb{H}^T\by\non\\
&=&\frac{1}{\lambda}\bb{H}\bb{H}^T\left[\bb{I}-(\lambda\bb{I}+\bb{H}\bb{H}^T)^{-1}\bb{H}\bb{H}^T\right]\by\non\\
&=&\frac{1}{\lambda}\bb{H}\bb{H}^T\left[(\lambda\bb{I}+\bb{H}\bb{H}^T)^{-1}(\lambda\bb{I}+\bb{H}\bb{H}^T)-(\lambda\bb{I}+\bb{H}\bb{H}^T)^{-1}\bb{H}\bb{H}^T\right]\by\non\\
&=&\frac{1}{\lambda}\bb{H}\bb{H}^T\left[(\lambda\bb{I}+\bb{H}\bb{H}^T)^{-1}\lambda\bb{I}\right]\by\non\\
&=&\bb{H}\bb{H}^T(\lambda\bb{I}+\bb{H}\bb{H}^T)^{-1}\by\non\\
&=&\bb{K}(\bb{K}+\lambda\bb{I})^{-1}\by.\non
\end{eqnarray}\]