@inproceedings{barber1998ensemble,
author = {Barber, D. and Bishop, Christopher},
title = {Ensemble learning for multi-layer networks},
booktitle = {Advances in Neural Information Processing Systems},
year = {1998},
month = {January},
abstract = {Bayesian treatments of learning in neural networks are typically based either on local Gaussian approximations to a mode of the posterior weight distribution, or on Markov chain Monte Carlo simulations. A third approach, called ensemble learning, was introduced by Hinton (1993). It aims to approximate the posterior distribution by minimizing the Kullback-Leibler divergence between the true posterior and a parametric approximating distribution. However, the derivation of a deterministic algorithm relied on the use of a Gaussian approximating distribution with a diagonal covariance matrix and so was unable to capture the posterior correlations between parameters. In this paper, we show how the ensemble learning approach can be extended to full-covariance Gaussian distributions while remaining computationally tractable. We also extend the framework to deal with hyperparameters, leading to a simple re-estimation procedure. Initial results from a standard benchmark problem are encouraging.},
url = {http://approjects.co.za/?big=en-us/research/publication/ensemble-learning-for-multi-layer-networks/},
pages = {395-401},
volume = {10},
edition = {Advances in Neural Information Processing Systems},
}