@inproceedings{allen-zhu2018natasha,
author = {Allen-Zhu, Zeyuan},
title = {Natasha 2: Faster Non-Convex Optimization Than SGD},
booktitle = {NIPS 2018},
year = {2018},
month = {December},
abstract = {We design a stochastic algorithm to train any smooth neural network to ε-approximate local minima, using O(ε−3.25) backpropagations. The best result was essentially O(ε−4) by SGD. More broadly, it finds ε-approximate local minima of any smooth nonconvex function in rate O(ε−3.25), with only oracle access to stochastic gradients.},
url = {http://approjects.co.za/?big=en-us/research/publication/natasha-2-faster-non-convex-optimization-than-sgd/},
}