Citation

BibTex format

@inproceedings{Dai:2021:10.1007/978-3-030-89370-5_3,
author = {Dai, T and Liu, H and Arulkumaran, K and Ren, G and Bharath, AA},
doi = {10.1007/978-3-030-89370-5_3},
pages = {32--45},
publisher = {Springer},
title = {Diversity-based trajectory and goal selection with hindsight experience replay},
url = {http://dx.doi.org/10.1007/978-3-030-89370-5_3},
year = {2021}
}

RIS format (EndNote, RefMan)

TY  - CPAPER
AB - Hindsight experience replay (HER) is a goal relabelling technique typicallyused with off-policy deep reinforcement learning algorithms to solvegoal-oriented tasks; it is well suited to robotic manipulation tasks thatdeliver only sparse rewards. In HER, both trajectories and transitions aresampled uniformly for training. However, not all of the agent's experiencescontribute equally to training, and so naive uniform sampling may lead toinefficient learning. In this paper, we propose diversity-based trajectory andgoal selection with HER (DTGSH). Firstly, trajectories are sampled according tothe diversity of the goal states as modelled by determinantal point processes(DPPs). Secondly, transitions with diverse goal states are selected from thetrajectories by using k-DPPs. We evaluate DTGSH on five challenging roboticmanipulation tasks in simulated robot environments, where we show that ourmethod can learn more quickly and reach higher performance than otherstate-of-the-art approaches on all tasks.
AU - Dai,T
AU - Liu,H
AU - Arulkumaran,K
AU - Ren,G
AU - Bharath,AA
DO - 10.1007/978-3-030-89370-5_3
EP - 45
PB - Springer
PY - 2021///
SP - 32
TI - Diversity-based trajectory and goal selection with hindsight experience replay
UR - http://dx.doi.org/10.1007/978-3-030-89370-5_3
UR - http://arxiv.org/abs/2108.07887v1
UR - https://link.springer.com/chapter/10.1007/978-3-030-89370-5_3
UR - http://hdl.handle.net/10044/1/91238
ER -