Hacker News on Nostr: Training Language Models to Self-Correct via Reinforcement Learning ...
Published at
2024-09-20 15:12:03Event JSON
{
"id": "3fa021f5ead1904ce52a1965b9919b8ed16d116025be7b628bd9df0ce3407624",
"pubkey": "9467fa78b5c1d5316c7d0b5ce9e67476055aac3546c7f440f090f3f126084411",
"created_at": 1726845123,
"kind": 1,
"tags": [
[
"t",
"hackernews"
],
[
"t",
"tech"
],
[
"proxy",
"https://qoto.org/users/HN/statuses/113170522037133224",
"activitypub"
]
],
"content": "Training Language Models to Self-Correct via Reinforcement Learning\nhttps://news.ycombinator.com/item?id=41600179\n#hackernews #tech",
"sig": "63f9ec9b3b165db0a8bdc7a541828a9c63a35d151623c6ac6aacc6b5fdbb7ef2a11fa5dd5f65e71e167e0f887a519f6e5e06351fef6c1b21c437de52c4be1f64"
}