devopscats on Nostr: Scaling Monosemanticity: Extracting Interpretable Features from Claude 3 Sonnet ...
Published at
2024-08-29 22:22:38Event JSON
{
"id": "f5d98011a10cec1f3680765a6f01be0d4a7bad3abd8507c1f192c9f5d1f0b77e",
"pubkey": "e8e1ec0aa8819c6b43a17ea1307127db1991ebb4eaabb1f104b95df1d20dc012",
"created_at": 1724970158,
"kind": 1,
"tags": [
[
"proxy",
"https://toot.cat/users/devopscats/statuses/113047644296221105",
"activitypub"
]
],
"content": "Scaling Monosemanticity: Extracting Interpretable Features from Claude 3 Sonnet\n\nhttps://transformer-circuits.pub/2024/scaling-monosemanticity/index.html",
"sig": "70fef8d8f3cf3bcc92814aecb486abbb4fe76fa325e05242c44469ab13321823e5db5ca5ec9f3aaa2e30769973e34bb1f64f83a4c71a625f401a17e1db17bc1f"
}