{ "activation_function": "gelu_new", "architectures": [ "TinyRecursiveModel" ], "attn_pdrop": 0.1, "dtype": "float32", "embd_pdrop": 0.1, "hidden_size": 640, "layer_norm_epsilon": 1e-05, "max_position_embeddings": 1024, "model_type": "recursive_gpt", "n_embd": 640, "n_head": 8, "n_inner": null, "n_loops": 8, "n_physical_layers": 3, "n_positions": 1024, "num_attention_heads": 8, "num_hidden_layers": 3, "reorder_and_upcast_attn": false, "resid_pdrop": 0.1, "scale_attn_by_inverse_layer_idx": false, "scale_attn_weights": true, "transformers_version": "4.56.0", "vocab_size": 50257 }