Skip to content

Commit

Permalink
convert : use 1e6 rope_freq_base for mixtral
Browse files Browse the repository at this point in the history
  • Loading branch information
slaren committed Dec 11, 2023
1 parent 296c945 commit 7dc75e3
Showing 1 changed file with 4 additions and 1 deletion.
5 changes: 4 additions & 1 deletion convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,7 @@ def loadOriginalParamsJson(model: LazyModel, config_path: Path) -> Params:

n_experts = None
n_experts_used = None
f_rope_freq_base = None

# hack to determine LLaMA v1 vs v2 vs CodeLlama
if config.get("moe"):
Expand All @@ -281,6 +282,8 @@ def loadOriginalParamsJson(model: LazyModel, config_path: Path) -> Params:
n_ff = model["layers.0.feed_forward.experts.0.w1.weight"].shape[0]
n_experts = config["moe"]["num_experts"]
n_experts_used = config["moe"]["num_experts_per_tok"]
f_rope_freq_base = 1e6


return Params(
n_vocab = model["tok_embeddings.weight"].shape[0],
Expand All @@ -293,7 +296,7 @@ def loadOriginalParamsJson(model: LazyModel, config_path: Path) -> Params:
n_experts = n_experts,
n_experts_used = n_experts_used,
f_norm_eps = config["norm_eps"],
f_rope_freq_base = config.get("rope_theta"),
f_rope_freq_base = config.get("rope_theta", f_rope_freq_base),
)

@staticmethod
Expand Down

0 comments on commit 7dc75e3

Please sign in to comment.