Skip to content

Commit

Permalink
Fix the float number & Add SelfMoA_gemma-2-9b-it-SimPO, SelfMoA_gemma…
Browse files Browse the repository at this point in the history
…-2-9b-it-WPO-HB to AlpacaEval (#411)
  • Loading branch information
wenzhe-li authored Sep 25, 2024
1 parent f8a7bf9 commit b759c8d
Show file tree
Hide file tree
Showing 8 changed files with 172,171 additions and 1 deletion.
4,832 changes: 4,832 additions & 0 deletions results/SelfMoA_gemma-2-9b-it-SimPO/model_outputs.json

Large diffs are not rendered by default.

81,330 changes: 81,330 additions & 0 deletions results/SelfMoA_gemma-2-9b-it-SimPO/weighted_alpaca_eval_gpt4_turbo/annotations.json

Large diffs are not rendered by default.

4,832 changes: 4,832 additions & 0 deletions results/SelfMoA_gemma-2-9b-it-WPO-HB/model_outputs.json

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
,win_rate,standard_error,n_wins,n_wins_base,n_draws,n_total,discrete_win_rate,mode,avg_length,length_controlled_winrate,lc_standard_error
SelfMoA_gemma-2-9b-it-WPO-HB,77.58955217385297,1.231940914887347,640,165,0,805,79.5031055900621,community,3261,78.53928111481099,0.3042788133382446
Shopee-SlimMoA-v1,75.61428659805350,1.2706274059194700,621,184,0,805,77.14285714285720,community,1994,77.4515432873834,0.43017522149239600
blendaxai-gm-l6-vo31,69.11033492869565,1.3280735654354863,562,242,1,805,69.87577639751554,community,1809,76.91981221023656,0.5725365663132986
gemma-2-9b-it-WPO-HB,77.82503168985093,1.2355857177790277,640,163,2,805,79.62732919254658,community,2285,76.72506842726064,0.4242603928637889
SelfMoA_gemma-2-9b-it-SimPO,71.9958856144492,1.3495341826849294,597,208,0,805,74.16149068322981,community,1930,75.04950944068965,0.44287068760098436
blendaxai-gm-l3-v35,73.41035740244067,1.254951147343878,607,196,2,805,75.527950310559,community,2186,73.37270365010379,0.6163911450738288
gemma-2-9b-it-SimPO,65.86422561532919,1.423459922555078,540,264,1,805,67.14285714285714,community,1833,72.3508446939842,0.5167873784867067
openpipe-moa-gpt-4-turbo-v1,63.15493451236265,1.422980098799326,515,283,7,805,64.40993788819875,community,1856,68.37866250336802,0.7309418614587613
Expand All @@ -28,7 +30,7 @@ Llama-3-Instruct-8B-SimPO-ExPO,40.63285400856655,1.4439449942168028,325,479,1,80
Llama-3-Instruct-8B-SimPO,40.52977498461182,1.422574464675002,319,485,1,805,39.68944099378882,community,1825,44.65131348921881,0.8800655791760451
Nanbeige-Plus-Chat-v0.1,56.70300973017392,1.482841874951873,456,347,2,805,56.77018633540373,community,2587,44.45966240337981,
Qwen1.5-110B-Chat,33.77709527565118,1.3776163153661627,255,545,5,805,31.98757763975156,community,1631,43.90555221078692,
Llama-3-8B-Instruct-SkillMix,44.62754272981248,1.4912297729002553,361,444,0,805.0,44.84472049689441,community,3141,42.76194893575222,
Llama-3-8B-Instruct-SkillMix,44.62754272981248,1.4912297729002553,361,444,0,805,44.84472049689441,community,3141,42.76194893575222,
gpt4_1106_preview_concise,22.92019444047205,1.232517714329424,172,622,11,805,22.049689440993788,dev,1136,41.896601591245386,
aligner-2b_claude-3-opus-20240229,34.46337362321739,1.314666526302454,225,475,105,805,34.47204968944099,community,1669,41.823071715247664,
Nanbeige2-16B-Chat,37.03608605005168,1.4340261272580377,288,514,3,805,35.962732919254655,community,1867,40.591286349562864,0.8504106275373426
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -187,3 +187,5 @@ Shopee-SlimMoA-v1,-0.6930943742294789,0.5778443790027642,1.4506276222723822
blendaxai-gm-l6-vo31,-1.4827230167114802,0.8256378421072179,1.5942312525409852
Llama-3-8B-Instruct-SkillMix,-0.3007600604906024,0.4853486472189554,-0.2808727525336005
REBEL-Llama-3-8B-Instruct-Armo,-1.0427168605260002,0.6464073051877255,0.0395191056877229
SelfMoA_gemma-2-9b-it-SimPO,-0.8425253084188749,0.5482697859900880,1.2874783673834935
SelfMoA_gemma-2-9b-it-WPO-HB,0.2523363342614252,0.3970191588440620,1.4137351138484051
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
SelfMoA_gemma-2-9b-it-SimPO:
prompt_template: "gemma-2-9b-it-DPO/prompt.txt"
fn_completions: null
completions_kwargs:
max_new_tokens: 2048
model_name: "SelfMoA_gemma-2-9b-it-SimPO"
temperature: 0.7
pretty_name: "SelfMoA + gemma-2-9b-it-SimPO"
link: "https://github.com/wenzhe-li/Self-MoA/"
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
SelfMoA_gemma-2-9b-it-WPO-HB:
prompt_template: "gemma-2-9b-it-DPO/prompt.txt"
fn_completions: null
completions_kwargs:
max_new_tokens: 2048
model_name: "SelfMoA_gemma-2-9b-it-WPO-HB"
temperature: 0.7
pretty_name: "SelfMoA + gemma-2-9b-it-WPO-HB"
link: "https://github.com/wenzhe-li/Self-MoA/"

0 comments on commit b759c8d

Please sign in to comment.