Skip to content

Commit

Permalink
CLDR-17535 Ensure testData is good for likelySubtags (#3977)
Browse files Browse the repository at this point in the history
  • Loading branch information
macchiati authored Aug 21, 2024
1 parent 56a2de1 commit 67afecd
Show file tree
Hide file tree
Showing 7 changed files with 218 additions and 26 deletions.
2 changes: 0 additions & 2 deletions common/supplemental/likelySubtags.xml
Original file line number Diff line number Diff line change
Expand Up @@ -837,7 +837,6 @@ not be patched by hand, as any changes made in that fashion may be lost.
<likelySubtag from="zza" to="zza_Latn_TR"/> <!--Zaza‧?‧? ➡ Zaza‧Latin‧Türkiye-->
<!-- Data to find likely language; some implementations may omit -->
<likelySubtag from="und" to="en_Latn_US"/> <!--?‧?‧? ➡ English‧Latin‧United States-->
<likelySubtag from="und_001" to="en_Latn_US"/> <!--?‧?‧world ➡ English‧Latin‧United States-->
<likelySubtag from="und_419" to="es_Latn_419"/> <!--?‧?‧Latin America ➡ Spanish‧Latin‧Latin America-->
<likelySubtag from="und_AD" to="ca_Latn_AD"/> <!--?‧?‧Andorra ➡ Catalan‧Latin‧Andorra-->
<likelySubtag from="und_AE" to="ar_Arab_AE"/> <!--?‧?‧United Arab Emirates ➡ Arabic‧Arabic‧United Arab Emirates-->
Expand Down Expand Up @@ -1162,7 +1161,6 @@ not be patched by hand, as any changes made in that fashion may be lost.
<likelySubtag from="und_Kthi" to="bho_Kthi_IN"/> <!--?‧Kaithi‧? ➡ Bhojpuri‧Kaithi‧India-->
<likelySubtag from="und_Lana" to="nod_Lana_TH"/> <!--?‧Lanna‧? ➡ Northern Thai‧Lanna‧Thailand-->
<likelySubtag from="und_Laoo" to="lo_Laoo_LA"/> <!--?‧Lao‧? ➡ Lao‧Lao‧Laos-->
<likelySubtag from="und_Latn_001" to="en_Latn_US"/> <!--?‧Latin‧world ➡ English‧Latin‧United States-->
<likelySubtag from="und_Latn_AE" to="en_Latn_AE"/> <!--?‧Latin‧United Arab Emirates ➡ English‧Latin‧United Arab Emirates-->
<likelySubtag from="und_Latn_AF" to="tk_Latn_AF"/> <!--?‧Latin‧Afghanistan ➡ Turkmen‧Latin‧Afghanistan-->
<likelySubtag from="und_Latn_AM" to="ku_Latn_AM"/> <!--?‧Latin‧Armenia ➡ Kurdish‧Latin‧Armenia-->
Expand Down
75 changes: 68 additions & 7 deletions common/testData/localeIdentifiers/likelySubtags.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,11 @@ af-Egyp ; af-Egyp-ZA ; af-Egyp ;
af-Latn ; af-Latn-ZA ; af ;
af-NA ; af-Latn-NA ; af-NA ;
af-ZA ; af-Latn-ZA ; af ;
ak ; ak-Latn-GH ; ak ;
ak-AQ ; ak-Latn-AQ ; ak-AQ ;
ak-Egyp ; ak-Egyp-GH ; ak-Egyp ;
ak-GH ; ak-Latn-GH ; ak ;
ak-Latn ; ak-Latn-GH ; ak ;
am ; am-Ethi-ET ; am ;
am-AQ ; am-Ethi-AQ ; am-AQ ;
am-ET ; am-Ethi-ET ; am ;
Expand Down Expand Up @@ -77,6 +82,11 @@ az-AQ ; az-Latn-AQ ; az-AQ ;
az-AZ ; az-Latn-AZ ; az ;
az-Egyp ; az-Egyp-AZ ; az-Egyp ;
az-Latn ; az-Latn-AZ ; az ;
bal ; bal-Arab-PK ; bal ;
bal-AQ ; bal-Arab-AQ ; bal-AQ ;
bal-Egyp ; bal-Egyp-PK ; bal-Egyp ;
bal-Latn ; bal-Latn-PK ; bal-Latn ;
bal-PK ; bal-Arab-PK ; bal ;
be ; be-Cyrl-BY ; be ;
be-AQ ; be-Cyrl-AQ ; be-AQ ;
be-BY ; be-Cyrl-BY ; be ;
Expand Down Expand Up @@ -189,6 +199,12 @@ dsb-AQ ; dsb-Latn-AQ ; dsb-AQ ;
dsb-DE ; dsb-Latn-DE ; dsb ;
dsb-Egyp ; dsb-Egyp-DE ; dsb-Egyp ;
dsb-Latn ; dsb-Latn-DE ; dsb ;
ee ; ee-Latn-GH ; ee ;
ee-AQ ; ee-Latn-AQ ; ee-AQ ;
ee-Egyp ; ee-Egyp-GH ; ee-Egyp ;
ee-GH ; ee-Latn-GH ; ee ;
ee-Latn ; ee-Latn-GH ; ee ;
ee-TG ; ee-Latn-TG ; ee-TG ;
el ; el-Grek-GR ; el ;
el-AQ ; el-Grek-AQ ; el-AQ ;
el-CY ; el-Grek-CY ; el-CY ;
Expand Down Expand Up @@ -447,6 +463,11 @@ ga-Egyp ; ga-Egyp-IE ; ga-Egyp ;
ga-GB ; ga-Latn-GB ; ga-GB ;
ga-IE ; ga-Latn-IE ; ga ;
ga-Latn ; ga-Latn-IE ; ga ;
gaa ; gaa-Latn-GH ; gaa ;
gaa-AQ ; gaa-Latn-AQ ; gaa-AQ ;
gaa-Egyp ; gaa-Egyp-GH ; gaa-Egyp ;
gaa-GH ; gaa-Latn-GH ; gaa ;
gaa-Latn ; gaa-Latn-GH ; gaa ;
gd ; gd-Latn-GB ; gd ;
gd-AQ ; gd-Latn-AQ ; gd-AQ ;
gd-Egyp ; gd-Egyp-GB ; gd-Egyp ;
Expand Down Expand Up @@ -516,6 +537,11 @@ ig-AQ ; ig-Latn-AQ ; ig-AQ ;
ig-Egyp ; ig-Egyp-NG ; ig-Egyp ;
ig-Latn ; ig-Latn-NG ; ig ;
ig-NG ; ig-Latn-NG ; ig ;
ii ; ii-Yiii-CN ; ii ;
ii-AQ ; ii-Yiii-AQ ; ii-AQ ;
ii-CN ; ii-Yiii-CN ; ii ;
ii-Egyp ; ii-Egyp-CN ; ii-Egyp ;
ii-Yiii ; ii-Yiii-CN ; ii ;
is ; is-Latn-IS ; is ;
is-AQ ; is-Latn-AQ ; is-AQ ;
is-Egyp ; is-Egyp-IS ; is-Egyp ;
Expand Down Expand Up @@ -581,6 +607,7 @@ kok-AQ ; kok-Deva-AQ ; kok-AQ ;
kok-Deva ; kok-Deva-IN ; kok ;
kok-Egyp ; kok-Egyp-IN ; kok-Egyp ;
kok-IN ; kok-Deva-IN ; kok ;
kok-Latn ; kok-Latn-IN ; kok-Latn ;
ks ; ks-Arab-IN ; ks ;
ks-AQ ; ks-Arab-AQ ; ks-AQ ;
ks-Arab ; ks-Arab-IN ; ks ;
Expand Down Expand Up @@ -732,12 +759,23 @@ nqo-AQ ; nqo-Nkoo-AQ ; nqo-AQ ;
nqo-Egyp ; nqo-Egyp-GN ; nqo-Egyp ;
nqo-GN ; nqo-Nkoo-GN ; nqo ;
nqo-Nkoo ; nqo-Nkoo-GN ; nqo ;
nso ; nso-Latn-ZA ; nso ;
nso-AQ ; nso-Latn-AQ ; nso-AQ ;
nso-Egyp ; nso-Egyp-ZA ; nso-Egyp ;
nso-Latn ; nso-Latn-ZA ; nso ;
nso-ZA ; nso-Latn-ZA ; nso ;
oc ; oc-Latn-FR ; oc ;
oc-AQ ; oc-Latn-AQ ; oc-AQ ;
oc-ES ; oc-Latn-ES ; oc-ES ;
oc-Egyp ; oc-Egyp-FR ; oc-Egyp ;
oc-FR ; oc-Latn-FR ; oc ;
oc-Latn ; oc-Latn-FR ; oc ;
om ; om-Latn-ET ; om ;
om-AQ ; om-Latn-AQ ; om-AQ ;
om-ET ; om-Latn-ET ; om ;
om-Egyp ; om-Egyp-ET ; om-Egyp ;
om-KE ; om-Latn-KE ; om-KE ;
om-Latn ; om-Latn-ET ; om ;
or ; or-Orya-IN ; or ;
or-AQ ; or-Orya-AQ ; or-AQ ;
or-Egyp ; or-Egyp-IN ; or-Egyp ;
Expand Down Expand Up @@ -822,6 +860,11 @@ ru-KZ ; ru-Cyrl-KZ ; ru-KZ ;
ru-MD ; ru-Cyrl-MD ; ru-MD ;
ru-RU ; ru-Cyrl-RU ; ru ;
ru-UA ; ru-Cyrl-UA ; ru-UA ;
rw ; rw-Latn-RW ; rw ;
rw-AQ ; rw-Latn-AQ ; rw-AQ ;
rw-Egyp ; rw-Egyp-RW ; rw-Egyp ;
rw-Latn ; rw-Latn-RW ; rw ;
rw-RW ; rw-Latn-RW ; rw ;
sa ; sa-Deva-IN ; sa ;
sa-AQ ; sa-Deva-AQ ; sa-AQ ;
sa-Deva ; sa-Deva-IN ; sa ;
Expand Down Expand Up @@ -888,6 +931,12 @@ sr-Latn ; sr-Latn-RS ; sr-Latn ;
sr-ME ; sr-Latn-ME ; sr-ME ;
sr-RS ; sr-Cyrl-RS ; sr ;
sr-XK ; sr-Cyrl-XK ; sr-XK ;
st ; st-Latn-ZA ; st ;
st-AQ ; st-Latn-AQ ; st-AQ ;
st-Egyp ; st-Egyp-ZA ; st-Egyp ;
st-LS ; st-Latn-LS ; st-LS ;
st-Latn ; st-Latn-ZA ; st ;
st-ZA ; st-Latn-ZA ; st ;
su ; su-Latn-ID ; su ;
su-AQ ; su-Latn-AQ ; su-AQ ;
su-Egyp ; su-Egyp-ID ; su-Egyp ;
Expand Down Expand Up @@ -953,6 +1002,12 @@ tk-AQ ; tk-Latn-AQ ; tk-AQ ;
tk-Egyp ; tk-Egyp-TM ; tk-Egyp ;
tk-Latn ; tk-Latn-TM ; tk ;
tk-TM ; tk-Latn-TM ; tk ;
tn ; tn-Latn-ZA ; tn ;
tn-AQ ; tn-Latn-AQ ; tn-AQ ;
tn-BW ; tn-Latn-BW ; tn-BW ;
tn-Egyp ; tn-Egyp-ZA ; tn-Egyp ;
tn-Latn ; tn-Latn-ZA ; tn ;
tn-ZA ; tn-Latn-ZA ; tn ;
to ; to-Latn-TO ; to ;
to-AQ ; to-Latn-AQ ; to-AQ ;
to-Egyp ; to-Egyp-TO ; to-Egyp ;
Expand Down Expand Up @@ -1107,14 +1162,14 @@ und-Cyrl-BY ; be-Cyrl-BY ; be ;
und-Cyrl-KG ; ky-Cyrl-KG ; ky ;
und-Cyrl-KZ ; ru-Cyrl-KZ ; ru-KZ ;
und-Cyrl-MD ; uk-Cyrl-MD ; uk-MD ;
und-Cyrl-ME ; ru-Cyrl-ME ; ru-ME ;
und-Cyrl-ME ; sr-Cyrl-ME ; ;
und-Cyrl-MK ; mk-Cyrl-MK ; mk ;
und-Cyrl-MN ; mn-Cyrl-MN ; mn ;
und-Cyrl-RS ; sr-Cyrl-RS ; sr ;
und-Cyrl-RU ; ru-Cyrl-RU ; ru ;
und-Cyrl-TJ ; tg-Cyrl-TJ ; tg ;
und-Cyrl-UA ; uk-Cyrl-UA ; uk ;
und-Cyrl-UZ ; ru-Cyrl-UZ ; ru-UZ ;
und-Cyrl-UZ ; uz-Cyrl-UZ ; uz-Cyrl ;
und-Cyrl-XK ; sr-Cyrl-XK ; sr-XK ;
und-DE ; de-Latn-DE ; de ;
und-DG ; en-Latn-DG ; en-DG ;
Expand Down Expand Up @@ -1188,13 +1243,15 @@ und-Hans-AQ ; zh-Hans-AQ ; zh-AQ ;
und-Hans-CN ; zh-Hans-CN ; zh ;
und-Hans-HK ; zh-Hans-HK ; ;
und-Hans-MO ; zh-Hans-MO ; ;
und-Hans-MY ; zh-Hans-MY ; zh-MY ;
und-Hans-SG ; zh-Hans-SG ; zh-SG ;
und-Hans-TW ; zh-Hans-TW ; ;
und-Hant ; zh-Hant-TW ; zh-Hant ; zh-TW
und-Hant-AQ ; zh-Hant-AQ ; ;
und-Hant-CN ; zh-Hant-CN ; ;
und-Hant-CN ; yue-Hant-CN ; ;
und-Hant-HK ; zh-Hant-HK ; zh-HK ;
und-Hant-MO ; zh-Hant-MO ; zh-MO ;
und-Hant-MY ; zh-Hant-MY ; ;
und-Hant-SG ; zh-Hant-SG ; ;
und-Hant-TW ; zh-Hant-TW ; zh-Hant ; zh-TW
und-Hebr ; he-Hebr-IL ; he ;
Expand Down Expand Up @@ -1384,7 +1441,7 @@ und-Latn-MQ ; fr-Latn-MQ ; fr-MQ ;
und-Latn-MR ; fr-Latn-MR ; fr-MR ;
und-Latn-MS ; en-Latn-MS ; en-MS ;
und-Latn-MT ; mt-Latn-MT ; mt ;
und-Latn-MU ; mfe-Latn-MU ; mfe ;
und-Latn-MU ; en-Latn-MU ; en-MU ;
und-Latn-MV ; en-Latn-MV ; en-MV ;
und-Latn-MW ; en-Latn-MW ; en-MW ;
und-Latn-MX ; es-Latn-MX ; es-MX ;
Expand Down Expand Up @@ -1427,7 +1484,7 @@ und-Latn-SH ; en-Latn-SH ; en-SH ;
und-Latn-SI ; sl-Latn-SI ; sl ;
und-Latn-SJ ; nb-Latn-SJ ; nb-SJ ;
und-Latn-SK ; sk-Latn-SK ; sk ;
und-Latn-SL ; kri-Latn-SL ; kri ;
und-Latn-SL ; en-Latn-SL ; en-SL ;
und-Latn-SM ; it-Latn-SM ; it-SM ;
und-Latn-SN ; fr-Latn-SN ; fr-SN ;
und-Latn-SO ; so-Latn-SO ; so ;
Expand All @@ -1441,7 +1498,7 @@ und-Latn-SZ ; en-Latn-SZ ; en-SZ ;
und-Latn-TC ; en-Latn-TC ; en-TC ;
und-Latn-TD ; fr-Latn-TD ; fr-TD ;
und-Latn-TG ; fr-Latn-TG ; fr-TG ;
und-Latn-TK ; tkl-Latn-TK ; tkl ;
und-Latn-TK ; en-Latn-TK ; en-TK ;
und-Latn-TL ; pt-Latn-TL ; pt-TL ;
und-Latn-TM ; tk-Latn-TM ; tk ;
und-Latn-TN ; fr-Latn-TN ; fr-TN ;
Expand All @@ -1467,7 +1524,7 @@ und-Latn-WS ; sm-Latn-WS ; sm ;
und-Latn-XK ; sq-Latn-XK ; sq-XK ;
und-Latn-YT ; fr-Latn-YT ; fr-YT ;
und-Latn-ZA ; en-Latn-ZA ; en-ZA ;
und-Latn-ZM ; bem-Latn-ZM ; bem ;
und-Latn-ZM ; en-Latn-ZM ; en-ZM ;
und-Latn-ZW ; sn-Latn-ZW ; sn ;
und-MA ; ar-Arab-MA ; ar-MA ;
und-MC ; fr-Latn-MC ; fr-MC ;
Expand Down Expand Up @@ -1613,6 +1670,9 @@ und-WS ; sm-Latn-WS ; sm ;
und-XK ; sq-Latn-XK ; sq-XK ;
und-YE ; ar-Arab-YE ; ar-YE ;
und-YT ; fr-Latn-YT ; fr-YT ;
und-Yiii ; ii-Yiii-CN ; ii ;
und-Yiii-AQ ; ii-Yiii-AQ ; ii-AQ ;
und-Yiii-CN ; ii-Yiii-CN ; ii ;
und-ZA ; en-Latn-ZA ; en-ZA ;
und-ZM ; bem-Latn-ZM ; bem ;
und-ZW ; sn-Latn-ZW ; sn ;
Expand Down Expand Up @@ -1691,6 +1751,7 @@ zh-HK ; zh-Hant-HK ; zh-HK ;
zh-Hans ; zh-Hans-CN ; zh ;
zh-Hant ; zh-Hant-TW ; zh-Hant ; zh-TW
zh-MO ; zh-Hant-MO ; zh-MO ;
zh-MY ; zh-Hans-MY ; zh-MY ;
zh-SG ; zh-Hans-SG ; zh-SG ;
zh-TW ; zh-Hant-TW ; zh-Hant ; zh-TW
zu ; zu-Latn-ZA ; zu ;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import org.unicode.cldr.util.LocaleNames;
import org.unicode.cldr.util.SupplementalDataInfo;

@Deprecated
public class GenerateLikelySubtagTests {
private static final String SEPARATOR = CldrUtility.LINE_SEPARATOR;
private static final OutputStyle OUTPUT_STYLE = OutputStyle.XML;
Expand All @@ -27,7 +28,7 @@ public class GenerateLikelySubtagTests {
public static void main(String[] args) throws IOException {
if (true) {
throw new IllegalArgumentException(
"This tool should not be used in its current state.");
"Deprecated — it appears that we don't need this, but keeping until we are sure.");
}
out =
FileUtilities.openUTF8Writer(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -421,9 +421,9 @@ public static void main(String[] args) throws IOException {
{"und_Kana", "ja_Kana_JP"},
{"und_Kana_JP", "ja_Kana_JP"},
{"und_Latn", "en_Latn_US"},
{"und_001", "en_Latn_US"}, // to not be overridden by tok_Latn_001
{"und_001", "en_Latn_001"}, // to not be overridden by tok_Latn_001
{
"und_Latn_001", "en_Latn_US"
"und_Latn_001", "en_Latn_001"
}, // to not be overridden by tok_Latn_001
{"und_Latn_ET", "en_Latn_ET"},
{"und_Latn_NE", "ha_Latn_NE"},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,6 @@ public static void main(String[] args) {
Set<String> testCases = getTestCases(data);

for (String testRaw : testCases) {
if (testRaw.startsWith("qaa")) {
int debug = 0;
}
final CLDRLocale source = CLDRLocale.getInstance(testRaw);
final String test = source.toLanguageTag();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -890,4 +890,24 @@ public void testSuperfluous() {
}
}
}

public void testConflicts() {
// All explicit codes must be in the maximum value

for (Entry<String, String> entry : likely.entrySet()) {
final CLDRLocale source = CLDRLocale.getInstance(entry.getKey());
final CLDRLocale target = CLDRLocale.getInstance(entry.getValue());
final String info = entry.getKey() + " ➡︎ " + entry.getValue();

if (!source.getLanguage().equals("und")) {
assertEquals("Language: " + info, source.getLanguage(), target.getLanguage());
}
if (!source.getScript().isEmpty()) {
assertEquals("Script: " + info, source.getScript(), target.getScript());
}
if (!source.getRegion().isEmpty()) {
assertEquals("Region: " + info, source.getRegion(), target.getRegion());
}
}
}
}
Loading

0 comments on commit 67afecd

Please sign in to comment.