Skip to content

Commit

Permalink
New \p{Letter} Unicode property escape
Browse files Browse the repository at this point in the history
  • Loading branch information
bhamiltoncx committed Mar 1, 2017
1 parent d11968d commit ca03e6a
Show file tree
Hide file tree
Showing 4 changed files with 341 additions and 30 deletions.
123 changes: 123 additions & 0 deletions tool-testsuite/test/org/antlr/v4/test/tool/TestATNConstruction.java
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,129 @@ public void testA() throws Exception {
"s4->RuleStop_A_2\n";
checkTokensRule(g, null, expecting);
}
@Test public void testCharSet() throws Exception {
LexerGrammar g = new LexerGrammar(
"lexer grammar P;\n"+
"A : [abc] ;"
);
String expecting =
"s0->RuleStart_A_1\n" +
"RuleStart_A_1->s3\n" +
"s3-{97..99}->s4\n" +
"s4->RuleStop_A_2\n";
checkTokensRule(g, null, expecting);
}
@Test public void testCharSetRange() throws Exception {
LexerGrammar g = new LexerGrammar(
"lexer grammar P;\n"+
"A : [a-c] ;"
);
String expecting =
"s0->RuleStart_A_1\n" +
"RuleStart_A_1->s3\n" +
"s3-{97..99}->s4\n" +
"s4->RuleStop_A_2\n";
checkTokensRule(g, null, expecting);
}
@Test public void testCharSetUnicodeBMPEscape() throws Exception {
LexerGrammar g = new LexerGrammar(
"lexer grammar P;\n"+
"A : [\\uABCD] ;"
);
String expecting =
"s0->RuleStart_A_1\n" +
"RuleStart_A_1->s3\n" +
"s3-43981->s4\n" +
"s4->RuleStop_A_2\n";
checkTokensRule(g, null, expecting);
}
@Test public void testCharSetUnicodeBMPEscapeRange() throws Exception {
LexerGrammar g = new LexerGrammar(
"lexer grammar P;\n"+
"A : [a-c\\uABCD-\\uABFF] ;"
);
String expecting =
"s0->RuleStart_A_1\n" +
"RuleStart_A_1->s3\n" +
"s3-{97..99, 43981..44031}->s4\n" +
"s4->RuleStop_A_2\n";
checkTokensRule(g, null, expecting);
}
@Test public void testCharSetUnicodeSMPEscape() throws Exception {
LexerGrammar g = new LexerGrammar(
"lexer grammar P;\n"+
"A : [\\u{10ABCD}] ;"
);
String expecting =
"s0->RuleStart_A_1\n" +
"RuleStart_A_1->s3\n" +
"s3-1092557->s4\n" +
"s4->RuleStop_A_2\n";
checkTokensRule(g, null, expecting);
}
@Test public void testCharSetUnicodeSMPEscapeRange() throws Exception {
LexerGrammar g = new LexerGrammar(
"lexer grammar P;\n"+
"A : [a-c\\u{10ABCD}-\\u{10ABFF}] ;"
);
String expecting =
"s0->RuleStart_A_1\n" +
"RuleStart_A_1->s3\n" +
"s3-{97..99, 1092557..1092607}->s4\n" +
"s4->RuleStop_A_2\n";
checkTokensRule(g, null, expecting);
}
@Test public void testCharSetUnicodePropertyEscape() throws Exception {
// The Gothic script is long dead and unlikely to change (which would
// cause this test to fail)
LexerGrammar g = new LexerGrammar(
"lexer grammar P;\n"+
"A : [\\p{Gothic}] ;"
);
String expecting =
"s0->RuleStart_A_1\n" +
"RuleStart_A_1->s3\n" +
"s3-{66352..66378}->s4\n" +
"s4->RuleStop_A_2\n";
checkTokensRule(g, null, expecting);
}
@Test public void testCharSetUnicodePropertyInvertEscape() throws Exception {
LexerGrammar g = new LexerGrammar(
"lexer grammar P;\n"+
"A : [\\P{Gothic}] ;"
);
String expecting =
"s0->RuleStart_A_1\n" +
"RuleStart_A_1->s3\n" +
"s3-{0..66351, 66379..1114111}->s4\n" +
"s4->RuleStop_A_2\n";
checkTokensRule(g, null, expecting);
}
@Test public void testCharSetUnicodeMultiplePropertyEscape() throws Exception {
// Ditto the Mahajani script. Not going to change soon. I hope.
LexerGrammar g = new LexerGrammar(
"lexer grammar P;\n"+
"A : [\\p{Gothic}\\p{Mahajani}] ;"
);
String expecting =
"s0->RuleStart_A_1\n" +
"RuleStart_A_1->s3\n" +
"s3-{66352..66378, 69968..70006}->s4\n" +
"s4->RuleStop_A_2\n";
checkTokensRule(g, null, expecting);
}
@Test public void testCharSetUnicodePropertyOverlap() throws Exception {
LexerGrammar g = new LexerGrammar(
"lexer grammar P;\n"+
"A : [\\p{ASCII_Hex_Digit}\\p{Hex_Digit}] ;"
);
String expecting =
"s0->RuleStart_A_1\n" +
"RuleStart_A_1->s3\n" +
"s3-{48..57, 65..70, 97..102, 65296..65305, 65313..65318, 65345..65350}->s4\n" +
"s4->RuleStop_A_2\n";
checkTokensRule(g, null, expecting);
}
@Test public void testRangeOrRange() throws Exception {
LexerGrammar g = new LexerGrammar(
"lexer grammar P;\n"+
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -529,6 +529,44 @@ public void testSetUp() throws Exception {
super.testErrors(pair, true);
}

@Test public void testInvalidUnicodeEscapesInCharSet() {
String grammar =
"lexer grammar Test;\n" +
"INVALID_EXTENDED_UNICODE_EMPTY: [\\u{}];\n" +
"INVALID_EXTENDED_UNICODE_NOT_TERMINATED: [\\u{];\n" +
"INVALID_EXTENDED_UNICODE_TOO_LONG: [\\u{110000}];\n" +
"INVALID_UNICODE_PROPERTY_EMPTY: [\\p{}];\n" +
"INVALID_UNICODE_PROPERTY_NOT_TERMINATED: [\\p{];\n" +
"INVALID_INVERTED_UNICODE_PROPERTY_EMPTY: [\\P{}];\n" +
"INVALID_UNICODE_PROPERTY_UNKNOWN: [\\p{NotAProperty}];\n" +
"INVALID_INVERTED_UNICODE_PROPERTY_UNKNOWN: [\\P{NotAProperty}];\n" +
"UNICODE_PROPERTY_NOT_ALLOWED_IN_RANGE: [\\p{Uppercase_Letter}-\\p{Lowercase_Letter}];\n" +
"UNICODE_PROPERTY_NOT_ALLOWED_IN_RANGE_2: [\\p{Letter}-Z];\n" +
"UNICODE_PROPERTY_NOT_ALLOWED_IN_RANGE_3: [A-\\p{Number}];\n" +
"INVERTED_UNICODE_PROPERTY_NOT_ALLOWED_IN_RANGE: [\\P{Uppercase_Letter}-\\P{Number}];\n";

String expected =
"error(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:2:32: invalid escape sequence\n" +
"error(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:3:41: invalid escape sequence\n" +
"error(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:4:35: invalid escape sequence\n" +
"error(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:5:32: invalid escape sequence\n" +
"error(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:6:41: invalid escape sequence\n" +
"error(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:7:41: invalid escape sequence\n" +
"error(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:8:34: invalid escape sequence\n" +
"error(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:9:43: invalid escape sequence\n" +
"error(" + ErrorType.UNICODE_PROPERTY_NOT_ALLOWED_IN_RANGE.code + "): Test.g4:10:39: unicode property escapes not allowed in lexer charset range: [\\p{Uppercase_Letter}-\\p{Lowercase_Letter}]\n" +
"error(" + ErrorType.UNICODE_PROPERTY_NOT_ALLOWED_IN_RANGE.code + "): Test.g4:11:41: unicode property escapes not allowed in lexer charset range: [\\p{Letter}-Z]\n" +
"error(" + ErrorType.UNICODE_PROPERTY_NOT_ALLOWED_IN_RANGE.code + "): Test.g4:12:41: unicode property escapes not allowed in lexer charset range: [A-\\p{Number}]\n" +
"error(" + ErrorType.UNICODE_PROPERTY_NOT_ALLOWED_IN_RANGE.code + "): Test.g4:13:48: unicode property escapes not allowed in lexer charset range: [\\P{Uppercase_Letter}-\\P{Number}]\n";

String[] pair = new String[] {
grammar,
expected
};

super.testErrors(pair, true);
}

/**
* This test ensures the {@link ErrorType#UNRECOGNIZED_ASSOC_OPTION} warning
* is produced as described in the documentation.
Expand Down
Loading

0 comments on commit ca03e6a

Please sign in to comment.