Skip to content

Commit

Permalink
Merge branch 'master_upstream' into optimizations
Browse files Browse the repository at this point in the history
  • Loading branch information
mike-lischke committed Mar 4, 2017
2 parents c874ab6 + d882636 commit 1cf2885
Show file tree
Hide file tree
Showing 563 changed files with 15,458 additions and 12,400 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ __pycache__/

# User-specific files (MonoDevelop/Xamarin Studio)
*.userprefs
*.user
.vs/
project.lock.json

# Build results
[Dd]ebug/
Expand Down
10 changes: 10 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,22 @@ matrix:
- os: linux
jdk: oraclejdk7
env: TARGET=csharp
- os: linux
jdk: oraclejdk7
dist: trusty
env: TARGET=dotnet
- os: linux
jdk: oraclejdk7
env: TARGET=python2
- os: linux
jdk: oraclejdk7
env: TARGET=python3
addons:
apt:
sources:
- deadsnakes # source required so it finds the package definition below
packages:
- python3.5
- os: linux
jdk: oraclejdk7
env: TARGET=javascript
Expand Down
2 changes: 0 additions & 2 deletions .travis/before-install-linux-cpp.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,4 @@
set -euo pipefail

sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys 3FA7E0328081BFF6A14DA29AA6A19B38D3D831EF
sudo add-apt-repository ppa:fkrull/deadsnakes -y
sudo add-apt-repository ppa:rwky/nodejs -y
sudo apt-get update -qq
2 changes: 0 additions & 2 deletions .travis/before-install-linux-csharp.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
set -euo pipefail

sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys 3FA7E0328081BFF6A14DA29AA6A19B38D3D831EF
sudo add-apt-repository ppa:fkrull/deadsnakes -y
sudo add-apt-repository ppa:rwky/nodejs -y
sudo apt-get update -qq
echo "deb http://download.mono-project.com/repo/debian wheezy/snapshots/3.12.1 main" | sudo tee /etc/apt/sources.list.d/mono-xamarin.list
sudo apt-get install -qq mono-complete
22 changes: 22 additions & 0 deletions .travis/before-install-linux-dotnet.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#!/bin/bash

set -euo pipefail

# install dotnet
sudo sh -c 'echo "deb [arch=amd64] https://apt-mo.trafficmanager.net/repos/dotnet-release/ trusty main" > /etc/apt/sources.list.d/dotnetdev.list'
sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys 417A0893
sudo apt-get update

sudo apt-get install dotnet-dev-1.0.0-preview2.1-003177

# install mvn
wget http://apache.mirrors.lucidnetworks.net/maven/maven-3/3.3.9/binaries/apache-maven-3.3.9-bin.tar.gz && \
wget https://www.apache.org/dist/maven/maven-3/3.3.9/binaries/apache-maven-3.3.9-bin.tar.gz.md5 && \
echo "$(cat apache-maven-3.3.9-bin.tar.gz.md5) apache-maven-3.3.9-bin.tar.gz" > apache-maven-3.3.9-bin.tar.gz.md5 && \
md5sum -c *.md5

sudo rm -rf /usr/local/maven/ && sudo mkdir -p /usr/local/maven && \
sudo tar xzvf apache-maven-3.3.9-bin.tar.gz -C /usr/local/maven --strip-components=1

mvn -v

2 changes: 0 additions & 2 deletions .travis/before-install-linux-go.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
set -euo pipefail

sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys 3FA7E0328081BFF6A14DA29AA6A19B38D3D831EF
sudo add-apt-repository ppa:fkrull/deadsnakes -y
sudo add-apt-repository ppa:rwky/nodejs -y
sudo apt-get update -qq
eval "$(sudo gimme 1.7.3)"
( go version ; go env ) || true
2 changes: 0 additions & 2 deletions .travis/before-install-linux-java.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,4 @@
set -euo pipefail

sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys 3FA7E0328081BFF6A14DA29AA6A19B38D3D831EF
sudo add-apt-repository ppa:fkrull/deadsnakes -y
sudo add-apt-repository ppa:rwky/nodejs -y
sudo apt-get update -qq
4 changes: 2 additions & 2 deletions .travis/before-install-linux-javascript.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
set -euo pipefail

sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys 3FA7E0328081BFF6A14DA29AA6A19B38D3D831EF
sudo add-apt-repository ppa:fkrull/deadsnakes -y
sudo add-apt-repository ppa:rwky/nodejs -y
sudo apt-get update -qq
curl -sL https://deb.nodesource.com/setup_0.12 | sudo -E bash -
sudo apt-get install -qq nodejs
node --version
2 changes: 0 additions & 2 deletions .travis/before-install-linux-python2.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,5 @@
set -euo pipefail

sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys 3FA7E0328081BFF6A14DA29AA6A19B38D3D831EF
sudo add-apt-repository ppa:fkrull/deadsnakes -y
sudo add-apt-repository ppa:rwky/nodejs -y
sudo apt-get update -qq
python --version
5 changes: 0 additions & 5 deletions .travis/before-install-linux-python3.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,4 @@

set -euo pipefail

sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys 3FA7E0328081BFF6A14DA29AA6A19B38D3D831EF
sudo add-apt-repository ppa:fkrull/deadsnakes -y
sudo add-apt-repository ppa:rwky/nodejs -y
sudo apt-get update -qq
sudo apt-get install -qq python3.5
python3 --version
4 changes: 4 additions & 0 deletions .travis/run-tests-dotnet.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#!/bin/bash

mvn -q -Dparallel=methods -DthreadCount=4 -Dtest=csharp.* -Dantlr-csharp-netstandard=true test

10 changes: 10 additions & 0 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# Contributing to ANTLR 4

1. [Fork](https://help.github.com/articles/fork-a-repo) the [antlr/antlr4 repo](https://github.com/antlr/antlr4)
2. Install and configure [EditorConfig](http://editorconfig.org/) so your text editor or IDE uses the ANTLR 4 coding style
3. [Build ANTLR 4](doc/building-antlr.md)
4. [Run the ANTLR project unit tests](doc/antlr-project-testing.md)
5. Create a [pull request](https://help.github.com/articles/using-pull-requests/) including your change


**Note:** You must sign the `contributors.txt` certificate of origin with your pull request if you've not done so before.
26 changes: 26 additions & 0 deletions LICENSE.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,29 @@ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=====

MIT License for codepointat.js from https://git.io/codepointat
MIT License for fromcodepoint.js from https://git.io/vDW1m

Copyright Mathias Bynens <https://mathiasbynens.be/>

Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:

The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
2 changes: 1 addition & 1 deletion antlr4-maven-plugin/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
<parent>
<groupId>org.antlr</groupId>
<artifactId>antlr4-master</artifactId>
<version>4.6.1-SNAPSHOT</version>
<version>4.7-SNAPSHOT</version>
</parent>
<artifactId>antlr4-maven-plugin</artifactId>
<packaging>maven-plugin</packaging>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,13 @@ public class Antlr4Mojo extends AbstractMojo {
@Parameter(property = "project", required = true, readonly = true)
protected MavenProject project;

/**
* Specifies whether sources are added to the {@code compile} or
* {@code test} scope.
*/
@Parameter(property = "antlr4.generateTestSources", defaultValue = "false")
private boolean generateTestSources;

/**
* The directory where the ANTLR grammar files ({@code *.g4}) are located.
*/
Expand Down Expand Up @@ -190,7 +197,12 @@ public File getLibDirectory() {
}

void addSourceRoot(File outputDir) {
project.addCompileSourceRoot(outputDir.getPath());
if (generateTestSources) {
project.addTestCompileSourceRoot(outputDir.getPath());
}
else {
project.addCompileSourceRoot(outputDir.getPath());
}
}

/**
Expand Down
9 changes: 9 additions & 0 deletions contributors.txt
Original file line number Diff line number Diff line change
Expand Up @@ -129,3 +129,12 @@ YYYY/MM/DD, github id, Full name, email
2016/12/11, Gaulouis, Gaulouis, gaulouis.com@gmail.com
2016/12/22, akosthekiss, Akos Kiss, akiss@inf.u-szeged.hu
2016/12/24, adrpo, Adrian Pop, adrian.pop@liu.se
2017/01/11, robertbrignull, Robert Brignull, robertbrignull@gmail.com
2017/01/13, marcelo-rocha, Marcelo Rocha, mcrocha@gmail.com
2017/01/23, bhamiltoncx, Ben Hamilton, bhamiltoncx+antlr@gmail.com
2017/01/18, mshockwave, Bekket McClane, yihshyng223@gmail.com
2017/02/10, lionelplessis, Lionel Plessis, lionelplessis@users.noreply.github.com
2017/02/14, lecode-official, David Neumann, david.neumann@lecode.de
2017/02/14, xied75, Dong Xie, xied75@gmail.com
2017/02/20, Thomasb81, Thomas Burg, thomasb81@gmail.com
2017/02/26, jvasileff, John Vasileff, john@vasileff.com
10 changes: 3 additions & 7 deletions doc/cpp-target.md
Original file line number Diff line number Diff line change
Expand Up @@ -106,16 +106,12 @@ For gcc and clang it is possible to use the `-fvisibility=hidden` setting to hid
Since C++ has no built-in memory management we need to take extra care. For that we rely mostly on smart pointers, which however might cause time penalties or memory side effects (like cyclic references) if not used with care. Currently however the memory household looks very stable. Generally, when you see a raw pointer in code consider this as being managed elsewehere. You should never try to manage such a pointer (delete, assign to smart pointer etc.).

### Unicode Support
Encoding is mostly an input issue, i.e. when the lexer converts text input into lexer tokens. The parser is completely encoding unaware. However, lexer input in the grammar is defined by character ranges with either a single member (e.g. 'a' or [a] or [abc]), an explicit range (e.g. 'a'..'z' or [a-z]), the full Unicode range (for a wildcard) and the full Unicode range minus a sub range (for negated ranges, e.g. ~[a]). The explicit ranges (including single member ranges) are encoded in the serialized ATN by 16bit numbers, hence cannot reach beyond 0xFFFF (the Unicode BMP), while the implicit ranges can include any value (and hence support the full Unicode set, up to 0x10FFFF).
Encoding is mostly an input issue, i.e. when the lexer converts text input into lexer tokens. The parser is completely encoding unaware.

> An interesting side note here is that the Java target fully supports Unicode as well, despite the inherent limitations from the serialized ATN. That's possible because the Java String class represents characters beyond the BMP as surrogate pairs (two 16bit values) and even reads them as 2 separate input characters. To make this work a character range for an identifier in a grammar must include the surrogate pairs area (for a Java parser).
The C++ target however always expects UTF-8 input (either in a string or via a wide stream) which is then converted to UTF-32 (a char32_t array) and fed to the lexer. ANTLR, when parsing your grammar, limits character ranges explicitly to the BMP currently. So, in order to allow specifying the full Unicode set the C++ target uses a little trick: whenever an explicit character range includes the (unused) codepoint 0xFFFF in a grammar it is silently extended to the full Unicode range. It's clear that this is an all-or-nothing solution. You cannot define a subset of Unicode codepoints > 0xFFFF that way. This can only be solved if ANTLR supports larger character intervals.

The differences in handling characters beyond the BMP leads to a difference between Java and C++ lexers: the character offsets may not concur. This is because Java reads two 16bit values per Unicode char (if that falls into the surrogate area) while a C++ parser only reads one 32bit value. That usually doesn't have practical consequences, but might confuse people when comparing token positions.
The C++ target always expects UTF-8 input (either in a string or stream) which is then converted to UTF-32 (a char32_t array) and fed to the lexer.

### Named Actions
In order to help customizing the generated files there are a number of additional socalled **named actions**. These actions are tight to specific areas in the generated code and allow to add custom (target specific) code. All targets support these actions
In order to help customizing the generated files there are a number of additional socalled **named actions**. These actions are tight to specific areas in the generated code and allow to add custom (target specific) code. All targets support these actions

* @parser::header
* @parser::members
Expand Down
6 changes: 4 additions & 2 deletions doc/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ Links in the documentation refer to various sections of the book but have been r
<a href=""><img src=images/tpantlr2.png width=120></a>
<a href=""><img src=images/tpdsl.png width=120></a>
<a href="https://www.youtube.com/watch?v=OAoA3E-cyug"><img src=images/teronbook.png width=250></a>

This documentation is a reference and summarizes grammar syntax and the key semantics of ANTLR grammars. The source code for all examples in the book, not just this chapter, are free at the publisher's website. The following video is a general tour of ANTLR 4 and includes a description of how to use parse tree listeners to process Java files easily:

<a href="https://vimeo.com/59285751"><img src=images/tertalk.png width=200></a>
Expand Down Expand Up @@ -52,7 +52,7 @@ This documentation is a reference and summarizes grammar syntax and the key sema
* [Runtime Libraries and Code Generation Targets](targets.md)

* [Parsing binary streams](parsing-binary-files.md)

* [Parser and lexer interpreters](interpreters.md)

* [Resources](resources.md)
Expand All @@ -61,6 +61,8 @@ This documentation is a reference and summarizes grammar syntax and the key sema

* [Building ANTLR itself](building-antlr.md)

* [Contributing to ANTLR](/CONTRIBUTING.md)

* [Cutting an ANTLR Release](releasing-antlr.md)

* [ANTLR project unit tests](antlr-project-testing.md)
Expand Down
22 changes: 19 additions & 3 deletions doc/lexer-rules.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,13 +58,29 @@ Match that character or sequence of characters. E.g., ’while’ or ’=’.</t

<tr>
<td>[char set]</td><td>
Match one of the characters specified in the character set. Interpret x-y as set of characters between range x and y, inclusively. The following escaped characters are interpreted as single special characters: \n, \r, \b, \t, and \f. To get ], \, or - you must escape them with \. You can also use Unicode character specifications: \uXXXX. Here are a few examples:
<p>Match one of the characters specified in the character set. Interpret <tt>x-y</tt> as the set of characters between range <tt>x</tt> and <tt>y</tt>, inclusively. The following escaped characters are interpreted as single special characters: <tt>\n</tt>, <tt>\r</tt>, <tt>\b</tt>, <tt>\t</tt>, <tt>\f</tt>, <tt>\uXXXX</tt>, and <tt>\u{XXXXXX}</tt>. To get <tt>]</tt>, <tt>\</tt>, or <tt>-</tt> you must escape them with <tt>\</tt>.</p>

<p>You can also include all characters matching Unicode properties (general category, boolean, script, or block) with <tt>\p{PropertyName}</tt>. (You can invert the test with <tt>\P{PropertyName}</tt>).</p>

<p>For a list of valid Unicode property names, see <a href="http://unicode.org/reports/tr44/#Properties">Unicode Standard Annex #44</a>. (ANTLR also supports <a href="http://unicode.org/reports/tr44/#General_Category_Values">short and long Unicode general category names</a> like <tt>\p{Lu}</tt>, <tt>\p{Z}</tt>, and <tt>\p{Symbol}</tt>.)</p>

<p>Property names include <a href="http://www.unicode.org/Public/UCD/latest/ucd/Blocks.txt">Unicode block names</a> prefixed with <tt>In</tt> (they overlap with script names) and with spaces changed to <tt>_</tt>. For example: <tt>\p{InLatin_1_Supplement}</tt>, <tt>\p{InYijing_Hexagram_Symbols}</tt>, and <tt>\p{InAncient_Greek_Numbers}</tt>.</p>

<p>Property names are <b>case-insensitive</b>, and <tt>_</tt> and <tt>-</tt> are treated identically</p>

<p>Here are a few examples:</p>

<pre>
WS : [ \n\u000D] -> skip ; // same as [ \n\r]


UNICODE_WS : [\p{White_Space}] -> skip; // match all Unicode whitespace

ID : [a-zA-Z] [a-zA-Z0-9]* ; // match usual identifier spec


UNICODE_ID : [\p{Alpha}] [\p{Alnum}]* ; // match full Unicode alphabetic ids

EMOJI : [\u{1F4A9}\u{1F926}] ; // note Unicode code points > U+FFFF

DASHBRACK : [\-\]]+ ; // match - or ] one or more times
</pre>
</td>
Expand Down
6 changes: 5 additions & 1 deletion doc/lexicon.md
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,11 @@ These more or less correspond to `isJavaIdentifierPart` and `isJavaIdentifierSta

ANTLR does not distinguish between character and string literals as most languages do. All literal strings one or more characters in length are enclosed in single quotes such as `’;’`, `’if’`, `’>=’`, and `’\’'` (refers to the one-character string containing the single quote character). Literals never contain regular expressions.

Literals can contain Unicode escape sequences of the form `\uXXXX`, where XXXX is the hexadecimal Unicode character value. For example, `’\u00E8’` is the French letter with a grave accent: `’è’`. ANTLR also understands the usual special escape sequences: `’\n’` (newline), `’\r’` (carriage return), `’\t’` (tab), `’\b’` (backspace), and `’\f’` (form feed). You can use Unicode characters directly within literals or use the Unicode escape sequences:
Literals can contain Unicode escape sequences of the form `’\uXXXX’` (for Unicode code points up to `’U+FFFF’`) or `’\u{XXXXXX}’` (for all Unicode code points), where `’XXXX’` is the hexadecimal Unicode code point value.

For example, `’\u00E8’` is the French letter with a grave accent: `’è’`, and `’\u{1F4A9}’` is the famous emoji: `’💩’`.

ANTLR also understands the usual special escape sequences: `’\n’` (newline), `’\r’` (carriage return), `’\t’` (tab), `’\b’` (backspace), and `’\f’` (form feed). You can use Unicode code points directly within literals or use the Unicode escape sequences:

```
grammar Foreign;
Expand Down
3 changes: 2 additions & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
</parent>
<groupId>org.antlr</groupId>
<artifactId>antlr4-master</artifactId>
<version>4.6.1-SNAPSHOT</version>
<version>4.7-SNAPSHOT</version>
<packaging>pom</packaging>

<name>ANTLR 4</name>
Expand Down Expand Up @@ -78,6 +78,7 @@

<modules>
<module>runtime/Java</module>
<module>tool-codegen</module>
<module>tool</module>
<module>antlr4-maven-plugin</module>
<module>tool-testsuite</module>
Expand Down
2 changes: 1 addition & 1 deletion runtime-testsuite/annotations/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
<parent>
<groupId>org.antlr</groupId>
<artifactId>antlr4-master</artifactId>
<version>4.6.1-SNAPSHOT</version>
<version>4.7-SNAPSHOT</version>
<relativePath>../../pom.xml</relativePath>
</parent>
<artifactId>antlr4-runtime-test-annotations</artifactId>
Expand Down
22 changes: 21 additions & 1 deletion runtime-testsuite/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
<parent>
<groupId>org.antlr</groupId>
<artifactId>antlr4-master</artifactId>
<version>4.6.1-SNAPSHOT</version>
<version>4.7-SNAPSHOT</version>
</parent>
<artifactId>antlr4-runtime-testsuite</artifactId>
<name>ANTLR 4 Runtime Tests (2nd generation)</name>
Expand Down Expand Up @@ -95,6 +95,8 @@
<artifactId>maven-surefire-plugin</artifactId>
<version>2.19.1</version>
<configuration>
<!-- SUREFIRE-951: file.encoding cannot be set via systemPropertyVariables -->
<argLine>-Dfile.encoding=UTF-8</argLine>
<includes>
<include>**/csharp/Test*.java</include>
<include>**/java/Test*.java</include>
Expand All @@ -118,6 +120,24 @@
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.antlr</groupId>
<artifactId>antlr4-maven-plugin</artifactId>
<version>${project.version}</version>
<executions>
<execution>
<goals>
<goal>antlr4</goal>
</goals>
<configuration>
<sourceDirectory>${basedir}/test</sourceDirectory>
<outputDirectory>${project.build.directory}/generated-test-sources/antlr4</outputDirectory>
<visitor>true</visitor>
<generateTestSources>true</generateTestSources>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>

Expand Down
2 changes: 1 addition & 1 deletion runtime-testsuite/processors/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
<parent>
<groupId>org.antlr</groupId>
<artifactId>antlr4-master</artifactId>
<version>4.6.1-SNAPSHOT</version>
<version>4.7-SNAPSHOT</version>
<relativePath>../../pom.xml</relativePath>
</parent>
<artifactId>antlr4-runtime-test-annotation-processors</artifactId>
Expand Down
Loading

0 comments on commit 1cf2885

Please sign in to comment.