diff --git a/.gitignore b/.gitignore index 00040c195d..f517fa3f88 100644 --- a/.gitignore +++ b/.gitignore @@ -104,3 +104,23 @@ runtime/PHP # Swift binaries .build/ + +# Cpp generated build files +runtime/Cpp/CMakeCache.txt +runtime/Cpp/CMakeFiles/ +runtime/Cpp/CPackConfig.cmake +runtime/Cpp/CPackSourceConfig.cmake +runtime/Cpp/CTestTestfile.cmake +runtime/Cpp/Makefile +runtime/Cpp/_deps/ +runtime/Cpp/cmake_install.cmake +runtime/Cpp/runtime/CMakeFiles/ +runtime/Cpp/runtime/CTestTestfile.cmake +runtime/Cpp/runtime/Makefile +runtime/Cpp/runtime/antlr4_tests +runtime/Cpp/runtime/antlr4_tests\[1]_include.cmake +runtime/Cpp/runtime/antlr4_tests\[1]_tests.cmake +runtime/Cpp/runtime/cmake_install.cmake +runtime/Cpp/runtime/libantlr4-runtime.4.10.1.dylib +runtime/Cpp/runtime/libantlr4-runtime.a +runtime/Cpp/runtime/libantlr4-runtime.dylib diff --git a/doc/go-target.md b/doc/go-target.md index 6bdac133b3..b85c39cc65 100644 --- a/doc/go-target.md +++ b/doc/go-target.md @@ -8,32 +8,102 @@ #### 2. Get the Go ANTLR runtime -Each target language for ANTLR has a runtime package for running parser generated by ANTLR4. The runtime provides a common set of tools for using your parser. +Each target language for ANTLR has a runtime package for running parser generated by ANTLR4. +The runtime provides a common set of tools for using your parser. Note that if you have existing projects and have +yet to replace the `v1.x.x` modules with the `v4` modules, then you can skip ahead to the section *Upgrading to v4 +from earlier versions* -Get the runtime and install it on your GOPATH: +The Go runtime uses modules and has a version path of `/v4` to stay in sync with the runtime versions of all the other runtimes. +Setup is the same as any other module based project: ```bash -go get github.com/antlr/antlr4/runtime/Go/antlr +$ cd mymodproject +$ go mod init mymodproject ``` -#### 3. Set the release tag (optional) +After which, you can use go get, to get the latest release version of the ANTLR v4 runtime using: -`go get` has no native way to specify a branch or commit. So, when you run it, you'll download the latest commits. This may or may not be your preference. +```bash +go get github.com/antlr/antlr4/runtime/Go/antlr/v4 +``` -You'll need to use git to set the release. For example, to set the release tag for release 4.9.3: +If your project is already using the v4 runtime, then you can upgrade to the latest release using the usual: ```bash -cd $GOPATH/src/github.com/antlr/antlr4 # enter the antlr4 source directory -git checkout tags/4.9.3 # the go runtime was added in release 4.9.3 +go get -u github.com/antlr/antlr4/runtime/Go/antlr/v4 +``` +If you have not yet upgraded existing projects to the `/v4` module path, consult the section *Upgrading to v4 +from earlier versions* + +The ANTLR runtime has only one external dependency, and that is part of the go system itself: + +``` +golang.org/x/exp +``` + +A complete list of releases can be found on [the release page](https://github.com/antlr/antlr4/releases). The Go +runtime will be tagged using standard Go tags, so release 4.11.0 will be tagged with `v4.11.0` and go get will pick +that up from the ANTLR repo. + +#### 3. Configuring `go generate` in your project + +In order to promote the use of repeatable builds, it is often useful to add the latest tool jar to your project's +repo and configure a `generate.sh` and `generate.go` file. You can of course globally alias the java command required to run the +tool. Your own CI and dev environment will guide you. + +Here is how you can configure `go generate` for your project, assuming that you follow the general recommendation to +place the ANTLR grammar files in their own package in your project structure. Here is a general template as a starting point: + +``` + . + ├── myproject + ├── parser + │ ├── mygrammar.g4 + │ ├── antlr-4.11.0-complete.jar + │ ├── error_listeners.go + │ ├── generate.go + │ ├── generate.sh + ├── go.mod + ├── go.sum + ├── main.go + └── main_test.go +``` + +Make sure that the package statement in your grammar file(s) reflects the go package they exist in. +The `generate.go` file then looks like this: + +```golang + package parser + + //go:generate ./generate.sh +``` + +And the `generate.sh` file will look similar to this: + +```shell + + #!/bin/sh + + alias antlr4='java -Xmx500M -cp "./antlr4-4.11.0-complete.jar:$CLASSPATH" org.antlr.v4.Tool' + antlr4 -Dlanguage=Go -no-visitor -package parser *.g4 +``` + +From the command line at the root of your package “myproject” you can then simply issue the command: + +```shell + go generate ./... ``` -A complete list of releases can be found on [the release page](https://github.com/antlr/antlr4/releases). +If you have not yet run a `go get`, you can now run `go mod tidy` and update your -#### 4. Generate your parser +#### 4. Generate your parser manually You use the ANTLR4 "tool" to generate a parser. These will reference the ANTLR runtime, installed above. -Suppose you're using a UNIX system and have set up an alias for the ANTLR4 tool as described in [the getting started guide](getting-started.md). To generate your go parser, you'll need to invoke: +Suppose you're using a UNIX system and have set up an alias for the ANTLR4 tool as described in +[the getting started guide](getting-started.md). + +To generate your go parser, you'll need to invoke: ```bash antlr4 -Dlanguage=Go MyGrammar.g4 @@ -41,17 +111,59 @@ antlr4 -Dlanguage=Go MyGrammar.g4 For a full list of antlr4 tool options, please visit the [tool documentation page](tool-options.md). +### Upgrading to `/v4` from the default path + +*NB: While switch to new module path would normally imply that the public interface for the runtime has changed, this is +not actually the case - you will not need to change your existing code to upgrade. The main point of the path change is so +that git tagging works with the ANTLR Go runtime.* + +Prior to release v4.11.0 the Go runtime shipped with a module but the module had no version path. This meant that +the tags in the ANTLR repo did not work, as any tag above `v1` must refer to a matching module path. +So the command `go get github.com/antlr/antlr4/runtime/Go/antlr` would just bring in +whatever was the `HEAD` of the master branch. While this *kind of* worked, it is obviously subject to problems and does +not fit properly with the idiomatic ways of Go. + +As of v4.11.0 the module path for the Go runtime is properly in sync with the repo tags. However, this means you need to +perform a few simple actions in order to upgrade to the `/v4` path. + + - Firstly, make sure that you are using an ANTLR tool jar with a version number of 4.11.0 or greater. + - Next you replace any mention of the old (default) path to ANTLR in your go source files. Don't worry that this will +modify your generated files as... + - Now regenerate your grammar files either manually or using `go generate ./...` (see above) + +A quick way to replace original module path references is to use this script from your module's base directory: + +```shell +find . -type f \ + -name '*.go' \ + -exec sed -i -e 's,github.com/antlr/antlr4/runtime/Go/antlr,github.com/antlr/antlr4/runtime/Go/antlr/v4,g' {} \; +``` +After performing the steps above, issuing: + +```shell +go mod tidy +``` +Should fix up your `go.mod` file to reference only the `v4` version of the ANTLR Go runtime: + +```shell +require github.com/antlr/antlr4/runtime/Go/antlr/v4 v4.11.0-xxxxxx-xxxxxxxxx +``` + +From this point on, your go mod commands will work correctly with the ANTLR repo and upgrades and downgrades will work +as you expect. As will branch version such as @dev + ### Referencing the Go ANTLR runtime You can reference the go ANTLR runtime package like this: -```go -import "github.com/antlr/antlr4/runtime/Go/antlr" +```golang +import "github.com/antlr/antlr4/runtime/Go/antlr/v4" ``` ### Complete example -Suppose you're using the JSON grammar from https://github.com/antlr/grammars-v4/tree/master/json. +Suppose you're using the JSON grammar from https://github.com/antlr/grammars-v4/tree/master/json placed in the parser +directory and have initialized your `go mod` file. Then, invoke `antlr4 -Dlanguage=Go JSON.g4`. The result of this is a collection of .go files in the `parser` directory including: ``` @@ -61,15 +173,17 @@ json_lexer.go json_listener.go ``` -Another common option to the ANTLR tool is `-visitor`, which generates a parse tree visitor, but we won't be doing that here. For a full list of antlr4 tool options, please visit the [tool documentation page](tool-options.md). +Another common option to the ANTLR tool is `-visitor`, which generates a parse tree visitor, but we won't be doing that here. +For a full list of antlr4 tool options, please visit the [tool documentation page](tool-options.md). -We'll write a small main func to call the generated parser/lexer (assuming they are separate). This one writes out the encountered `ParseTreeContext`'s. Suppose the gen'ed parser code is in the `parser` directory relative to this code: +We'll write a small main func to call the generated parser/lexer (assuming they are separate). This one writes out the +encountered `ParseTreeContext`'s. Assuming the generated parser code is in the `parser` directory relative to this code: ```golang package main import ( - "github.com/antlr/antlr4/runtime/Go/antlr" + "github.com/antlr/antlr4/runtime/Go/antlr/v4" "./parser" "os" "fmt" diff --git a/runtime-testsuite/resources/org/antlr/v4/test/runtime/helpers/Test.go.stg b/runtime-testsuite/resources/org/antlr/v4/test/runtime/helpers/Test.go.stg index 184cc2b4f7..d8b479b977 100644 --- a/runtime-testsuite/resources/org/antlr/v4/test/runtime/helpers/Test.go.stg +++ b/runtime-testsuite/resources/org/antlr/v4/test/runtime/helpers/Test.go.stg @@ -1,7 +1,7 @@ package main import ( "test/parser" - "github.com/antlr/antlr4/runtime/Go/antlr" + "github.com/antlr/antlr4/runtime/Go/antlr/v4" "fmt" "os" ) diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/go/GoRunner.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/go/GoRunner.java index 3316debf40..d66df24ded 100644 --- a/runtime-testsuite/test/org/antlr/v4/test/runtime/go/GoRunner.java +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/go/GoRunner.java @@ -64,7 +64,7 @@ public String[] getExtraRunArgs() { return new String[]{"run"}; } - private static final String GoRuntimeImportPath = "github.com/antlr/antlr4/runtime/Go/antlr"; + private static final String GoRuntimeImportPath = "github.com/antlr/antlr4/runtime/Go/antlr/v4"; private final static Map environment; @@ -138,4 +138,4 @@ protected CompiledState compile(RunOptions runOptions, GeneratedState generatedS public Map getExecEnvironment() { return environment; } -} +} \ No newline at end of file diff --git a/runtime/Go/antlr/v4/LICENSE b/runtime/Go/antlr/v4/LICENSE new file mode 100644 index 0000000000..52cf18e425 --- /dev/null +++ b/runtime/Go/antlr/v4/LICENSE @@ -0,0 +1,26 @@ +Copyright 2021 The ANTLR Project + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from this + software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/runtime/Go/antlr/v4/antlrdoc.go b/runtime/Go/antlr/v4/antlrdoc.go new file mode 100644 index 0000000000..aa3a5b363d --- /dev/null +++ b/runtime/Go/antlr/v4/antlrdoc.go @@ -0,0 +1,68 @@ +/* +Package antlr implements the Go version of the ANTLR 4 runtime. + +# The ANTLR Tool + +ANTLR (ANother Tool for Language Recognition) is a powerful parser generator for reading, processing, executing, +or translating structured text or binary files. It's widely used to build languages, tools, and frameworks. +From a grammar, ANTLR generates a parser that can build parse trees and also generates a listener interface +(or visitor) that makes it easy to respond to the recognition of phrases of interest. + +# Code Generation + +ANTLR supports the generation of code in a number of [target languages], and the generated code is supported by a +runtime library, written specifically to support the generated code in the target language. This library is the +runtime for the Go target. + +To generate code for the go target, it is generally recommended to place the source grammar files in a package of +their own, and use the `.sh` script method of generating code, using the go generate directive. In that same directory +it is usual, though not required, to place the antlr tool that should be used to generate the code. That does mean +that the antlr tool JAR file will be checked in to your source code control though, so you are free to use any other +way of specifying the version of the ANTLR tool to use, such as aliasing in `.zshrc` or equivalent, or a profile in +your IDE, or configuration in your CI system. + +Here is a general template for an ANTLR based recognizer in Go: + + . + ├── myproject + ├── parser + │ ├── mygrammar.g4 + │ ├── antlr-4.11.0-complete.jar + │ ├── error_listeners.go + │ ├── generate.go + │ ├── generate.sh + ├── go.mod + ├── go.sum + ├── main.go + └── main_test.go + +Make sure that the package statement in your grammar file(s) reflects the go package they exist in. +The generate.go file then looks like this: + + package parser + + //go:generate ./generate.sh + +And the generate.sh file will look similar to this: + + #!/bin/sh + + alias antlr4='java -Xmx500M -cp "./antlr4-4.11.0-complete.jar:$CLASSPATH" org.antlr.v4.Tool' + antlr4 -Dlanguage=Go -no-visitor -package parser *.g4 + +depending on whether you want visitors or listeners or any other ANTLR options. + +From the command line at the root of your package “myproject” you can then simply issue the command: + + go generate ./... + +# Copyright Notice + +Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. + +Use of this file is governed by the BSD 3-clause license, which can be found in the [LICENSE.txt] file in the project root. + +[target languages]: https://github.com/antlr/antlr4/tree/master/runtime +[LICENSE.txt]: https://github.com/antlr/antlr4/blob/master/LICENSE.txt +*/ +package antlr diff --git a/runtime/Go/antlr/v4/atn.go b/runtime/Go/antlr/v4/atn.go new file mode 100644 index 0000000000..98010d2e6e --- /dev/null +++ b/runtime/Go/antlr/v4/atn.go @@ -0,0 +1,176 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +import "sync" + +// ATNInvalidAltNumber is used to represent an ALT number that has yet to be calculated or +// which is invalid for a particular struct such as [*antlr.BaseRuleContext] +var ATNInvalidAltNumber int + +// ATN represents an “[Augmented Transition Network]”, though general in ANTLR the term +// “Augmented Recursive Transition Network” though there are some descriptions of “[Recursive Transition Network]” +// in existence. +// +// ATNs represent the main networks in the system and are serialized by the code generator and support [ALL(*)]. +// +// [Augmented Transition Network]: https://en.wikipedia.org/wiki/Augmented_transition_network +// [ALL(*)]: https://www.antlr.org/papers/allstar-techreport.pdf +// [Recursive Transition Network]: https://en.wikipedia.org/wiki/Recursive_transition_network +type ATN struct { + // DecisionToState is the decision points for all rules, subrules, optional + // blocks, ()+, ()*, etc. Each subrule/rule is a decision point, and we must track them so we + // can go back later and build DFA predictors for them. This includes + // all the rules, subrules, optional blocks, ()+, ()* etc... + DecisionToState []DecisionState + + // grammarType is the ATN type and is used for deserializing ATNs from strings. + grammarType int + + // lexerActions is referenced by action transitions in the ATN for lexer ATNs. + lexerActions []LexerAction + + // maxTokenType is the maximum value for any symbol recognized by a transition in the ATN. + maxTokenType int + + modeNameToStartState map[string]*TokensStartState + + modeToStartState []*TokensStartState + + // ruleToStartState maps from rule index to starting state number. + ruleToStartState []*RuleStartState + + // ruleToStopState maps from rule index to stop state number. + ruleToStopState []*RuleStopState + + // ruleToTokenType maps the rule index to the resulting token type for lexer + // ATNs. For parser ATNs, it maps the rule index to the generated bypass token + // type if ATNDeserializationOptions.isGenerateRuleBypassTransitions was + // specified, and otherwise is nil. + ruleToTokenType []int + + states []ATNState + + mu sync.Mutex + stateMu sync.RWMutex + edgeMu sync.RWMutex +} + +// NewATN returns a new ATN struct representing the given grammarType and is used +// for runtime deserialization of ATNs from the code generated by the ANTLR tool +func NewATN(grammarType int, maxTokenType int) *ATN { + return &ATN{ + grammarType: grammarType, + maxTokenType: maxTokenType, + modeNameToStartState: make(map[string]*TokensStartState), + } +} + +// NextTokensInContext computes and returns the set of valid tokens that can occur starting +// in state s. If ctx is nil, the set of tokens will not include what can follow +// the rule surrounding s. In other words, the set will be restricted to tokens +// reachable staying within the rule of s. +func (a *ATN) NextTokensInContext(s ATNState, ctx RuleContext) *IntervalSet { + return NewLL1Analyzer(a).Look(s, nil, ctx) +} + +// NextTokensNoContext computes and returns the set of valid tokens that can occur starting +// in state s and staying in same rule. [antlr.Token.EPSILON] is in set if we reach end of +// rule. +func (a *ATN) NextTokensNoContext(s ATNState) *IntervalSet { + a.mu.Lock() + defer a.mu.Unlock() + iset := s.GetNextTokenWithinRule() + if iset == nil { + iset = a.NextTokensInContext(s, nil) + iset.readOnly = true + s.SetNextTokenWithinRule(iset) + } + return iset +} + +// NextTokens computes and returns the set of valid tokens starting in state s, by +// calling either [NextTokensNoContext] (ctx == nil) or [NextTokensInContext] (ctx != nil). +func (a *ATN) NextTokens(s ATNState, ctx RuleContext) *IntervalSet { + if ctx == nil { + return a.NextTokensNoContext(s) + } + + return a.NextTokensInContext(s, ctx) +} + +func (a *ATN) addState(state ATNState) { + if state != nil { + state.SetATN(a) + state.SetStateNumber(len(a.states)) + } + + a.states = append(a.states, state) +} + +func (a *ATN) removeState(state ATNState) { + a.states[state.GetStateNumber()] = nil // Just free the memory; don't shift states in the slice +} + +func (a *ATN) defineDecisionState(s DecisionState) int { + a.DecisionToState = append(a.DecisionToState, s) + s.setDecision(len(a.DecisionToState) - 1) + + return s.getDecision() +} + +func (a *ATN) getDecisionState(decision int) DecisionState { + if len(a.DecisionToState) == 0 { + return nil + } + + return a.DecisionToState[decision] +} + +// getExpectedTokens computes the set of input symbols which could follow ATN +// state number stateNumber in the specified full parse context ctx and returns +// the set of potentially valid input symbols which could follow the specified +// state in the specified context. This method considers the complete parser +// context, but does not evaluate semantic predicates (i.e. all predicates +// encountered during the calculation are assumed true). If a path in the ATN +// exists from the starting state to the RuleStopState of the outermost context +// without Matching any symbols, Token.EOF is added to the returned set. +// +// A nil ctx defaults to ParserRuleContext.EMPTY. +// +// It panics if the ATN does not contain state stateNumber. +func (a *ATN) getExpectedTokens(stateNumber int, ctx RuleContext) *IntervalSet { + if stateNumber < 0 || stateNumber >= len(a.states) { + panic("Invalid state number.") + } + + s := a.states[stateNumber] + following := a.NextTokens(s, nil) + + if !following.contains(TokenEpsilon) { + return following + } + + expected := NewIntervalSet() + + expected.addSet(following) + expected.removeOne(TokenEpsilon) + + for ctx != nil && ctx.GetInvokingState() >= 0 && following.contains(TokenEpsilon) { + invokingState := a.states[ctx.GetInvokingState()] + rt := invokingState.GetTransitions()[0] + + following = a.NextTokens(rt.(*RuleTransition).followState, nil) + expected.addSet(following) + expected.removeOne(TokenEpsilon) + ctx = ctx.GetParent().(RuleContext) + } + + if following.contains(TokenEpsilon) { + expected.addOne(TokenEOF) + } + + return expected +} diff --git a/runtime/Go/antlr/v4/atn_config.go b/runtime/Go/antlr/v4/atn_config.go new file mode 100644 index 0000000000..7619fa172e --- /dev/null +++ b/runtime/Go/antlr/v4/atn_config.go @@ -0,0 +1,303 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +import ( + "fmt" +) + +// ATNConfig is a tuple: (ATN state, predicted alt, syntactic, semantic +// context). The syntactic context is a graph-structured stack node whose +// path(s) to the root is the rule invocation(s) chain used to arrive at the +// state. The semantic context is the tree of semantic predicates encountered +// before reaching an ATN state. +type ATNConfig interface { + Equals(o Collectable[ATNConfig]) bool + Hash() int + + GetState() ATNState + GetAlt() int + GetSemanticContext() SemanticContext + + GetContext() PredictionContext + SetContext(PredictionContext) + + GetReachesIntoOuterContext() int + SetReachesIntoOuterContext(int) + + String() string + + getPrecedenceFilterSuppressed() bool + setPrecedenceFilterSuppressed(bool) +} + +type BaseATNConfig struct { + precedenceFilterSuppressed bool + state ATNState + alt int + context PredictionContext + semanticContext SemanticContext + reachesIntoOuterContext int +} + +func NewBaseATNConfig7(old *BaseATNConfig) ATNConfig { // TODO: Dup + return &BaseATNConfig{ + state: old.state, + alt: old.alt, + context: old.context, + semanticContext: old.semanticContext, + reachesIntoOuterContext: old.reachesIntoOuterContext, + } +} + +func NewBaseATNConfig6(state ATNState, alt int, context PredictionContext) *BaseATNConfig { + return NewBaseATNConfig5(state, alt, context, SemanticContextNone) +} + +func NewBaseATNConfig5(state ATNState, alt int, context PredictionContext, semanticContext SemanticContext) *BaseATNConfig { + if semanticContext == nil { + panic("semanticContext cannot be nil") // TODO: Necessary? + } + + return &BaseATNConfig{state: state, alt: alt, context: context, semanticContext: semanticContext} +} + +func NewBaseATNConfig4(c ATNConfig, state ATNState) *BaseATNConfig { + return NewBaseATNConfig(c, state, c.GetContext(), c.GetSemanticContext()) +} + +func NewBaseATNConfig3(c ATNConfig, state ATNState, semanticContext SemanticContext) *BaseATNConfig { + return NewBaseATNConfig(c, state, c.GetContext(), semanticContext) +} + +func NewBaseATNConfig2(c ATNConfig, semanticContext SemanticContext) *BaseATNConfig { + return NewBaseATNConfig(c, c.GetState(), c.GetContext(), semanticContext) +} + +func NewBaseATNConfig1(c ATNConfig, state ATNState, context PredictionContext) *BaseATNConfig { + return NewBaseATNConfig(c, state, context, c.GetSemanticContext()) +} + +func NewBaseATNConfig(c ATNConfig, state ATNState, context PredictionContext, semanticContext SemanticContext) *BaseATNConfig { + if semanticContext == nil { + panic("semanticContext cannot be nil") + } + + return &BaseATNConfig{ + state: state, + alt: c.GetAlt(), + context: context, + semanticContext: semanticContext, + reachesIntoOuterContext: c.GetReachesIntoOuterContext(), + precedenceFilterSuppressed: c.getPrecedenceFilterSuppressed(), + } +} + +func (b *BaseATNConfig) getPrecedenceFilterSuppressed() bool { + return b.precedenceFilterSuppressed +} + +func (b *BaseATNConfig) setPrecedenceFilterSuppressed(v bool) { + b.precedenceFilterSuppressed = v +} + +func (b *BaseATNConfig) GetState() ATNState { + return b.state +} + +func (b *BaseATNConfig) GetAlt() int { + return b.alt +} + +func (b *BaseATNConfig) SetContext(v PredictionContext) { + b.context = v +} +func (b *BaseATNConfig) GetContext() PredictionContext { + return b.context +} + +func (b *BaseATNConfig) GetSemanticContext() SemanticContext { + return b.semanticContext +} + +func (b *BaseATNConfig) GetReachesIntoOuterContext() int { + return b.reachesIntoOuterContext +} + +func (b *BaseATNConfig) SetReachesIntoOuterContext(v int) { + b.reachesIntoOuterContext = v +} + +// Equals is the default comparison function for an ATNConfig when no specialist implementation is required +// for a collection. +// +// An ATN configuration is equal to another if both have the same state, they +// predict the same alternative, and syntactic/semantic contexts are the same. +func (b *BaseATNConfig) Equals(o Collectable[ATNConfig]) bool { + if b == o { + return true + } else if o == nil { + return false + } + + var other, ok = o.(*BaseATNConfig) + + if !ok { + return false + } + + var equal bool + + if b.context == nil { + equal = other.context == nil + } else { + equal = b.context.Equals(other.context) + } + + var ( + nums = b.state.GetStateNumber() == other.state.GetStateNumber() + alts = b.alt == other.alt + cons = b.semanticContext.Equals(other.semanticContext) + sups = b.precedenceFilterSuppressed == other.precedenceFilterSuppressed + ) + + return nums && alts && cons && sups && equal +} + +// Hash is the default hash function for BaseATNConfig, when no specialist hash function +// is required for a collection +func (b *BaseATNConfig) Hash() int { + var c int + if b.context != nil { + c = b.context.Hash() + } + + h := murmurInit(7) + h = murmurUpdate(h, b.state.GetStateNumber()) + h = murmurUpdate(h, b.alt) + h = murmurUpdate(h, c) + h = murmurUpdate(h, b.semanticContext.Hash()) + return murmurFinish(h, 4) +} + +func (b *BaseATNConfig) String() string { + var s1, s2, s3 string + + if b.context != nil { + s1 = ",[" + fmt.Sprint(b.context) + "]" + } + + if b.semanticContext != SemanticContextNone { + s2 = "," + fmt.Sprint(b.semanticContext) + } + + if b.reachesIntoOuterContext > 0 { + s3 = ",up=" + fmt.Sprint(b.reachesIntoOuterContext) + } + + return fmt.Sprintf("(%v,%v%v%v%v)", b.state, b.alt, s1, s2, s3) +} + +type LexerATNConfig struct { + *BaseATNConfig + lexerActionExecutor *LexerActionExecutor + passedThroughNonGreedyDecision bool +} + +func NewLexerATNConfig6(state ATNState, alt int, context PredictionContext) *LexerATNConfig { + return &LexerATNConfig{BaseATNConfig: NewBaseATNConfig5(state, alt, context, SemanticContextNone)} +} + +func NewLexerATNConfig5(state ATNState, alt int, context PredictionContext, lexerActionExecutor *LexerActionExecutor) *LexerATNConfig { + return &LexerATNConfig{ + BaseATNConfig: NewBaseATNConfig5(state, alt, context, SemanticContextNone), + lexerActionExecutor: lexerActionExecutor, + } +} + +func NewLexerATNConfig4(c *LexerATNConfig, state ATNState) *LexerATNConfig { + return &LexerATNConfig{ + BaseATNConfig: NewBaseATNConfig(c, state, c.GetContext(), c.GetSemanticContext()), + lexerActionExecutor: c.lexerActionExecutor, + passedThroughNonGreedyDecision: checkNonGreedyDecision(c, state), + } +} + +func NewLexerATNConfig3(c *LexerATNConfig, state ATNState, lexerActionExecutor *LexerActionExecutor) *LexerATNConfig { + return &LexerATNConfig{ + BaseATNConfig: NewBaseATNConfig(c, state, c.GetContext(), c.GetSemanticContext()), + lexerActionExecutor: lexerActionExecutor, + passedThroughNonGreedyDecision: checkNonGreedyDecision(c, state), + } +} + +func NewLexerATNConfig2(c *LexerATNConfig, state ATNState, context PredictionContext) *LexerATNConfig { + return &LexerATNConfig{ + BaseATNConfig: NewBaseATNConfig(c, state, context, c.GetSemanticContext()), + lexerActionExecutor: c.lexerActionExecutor, + passedThroughNonGreedyDecision: checkNonGreedyDecision(c, state), + } +} + +func NewLexerATNConfig1(state ATNState, alt int, context PredictionContext) *LexerATNConfig { + return &LexerATNConfig{BaseATNConfig: NewBaseATNConfig5(state, alt, context, SemanticContextNone)} +} + +// Hash is the default hash function for LexerATNConfig objects, it can be used directly or via +// the default comparator [ObjEqComparator]. +func (l *LexerATNConfig) Hash() int { + var f int + if l.passedThroughNonGreedyDecision { + f = 1 + } else { + f = 0 + } + h := murmurInit(7) + h = murmurUpdate(h, l.state.GetStateNumber()) + h = murmurUpdate(h, l.alt) + h = murmurUpdate(h, l.context.Hash()) + h = murmurUpdate(h, l.semanticContext.Hash()) + h = murmurUpdate(h, f) + h = murmurUpdate(h, l.lexerActionExecutor.Hash()) + h = murmurFinish(h, 6) + return h +} + +// Equals is the default comparison function for LexerATNConfig objects, it can be used directly or via +// the default comparator [ObjEqComparator]. +func (l *LexerATNConfig) Equals(other Collectable[ATNConfig]) bool { + if l == other { + return true + } + var othert, ok = other.(*LexerATNConfig) + + if l == other { + return true + } else if !ok { + return false + } else if l.passedThroughNonGreedyDecision != othert.passedThroughNonGreedyDecision { + return false + } + + var b bool + + if l.lexerActionExecutor != nil { + b = !l.lexerActionExecutor.Equals(othert.lexerActionExecutor) + } else { + b = othert.lexerActionExecutor != nil + } + + if b { + return false + } + + return l.BaseATNConfig.Equals(othert.BaseATNConfig) +} + +func checkNonGreedyDecision(source *LexerATNConfig, target ATNState) bool { + var ds, ok = target.(DecisionState) + + return source.passedThroughNonGreedyDecision || (ok && ds.getNonGreedy()) +} diff --git a/runtime/Go/antlr/v4/atn_config_set.go b/runtime/Go/antlr/v4/atn_config_set.go new file mode 100644 index 0000000000..582f5fa4b5 --- /dev/null +++ b/runtime/Go/antlr/v4/atn_config_set.go @@ -0,0 +1,439 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +import "fmt" + +type ATNConfigSet interface { + Hash() int + Equals(o Collectable[ATNConfig]) bool + Add(ATNConfig, *DoubleDict) bool + AddAll([]ATNConfig) bool + + GetStates() *JStore[ATNState, Comparator[ATNState]] + GetPredicates() []SemanticContext + GetItems() []ATNConfig + + OptimizeConfigs(interpreter *BaseATNSimulator) + + Length() int + IsEmpty() bool + Contains(ATNConfig) bool + ContainsFast(ATNConfig) bool + Clear() + String() string + + HasSemanticContext() bool + SetHasSemanticContext(v bool) + + ReadOnly() bool + SetReadOnly(bool) + + GetConflictingAlts() *BitSet + SetConflictingAlts(*BitSet) + + Alts() *BitSet + + FullContext() bool + + GetUniqueAlt() int + SetUniqueAlt(int) + + GetDipsIntoOuterContext() bool + SetDipsIntoOuterContext(bool) +} + +// BaseATNConfigSet is a specialized set of ATNConfig that tracks information +// about its elements and can combine similar configurations using a +// graph-structured stack. +type BaseATNConfigSet struct { + cachedHash int + + // configLookup is used to determine whether two BaseATNConfigSets are equal. We + // need all configurations with the same (s, i, _, semctx) to be equal. A key + // effectively doubles the number of objects associated with ATNConfigs. All + // keys are hashed by (s, i, _, pi), not including the context. Wiped out when + // read-only because a set becomes a DFA state. + configLookup *JStore[ATNConfig, Comparator[ATNConfig]] + + // configs is the added elements. + configs []ATNConfig + + // TODO: These fields make me pretty uncomfortable, but it is nice to pack up + // info together because it saves recomputation. Can we track conflicts as they + // are added to save scanning configs later? + conflictingAlts *BitSet + + // dipsIntoOuterContext is used by parsers and lexers. In a lexer, it indicates + // we hit a pred while computing a closure operation. Do not make a DFA state + // from the BaseATNConfigSet in this case. TODO: How is this used by parsers? + dipsIntoOuterContext bool + + // fullCtx is whether it is part of a full context LL prediction. Used to + // determine how to merge $. It is a wildcard with SLL, but not for an LL + // context merge. + fullCtx bool + + // Used in parser and lexer. In lexer, it indicates we hit a pred + // while computing a closure operation. Don't make a DFA state from a. + hasSemanticContext bool + + // readOnly is whether it is read-only. Do not + // allow any code to manipulate the set if true because DFA states will point at + // sets and those must not change. It not, protect other fields; conflictingAlts + // in particular, which is assigned after readOnly. + readOnly bool + + // TODO: These fields make me pretty uncomfortable, but it is nice to pack up + // info together because it saves recomputation. Can we track conflicts as they + // are added to save scanning configs later? + uniqueAlt int +} + +func (b *BaseATNConfigSet) Alts() *BitSet { + alts := NewBitSet() + for _, it := range b.configs { + alts.add(it.GetAlt()) + } + return alts +} + +func NewBaseATNConfigSet(fullCtx bool) *BaseATNConfigSet { + return &BaseATNConfigSet{ + cachedHash: -1, + configLookup: NewJStore[ATNConfig, Comparator[ATNConfig]](&ATNConfigComparator[ATNConfig]{}), + fullCtx: fullCtx, + } +} + +// Add merges contexts with existing configs for (s, i, pi, _), where s is the +// ATNConfig.state, i is the ATNConfig.alt, and pi is the +// ATNConfig.semanticContext. We use (s,i,pi) as the key. Updates +// dipsIntoOuterContext and hasSemanticContext when necessary. +func (b *BaseATNConfigSet) Add(config ATNConfig, mergeCache *DoubleDict) bool { + if b.readOnly { + panic("set is read-only") + } + + if config.GetSemanticContext() != SemanticContextNone { + b.hasSemanticContext = true + } + + if config.GetReachesIntoOuterContext() > 0 { + b.dipsIntoOuterContext = true + } + + existing, present := b.configLookup.Put(config) + + // The config was not already in the set + // + if !present { + b.cachedHash = -1 + b.configs = append(b.configs, config) // Track order here + return true + } + + // Merge a previous (s, i, pi, _) with it and save the result + rootIsWildcard := !b.fullCtx + merged := merge(existing.GetContext(), config.GetContext(), rootIsWildcard, mergeCache) + + // No need to check for existing.context because config.context is in the cache, + // since the only way to create new graphs is the "call rule" and here. We cache + // at both places. + existing.SetReachesIntoOuterContext(intMax(existing.GetReachesIntoOuterContext(), config.GetReachesIntoOuterContext())) + + // Preserve the precedence filter suppression during the merge + if config.getPrecedenceFilterSuppressed() { + existing.setPrecedenceFilterSuppressed(true) + } + + // Replace the context because there is no need to do alt mapping + existing.SetContext(merged) + + return true +} + +func (b *BaseATNConfigSet) GetStates() *JStore[ATNState, Comparator[ATNState]] { + + // states uses the standard comparator provided by the ATNState instance + // + states := NewJStore[ATNState, Comparator[ATNState]](&ObjEqComparator[ATNState]{}) + + for i := 0; i < len(b.configs); i++ { + states.Put(b.configs[i].GetState()) + } + + return states +} + +func (b *BaseATNConfigSet) HasSemanticContext() bool { + return b.hasSemanticContext +} + +func (b *BaseATNConfigSet) SetHasSemanticContext(v bool) { + b.hasSemanticContext = v +} + +func (b *BaseATNConfigSet) GetPredicates() []SemanticContext { + preds := make([]SemanticContext, 0) + + for i := 0; i < len(b.configs); i++ { + c := b.configs[i].GetSemanticContext() + + if c != SemanticContextNone { + preds = append(preds, c) + } + } + + return preds +} + +func (b *BaseATNConfigSet) GetItems() []ATNConfig { + return b.configs +} + +func (b *BaseATNConfigSet) OptimizeConfigs(interpreter *BaseATNSimulator) { + if b.readOnly { + panic("set is read-only") + } + + if b.configLookup.Len() == 0 { + return + } + + for i := 0; i < len(b.configs); i++ { + config := b.configs[i] + + config.SetContext(interpreter.getCachedContext(config.GetContext())) + } +} + +func (b *BaseATNConfigSet) AddAll(coll []ATNConfig) bool { + for i := 0; i < len(coll); i++ { + b.Add(coll[i], nil) + } + + return false +} + +// Compare is a hack function just to verify that adding DFAstares to the known +// set works, so long as comparison of ATNConfigSet s works. For that to work, we +// need to make sure that the set of ATNConfigs in two sets are equivalent. We can't +// know the order, so we do this inefficient hack. If this proves the point, then +// we can change the config set to a better structure. +func (b *BaseATNConfigSet) Compare(bs *BaseATNConfigSet) bool { + if len(b.configs) != len(bs.configs) { + return false + } + + for _, c := range b.configs { + found := false + for _, c2 := range bs.configs { + if c.Equals(c2) { + found = true + break + } + } + + if !found { + return false + } + + } + return true +} + +func (b *BaseATNConfigSet) Equals(other Collectable[ATNConfig]) bool { + if b == other { + return true + } else if _, ok := other.(*BaseATNConfigSet); !ok { + return false + } + + other2 := other.(*BaseATNConfigSet) + + return b.configs != nil && + b.fullCtx == other2.fullCtx && + b.uniqueAlt == other2.uniqueAlt && + b.conflictingAlts == other2.conflictingAlts && + b.hasSemanticContext == other2.hasSemanticContext && + b.dipsIntoOuterContext == other2.dipsIntoOuterContext && + b.Compare(other2) +} + +func (b *BaseATNConfigSet) Hash() int { + if b.readOnly { + if b.cachedHash == -1 { + b.cachedHash = b.hashCodeConfigs() + } + + return b.cachedHash + } + + return b.hashCodeConfigs() +} + +func (b *BaseATNConfigSet) hashCodeConfigs() int { + h := 1 + for _, config := range b.configs { + h = 31*h + config.Hash() + } + return h +} + +func (b *BaseATNConfigSet) Length() int { + return len(b.configs) +} + +func (b *BaseATNConfigSet) IsEmpty() bool { + return len(b.configs) == 0 +} + +func (b *BaseATNConfigSet) Contains(item ATNConfig) bool { + if b.configLookup == nil { + panic("not implemented for read-only sets") + } + + return b.configLookup.Contains(item) +} + +func (b *BaseATNConfigSet) ContainsFast(item ATNConfig) bool { + if b.configLookup == nil { + panic("not implemented for read-only sets") + } + + return b.configLookup.Contains(item) // TODO: containsFast is not implemented for Set +} + +func (b *BaseATNConfigSet) Clear() { + if b.readOnly { + panic("set is read-only") + } + + b.configs = make([]ATNConfig, 0) + b.cachedHash = -1 + b.configLookup = NewJStore[ATNConfig, Comparator[ATNConfig]](&BaseATNConfigComparator[ATNConfig]{}) +} + +func (b *BaseATNConfigSet) FullContext() bool { + return b.fullCtx +} + +func (b *BaseATNConfigSet) GetDipsIntoOuterContext() bool { + return b.dipsIntoOuterContext +} + +func (b *BaseATNConfigSet) SetDipsIntoOuterContext(v bool) { + b.dipsIntoOuterContext = v +} + +func (b *BaseATNConfigSet) GetUniqueAlt() int { + return b.uniqueAlt +} + +func (b *BaseATNConfigSet) SetUniqueAlt(v int) { + b.uniqueAlt = v +} + +func (b *BaseATNConfigSet) GetConflictingAlts() *BitSet { + return b.conflictingAlts +} + +func (b *BaseATNConfigSet) SetConflictingAlts(v *BitSet) { + b.conflictingAlts = v +} + +func (b *BaseATNConfigSet) ReadOnly() bool { + return b.readOnly +} + +func (b *BaseATNConfigSet) SetReadOnly(readOnly bool) { + b.readOnly = readOnly + + if readOnly { + b.configLookup = nil // Read only, so no need for the lookup cache + } +} + +func (b *BaseATNConfigSet) String() string { + s := "[" + + for i, c := range b.configs { + s += c.String() + + if i != len(b.configs)-1 { + s += ", " + } + } + + s += "]" + + if b.hasSemanticContext { + s += ",hasSemanticContext=" + fmt.Sprint(b.hasSemanticContext) + } + + if b.uniqueAlt != ATNInvalidAltNumber { + s += ",uniqueAlt=" + fmt.Sprint(b.uniqueAlt) + } + + if b.conflictingAlts != nil { + s += ",conflictingAlts=" + b.conflictingAlts.String() + } + + if b.dipsIntoOuterContext { + s += ",dipsIntoOuterContext" + } + + return s +} + +type OrderedATNConfigSet struct { + *BaseATNConfigSet +} + +func NewOrderedATNConfigSet() *OrderedATNConfigSet { + b := NewBaseATNConfigSet(false) + + // This set uses the standard Hash() and Equals() from ATNConfig + b.configLookup = NewJStore[ATNConfig, Comparator[ATNConfig]](&ObjEqComparator[ATNConfig]{}) + + return &OrderedATNConfigSet{BaseATNConfigSet: b} +} + +func hashATNConfig(i interface{}) int { + o := i.(ATNConfig) + hash := 7 + hash = 31*hash + o.GetState().GetStateNumber() + hash = 31*hash + o.GetAlt() + hash = 31*hash + o.GetSemanticContext().Hash() + return hash +} + +func equalATNConfigs(a, b interface{}) bool { + if a == nil || b == nil { + return false + } + + if a == b { + return true + } + + var ai, ok = a.(ATNConfig) + var bi, ok1 = b.(ATNConfig) + + if !ok || !ok1 { + return false + } + + if ai.GetState().GetStateNumber() != bi.GetState().GetStateNumber() { + return false + } + + if ai.GetAlt() != bi.GetAlt() { + return false + } + + return ai.GetSemanticContext().Equals(bi.GetSemanticContext()) +} diff --git a/runtime/Go/antlr/v4/atn_deserialization_options.go b/runtime/Go/antlr/v4/atn_deserialization_options.go new file mode 100644 index 0000000000..3c975ec7bf --- /dev/null +++ b/runtime/Go/antlr/v4/atn_deserialization_options.go @@ -0,0 +1,61 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +import "errors" + +var defaultATNDeserializationOptions = ATNDeserializationOptions{true, true, false} + +type ATNDeserializationOptions struct { + readOnly bool + verifyATN bool + generateRuleBypassTransitions bool +} + +func (opts *ATNDeserializationOptions) ReadOnly() bool { + return opts.readOnly +} + +func (opts *ATNDeserializationOptions) SetReadOnly(readOnly bool) { + if opts.readOnly { + panic(errors.New("Cannot mutate read only ATNDeserializationOptions")) + } + opts.readOnly = readOnly +} + +func (opts *ATNDeserializationOptions) VerifyATN() bool { + return opts.verifyATN +} + +func (opts *ATNDeserializationOptions) SetVerifyATN(verifyATN bool) { + if opts.readOnly { + panic(errors.New("Cannot mutate read only ATNDeserializationOptions")) + } + opts.verifyATN = verifyATN +} + +func (opts *ATNDeserializationOptions) GenerateRuleBypassTransitions() bool { + return opts.generateRuleBypassTransitions +} + +func (opts *ATNDeserializationOptions) SetGenerateRuleBypassTransitions(generateRuleBypassTransitions bool) { + if opts.readOnly { + panic(errors.New("Cannot mutate read only ATNDeserializationOptions")) + } + opts.generateRuleBypassTransitions = generateRuleBypassTransitions +} + +func DefaultATNDeserializationOptions() *ATNDeserializationOptions { + return NewATNDeserializationOptions(&defaultATNDeserializationOptions) +} + +func NewATNDeserializationOptions(other *ATNDeserializationOptions) *ATNDeserializationOptions { + o := new(ATNDeserializationOptions) + if other != nil { + *o = *other + o.readOnly = false + } + return o +} diff --git a/runtime/Go/antlr/v4/atn_deserializer.go b/runtime/Go/antlr/v4/atn_deserializer.go new file mode 100644 index 0000000000..3888856b4b --- /dev/null +++ b/runtime/Go/antlr/v4/atn_deserializer.go @@ -0,0 +1,683 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +import ( + "fmt" + "strconv" +) + +const serializedVersion = 4 + +type loopEndStateIntPair struct { + item0 *LoopEndState + item1 int +} + +type blockStartStateIntPair struct { + item0 BlockStartState + item1 int +} + +type ATNDeserializer struct { + options *ATNDeserializationOptions + data []int32 + pos int +} + +func NewATNDeserializer(options *ATNDeserializationOptions) *ATNDeserializer { + if options == nil { + options = &defaultATNDeserializationOptions + } + + return &ATNDeserializer{options: options} +} + +func stringInSlice(a string, list []string) int { + for i, b := range list { + if b == a { + return i + } + } + + return -1 +} + +func (a *ATNDeserializer) Deserialize(data []int32) *ATN { + a.data = data + a.pos = 0 + a.checkVersion() + + atn := a.readATN() + + a.readStates(atn) + a.readRules(atn) + a.readModes(atn) + + sets := a.readSets(atn, nil) + + a.readEdges(atn, sets) + a.readDecisions(atn) + a.readLexerActions(atn) + a.markPrecedenceDecisions(atn) + a.verifyATN(atn) + + if a.options.GenerateRuleBypassTransitions() && atn.grammarType == ATNTypeParser { + a.generateRuleBypassTransitions(atn) + // Re-verify after modification + a.verifyATN(atn) + } + + return atn + +} + +func (a *ATNDeserializer) checkVersion() { + version := a.readInt() + + if version != serializedVersion { + panic("Could not deserialize ATN with version " + strconv.Itoa(version) + " (expected " + strconv.Itoa(serializedVersion) + ").") + } +} + +func (a *ATNDeserializer) readATN() *ATN { + grammarType := a.readInt() + maxTokenType := a.readInt() + + return NewATN(grammarType, maxTokenType) +} + +func (a *ATNDeserializer) readStates(atn *ATN) { + nstates := a.readInt() + + // Allocate worst case size. + loopBackStateNumbers := make([]loopEndStateIntPair, 0, nstates) + endStateNumbers := make([]blockStartStateIntPair, 0, nstates) + + // Preallocate states slice. + atn.states = make([]ATNState, 0, nstates) + + for i := 0; i < nstates; i++ { + stype := a.readInt() + + // Ignore bad types of states + if stype == ATNStateInvalidType { + atn.addState(nil) + continue + } + + ruleIndex := a.readInt() + + s := a.stateFactory(stype, ruleIndex) + + if stype == ATNStateLoopEnd { + loopBackStateNumber := a.readInt() + + loopBackStateNumbers = append(loopBackStateNumbers, loopEndStateIntPair{s.(*LoopEndState), loopBackStateNumber}) + } else if s2, ok := s.(BlockStartState); ok { + endStateNumber := a.readInt() + + endStateNumbers = append(endStateNumbers, blockStartStateIntPair{s2, endStateNumber}) + } + + atn.addState(s) + } + + // Delay the assignment of loop back and end states until we know all the state + // instances have been initialized + for _, pair := range loopBackStateNumbers { + pair.item0.loopBackState = atn.states[pair.item1] + } + + for _, pair := range endStateNumbers { + pair.item0.setEndState(atn.states[pair.item1].(*BlockEndState)) + } + + numNonGreedyStates := a.readInt() + for j := 0; j < numNonGreedyStates; j++ { + stateNumber := a.readInt() + + atn.states[stateNumber].(DecisionState).setNonGreedy(true) + } + + numPrecedenceStates := a.readInt() + for j := 0; j < numPrecedenceStates; j++ { + stateNumber := a.readInt() + + atn.states[stateNumber].(*RuleStartState).isPrecedenceRule = true + } +} + +func (a *ATNDeserializer) readRules(atn *ATN) { + nrules := a.readInt() + + if atn.grammarType == ATNTypeLexer { + atn.ruleToTokenType = make([]int, nrules) + } + + atn.ruleToStartState = make([]*RuleStartState, nrules) + + for i := range atn.ruleToStartState { + s := a.readInt() + startState := atn.states[s].(*RuleStartState) + + atn.ruleToStartState[i] = startState + + if atn.grammarType == ATNTypeLexer { + tokenType := a.readInt() + + atn.ruleToTokenType[i] = tokenType + } + } + + atn.ruleToStopState = make([]*RuleStopState, nrules) + + for _, state := range atn.states { + if s2, ok := state.(*RuleStopState); ok { + atn.ruleToStopState[s2.ruleIndex] = s2 + atn.ruleToStartState[s2.ruleIndex].stopState = s2 + } + } +} + +func (a *ATNDeserializer) readModes(atn *ATN) { + nmodes := a.readInt() + atn.modeToStartState = make([]*TokensStartState, nmodes) + + for i := range atn.modeToStartState { + s := a.readInt() + + atn.modeToStartState[i] = atn.states[s].(*TokensStartState) + } +} + +func (a *ATNDeserializer) readSets(atn *ATN, sets []*IntervalSet) []*IntervalSet { + m := a.readInt() + + // Preallocate the needed capacity. + if cap(sets)-len(sets) < m { + isets := make([]*IntervalSet, len(sets), len(sets)+m) + copy(isets, sets) + sets = isets + } + + for i := 0; i < m; i++ { + iset := NewIntervalSet() + + sets = append(sets, iset) + + n := a.readInt() + containsEOF := a.readInt() + + if containsEOF != 0 { + iset.addOne(-1) + } + + for j := 0; j < n; j++ { + i1 := a.readInt() + i2 := a.readInt() + + iset.addRange(i1, i2) + } + } + + return sets +} + +func (a *ATNDeserializer) readEdges(atn *ATN, sets []*IntervalSet) { + nedges := a.readInt() + + for i := 0; i < nedges; i++ { + var ( + src = a.readInt() + trg = a.readInt() + ttype = a.readInt() + arg1 = a.readInt() + arg2 = a.readInt() + arg3 = a.readInt() + trans = a.edgeFactory(atn, ttype, src, trg, arg1, arg2, arg3, sets) + srcState = atn.states[src] + ) + + srcState.AddTransition(trans, -1) + } + + // Edges for rule stop states can be derived, so they are not serialized + for _, state := range atn.states { + for _, t := range state.GetTransitions() { + var rt, ok = t.(*RuleTransition) + + if !ok { + continue + } + + outermostPrecedenceReturn := -1 + + if atn.ruleToStartState[rt.getTarget().GetRuleIndex()].isPrecedenceRule { + if rt.precedence == 0 { + outermostPrecedenceReturn = rt.getTarget().GetRuleIndex() + } + } + + trans := NewEpsilonTransition(rt.followState, outermostPrecedenceReturn) + + atn.ruleToStopState[rt.getTarget().GetRuleIndex()].AddTransition(trans, -1) + } + } + + for _, state := range atn.states { + if s2, ok := state.(BlockStartState); ok { + // We need to know the end state to set its start state + if s2.getEndState() == nil { + panic("IllegalState") + } + + // Block end states can only be associated to a single block start state + if s2.getEndState().startState != nil { + panic("IllegalState") + } + + s2.getEndState().startState = state + } + + if s2, ok := state.(*PlusLoopbackState); ok { + for _, t := range s2.GetTransitions() { + if t2, ok := t.getTarget().(*PlusBlockStartState); ok { + t2.loopBackState = state + } + } + } else if s2, ok := state.(*StarLoopbackState); ok { + for _, t := range s2.GetTransitions() { + if t2, ok := t.getTarget().(*StarLoopEntryState); ok { + t2.loopBackState = state + } + } + } + } +} + +func (a *ATNDeserializer) readDecisions(atn *ATN) { + ndecisions := a.readInt() + + for i := 0; i < ndecisions; i++ { + s := a.readInt() + decState := atn.states[s].(DecisionState) + + atn.DecisionToState = append(atn.DecisionToState, decState) + decState.setDecision(i) + } +} + +func (a *ATNDeserializer) readLexerActions(atn *ATN) { + if atn.grammarType == ATNTypeLexer { + count := a.readInt() + + atn.lexerActions = make([]LexerAction, count) + + for i := range atn.lexerActions { + actionType := a.readInt() + data1 := a.readInt() + data2 := a.readInt() + atn.lexerActions[i] = a.lexerActionFactory(actionType, data1, data2) + } + } +} + +func (a *ATNDeserializer) generateRuleBypassTransitions(atn *ATN) { + count := len(atn.ruleToStartState) + + for i := 0; i < count; i++ { + atn.ruleToTokenType[i] = atn.maxTokenType + i + 1 + } + + for i := 0; i < count; i++ { + a.generateRuleBypassTransition(atn, i) + } +} + +func (a *ATNDeserializer) generateRuleBypassTransition(atn *ATN, idx int) { + bypassStart := NewBasicBlockStartState() + + bypassStart.ruleIndex = idx + atn.addState(bypassStart) + + bypassStop := NewBlockEndState() + + bypassStop.ruleIndex = idx + atn.addState(bypassStop) + + bypassStart.endState = bypassStop + + atn.defineDecisionState(bypassStart.BaseDecisionState) + + bypassStop.startState = bypassStart + + var excludeTransition Transition + var endState ATNState + + if atn.ruleToStartState[idx].isPrecedenceRule { + // Wrap from the beginning of the rule to the StarLoopEntryState + endState = nil + + for i := 0; i < len(atn.states); i++ { + state := atn.states[i] + + if a.stateIsEndStateFor(state, idx) != nil { + endState = state + excludeTransition = state.(*StarLoopEntryState).loopBackState.GetTransitions()[0] + + break + } + } + + if excludeTransition == nil { + panic("Couldn't identify final state of the precedence rule prefix section.") + } + } else { + endState = atn.ruleToStopState[idx] + } + + // All non-excluded transitions that currently target end state need to target + // blockEnd instead + for i := 0; i < len(atn.states); i++ { + state := atn.states[i] + + for j := 0; j < len(state.GetTransitions()); j++ { + transition := state.GetTransitions()[j] + + if transition == excludeTransition { + continue + } + + if transition.getTarget() == endState { + transition.setTarget(bypassStop) + } + } + } + + // All transitions leaving the rule start state need to leave blockStart instead + ruleToStartState := atn.ruleToStartState[idx] + count := len(ruleToStartState.GetTransitions()) + + for count > 0 { + bypassStart.AddTransition(ruleToStartState.GetTransitions()[count-1], -1) + ruleToStartState.SetTransitions([]Transition{ruleToStartState.GetTransitions()[len(ruleToStartState.GetTransitions())-1]}) + } + + // Link the new states + atn.ruleToStartState[idx].AddTransition(NewEpsilonTransition(bypassStart, -1), -1) + bypassStop.AddTransition(NewEpsilonTransition(endState, -1), -1) + + MatchState := NewBasicState() + + atn.addState(MatchState) + MatchState.AddTransition(NewAtomTransition(bypassStop, atn.ruleToTokenType[idx]), -1) + bypassStart.AddTransition(NewEpsilonTransition(MatchState, -1), -1) +} + +func (a *ATNDeserializer) stateIsEndStateFor(state ATNState, idx int) ATNState { + if state.GetRuleIndex() != idx { + return nil + } + + if _, ok := state.(*StarLoopEntryState); !ok { + return nil + } + + maybeLoopEndState := state.GetTransitions()[len(state.GetTransitions())-1].getTarget() + + if _, ok := maybeLoopEndState.(*LoopEndState); !ok { + return nil + } + + var _, ok = maybeLoopEndState.GetTransitions()[0].getTarget().(*RuleStopState) + + if maybeLoopEndState.(*LoopEndState).epsilonOnlyTransitions && ok { + return state + } + + return nil +} + +// markPrecedenceDecisions analyzes the StarLoopEntryState states in the +// specified ATN to set the StarLoopEntryState.precedenceRuleDecision field to +// the correct value. +func (a *ATNDeserializer) markPrecedenceDecisions(atn *ATN) { + for _, state := range atn.states { + if _, ok := state.(*StarLoopEntryState); !ok { + continue + } + + // We analyze the ATN to determine if a ATN decision state is the + // decision for the closure block that determines whether a + // precedence rule should continue or complete. + if atn.ruleToStartState[state.GetRuleIndex()].isPrecedenceRule { + maybeLoopEndState := state.GetTransitions()[len(state.GetTransitions())-1].getTarget() + + if s3, ok := maybeLoopEndState.(*LoopEndState); ok { + var _, ok2 = maybeLoopEndState.GetTransitions()[0].getTarget().(*RuleStopState) + + if s3.epsilonOnlyTransitions && ok2 { + state.(*StarLoopEntryState).precedenceRuleDecision = true + } + } + } + } +} + +func (a *ATNDeserializer) verifyATN(atn *ATN) { + if !a.options.VerifyATN() { + return + } + + // Verify assumptions + for _, state := range atn.states { + if state == nil { + continue + } + + a.checkCondition(state.GetEpsilonOnlyTransitions() || len(state.GetTransitions()) <= 1, "") + + switch s2 := state.(type) { + case *PlusBlockStartState: + a.checkCondition(s2.loopBackState != nil, "") + + case *StarLoopEntryState: + a.checkCondition(s2.loopBackState != nil, "") + a.checkCondition(len(s2.GetTransitions()) == 2, "") + + switch s2.transitions[0].getTarget().(type) { + case *StarBlockStartState: + _, ok := s2.transitions[1].getTarget().(*LoopEndState) + + a.checkCondition(ok, "") + a.checkCondition(!s2.nonGreedy, "") + + case *LoopEndState: + var _, ok = s2.transitions[1].getTarget().(*StarBlockStartState) + + a.checkCondition(ok, "") + a.checkCondition(s2.nonGreedy, "") + + default: + panic("IllegalState") + } + + case *StarLoopbackState: + a.checkCondition(len(state.GetTransitions()) == 1, "") + + var _, ok = state.GetTransitions()[0].getTarget().(*StarLoopEntryState) + + a.checkCondition(ok, "") + + case *LoopEndState: + a.checkCondition(s2.loopBackState != nil, "") + + case *RuleStartState: + a.checkCondition(s2.stopState != nil, "") + + case BlockStartState: + a.checkCondition(s2.getEndState() != nil, "") + + case *BlockEndState: + a.checkCondition(s2.startState != nil, "") + + case DecisionState: + a.checkCondition(len(s2.GetTransitions()) <= 1 || s2.getDecision() >= 0, "") + + default: + var _, ok = s2.(*RuleStopState) + + a.checkCondition(len(s2.GetTransitions()) <= 1 || ok, "") + } + } +} + +func (a *ATNDeserializer) checkCondition(condition bool, message string) { + if !condition { + if message == "" { + message = "IllegalState" + } + + panic(message) + } +} + +func (a *ATNDeserializer) readInt() int { + v := a.data[a.pos] + + a.pos++ + + return int(v) // data is 32 bits but int is at least that big +} + +func (a *ATNDeserializer) edgeFactory(atn *ATN, typeIndex, src, trg, arg1, arg2, arg3 int, sets []*IntervalSet) Transition { + target := atn.states[trg] + + switch typeIndex { + case TransitionEPSILON: + return NewEpsilonTransition(target, -1) + + case TransitionRANGE: + if arg3 != 0 { + return NewRangeTransition(target, TokenEOF, arg2) + } + + return NewRangeTransition(target, arg1, arg2) + + case TransitionRULE: + return NewRuleTransition(atn.states[arg1], arg2, arg3, target) + + case TransitionPREDICATE: + return NewPredicateTransition(target, arg1, arg2, arg3 != 0) + + case TransitionPRECEDENCE: + return NewPrecedencePredicateTransition(target, arg1) + + case TransitionATOM: + if arg3 != 0 { + return NewAtomTransition(target, TokenEOF) + } + + return NewAtomTransition(target, arg1) + + case TransitionACTION: + return NewActionTransition(target, arg1, arg2, arg3 != 0) + + case TransitionSET: + return NewSetTransition(target, sets[arg1]) + + case TransitionNOTSET: + return NewNotSetTransition(target, sets[arg1]) + + case TransitionWILDCARD: + return NewWildcardTransition(target) + } + + panic("The specified transition type is not valid.") +} + +func (a *ATNDeserializer) stateFactory(typeIndex, ruleIndex int) ATNState { + var s ATNState + + switch typeIndex { + case ATNStateInvalidType: + return nil + + case ATNStateBasic: + s = NewBasicState() + + case ATNStateRuleStart: + s = NewRuleStartState() + + case ATNStateBlockStart: + s = NewBasicBlockStartState() + + case ATNStatePlusBlockStart: + s = NewPlusBlockStartState() + + case ATNStateStarBlockStart: + s = NewStarBlockStartState() + + case ATNStateTokenStart: + s = NewTokensStartState() + + case ATNStateRuleStop: + s = NewRuleStopState() + + case ATNStateBlockEnd: + s = NewBlockEndState() + + case ATNStateStarLoopBack: + s = NewStarLoopbackState() + + case ATNStateStarLoopEntry: + s = NewStarLoopEntryState() + + case ATNStatePlusLoopBack: + s = NewPlusLoopbackState() + + case ATNStateLoopEnd: + s = NewLoopEndState() + + default: + panic(fmt.Sprintf("state type %d is invalid", typeIndex)) + } + + s.SetRuleIndex(ruleIndex) + + return s +} + +func (a *ATNDeserializer) lexerActionFactory(typeIndex, data1, data2 int) LexerAction { + switch typeIndex { + case LexerActionTypeChannel: + return NewLexerChannelAction(data1) + + case LexerActionTypeCustom: + return NewLexerCustomAction(data1, data2) + + case LexerActionTypeMode: + return NewLexerModeAction(data1) + + case LexerActionTypeMore: + return LexerMoreActionINSTANCE + + case LexerActionTypePopMode: + return LexerPopModeActionINSTANCE + + case LexerActionTypePushMode: + return NewLexerPushModeAction(data1) + + case LexerActionTypeSkip: + return LexerSkipActionINSTANCE + + case LexerActionTypeType: + return NewLexerTypeAction(data1) + + default: + panic(fmt.Sprintf("lexer action %d is invalid", typeIndex)) + } +} diff --git a/runtime/Go/antlr/v4/atn_simulator.go b/runtime/Go/antlr/v4/atn_simulator.go new file mode 100644 index 0000000000..41529115fa --- /dev/null +++ b/runtime/Go/antlr/v4/atn_simulator.go @@ -0,0 +1,50 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +var ATNSimulatorError = NewDFAState(0x7FFFFFFF, NewBaseATNConfigSet(false)) + +type IATNSimulator interface { + SharedContextCache() *PredictionContextCache + ATN() *ATN + DecisionToDFA() []*DFA +} + +type BaseATNSimulator struct { + atn *ATN + sharedContextCache *PredictionContextCache + decisionToDFA []*DFA +} + +func NewBaseATNSimulator(atn *ATN, sharedContextCache *PredictionContextCache) *BaseATNSimulator { + b := new(BaseATNSimulator) + + b.atn = atn + b.sharedContextCache = sharedContextCache + + return b +} + +func (b *BaseATNSimulator) getCachedContext(context PredictionContext) PredictionContext { + if b.sharedContextCache == nil { + return context + } + + visited := make(map[PredictionContext]PredictionContext) + + return getCachedBasePredictionContext(context, b.sharedContextCache, visited) +} + +func (b *BaseATNSimulator) SharedContextCache() *PredictionContextCache { + return b.sharedContextCache +} + +func (b *BaseATNSimulator) ATN() *ATN { + return b.atn +} + +func (b *BaseATNSimulator) DecisionToDFA() []*DFA { + return b.decisionToDFA +} diff --git a/runtime/Go/antlr/v4/atn_state.go b/runtime/Go/antlr/v4/atn_state.go new file mode 100644 index 0000000000..1f2a56bc31 --- /dev/null +++ b/runtime/Go/antlr/v4/atn_state.go @@ -0,0 +1,393 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +import "strconv" + +// Constants for serialization. +const ( + ATNStateInvalidType = 0 + ATNStateBasic = 1 + ATNStateRuleStart = 2 + ATNStateBlockStart = 3 + ATNStatePlusBlockStart = 4 + ATNStateStarBlockStart = 5 + ATNStateTokenStart = 6 + ATNStateRuleStop = 7 + ATNStateBlockEnd = 8 + ATNStateStarLoopBack = 9 + ATNStateStarLoopEntry = 10 + ATNStatePlusLoopBack = 11 + ATNStateLoopEnd = 12 + + ATNStateInvalidStateNumber = -1 +) + +var ATNStateInitialNumTransitions = 4 + +type ATNState interface { + GetEpsilonOnlyTransitions() bool + + GetRuleIndex() int + SetRuleIndex(int) + + GetNextTokenWithinRule() *IntervalSet + SetNextTokenWithinRule(*IntervalSet) + + GetATN() *ATN + SetATN(*ATN) + + GetStateType() int + + GetStateNumber() int + SetStateNumber(int) + + GetTransitions() []Transition + SetTransitions([]Transition) + AddTransition(Transition, int) + + String() string + Hash() int + Equals(Collectable[ATNState]) bool +} + +type BaseATNState struct { + // NextTokenWithinRule caches lookahead during parsing. Not used during construction. + NextTokenWithinRule *IntervalSet + + // atn is the current ATN. + atn *ATN + + epsilonOnlyTransitions bool + + // ruleIndex tracks the Rule index because there are no Rule objects at runtime. + ruleIndex int + + stateNumber int + + stateType int + + // Track the transitions emanating from this ATN state. + transitions []Transition +} + +func NewBaseATNState() *BaseATNState { + return &BaseATNState{stateNumber: ATNStateInvalidStateNumber, stateType: ATNStateInvalidType} +} + +func (as *BaseATNState) GetRuleIndex() int { + return as.ruleIndex +} + +func (as *BaseATNState) SetRuleIndex(v int) { + as.ruleIndex = v +} +func (as *BaseATNState) GetEpsilonOnlyTransitions() bool { + return as.epsilonOnlyTransitions +} + +func (as *BaseATNState) GetATN() *ATN { + return as.atn +} + +func (as *BaseATNState) SetATN(atn *ATN) { + as.atn = atn +} + +func (as *BaseATNState) GetTransitions() []Transition { + return as.transitions +} + +func (as *BaseATNState) SetTransitions(t []Transition) { + as.transitions = t +} + +func (as *BaseATNState) GetStateType() int { + return as.stateType +} + +func (as *BaseATNState) GetStateNumber() int { + return as.stateNumber +} + +func (as *BaseATNState) SetStateNumber(stateNumber int) { + as.stateNumber = stateNumber +} + +func (as *BaseATNState) GetNextTokenWithinRule() *IntervalSet { + return as.NextTokenWithinRule +} + +func (as *BaseATNState) SetNextTokenWithinRule(v *IntervalSet) { + as.NextTokenWithinRule = v +} + +func (as *BaseATNState) Hash() int { + return as.stateNumber +} + +func (as *BaseATNState) String() string { + return strconv.Itoa(as.stateNumber) +} + +func (as *BaseATNState) Equals(other Collectable[ATNState]) bool { + if ot, ok := other.(ATNState); ok { + return as.stateNumber == ot.GetStateNumber() + } + + return false +} + +func (as *BaseATNState) isNonGreedyExitState() bool { + return false +} + +func (as *BaseATNState) AddTransition(trans Transition, index int) { + if len(as.transitions) == 0 { + as.epsilonOnlyTransitions = trans.getIsEpsilon() + } else if as.epsilonOnlyTransitions != trans.getIsEpsilon() { + as.epsilonOnlyTransitions = false + } + + if index == -1 { + as.transitions = append(as.transitions, trans) + } else { + as.transitions = append(as.transitions[:index], append([]Transition{trans}, as.transitions[index:]...)...) + // TODO: as.transitions.splice(index, 1, trans) + } +} + +type BasicState struct { + *BaseATNState +} + +func NewBasicState() *BasicState { + b := NewBaseATNState() + + b.stateType = ATNStateBasic + + return &BasicState{BaseATNState: b} +} + +type DecisionState interface { + ATNState + + getDecision() int + setDecision(int) + + getNonGreedy() bool + setNonGreedy(bool) +} + +type BaseDecisionState struct { + *BaseATNState + decision int + nonGreedy bool +} + +func NewBaseDecisionState() *BaseDecisionState { + return &BaseDecisionState{BaseATNState: NewBaseATNState(), decision: -1} +} + +func (s *BaseDecisionState) getDecision() int { + return s.decision +} + +func (s *BaseDecisionState) setDecision(b int) { + s.decision = b +} + +func (s *BaseDecisionState) getNonGreedy() bool { + return s.nonGreedy +} + +func (s *BaseDecisionState) setNonGreedy(b bool) { + s.nonGreedy = b +} + +type BlockStartState interface { + DecisionState + + getEndState() *BlockEndState + setEndState(*BlockEndState) +} + +// BaseBlockStartState is the start of a regular (...) block. +type BaseBlockStartState struct { + *BaseDecisionState + endState *BlockEndState +} + +func NewBlockStartState() *BaseBlockStartState { + return &BaseBlockStartState{BaseDecisionState: NewBaseDecisionState()} +} + +func (s *BaseBlockStartState) getEndState() *BlockEndState { + return s.endState +} + +func (s *BaseBlockStartState) setEndState(b *BlockEndState) { + s.endState = b +} + +type BasicBlockStartState struct { + *BaseBlockStartState +} + +func NewBasicBlockStartState() *BasicBlockStartState { + b := NewBlockStartState() + + b.stateType = ATNStateBlockStart + + return &BasicBlockStartState{BaseBlockStartState: b} +} + +var _ BlockStartState = &BasicBlockStartState{} + +// BlockEndState is a terminal node of a simple (a|b|c) block. +type BlockEndState struct { + *BaseATNState + startState ATNState +} + +func NewBlockEndState() *BlockEndState { + b := NewBaseATNState() + + b.stateType = ATNStateBlockEnd + + return &BlockEndState{BaseATNState: b} +} + +// RuleStopState is the last node in the ATN for a rule, unless that rule is the +// start symbol. In that case, there is one transition to EOF. Later, we might +// encode references to all calls to this rule to compute FOLLOW sets for error +// handling. +type RuleStopState struct { + *BaseATNState +} + +func NewRuleStopState() *RuleStopState { + b := NewBaseATNState() + + b.stateType = ATNStateRuleStop + + return &RuleStopState{BaseATNState: b} +} + +type RuleStartState struct { + *BaseATNState + stopState ATNState + isPrecedenceRule bool +} + +func NewRuleStartState() *RuleStartState { + b := NewBaseATNState() + + b.stateType = ATNStateRuleStart + + return &RuleStartState{BaseATNState: b} +} + +// PlusLoopbackState is a decision state for A+ and (A|B)+. It has two +// transitions: one to the loop back to start of the block, and one to exit. +type PlusLoopbackState struct { + *BaseDecisionState +} + +func NewPlusLoopbackState() *PlusLoopbackState { + b := NewBaseDecisionState() + + b.stateType = ATNStatePlusLoopBack + + return &PlusLoopbackState{BaseDecisionState: b} +} + +// PlusBlockStartState is the start of a (A|B|...)+ loop. Technically it is a +// decision state; we don't use it for code generation. Somebody might need it, +// it is included for completeness. In reality, PlusLoopbackState is the real +// decision-making node for A+. +type PlusBlockStartState struct { + *BaseBlockStartState + loopBackState ATNState +} + +func NewPlusBlockStartState() *PlusBlockStartState { + b := NewBlockStartState() + + b.stateType = ATNStatePlusBlockStart + + return &PlusBlockStartState{BaseBlockStartState: b} +} + +var _ BlockStartState = &PlusBlockStartState{} + +// StarBlockStartState is the block that begins a closure loop. +type StarBlockStartState struct { + *BaseBlockStartState +} + +func NewStarBlockStartState() *StarBlockStartState { + b := NewBlockStartState() + + b.stateType = ATNStateStarBlockStart + + return &StarBlockStartState{BaseBlockStartState: b} +} + +var _ BlockStartState = &StarBlockStartState{} + +type StarLoopbackState struct { + *BaseATNState +} + +func NewStarLoopbackState() *StarLoopbackState { + b := NewBaseATNState() + + b.stateType = ATNStateStarLoopBack + + return &StarLoopbackState{BaseATNState: b} +} + +type StarLoopEntryState struct { + *BaseDecisionState + loopBackState ATNState + precedenceRuleDecision bool +} + +func NewStarLoopEntryState() *StarLoopEntryState { + b := NewBaseDecisionState() + + b.stateType = ATNStateStarLoopEntry + + // False precedenceRuleDecision indicates whether s state can benefit from a precedence DFA during SLL decision making. + return &StarLoopEntryState{BaseDecisionState: b} +} + +// LoopEndState marks the end of a * or + loop. +type LoopEndState struct { + *BaseATNState + loopBackState ATNState +} + +func NewLoopEndState() *LoopEndState { + b := NewBaseATNState() + + b.stateType = ATNStateLoopEnd + + return &LoopEndState{BaseATNState: b} +} + +// TokensStartState is the Tokens rule start state linking to each lexer rule start state. +type TokensStartState struct { + *BaseDecisionState +} + +func NewTokensStartState() *TokensStartState { + b := NewBaseDecisionState() + + b.stateType = ATNStateTokenStart + + return &TokensStartState{BaseDecisionState: b} +} diff --git a/runtime/Go/antlr/v4/atn_type.go b/runtime/Go/antlr/v4/atn_type.go new file mode 100644 index 0000000000..3a515a145f --- /dev/null +++ b/runtime/Go/antlr/v4/atn_type.go @@ -0,0 +1,11 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +// Represent the type of recognizer an ATN applies to. +const ( + ATNTypeLexer = 0 + ATNTypeParser = 1 +) diff --git a/runtime/Go/antlr/v4/atnconfigset_test.go b/runtime/Go/antlr/v4/atnconfigset_test.go new file mode 100644 index 0000000000..3f1e9cc6cb --- /dev/null +++ b/runtime/Go/antlr/v4/atnconfigset_test.go @@ -0,0 +1,73 @@ +package antlr + +import ( + "testing" +) + +// Test for Issue # 3319 +// To run, "cd antlr4/runtime/Go/antlr/", then "go test". +// In the old runtime code, the test would crash because it would try +// to compare a *LexerActionExecutor with nil, causing a nil pointer dereference. +// It only happens if there were different states that had equal stateNumber mod 16, +// and you created that ATNConfig with a nil LexerActionExecutor. (That's why this +// code has a hardwired constant of 16. + +func TestCompare(t *testing.T) { + var set = NewOrderedATNConfigSet() + var s0 = NewBaseATNState() + var s1 = NewBaseATNState() + var s2 = NewBaseATNState() + var s3 = NewBaseATNState() + var s16 = NewBaseATNState() + s16.SetStateNumber(16) + var s17 = NewBaseATNState() + s17.SetStateNumber(17) + var s18 = NewBaseATNState() + s18.SetStateNumber(18) + var s19 = NewBaseATNState() + s19.SetStateNumber(19) + var la0 = NewBaseLexerAction(1) + var la1 = NewBaseLexerAction(2) + var laa = make([]LexerAction, 2) + laa[0] = la0 + laa[1] = la1 + var ae = NewLexerActionExecutor(laa) + set.Add(NewLexerATNConfig5(s0, 0, BasePredictionContextEMPTY, ae), nil) + set.Add(NewLexerATNConfig5(s0, 1, BasePredictionContextEMPTY, ae), nil) + set.Add(NewLexerATNConfig5(s0, 2, BasePredictionContextEMPTY, ae), nil) + set.Add(NewLexerATNConfig5(s1, 0, BasePredictionContextEMPTY, ae), nil) + set.Add(NewLexerATNConfig5(s1, 1, BasePredictionContextEMPTY, ae), nil) + set.Add(NewLexerATNConfig5(s1, 2, BasePredictionContextEMPTY, ae), nil) + set.Add(NewLexerATNConfig5(s2, 0, BasePredictionContextEMPTY, ae), nil) + set.Add(NewLexerATNConfig5(s2, 1, BasePredictionContextEMPTY, ae), nil) + set.Add(NewLexerATNConfig5(s2, 2, BasePredictionContextEMPTY, ae), nil) + set.Add(NewLexerATNConfig5(s3, 0, BasePredictionContextEMPTY, ae), nil) + set.Add(NewLexerATNConfig5(s3, 1, BasePredictionContextEMPTY, ae), nil) + set.Add(NewLexerATNConfig5(s3, 2, BasePredictionContextEMPTY, ae), nil) + + set.Add(NewLexerATNConfig5(s0, 0, BasePredictionContextEMPTY, nil), nil) + set.Add(NewLexerATNConfig5(s0, 1, BasePredictionContextEMPTY, nil), nil) + set.Add(NewLexerATNConfig5(s0, 2, BasePredictionContextEMPTY, nil), nil) + set.Add(NewLexerATNConfig5(s1, 0, BasePredictionContextEMPTY, nil), nil) + set.Add(NewLexerATNConfig5(s1, 1, BasePredictionContextEMPTY, nil), nil) + set.Add(NewLexerATNConfig5(s1, 2, BasePredictionContextEMPTY, nil), nil) + set.Add(NewLexerATNConfig5(s2, 0, BasePredictionContextEMPTY, nil), nil) + set.Add(NewLexerATNConfig5(s2, 1, BasePredictionContextEMPTY, nil), nil) + set.Add(NewLexerATNConfig5(s2, 2, BasePredictionContextEMPTY, nil), nil) + set.Add(NewLexerATNConfig5(s3, 0, BasePredictionContextEMPTY, nil), nil) + set.Add(NewLexerATNConfig5(s3, 1, BasePredictionContextEMPTY, nil), nil) + set.Add(NewLexerATNConfig5(s3, 2, BasePredictionContextEMPTY, nil), nil) + + set.Add(NewLexerATNConfig5(s16, 0, BasePredictionContextEMPTY, nil), nil) + set.Add(NewLexerATNConfig5(s16, 1, BasePredictionContextEMPTY, nil), nil) + set.Add(NewLexerATNConfig5(s16, 2, BasePredictionContextEMPTY, nil), nil) + set.Add(NewLexerATNConfig5(s17, 0, BasePredictionContextEMPTY, nil), nil) + set.Add(NewLexerATNConfig5(s17, 1, BasePredictionContextEMPTY, nil), nil) + set.Add(NewLexerATNConfig5(s17, 2, BasePredictionContextEMPTY, nil), nil) + set.Add(NewLexerATNConfig5(s18, 0, BasePredictionContextEMPTY, nil), nil) + set.Add(NewLexerATNConfig5(s18, 1, BasePredictionContextEMPTY, nil), nil) + set.Add(NewLexerATNConfig5(s18, 2, BasePredictionContextEMPTY, nil), nil) + set.Add(NewLexerATNConfig5(s19, 0, BasePredictionContextEMPTY, nil), nil) + set.Add(NewLexerATNConfig5(s19, 1, BasePredictionContextEMPTY, nil), nil) + set.Add(NewLexerATNConfig5(s19, 2, BasePredictionContextEMPTY, nil), nil) +} diff --git a/runtime/Go/antlr/v4/char_stream.go b/runtime/Go/antlr/v4/char_stream.go new file mode 100644 index 0000000000..c33f0adb5e --- /dev/null +++ b/runtime/Go/antlr/v4/char_stream.go @@ -0,0 +1,12 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +type CharStream interface { + IntStream + GetText(int, int) string + GetTextFromTokens(start, end Token) string + GetTextFromInterval(*Interval) string +} diff --git a/runtime/Go/antlr/v4/common_token_factory.go b/runtime/Go/antlr/v4/common_token_factory.go new file mode 100644 index 0000000000..1bb0314ea0 --- /dev/null +++ b/runtime/Go/antlr/v4/common_token_factory.go @@ -0,0 +1,56 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +// TokenFactory creates CommonToken objects. +type TokenFactory interface { + Create(source *TokenSourceCharStreamPair, ttype int, text string, channel, start, stop, line, column int) Token +} + +// CommonTokenFactory is the default TokenFactory implementation. +type CommonTokenFactory struct { + // copyText indicates whether CommonToken.setText should be called after + // constructing tokens to explicitly set the text. This is useful for cases + // where the input stream might not be able to provide arbitrary substrings of + // text from the input after the lexer creates a token (e.g. the + // implementation of CharStream.GetText in UnbufferedCharStream panics an + // UnsupportedOperationException). Explicitly setting the token text allows + // Token.GetText to be called at any time regardless of the input stream + // implementation. + // + // The default value is false to avoid the performance and memory overhead of + // copying text for every token unless explicitly requested. + copyText bool +} + +func NewCommonTokenFactory(copyText bool) *CommonTokenFactory { + return &CommonTokenFactory{copyText: copyText} +} + +// CommonTokenFactoryDEFAULT is the default CommonTokenFactory. It does not +// explicitly copy token text when constructing tokens. +var CommonTokenFactoryDEFAULT = NewCommonTokenFactory(false) + +func (c *CommonTokenFactory) Create(source *TokenSourceCharStreamPair, ttype int, text string, channel, start, stop, line, column int) Token { + t := NewCommonToken(source, ttype, channel, start, stop) + + t.line = line + t.column = column + + if text != "" { + t.SetText(text) + } else if c.copyText && source.charStream != nil { + t.SetText(source.charStream.GetTextFromInterval(NewInterval(start, stop))) + } + + return t +} + +func (c *CommonTokenFactory) createThin(ttype int, text string) Token { + t := NewCommonToken(nil, ttype, TokenDefaultChannel, -1, -1) + t.SetText(text) + + return t +} diff --git a/runtime/Go/antlr/v4/common_token_stream.go b/runtime/Go/antlr/v4/common_token_stream.go new file mode 100644 index 0000000000..c6c9485a20 --- /dev/null +++ b/runtime/Go/antlr/v4/common_token_stream.go @@ -0,0 +1,449 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +import ( + "strconv" +) + +// CommonTokenStream is an implementation of TokenStream that loads tokens from +// a TokenSource on-demand and places the tokens in a buffer to provide access +// to any previous token by index. This token stream ignores the value of +// Token.getChannel. If your parser requires the token stream filter tokens to +// only those on a particular channel, such as Token.DEFAULT_CHANNEL or +// Token.HIDDEN_CHANNEL, use a filtering token stream such a CommonTokenStream. +type CommonTokenStream struct { + channel int + + // fetchedEOF indicates whether the Token.EOF token has been fetched from + // tokenSource and added to tokens. This field improves performance for the + // following cases: + // + // consume: The lookahead check in consume to preven consuming the EOF symbol is + // optimized by checking the values of fetchedEOF and p instead of calling LA. + // + // fetch: The check to prevent adding multiple EOF symbols into tokens is + // trivial with bt field. + fetchedEOF bool + + // index indexs into tokens of the current token (next token to consume). + // tokens[p] should be LT(1). It is set to -1 when the stream is first + // constructed or when SetTokenSource is called, indicating that the first token + // has not yet been fetched from the token source. For additional information, + // see the documentation of IntStream for a description of initializing methods. + index int + + // tokenSource is the TokenSource from which tokens for the bt stream are + // fetched. + tokenSource TokenSource + + // tokens is all tokens fetched from the token source. The list is considered a + // complete view of the input once fetchedEOF is set to true. + tokens []Token +} + +func NewCommonTokenStream(lexer Lexer, channel int) *CommonTokenStream { + return &CommonTokenStream{ + channel: channel, + index: -1, + tokenSource: lexer, + tokens: make([]Token, 0), + } +} + +func (c *CommonTokenStream) GetAllTokens() []Token { + return c.tokens +} + +func (c *CommonTokenStream) Mark() int { + return 0 +} + +func (c *CommonTokenStream) Release(marker int) {} + +func (c *CommonTokenStream) reset() { + c.Seek(0) +} + +func (c *CommonTokenStream) Seek(index int) { + c.lazyInit() + c.index = c.adjustSeekIndex(index) +} + +func (c *CommonTokenStream) Get(index int) Token { + c.lazyInit() + + return c.tokens[index] +} + +func (c *CommonTokenStream) Consume() { + SkipEOFCheck := false + + if c.index >= 0 { + if c.fetchedEOF { + // The last token in tokens is EOF. Skip the check if p indexes any fetched. + // token except the last. + SkipEOFCheck = c.index < len(c.tokens)-1 + } else { + // No EOF token in tokens. Skip the check if p indexes a fetched token. + SkipEOFCheck = c.index < len(c.tokens) + } + } else { + // Not yet initialized + SkipEOFCheck = false + } + + if !SkipEOFCheck && c.LA(1) == TokenEOF { + panic("cannot consume EOF") + } + + if c.Sync(c.index + 1) { + c.index = c.adjustSeekIndex(c.index + 1) + } +} + +// Sync makes sure index i in tokens has a token and returns true if a token is +// located at index i and otherwise false. +func (c *CommonTokenStream) Sync(i int) bool { + n := i - len(c.tokens) + 1 // TODO: How many more elements do we need? + + if n > 0 { + fetched := c.fetch(n) + return fetched >= n + } + + return true +} + +// fetch adds n elements to buffer and returns the actual number of elements +// added to the buffer. +func (c *CommonTokenStream) fetch(n int) int { + if c.fetchedEOF { + return 0 + } + + for i := 0; i < n; i++ { + t := c.tokenSource.NextToken() + + t.SetTokenIndex(len(c.tokens)) + c.tokens = append(c.tokens, t) + + if t.GetTokenType() == TokenEOF { + c.fetchedEOF = true + + return i + 1 + } + } + + return n +} + +// GetTokens gets all tokens from start to stop inclusive. +func (c *CommonTokenStream) GetTokens(start int, stop int, types *IntervalSet) []Token { + if start < 0 || stop < 0 { + return nil + } + + c.lazyInit() + + subset := make([]Token, 0) + + if stop >= len(c.tokens) { + stop = len(c.tokens) - 1 + } + + for i := start; i < stop; i++ { + t := c.tokens[i] + + if t.GetTokenType() == TokenEOF { + break + } + + if types == nil || types.contains(t.GetTokenType()) { + subset = append(subset, t) + } + } + + return subset +} + +func (c *CommonTokenStream) LA(i int) int { + return c.LT(i).GetTokenType() +} + +func (c *CommonTokenStream) lazyInit() { + if c.index == -1 { + c.setup() + } +} + +func (c *CommonTokenStream) setup() { + c.Sync(0) + c.index = c.adjustSeekIndex(0) +} + +func (c *CommonTokenStream) GetTokenSource() TokenSource { + return c.tokenSource +} + +// SetTokenSource resets the c token stream by setting its token source. +func (c *CommonTokenStream) SetTokenSource(tokenSource TokenSource) { + c.tokenSource = tokenSource + c.tokens = make([]Token, 0) + c.index = -1 +} + +// NextTokenOnChannel returns the index of the next token on channel given a +// starting index. Returns i if tokens[i] is on channel. Returns -1 if there are +// no tokens on channel between i and EOF. +func (c *CommonTokenStream) NextTokenOnChannel(i, channel int) int { + c.Sync(i) + + if i >= len(c.tokens) { + return -1 + } + + token := c.tokens[i] + + for token.GetChannel() != c.channel { + if token.GetTokenType() == TokenEOF { + return -1 + } + + i++ + c.Sync(i) + token = c.tokens[i] + } + + return i +} + +// previousTokenOnChannel returns the index of the previous token on channel +// given a starting index. Returns i if tokens[i] is on channel. Returns -1 if +// there are no tokens on channel between i and 0. +func (c *CommonTokenStream) previousTokenOnChannel(i, channel int) int { + for i >= 0 && c.tokens[i].GetChannel() != channel { + i-- + } + + return i +} + +// GetHiddenTokensToRight collects all tokens on a specified channel to the +// right of the current token up until we see a token on DEFAULT_TOKEN_CHANNEL +// or EOF. If channel is -1, it finds any non-default channel token. +func (c *CommonTokenStream) GetHiddenTokensToRight(tokenIndex, channel int) []Token { + c.lazyInit() + + if tokenIndex < 0 || tokenIndex >= len(c.tokens) { + panic(strconv.Itoa(tokenIndex) + " not in 0.." + strconv.Itoa(len(c.tokens)-1)) + } + + nextOnChannel := c.NextTokenOnChannel(tokenIndex+1, LexerDefaultTokenChannel) + from := tokenIndex + 1 + + // If no onchannel to the right, then nextOnChannel == -1, so set to to last token + var to int + + if nextOnChannel == -1 { + to = len(c.tokens) - 1 + } else { + to = nextOnChannel + } + + return c.filterForChannel(from, to, channel) +} + +// GetHiddenTokensToLeft collects all tokens on channel to the left of the +// current token until we see a token on DEFAULT_TOKEN_CHANNEL. If channel is +// -1, it finds any non default channel token. +func (c *CommonTokenStream) GetHiddenTokensToLeft(tokenIndex, channel int) []Token { + c.lazyInit() + + if tokenIndex < 0 || tokenIndex >= len(c.tokens) { + panic(strconv.Itoa(tokenIndex) + " not in 0.." + strconv.Itoa(len(c.tokens)-1)) + } + + prevOnChannel := c.previousTokenOnChannel(tokenIndex-1, LexerDefaultTokenChannel) + + if prevOnChannel == tokenIndex-1 { + return nil + } + + // If there are none on channel to the left and prevOnChannel == -1 then from = 0 + from := prevOnChannel + 1 + to := tokenIndex - 1 + + return c.filterForChannel(from, to, channel) +} + +func (c *CommonTokenStream) filterForChannel(left, right, channel int) []Token { + hidden := make([]Token, 0) + + for i := left; i < right+1; i++ { + t := c.tokens[i] + + if channel == -1 { + if t.GetChannel() != LexerDefaultTokenChannel { + hidden = append(hidden, t) + } + } else if t.GetChannel() == channel { + hidden = append(hidden, t) + } + } + + if len(hidden) == 0 { + return nil + } + + return hidden +} + +func (c *CommonTokenStream) GetSourceName() string { + return c.tokenSource.GetSourceName() +} + +func (c *CommonTokenStream) Size() int { + return len(c.tokens) +} + +func (c *CommonTokenStream) Index() int { + return c.index +} + +func (c *CommonTokenStream) GetAllText() string { + return c.GetTextFromInterval(nil) +} + +func (c *CommonTokenStream) GetTextFromTokens(start, end Token) string { + if start == nil || end == nil { + return "" + } + + return c.GetTextFromInterval(NewInterval(start.GetTokenIndex(), end.GetTokenIndex())) +} + +func (c *CommonTokenStream) GetTextFromRuleContext(interval RuleContext) string { + return c.GetTextFromInterval(interval.GetSourceInterval()) +} + +func (c *CommonTokenStream) GetTextFromInterval(interval *Interval) string { + c.lazyInit() + + if interval == nil { + c.Fill() + interval = NewInterval(0, len(c.tokens)-1) + } else { + c.Sync(interval.Stop) + } + + start := interval.Start + stop := interval.Stop + + if start < 0 || stop < 0 { + return "" + } + + if stop >= len(c.tokens) { + stop = len(c.tokens) - 1 + } + + s := "" + + for i := start; i < stop+1; i++ { + t := c.tokens[i] + + if t.GetTokenType() == TokenEOF { + break + } + + s += t.GetText() + } + + return s +} + +// Fill gets all tokens from the lexer until EOF. +func (c *CommonTokenStream) Fill() { + c.lazyInit() + + for c.fetch(1000) == 1000 { + continue + } +} + +func (c *CommonTokenStream) adjustSeekIndex(i int) int { + return c.NextTokenOnChannel(i, c.channel) +} + +func (c *CommonTokenStream) LB(k int) Token { + if k == 0 || c.index-k < 0 { + return nil + } + + i := c.index + n := 1 + + // Find k good tokens looking backward + for n <= k { + // Skip off-channel tokens + i = c.previousTokenOnChannel(i-1, c.channel) + n++ + } + + if i < 0 { + return nil + } + + return c.tokens[i] +} + +func (c *CommonTokenStream) LT(k int) Token { + c.lazyInit() + + if k == 0 { + return nil + } + + if k < 0 { + return c.LB(-k) + } + + i := c.index + n := 1 // We know tokens[n] is valid + + // Find k good tokens + for n < k { + // Skip off-channel tokens, but make sure to not look past EOF + if c.Sync(i + 1) { + i = c.NextTokenOnChannel(i+1, c.channel) + } + + n++ + } + + return c.tokens[i] +} + +// getNumberOfOnChannelTokens counts EOF once. +func (c *CommonTokenStream) getNumberOfOnChannelTokens() int { + var n int + + c.Fill() + + for i := 0; i < len(c.tokens); i++ { + t := c.tokens[i] + + if t.GetChannel() == c.channel { + n++ + } + + if t.GetTokenType() == TokenEOF { + break + } + } + + return n +} diff --git a/runtime/Go/antlr/v4/common_token_stream_test.go b/runtime/Go/antlr/v4/common_token_stream_test.go new file mode 100644 index 0000000000..e7c75d49b1 --- /dev/null +++ b/runtime/Go/antlr/v4/common_token_stream_test.go @@ -0,0 +1,178 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +import ( + "testing" +) + +type commonTokenStreamTestLexer struct { + *BaseLexer + + tokens []Token + i int +} + +func (l *commonTokenStreamTestLexer) NextToken() Token { + tmp := l.tokens[l.i] + l.i++ + return tmp +} + +func TestCommonTokenStreamOffChannel(t *testing.T) { + assert := assertNew(t) + lexEngine := &commonTokenStreamTestLexer{ + tokens: []Token{ + newTestCommonToken(1, " ", LexerHidden), // 0 + newTestCommonToken(1, "x", LexerDefaultTokenChannel), // 1 + newTestCommonToken(1, " ", LexerHidden), // 2 + newTestCommonToken(1, "=", LexerDefaultTokenChannel), // 3 + newTestCommonToken(1, "34", LexerDefaultTokenChannel), // 4 + newTestCommonToken(1, " ", LexerHidden), // 5 + newTestCommonToken(1, " ", LexerHidden), // 6 + newTestCommonToken(1, ";", LexerDefaultTokenChannel), // 7 + newTestCommonToken(1, "\n", LexerHidden), // 9 + newTestCommonToken(TokenEOF, "", LexerDefaultTokenChannel), // 10 + }, + } + tokens := NewCommonTokenStream(lexEngine, TokenDefaultChannel) + + assert.Equal("x", tokens.LT(1).GetText()) // must skip first off channel token + tokens.Consume() + assert.Equal("=", tokens.LT(1).GetText()) + assert.Equal("x", tokens.LT(-1).GetText()) + + tokens.Consume() + assert.Equal("34", tokens.LT(1).GetText()) + assert.Equal("=", tokens.LT(-1).GetText()) + + tokens.Consume() + assert.Equal(";", tokens.LT(1).GetText()) + assert.Equal("34", tokens.LT(-1).GetText()) + + tokens.Consume() + assert.Equal(TokenEOF, tokens.LT(1).GetTokenType()) + assert.Equal(";", tokens.LT(-1).GetText()) + + assert.Equal("34", tokens.LT(-2).GetText()) + assert.Equal("=", tokens.LT(-3).GetText()) + assert.Equal("x", tokens.LT(-4).GetText()) +} + +func TestCommonTokenStreamFetchOffChannel(t *testing.T) { + assert := assertNew(t) + lexEngine := &commonTokenStreamTestLexer{ + tokens: []Token{ + newTestCommonToken(1, " ", LexerHidden), // 0 + newTestCommonToken(1, "x", LexerDefaultTokenChannel), // 1 + newTestCommonToken(1, " ", LexerHidden), // 2 + newTestCommonToken(1, "=", LexerDefaultTokenChannel), // 3 + newTestCommonToken(1, "34", LexerDefaultTokenChannel), // 4 + newTestCommonToken(1, " ", LexerHidden), // 5 + newTestCommonToken(1, " ", LexerHidden), // 6 + newTestCommonToken(1, ";", LexerDefaultTokenChannel), // 7 + newTestCommonToken(1, " ", LexerHidden), // 8 + newTestCommonToken(1, "\n", LexerHidden), // 9 + newTestCommonToken(TokenEOF, "", LexerDefaultTokenChannel), // 10 + }, + } + tokens := NewCommonTokenStream(lexEngine, TokenDefaultChannel) + tokens.Fill() + + assert.Nil(tokens.GetHiddenTokensToLeft(0, -1)) + assert.Nil(tokens.GetHiddenTokensToRight(0, -1)) + + assert.Equal("[[@0,0:0=' ',<1>,channel=1,0:-1]]", tokensToString(tokens.GetHiddenTokensToLeft(1, -1))) + assert.Equal("[[@2,0:0=' ',<1>,channel=1,0:-1]]", tokensToString(tokens.GetHiddenTokensToRight(1, -1))) + + assert.Nil(tokens.GetHiddenTokensToLeft(2, -1)) + assert.Nil(tokens.GetHiddenTokensToRight(2, -1)) + + assert.Equal("[[@2,0:0=' ',<1>,channel=1,0:-1]]", tokensToString(tokens.GetHiddenTokensToLeft(3, -1))) + assert.Nil(tokens.GetHiddenTokensToRight(3, -1)) + + assert.Nil(tokens.GetHiddenTokensToLeft(4, -1)) + assert.Equal("[[@5,0:0=' ',<1>,channel=1,0:-1], [@6,0:0=' ',<1>,channel=1,0:-1]]", + tokensToString(tokens.GetHiddenTokensToRight(4, -1))) + + assert.Nil(tokens.GetHiddenTokensToLeft(5, -1)) + assert.Equal("[[@6,0:0=' ',<1>,channel=1,0:-1]]", + tokensToString(tokens.GetHiddenTokensToRight(5, -1))) + + assert.Equal("[[@5,0:0=' ',<1>,channel=1,0:-1]]", + tokensToString(tokens.GetHiddenTokensToLeft(6, -1))) + assert.Nil(tokens.GetHiddenTokensToRight(6, -1)) + + assert.Equal("[[@5,0:0=' ',<1>,channel=1,0:-1], [@6,0:0=' ',<1>,channel=1,0:-1]]", + tokensToString(tokens.GetHiddenTokensToLeft(7, -1))) + assert.Equal("[[@8,0:0=' ',<1>,channel=1,0:-1], [@9,0:0='\\n',<1>,channel=1,0:-1]]", + tokensToString(tokens.GetHiddenTokensToRight(7, -1))) + + assert.Nil(tokens.GetHiddenTokensToLeft(8, -1)) + assert.Equal("[[@9,0:0='\\n',<1>,channel=1,0:-1]]", + tokensToString(tokens.GetHiddenTokensToRight(8, -1))) + + assert.Equal("[[@8,0:0=' ',<1>,channel=1,0:-1]]", + tokensToString(tokens.GetHiddenTokensToLeft(9, -1))) + assert.Nil(tokens.GetHiddenTokensToRight(9, -1)) + +} + +type commonTokenStreamTestLexerSingleEOF struct { + *BaseLexer + + tokens []Token + i int +} + +func (l *commonTokenStreamTestLexerSingleEOF) NextToken() Token { + return newTestCommonToken(TokenEOF, "", LexerDefaultTokenChannel) +} + +func TestCommonTokenStreamSingleEOF(t *testing.T) { + assert := assertNew(t) + lexEngine := &commonTokenStreamTestLexerSingleEOF{} + tokens := NewCommonTokenStream(lexEngine, TokenDefaultChannel) + tokens.Fill() + + assert.Equal(TokenEOF, tokens.LA(1)) + assert.Equal(0, tokens.index) + assert.Equal(1, tokens.Size()) +} + +func TestCommonTokenStreamCannotConsumeEOF(t *testing.T) { + assert := assertNew(t) + lexEngine := &commonTokenStreamTestLexerSingleEOF{} + tokens := NewCommonTokenStream(lexEngine, TokenDefaultChannel) + tokens.Fill() + assert.Equal(TokenEOF, tokens.LA(1)) + assert.Equal(0, tokens.index) + assert.Equal(1, tokens.Size()) + assert.Panics(tokens.Consume) +} + +func TestCommonTokenStreamGetTextFromInterval(t *testing.T) { + assert := assertNew(t) + lexEngine := &commonTokenStreamTestLexer{ + tokens: []Token{ + newTestCommonToken(1, " ", LexerHidden), // 0 + newTestCommonToken(1, "x", LexerDefaultTokenChannel), // 1 + newTestCommonToken(1, " ", LexerHidden), // 2 + newTestCommonToken(1, "=", LexerDefaultTokenChannel), // 3 + newTestCommonToken(1, "34", LexerDefaultTokenChannel), // 4 + newTestCommonToken(1, " ", LexerHidden), // 5 + newTestCommonToken(1, " ", LexerHidden), // 6 + newTestCommonToken(1, ";", LexerDefaultTokenChannel), // 7 + newTestCommonToken(1, " ", LexerHidden), // 8 + newTestCommonToken(1, "\n", LexerHidden), // 9 + newTestCommonToken(TokenEOF, "", LexerDefaultTokenChannel), // 10 + }, + } + tokens := NewCommonTokenStream(lexEngine, TokenDefaultChannel) + assert.Equal("x", tokens.GetTextFromInterval(&Interval{Start: 1, Stop: 1})) + assert.Equal(len(tokens.tokens), 2) + assert.Equal(" x =34 ; \n", tokens.GetTextFromInterval(nil)) + assert.Equal(len(tokens.tokens), 11) +} diff --git a/runtime/Go/antlr/v4/comparators.go b/runtime/Go/antlr/v4/comparators.go new file mode 100644 index 0000000000..fbe76c33e0 --- /dev/null +++ b/runtime/Go/antlr/v4/comparators.go @@ -0,0 +1,137 @@ +package antlr + +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +// This file contains all the implementations of custom comparators used for generic collections when the +// Hash() and Equals() funcs supplied by the struct objects themselves need to be overridden. Normally, we would +// put the comparators in the source file for the struct themselves, but given the organization of this code is +// sorta kinda based upon the Java code, I found it confusing trying to find out which comparator was where and used by +// which instantiation of a collection. For instance, an Array2DHashSet in the Java source, when used with ATNConfig +// collections requires three different comparators depending on what the collection is being used for. Collecting - pun intended - +// all the comparators here, makes it much easier to see which implementation of hash and equals is used by which collection. +// It also makes it easy to verify that the Hash() and Equals() functions marry up with the Java implementations. + +// ObjEqComparator is the equivalent of the Java ObjectEqualityComparator, which is the default instance of +// Equality comparator. We do not have inheritance in Go, only interfaces, so we use generics to enforce some +// type safety and avoid having to implement this for every type that we want to perform comparison on. +// +// This comparator works by using the standard Hash() and Equals() methods of the type T that is being compared. Which +// allows us to use it in any collection instance that does nto require a special hash or equals implementation. +type ObjEqComparator[T Collectable[T]] struct{} + +// Equals2 delegates to the Equals() method of type T +func (c *ObjEqComparator[T]) Equals2(o1, o2 T) bool { + return o1.Equals(o2) +} + +// Hash1 delegates to the Hash() method of type T +func (c *ObjEqComparator[T]) Hash1(o T) int { + + return o.Hash() +} + +type SemCComparator[T Collectable[T]] struct{} + +// ATNConfigComparator is used as the compartor for the configLookup field of an ATNConfigSet +// and has a custom Equals() and Hash() implementation, because equality is not based on the +// standard Hash() and Equals() methods of the ATNConfig type. +type ATNConfigComparator[T Collectable[T]] struct { +} + +// Equals2 is a custom comparator for ATNConfigs specifically for configLookup +func (c *ATNConfigComparator[T]) Equals2(o1, o2 ATNConfig) bool { + + // Same pointer, must be equal, even if both nil + // + if o1 == o2 { + return true + + } + + // If either are nil, but not both, then the result is false + // + if o1 == nil || o2 == nil { + return false + } + + return o1.GetState().GetStateNumber() == o2.GetState().GetStateNumber() && + o1.GetAlt() == o2.GetAlt() && + o1.GetSemanticContext().Equals(o2.GetSemanticContext()) +} + +// Hash1 is custom hash implementation for ATNConfigs specifically for configLookup +func (c *ATNConfigComparator[T]) Hash1(o ATNConfig) int { + hash := 7 + hash = 31*hash + o.GetState().GetStateNumber() + hash = 31*hash + o.GetAlt() + hash = 31*hash + o.GetSemanticContext().Hash() + return hash +} + +// ATNAltConfigComparator is used as the comparator for mapping configs to Alt Bitsets +type ATNAltConfigComparator[T Collectable[T]] struct { +} + +// Equals2 is a custom comparator for ATNConfigs specifically for configLookup +func (c *ATNAltConfigComparator[T]) Equals2(o1, o2 ATNConfig) bool { + + // Same pointer, must be equal, even if both nil + // + if o1 == o2 { + return true + + } + + // If either are nil, but not both, then the result is false + // + if o1 == nil || o2 == nil { + return false + } + + return o1.GetState().GetStateNumber() == o2.GetState().GetStateNumber() && + o1.GetContext().Equals(o2.GetContext()) +} + +// Hash1 is custom hash implementation for ATNConfigs specifically for configLookup +func (c *ATNAltConfigComparator[T]) Hash1(o ATNConfig) int { + h := murmurInit(7) + h = murmurUpdate(h, o.GetState().GetStateNumber()) + h = murmurUpdate(h, o.GetContext().Hash()) + return murmurFinish(h, 2) +} + +// BaseATNConfigComparator is used as the comparator for the configLookup field of a BaseATNConfigSet +// and has a custom Equals() and Hash() implementation, because equality is not based on the +// standard Hash() and Equals() methods of the ATNConfig type. +type BaseATNConfigComparator[T Collectable[T]] struct { +} + +// Equals2 is a custom comparator for ATNConfigs specifically for baseATNConfigSet +func (c *BaseATNConfigComparator[T]) Equals2(o1, o2 ATNConfig) bool { + + // Same pointer, must be equal, even if both nil + // + if o1 == o2 { + return true + + } + + // If either are nil, but not both, then the result is false + // + if o1 == nil || o2 == nil { + return false + } + + return o1.GetState().GetStateNumber() == o2.GetState().GetStateNumber() && + o1.GetAlt() == o2.GetAlt() && + o1.GetSemanticContext().Equals(o2.GetSemanticContext()) +} + +// Hash1 is custom hash implementation for ATNConfigs specifically for configLookup, but in fact just +// delegates to the standard Hash() method of the ATNConfig type. +func (c *BaseATNConfigComparator[T]) Hash1(o ATNConfig) int { + + return o.Hash() +} diff --git a/runtime/Go/antlr/v4/dfa.go b/runtime/Go/antlr/v4/dfa.go new file mode 100644 index 0000000000..5326baff95 --- /dev/null +++ b/runtime/Go/antlr/v4/dfa.go @@ -0,0 +1,148 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +type DFA struct { + // atnStartState is the ATN state in which this was created + atnStartState DecisionState + + decision int + + // states is all the DFA states. Use Map to get the old state back; Set can only + // indicate whether it is there. Go maps implement key hash collisions and so on and are very + // good, but the DFAState is an object and can't be used directly as the key as it can in say JAva + // amd C#, whereby if the hashcode is the same for two objects, then Equals() is called against them + // to see if they really are the same object. + // + // + states *JStore[*DFAState, *ObjEqComparator[*DFAState]] + + numstates int + + s0 *DFAState + + // precedenceDfa is the backing field for isPrecedenceDfa and setPrecedenceDfa. + // True if the DFA is for a precedence decision and false otherwise. + precedenceDfa bool +} + +func NewDFA(atnStartState DecisionState, decision int) *DFA { + dfa := &DFA{ + atnStartState: atnStartState, + decision: decision, + states: NewJStore[*DFAState, *ObjEqComparator[*DFAState]](&ObjEqComparator[*DFAState]{}), + } + if s, ok := atnStartState.(*StarLoopEntryState); ok && s.precedenceRuleDecision { + dfa.precedenceDfa = true + dfa.s0 = NewDFAState(-1, NewBaseATNConfigSet(false)) + dfa.s0.isAcceptState = false + dfa.s0.requiresFullContext = false + } + return dfa +} + +// getPrecedenceStartState gets the start state for the current precedence and +// returns the start state corresponding to the specified precedence if a start +// state exists for the specified precedence and nil otherwise. d must be a +// precedence DFA. See also isPrecedenceDfa. +func (d *DFA) getPrecedenceStartState(precedence int) *DFAState { + if !d.getPrecedenceDfa() { + panic("only precedence DFAs may contain a precedence start state") + } + + // s0.edges is never nil for a precedence DFA + if precedence < 0 || precedence >= len(d.getS0().getEdges()) { + return nil + } + + return d.getS0().getIthEdge(precedence) +} + +// setPrecedenceStartState sets the start state for the current precedence. d +// must be a precedence DFA. See also isPrecedenceDfa. +func (d *DFA) setPrecedenceStartState(precedence int, startState *DFAState) { + if !d.getPrecedenceDfa() { + panic("only precedence DFAs may contain a precedence start state") + } + + if precedence < 0 { + return + } + + // Synchronization on s0 here is ok. When the DFA is turned into a + // precedence DFA, s0 will be initialized once and not updated again. s0.edges + // is never nil for a precedence DFA. + s0 := d.getS0() + if precedence >= s0.numEdges() { + edges := append(s0.getEdges(), make([]*DFAState, precedence+1-s0.numEdges())...) + s0.setEdges(edges) + d.setS0(s0) + } + + s0.setIthEdge(precedence, startState) +} + +func (d *DFA) getPrecedenceDfa() bool { + return d.precedenceDfa +} + +// setPrecedenceDfa sets whether d is a precedence DFA. If precedenceDfa differs +// from the current DFA configuration, then d.states is cleared, the initial +// state s0 is set to a new DFAState with an empty outgoing DFAState.edges to +// store the start states for individual precedence values if precedenceDfa is +// true or nil otherwise, and d.precedenceDfa is updated. +func (d *DFA) setPrecedenceDfa(precedenceDfa bool) { + if d.getPrecedenceDfa() != precedenceDfa { + d.states = NewJStore[*DFAState, *ObjEqComparator[*DFAState]](&ObjEqComparator[*DFAState]{}) + d.numstates = 0 + + if precedenceDfa { + precedenceState := NewDFAState(-1, NewBaseATNConfigSet(false)) + + precedenceState.setEdges(make([]*DFAState, 0)) + precedenceState.isAcceptState = false + precedenceState.requiresFullContext = false + d.setS0(precedenceState) + } else { + d.setS0(nil) + } + + d.precedenceDfa = precedenceDfa + } +} + +func (d *DFA) getS0() *DFAState { + return d.s0 +} + +func (d *DFA) setS0(s *DFAState) { + d.s0 = s +} + +// sortedStates returns the states in d sorted by their state number. +func (d *DFA) sortedStates() []*DFAState { + + vs := d.states.SortedSlice(func(i, j *DFAState) bool { + return i.stateNumber < j.stateNumber + }) + + return vs +} + +func (d *DFA) String(literalNames []string, symbolicNames []string) string { + if d.getS0() == nil { + return "" + } + + return NewDFASerializer(d, literalNames, symbolicNames).String() +} + +func (d *DFA) ToLexerString() string { + if d.getS0() == nil { + return "" + } + + return NewLexerDFASerializer(d).String() +} diff --git a/runtime/Go/antlr/v4/dfa_serializer.go b/runtime/Go/antlr/v4/dfa_serializer.go new file mode 100644 index 0000000000..84d0a31e53 --- /dev/null +++ b/runtime/Go/antlr/v4/dfa_serializer.go @@ -0,0 +1,158 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +import ( + "fmt" + "strconv" + "strings" +) + +// DFASerializer is a DFA walker that knows how to dump them to serialized +// strings. +type DFASerializer struct { + dfa *DFA + literalNames []string + symbolicNames []string +} + +func NewDFASerializer(dfa *DFA, literalNames, symbolicNames []string) *DFASerializer { + if literalNames == nil { + literalNames = make([]string, 0) + } + + if symbolicNames == nil { + symbolicNames = make([]string, 0) + } + + return &DFASerializer{ + dfa: dfa, + literalNames: literalNames, + symbolicNames: symbolicNames, + } +} + +func (d *DFASerializer) String() string { + if d.dfa.getS0() == nil { + return "" + } + + buf := "" + states := d.dfa.sortedStates() + + for _, s := range states { + if s.edges != nil { + n := len(s.edges) + + for j := 0; j < n; j++ { + t := s.edges[j] + + if t != nil && t.stateNumber != 0x7FFFFFFF { + buf += d.GetStateString(s) + buf += "-" + buf += d.getEdgeLabel(j) + buf += "->" + buf += d.GetStateString(t) + buf += "\n" + } + } + } + } + + if len(buf) == 0 { + return "" + } + + return buf +} + +func (d *DFASerializer) getEdgeLabel(i int) string { + if i == 0 { + return "EOF" + } else if d.literalNames != nil && i-1 < len(d.literalNames) { + return d.literalNames[i-1] + } else if d.symbolicNames != nil && i-1 < len(d.symbolicNames) { + return d.symbolicNames[i-1] + } + + return strconv.Itoa(i - 1) +} + +func (d *DFASerializer) GetStateString(s *DFAState) string { + var a, b string + + if s.isAcceptState { + a = ":" + } + + if s.requiresFullContext { + b = "^" + } + + baseStateStr := a + "s" + strconv.Itoa(s.stateNumber) + b + + if s.isAcceptState { + if s.predicates != nil { + return baseStateStr + "=>" + fmt.Sprint(s.predicates) + } + + return baseStateStr + "=>" + fmt.Sprint(s.prediction) + } + + return baseStateStr +} + +type LexerDFASerializer struct { + *DFASerializer +} + +func NewLexerDFASerializer(dfa *DFA) *LexerDFASerializer { + return &LexerDFASerializer{DFASerializer: NewDFASerializer(dfa, nil, nil)} +} + +func (l *LexerDFASerializer) getEdgeLabel(i int) string { + var sb strings.Builder + sb.Grow(6) + sb.WriteByte('\'') + sb.WriteRune(rune(i)) + sb.WriteByte('\'') + return sb.String() +} + +func (l *LexerDFASerializer) String() string { + if l.dfa.getS0() == nil { + return "" + } + + buf := "" + states := l.dfa.sortedStates() + + for i := 0; i < len(states); i++ { + s := states[i] + + if s.edges != nil { + n := len(s.edges) + + for j := 0; j < n; j++ { + t := s.edges[j] + + if t != nil && t.stateNumber != 0x7FFFFFFF { + buf += l.GetStateString(s) + buf += "-" + buf += l.getEdgeLabel(j) + buf += "->" + buf += l.GetStateString(t) + buf += "\n" + } + } + } + } + + if len(buf) == 0 { + return "" + } + + return buf +} diff --git a/runtime/Go/antlr/v4/dfa_state.go b/runtime/Go/antlr/v4/dfa_state.go new file mode 100644 index 0000000000..c90dec55c8 --- /dev/null +++ b/runtime/Go/antlr/v4/dfa_state.go @@ -0,0 +1,169 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +import ( + "fmt" +) + +// PredPrediction maps a predicate to a predicted alternative. +type PredPrediction struct { + alt int + pred SemanticContext +} + +func NewPredPrediction(pred SemanticContext, alt int) *PredPrediction { + return &PredPrediction{alt: alt, pred: pred} +} + +func (p *PredPrediction) String() string { + return "(" + fmt.Sprint(p.pred) + ", " + fmt.Sprint(p.alt) + ")" +} + +// DFAState represents a set of possible ATN configurations. As Aho, Sethi, +// Ullman p. 117 says: "The DFA uses its state to keep track of all possible +// states the ATN can be in after reading each input symbol. That is to say, +// after reading input a1a2..an, the DFA is in a state that represents the +// subset T of the states of the ATN that are reachable from the ATN's start +// state along some path labeled a1a2..an." In conventional NFA-to-DFA +// conversion, therefore, the subset T would be a bitset representing the set of +// states the ATN could be in. We need to track the alt predicted by each state +// as well, however. More importantly, we need to maintain a stack of states, +// tracking the closure operations as they jump from rule to rule, emulating +// rule invocations (method calls). I have to add a stack to simulate the proper +// lookahead sequences for the underlying LL grammar from which the ATN was +// derived. +// +// I use a set of ATNConfig objects, not simple states. An ATNConfig is both a +// state (ala normal conversion) and a RuleContext describing the chain of rules +// (if any) followed to arrive at that state. +// +// A DFAState may have multiple references to a particular state, but with +// different ATN contexts (with same or different alts) meaning that state was +// reached via a different set of rule invocations. +type DFAState struct { + stateNumber int + configs ATNConfigSet + + // edges elements point to the target of the symbol. Shift up by 1 so (-1) + // Token.EOF maps to the first element. + edges []*DFAState + + isAcceptState bool + + // prediction is the ttype we match or alt we predict if the state is accept. + // Set to ATN.INVALID_ALT_NUMBER when predicates != nil or + // requiresFullContext. + prediction int + + lexerActionExecutor *LexerActionExecutor + + // requiresFullContext indicates it was created during an SLL prediction that + // discovered a conflict between the configurations in the state. Future + // ParserATNSimulator.execATN invocations immediately jump doing + // full context prediction if true. + requiresFullContext bool + + // predicates is the predicates associated with the ATN configurations of the + // DFA state during SLL parsing. When we have predicates, requiresFullContext + // is false, since full context prediction evaluates predicates on-the-fly. If + // d is + // not nil, then prediction is ATN.INVALID_ALT_NUMBER. + // + // We only use these for non-requiresFullContext but conflicting states. That + // means we know from the context (it's $ or we don't dip into outer context) + // that it's an ambiguity not a conflict. + // + // This list is computed by + // ParserATNSimulator.predicateDFAState. + predicates []*PredPrediction +} + +func NewDFAState(stateNumber int, configs ATNConfigSet) *DFAState { + if configs == nil { + configs = NewBaseATNConfigSet(false) + } + + return &DFAState{configs: configs, stateNumber: stateNumber} +} + +// GetAltSet gets the set of all alts mentioned by all ATN configurations in d. +func (d *DFAState) GetAltSet() []int { + var alts []int + + if d.configs != nil { + for _, c := range d.configs.GetItems() { + alts = append(alts, c.GetAlt()) + } + } + + if len(alts) == 0 { + return nil + } + + return alts +} + +func (d *DFAState) getEdges() []*DFAState { + return d.edges +} + +func (d *DFAState) numEdges() int { + return len(d.edges) +} + +func (d *DFAState) getIthEdge(i int) *DFAState { + return d.edges[i] +} + +func (d *DFAState) setEdges(newEdges []*DFAState) { + d.edges = newEdges +} + +func (d *DFAState) setIthEdge(i int, edge *DFAState) { + d.edges[i] = edge +} + +func (d *DFAState) setPrediction(v int) { + d.prediction = v +} + +func (d *DFAState) String() string { + var s string + if d.isAcceptState { + if d.predicates != nil { + s = "=>" + fmt.Sprint(d.predicates) + } else { + s = "=>" + fmt.Sprint(d.prediction) + } + } + + return fmt.Sprintf("%d:%s%s", d.stateNumber, fmt.Sprint(d.configs), s) +} + +func (d *DFAState) Hash() int { + h := murmurInit(7) + h = murmurUpdate(h, d.configs.Hash()) + return murmurFinish(h, 1) +} + +// Equals returns whether d equals other. Two DFAStates are equal if their ATN +// configuration sets are the same. This method is used to see if a state +// already exists. +// +// Because the number of alternatives and number of ATN configurations are +// finite, there is a finite number of DFA states that can be processed. This is +// necessary to show that the algorithm terminates. +// +// Cannot test the DFA state numbers here because in +// ParserATNSimulator.addDFAState we need to know if any other state exists that +// has d exact set of ATN configurations. The stateNumber is irrelevant. +func (d *DFAState) Equals(o Collectable[*DFAState]) bool { + if d == o { + return true + } + + return d.configs.Equals(o.(*DFAState).configs) +} diff --git a/runtime/Go/antlr/v4/diagnostic_error_listener.go b/runtime/Go/antlr/v4/diagnostic_error_listener.go new file mode 100644 index 0000000000..c55bcc19b2 --- /dev/null +++ b/runtime/Go/antlr/v4/diagnostic_error_listener.go @@ -0,0 +1,109 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +import ( + "strconv" +) + +// +// This implementation of {@link ANTLRErrorListener} can be used to identify +// certain potential correctness and performance problems in grammars. "reports" +// are made by calling {@link Parser//NotifyErrorListeners} with the appropriate +// message. +// +// + +type DiagnosticErrorListener struct { + *DefaultErrorListener + + exactOnly bool +} + +func NewDiagnosticErrorListener(exactOnly bool) *DiagnosticErrorListener { + + n := new(DiagnosticErrorListener) + + // whether all ambiguities or only exact ambiguities are Reported. + n.exactOnly = exactOnly + return n +} + +func (d *DiagnosticErrorListener) ReportAmbiguity(recognizer Parser, dfa *DFA, startIndex, stopIndex int, exact bool, ambigAlts *BitSet, configs ATNConfigSet) { + if d.exactOnly && !exact { + return + } + msg := "reportAmbiguity d=" + + d.getDecisionDescription(recognizer, dfa) + + ": ambigAlts=" + + d.getConflictingAlts(ambigAlts, configs).String() + + ", input='" + + recognizer.GetTokenStream().GetTextFromInterval(NewInterval(startIndex, stopIndex)) + "'" + recognizer.NotifyErrorListeners(msg, nil, nil) +} + +func (d *DiagnosticErrorListener) ReportAttemptingFullContext(recognizer Parser, dfa *DFA, startIndex, stopIndex int, conflictingAlts *BitSet, configs ATNConfigSet) { + + msg := "reportAttemptingFullContext d=" + + d.getDecisionDescription(recognizer, dfa) + + ", input='" + + recognizer.GetTokenStream().GetTextFromInterval(NewInterval(startIndex, stopIndex)) + "'" + recognizer.NotifyErrorListeners(msg, nil, nil) +} + +func (d *DiagnosticErrorListener) ReportContextSensitivity(recognizer Parser, dfa *DFA, startIndex, stopIndex, prediction int, configs ATNConfigSet) { + msg := "reportContextSensitivity d=" + + d.getDecisionDescription(recognizer, dfa) + + ", input='" + + recognizer.GetTokenStream().GetTextFromInterval(NewInterval(startIndex, stopIndex)) + "'" + recognizer.NotifyErrorListeners(msg, nil, nil) +} + +func (d *DiagnosticErrorListener) getDecisionDescription(recognizer Parser, dfa *DFA) string { + decision := dfa.decision + ruleIndex := dfa.atnStartState.GetRuleIndex() + + ruleNames := recognizer.GetRuleNames() + if ruleIndex < 0 || ruleIndex >= len(ruleNames) { + return strconv.Itoa(decision) + } + ruleName := ruleNames[ruleIndex] + if ruleName == "" { + return strconv.Itoa(decision) + } + return strconv.Itoa(decision) + " (" + ruleName + ")" +} + +// Computes the set of conflicting or ambiguous alternatives from a +// configuration set, if that information was not already provided by the +// parser. +// +// @param ReportedAlts The set of conflicting or ambiguous alternatives, as +// Reported by the parser. +// @param configs The conflicting or ambiguous configuration set. +// @return Returns {@code ReportedAlts} if it is not {@code nil}, otherwise +// returns the set of alternatives represented in {@code configs}. +func (d *DiagnosticErrorListener) getConflictingAlts(ReportedAlts *BitSet, set ATNConfigSet) *BitSet { + if ReportedAlts != nil { + return ReportedAlts + } + result := NewBitSet() + for _, c := range set.GetItems() { + result.add(c.GetAlt()) + } + + return result +} diff --git a/runtime/Go/antlr/v4/error_listener.go b/runtime/Go/antlr/v4/error_listener.go new file mode 100644 index 0000000000..f679f0dcd5 --- /dev/null +++ b/runtime/Go/antlr/v4/error_listener.go @@ -0,0 +1,104 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +import ( + "fmt" + "os" + "strconv" +) + +// Provides an empty default implementation of {@link ANTLRErrorListener}. The +// default implementation of each method does nothing, but can be overridden as +// necessary. + +type ErrorListener interface { + SyntaxError(recognizer Recognizer, offendingSymbol interface{}, line, column int, msg string, e RecognitionException) + ReportAmbiguity(recognizer Parser, dfa *DFA, startIndex, stopIndex int, exact bool, ambigAlts *BitSet, configs ATNConfigSet) + ReportAttemptingFullContext(recognizer Parser, dfa *DFA, startIndex, stopIndex int, conflictingAlts *BitSet, configs ATNConfigSet) + ReportContextSensitivity(recognizer Parser, dfa *DFA, startIndex, stopIndex, prediction int, configs ATNConfigSet) +} + +type DefaultErrorListener struct { +} + +func NewDefaultErrorListener() *DefaultErrorListener { + return new(DefaultErrorListener) +} + +func (d *DefaultErrorListener) SyntaxError(recognizer Recognizer, offendingSymbol interface{}, line, column int, msg string, e RecognitionException) { +} + +func (d *DefaultErrorListener) ReportAmbiguity(recognizer Parser, dfa *DFA, startIndex, stopIndex int, exact bool, ambigAlts *BitSet, configs ATNConfigSet) { +} + +func (d *DefaultErrorListener) ReportAttemptingFullContext(recognizer Parser, dfa *DFA, startIndex, stopIndex int, conflictingAlts *BitSet, configs ATNConfigSet) { +} + +func (d *DefaultErrorListener) ReportContextSensitivity(recognizer Parser, dfa *DFA, startIndex, stopIndex, prediction int, configs ATNConfigSet) { +} + +type ConsoleErrorListener struct { + *DefaultErrorListener +} + +func NewConsoleErrorListener() *ConsoleErrorListener { + return new(ConsoleErrorListener) +} + +// Provides a default instance of {@link ConsoleErrorListener}. +var ConsoleErrorListenerINSTANCE = NewConsoleErrorListener() + +// {@inheritDoc} +// +//

+// This implementation prints messages to {@link System//err} containing the +// values of {@code line}, {@code charPositionInLine}, and {@code msg} using +// the following format.

+// +//
+// line line:charPositionInLine msg
+// 
+func (c *ConsoleErrorListener) SyntaxError(recognizer Recognizer, offendingSymbol interface{}, line, column int, msg string, e RecognitionException) { + fmt.Fprintln(os.Stderr, "line "+strconv.Itoa(line)+":"+strconv.Itoa(column)+" "+msg) +} + +type ProxyErrorListener struct { + *DefaultErrorListener + delegates []ErrorListener +} + +func NewProxyErrorListener(delegates []ErrorListener) *ProxyErrorListener { + if delegates == nil { + panic("delegates is not provided") + } + l := new(ProxyErrorListener) + l.delegates = delegates + return l +} + +func (p *ProxyErrorListener) SyntaxError(recognizer Recognizer, offendingSymbol interface{}, line, column int, msg string, e RecognitionException) { + for _, d := range p.delegates { + d.SyntaxError(recognizer, offendingSymbol, line, column, msg, e) + } +} + +func (p *ProxyErrorListener) ReportAmbiguity(recognizer Parser, dfa *DFA, startIndex, stopIndex int, exact bool, ambigAlts *BitSet, configs ATNConfigSet) { + for _, d := range p.delegates { + d.ReportAmbiguity(recognizer, dfa, startIndex, stopIndex, exact, ambigAlts, configs) + } +} + +func (p *ProxyErrorListener) ReportAttemptingFullContext(recognizer Parser, dfa *DFA, startIndex, stopIndex int, conflictingAlts *BitSet, configs ATNConfigSet) { + for _, d := range p.delegates { + d.ReportAttemptingFullContext(recognizer, dfa, startIndex, stopIndex, conflictingAlts, configs) + } +} + +func (p *ProxyErrorListener) ReportContextSensitivity(recognizer Parser, dfa *DFA, startIndex, stopIndex, prediction int, configs ATNConfigSet) { + for _, d := range p.delegates { + d.ReportContextSensitivity(recognizer, dfa, startIndex, stopIndex, prediction, configs) + } +} diff --git a/runtime/Go/antlr/v4/error_strategy.go b/runtime/Go/antlr/v4/error_strategy.go new file mode 100644 index 0000000000..5c0a637ba4 --- /dev/null +++ b/runtime/Go/antlr/v4/error_strategy.go @@ -0,0 +1,734 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +import ( + "fmt" + "reflect" + "strconv" + "strings" +) + +type ErrorStrategy interface { + reset(Parser) + RecoverInline(Parser) Token + Recover(Parser, RecognitionException) + Sync(Parser) + InErrorRecoveryMode(Parser) bool + ReportError(Parser, RecognitionException) + ReportMatch(Parser) +} + +// This is the default implementation of {@link ANTLRErrorStrategy} used for +// error Reporting and recovery in ANTLR parsers. +type DefaultErrorStrategy struct { + errorRecoveryMode bool + lastErrorIndex int + lastErrorStates *IntervalSet +} + +var _ ErrorStrategy = &DefaultErrorStrategy{} + +func NewDefaultErrorStrategy() *DefaultErrorStrategy { + + d := new(DefaultErrorStrategy) + + // Indicates whether the error strategy is currently "recovering from an + // error". This is used to suppress Reporting multiple error messages while + // attempting to recover from a detected syntax error. + // + // @see //InErrorRecoveryMode + // + d.errorRecoveryMode = false + + // The index into the input stream where the last error occurred. + // This is used to prevent infinite loops where an error is found + // but no token is consumed during recovery...another error is found, + // ad nauseum. This is a failsafe mechanism to guarantee that at least + // one token/tree node is consumed for two errors. + // + d.lastErrorIndex = -1 + d.lastErrorStates = nil + return d +} + +//

The default implementation simply calls {@link //endErrorCondition} to +// ensure that the handler is not in error recovery mode.

+func (d *DefaultErrorStrategy) reset(recognizer Parser) { + d.endErrorCondition(recognizer) +} + +// This method is called to enter error recovery mode when a recognition +// exception is Reported. +// +// @param recognizer the parser instance +func (d *DefaultErrorStrategy) beginErrorCondition(recognizer Parser) { + d.errorRecoveryMode = true +} + +func (d *DefaultErrorStrategy) InErrorRecoveryMode(recognizer Parser) bool { + return d.errorRecoveryMode +} + +// This method is called to leave error recovery mode after recovering from +// a recognition exception. +// +// @param recognizer +func (d *DefaultErrorStrategy) endErrorCondition(recognizer Parser) { + d.errorRecoveryMode = false + d.lastErrorStates = nil + d.lastErrorIndex = -1 +} + +// {@inheritDoc} +// +//

The default implementation simply calls {@link //endErrorCondition}.

+func (d *DefaultErrorStrategy) ReportMatch(recognizer Parser) { + d.endErrorCondition(recognizer) +} + +// {@inheritDoc} +// +//

The default implementation returns immediately if the handler is already +// in error recovery mode. Otherwise, it calls {@link //beginErrorCondition} +// and dispatches the Reporting task based on the runtime type of {@code e} +// according to the following table.

+// +// +func (d *DefaultErrorStrategy) ReportError(recognizer Parser, e RecognitionException) { + // if we've already Reported an error and have not Matched a token + // yet successfully, don't Report any errors. + if d.InErrorRecoveryMode(recognizer) { + return // don't Report spurious errors + } + d.beginErrorCondition(recognizer) + + switch t := e.(type) { + default: + fmt.Println("unknown recognition error type: " + reflect.TypeOf(e).Name()) + // fmt.Println(e.stack) + recognizer.NotifyErrorListeners(e.GetMessage(), e.GetOffendingToken(), e) + case *NoViableAltException: + d.ReportNoViableAlternative(recognizer, t) + case *InputMisMatchException: + d.ReportInputMisMatch(recognizer, t) + case *FailedPredicateException: + d.ReportFailedPredicate(recognizer, t) + } +} + +// {@inheritDoc} +// +//

The default implementation reSynchronizes the parser by consuming tokens +// until we find one in the reSynchronization set--loosely the set of tokens +// that can follow the current rule.

+func (d *DefaultErrorStrategy) Recover(recognizer Parser, e RecognitionException) { + + if d.lastErrorIndex == recognizer.GetInputStream().Index() && + d.lastErrorStates != nil && d.lastErrorStates.contains(recognizer.GetState()) { + // uh oh, another error at same token index and previously-Visited + // state in ATN must be a case where LT(1) is in the recovery + // token set so nothing got consumed. Consume a single token + // at least to prevent an infinite loop d is a failsafe. + recognizer.Consume() + } + d.lastErrorIndex = recognizer.GetInputStream().Index() + if d.lastErrorStates == nil { + d.lastErrorStates = NewIntervalSet() + } + d.lastErrorStates.addOne(recognizer.GetState()) + followSet := d.getErrorRecoverySet(recognizer) + d.consumeUntil(recognizer, followSet) +} + +// The default implementation of {@link ANTLRErrorStrategy//Sync} makes sure +// that the current lookahead symbol is consistent with what were expecting +// at d point in the ATN. You can call d anytime but ANTLR only +// generates code to check before subrules/loops and each iteration. +// +//

Implements Jim Idle's magic Sync mechanism in closures and optional +// subrules. E.g.,

+// +//
+// a : Sync ( stuff Sync )*
+// Sync : {consume to what can follow Sync}
+// 
+// +// At the start of a sub rule upon error, {@link //Sync} performs single +// token deletion, if possible. If it can't do that, it bails on the current +// rule and uses the default error recovery, which consumes until the +// reSynchronization set of the current rule. +// +//

If the sub rule is optional ({@code (...)?}, {@code (...)*}, or block +// with an empty alternative), then the expected set includes what follows +// the subrule.

+// +//

During loop iteration, it consumes until it sees a token that can start a +// sub rule or what follows loop. Yes, that is pretty aggressive. We opt to +// stay in the loop as long as possible.

+// +//

ORIGINS

+// +//

Previous versions of ANTLR did a poor job of their recovery within loops. +// A single mismatch token or missing token would force the parser to bail +// out of the entire rules surrounding the loop. So, for rule

+// +//
+// classfunc : 'class' ID '{' member* '}'
+// 
+// +// input with an extra token between members would force the parser to +// consume until it found the next class definition rather than the next +// member definition of the current class. +// +//

This functionality cost a little bit of effort because the parser has to +// compare token set at the start of the loop and at each iteration. If for +// some reason speed is suffering for you, you can turn off d +// functionality by simply overriding d method as a blank { }.

+func (d *DefaultErrorStrategy) Sync(recognizer Parser) { + // If already recovering, don't try to Sync + if d.InErrorRecoveryMode(recognizer) { + return + } + + s := recognizer.GetInterpreter().atn.states[recognizer.GetState()] + la := recognizer.GetTokenStream().LA(1) + + // try cheaper subset first might get lucky. seems to shave a wee bit off + nextTokens := recognizer.GetATN().NextTokens(s, nil) + if nextTokens.contains(TokenEpsilon) || nextTokens.contains(la) { + return + } + + switch s.GetStateType() { + case ATNStateBlockStart, ATNStateStarBlockStart, ATNStatePlusBlockStart, ATNStateStarLoopEntry: + // Report error and recover if possible + if d.SingleTokenDeletion(recognizer) != nil { + return + } + panic(NewInputMisMatchException(recognizer)) + case ATNStatePlusLoopBack, ATNStateStarLoopBack: + d.ReportUnwantedToken(recognizer) + expecting := NewIntervalSet() + expecting.addSet(recognizer.GetExpectedTokens()) + whatFollowsLoopIterationOrRule := expecting.addSet(d.getErrorRecoverySet(recognizer)) + d.consumeUntil(recognizer, whatFollowsLoopIterationOrRule) + default: + // do nothing if we can't identify the exact kind of ATN state + } +} + +// This is called by {@link //ReportError} when the exception is a +// {@link NoViableAltException}. +// +// @see //ReportError +// +// @param recognizer the parser instance +// @param e the recognition exception +func (d *DefaultErrorStrategy) ReportNoViableAlternative(recognizer Parser, e *NoViableAltException) { + tokens := recognizer.GetTokenStream() + var input string + if tokens != nil { + if e.startToken.GetTokenType() == TokenEOF { + input = "" + } else { + input = tokens.GetTextFromTokens(e.startToken, e.offendingToken) + } + } else { + input = "" + } + msg := "no viable alternative at input " + d.escapeWSAndQuote(input) + recognizer.NotifyErrorListeners(msg, e.offendingToken, e) +} + +// This is called by {@link //ReportError} when the exception is an +// {@link InputMisMatchException}. +// +// @see //ReportError +// +// @param recognizer the parser instance +// @param e the recognition exception +func (this *DefaultErrorStrategy) ReportInputMisMatch(recognizer Parser, e *InputMisMatchException) { + msg := "mismatched input " + this.GetTokenErrorDisplay(e.offendingToken) + + " expecting " + e.getExpectedTokens().StringVerbose(recognizer.GetLiteralNames(), recognizer.GetSymbolicNames(), false) + recognizer.NotifyErrorListeners(msg, e.offendingToken, e) +} + +// This is called by {@link //ReportError} when the exception is a +// {@link FailedPredicateException}. +// +// @see //ReportError +// +// @param recognizer the parser instance +// @param e the recognition exception +func (d *DefaultErrorStrategy) ReportFailedPredicate(recognizer Parser, e *FailedPredicateException) { + ruleName := recognizer.GetRuleNames()[recognizer.GetParserRuleContext().GetRuleIndex()] + msg := "rule " + ruleName + " " + e.message + recognizer.NotifyErrorListeners(msg, e.offendingToken, e) +} + +// This method is called to Report a syntax error which requires the removal +// of a token from the input stream. At the time d method is called, the +// erroneous symbol is current {@code LT(1)} symbol and has not yet been +// removed from the input stream. When d method returns, +// {@code recognizer} is in error recovery mode. +// +//

This method is called when {@link //singleTokenDeletion} identifies +// single-token deletion as a viable recovery strategy for a mismatched +// input error.

+// +//

The default implementation simply returns if the handler is already in +// error recovery mode. Otherwise, it calls {@link //beginErrorCondition} to +// enter error recovery mode, followed by calling +// {@link Parser//NotifyErrorListeners}.

+// +// @param recognizer the parser instance +func (d *DefaultErrorStrategy) ReportUnwantedToken(recognizer Parser) { + if d.InErrorRecoveryMode(recognizer) { + return + } + d.beginErrorCondition(recognizer) + t := recognizer.GetCurrentToken() + tokenName := d.GetTokenErrorDisplay(t) + expecting := d.GetExpectedTokens(recognizer) + msg := "extraneous input " + tokenName + " expecting " + + expecting.StringVerbose(recognizer.GetLiteralNames(), recognizer.GetSymbolicNames(), false) + recognizer.NotifyErrorListeners(msg, t, nil) +} + +// This method is called to Report a syntax error which requires the +// insertion of a missing token into the input stream. At the time d +// method is called, the missing token has not yet been inserted. When d +// method returns, {@code recognizer} is in error recovery mode. +// +//

This method is called when {@link //singleTokenInsertion} identifies +// single-token insertion as a viable recovery strategy for a mismatched +// input error.

+// +//

The default implementation simply returns if the handler is already in +// error recovery mode. Otherwise, it calls {@link //beginErrorCondition} to +// enter error recovery mode, followed by calling +// {@link Parser//NotifyErrorListeners}.

+// +// @param recognizer the parser instance +func (d *DefaultErrorStrategy) ReportMissingToken(recognizer Parser) { + if d.InErrorRecoveryMode(recognizer) { + return + } + d.beginErrorCondition(recognizer) + t := recognizer.GetCurrentToken() + expecting := d.GetExpectedTokens(recognizer) + msg := "missing " + expecting.StringVerbose(recognizer.GetLiteralNames(), recognizer.GetSymbolicNames(), false) + + " at " + d.GetTokenErrorDisplay(t) + recognizer.NotifyErrorListeners(msg, t, nil) +} + +//

The default implementation attempts to recover from the mismatched input +// by using single token insertion and deletion as described below. If the +// recovery attempt fails, d method panics an +// {@link InputMisMatchException}.

+// +//

EXTRA TOKEN (single token deletion)

+// +//

{@code LA(1)} is not what we are looking for. If {@code LA(2)} has the +// right token, however, then assume {@code LA(1)} is some extra spurious +// token and delete it. Then consume and return the next token (which was +// the {@code LA(2)} token) as the successful result of the Match operation.

+// +//

This recovery strategy is implemented by {@link +// //singleTokenDeletion}.

+// +//

MISSING TOKEN (single token insertion)

+// +//

If current token (at {@code LA(1)}) is consistent with what could come +// after the expected {@code LA(1)} token, then assume the token is missing +// and use the parser's {@link TokenFactory} to create it on the fly. The +// "insertion" is performed by returning the created token as the successful +// result of the Match operation.

+// +//

This recovery strategy is implemented by {@link +// //singleTokenInsertion}.

+// +//

EXAMPLE

+// +//

For example, Input {@code i=(3} is clearly missing the {@code ')'}. When +// the parser returns from the nested call to {@code expr}, it will have +// call chain:

+// +//
+// stat &rarr expr &rarr atom
+// 
+// +// and it will be trying to Match the {@code ')'} at d point in the +// derivation: +// +//
+// => ID '=' '(' INT ')' ('+' atom)* ”
+// ^
+// 
+// +// The attempt to Match {@code ')'} will fail when it sees {@code ”} and +// call {@link //recoverInline}. To recover, it sees that {@code LA(1)==”} +// is in the set of tokens that can follow the {@code ')'} token reference +// in rule {@code atom}. It can assume that you forgot the {@code ')'}. +func (d *DefaultErrorStrategy) RecoverInline(recognizer Parser) Token { + // SINGLE TOKEN DELETION + MatchedSymbol := d.SingleTokenDeletion(recognizer) + if MatchedSymbol != nil { + // we have deleted the extra token. + // now, move past ttype token as if all were ok + recognizer.Consume() + return MatchedSymbol + } + // SINGLE TOKEN INSERTION + if d.SingleTokenInsertion(recognizer) { + return d.GetMissingSymbol(recognizer) + } + // even that didn't work must panic the exception + panic(NewInputMisMatchException(recognizer)) +} + +// This method implements the single-token insertion inline error recovery +// strategy. It is called by {@link //recoverInline} if the single-token +// deletion strategy fails to recover from the mismatched input. If this +// method returns {@code true}, {@code recognizer} will be in error recovery +// mode. +// +//

This method determines whether or not single-token insertion is viable by +// checking if the {@code LA(1)} input symbol could be successfully Matched +// if it were instead the {@code LA(2)} symbol. If d method returns +// {@code true}, the caller is responsible for creating and inserting a +// token with the correct type to produce d behavior.

+// +// @param recognizer the parser instance +// @return {@code true} if single-token insertion is a viable recovery +// strategy for the current mismatched input, otherwise {@code false} +func (d *DefaultErrorStrategy) SingleTokenInsertion(recognizer Parser) bool { + currentSymbolType := recognizer.GetTokenStream().LA(1) + // if current token is consistent with what could come after current + // ATN state, then we know we're missing a token error recovery + // is free to conjure up and insert the missing token + atn := recognizer.GetInterpreter().atn + currentState := atn.states[recognizer.GetState()] + next := currentState.GetTransitions()[0].getTarget() + expectingAtLL2 := atn.NextTokens(next, recognizer.GetParserRuleContext()) + if expectingAtLL2.contains(currentSymbolType) { + d.ReportMissingToken(recognizer) + return true + } + + return false +} + +// This method implements the single-token deletion inline error recovery +// strategy. It is called by {@link //recoverInline} to attempt to recover +// from mismatched input. If this method returns nil, the parser and error +// handler state will not have changed. If this method returns non-nil, +// {@code recognizer} will not be in error recovery mode since the +// returned token was a successful Match. +// +//

If the single-token deletion is successful, d method calls +// {@link //ReportUnwantedToken} to Report the error, followed by +// {@link Parser//consume} to actually "delete" the extraneous token. Then, +// before returning {@link //ReportMatch} is called to signal a successful +// Match.

+// +// @param recognizer the parser instance +// @return the successfully Matched {@link Token} instance if single-token +// deletion successfully recovers from the mismatched input, otherwise +// {@code nil} +func (d *DefaultErrorStrategy) SingleTokenDeletion(recognizer Parser) Token { + NextTokenType := recognizer.GetTokenStream().LA(2) + expecting := d.GetExpectedTokens(recognizer) + if expecting.contains(NextTokenType) { + d.ReportUnwantedToken(recognizer) + // print("recoverFromMisMatchedToken deleting " \ + // + str(recognizer.GetTokenStream().LT(1)) \ + // + " since " + str(recognizer.GetTokenStream().LT(2)) \ + // + " is what we want", file=sys.stderr) + recognizer.Consume() // simply delete extra token + // we want to return the token we're actually Matching + MatchedSymbol := recognizer.GetCurrentToken() + d.ReportMatch(recognizer) // we know current token is correct + return MatchedSymbol + } + + return nil +} + +// Conjure up a missing token during error recovery. +// +// The recognizer attempts to recover from single missing +// symbols. But, actions might refer to that missing symbol. +// For example, x=ID {f($x)}. The action clearly assumes +// that there has been an identifier Matched previously and that +// $x points at that token. If that token is missing, but +// the next token in the stream is what we want we assume that +// d token is missing and we keep going. Because we +// have to return some token to replace the missing token, +// we have to conjure one up. This method gives the user control +// over the tokens returned for missing tokens. Mostly, +// you will want to create something special for identifier +// tokens. For literals such as '{' and ',', the default +// action in the parser or tree parser works. It simply creates +// a CommonToken of the appropriate type. The text will be the token. +// If you change what tokens must be created by the lexer, +// override d method to create the appropriate tokens. +func (d *DefaultErrorStrategy) GetMissingSymbol(recognizer Parser) Token { + currentSymbol := recognizer.GetCurrentToken() + expecting := d.GetExpectedTokens(recognizer) + expectedTokenType := expecting.first() + var tokenText string + + if expectedTokenType == TokenEOF { + tokenText = "" + } else { + ln := recognizer.GetLiteralNames() + if expectedTokenType > 0 && expectedTokenType < len(ln) { + tokenText = "" + } else { + tokenText = "" // TODO matches the JS impl + } + } + current := currentSymbol + lookback := recognizer.GetTokenStream().LT(-1) + if current.GetTokenType() == TokenEOF && lookback != nil { + current = lookback + } + + tf := recognizer.GetTokenFactory() + + return tf.Create(current.GetSource(), expectedTokenType, tokenText, TokenDefaultChannel, -1, -1, current.GetLine(), current.GetColumn()) +} + +func (d *DefaultErrorStrategy) GetExpectedTokens(recognizer Parser) *IntervalSet { + return recognizer.GetExpectedTokens() +} + +// How should a token be displayed in an error message? The default +// is to display just the text, but during development you might +// want to have a lot of information spit out. Override in that case +// to use t.String() (which, for CommonToken, dumps everything about +// the token). This is better than forcing you to override a method in +// your token objects because you don't have to go modify your lexer +// so that it creates a NewJava type. +func (d *DefaultErrorStrategy) GetTokenErrorDisplay(t Token) string { + if t == nil { + return "" + } + s := t.GetText() + if s == "" { + if t.GetTokenType() == TokenEOF { + s = "" + } else { + s = "<" + strconv.Itoa(t.GetTokenType()) + ">" + } + } + return d.escapeWSAndQuote(s) +} + +func (d *DefaultErrorStrategy) escapeWSAndQuote(s string) string { + s = strings.Replace(s, "\t", "\\t", -1) + s = strings.Replace(s, "\n", "\\n", -1) + s = strings.Replace(s, "\r", "\\r", -1) + return "'" + s + "'" +} + +// Compute the error recovery set for the current rule. During +// rule invocation, the parser pushes the set of tokens that can +// follow that rule reference on the stack d amounts to +// computing FIRST of what follows the rule reference in the +// enclosing rule. See LinearApproximator.FIRST(). +// This local follow set only includes tokens +// from within the rule i.e., the FIRST computation done by +// ANTLR stops at the end of a rule. +// +// # EXAMPLE +// +// When you find a "no viable alt exception", the input is not +// consistent with any of the alternatives for rule r. The best +// thing to do is to consume tokens until you see something that +// can legally follow a call to r//or* any rule that called r. +// You don't want the exact set of viable next tokens because the +// input might just be missing a token--you might consume the +// rest of the input looking for one of the missing tokens. +// +// Consider grammar: +// +// a : '[' b ']' +// | '(' b ')' +// +// b : c '^' INT +// c : ID +// | INT +// +// At each rule invocation, the set of tokens that could follow +// that rule is pushed on a stack. Here are the various +// context-sensitive follow sets: +// +// FOLLOW(b1_in_a) = FIRST(']') = ']' +// FOLLOW(b2_in_a) = FIRST(')') = ')' +// FOLLOW(c_in_b) = FIRST('^') = '^' +// +// Upon erroneous input "[]", the call chain is +// +// a -> b -> c +// +// and, hence, the follow context stack is: +// +// depth follow set start of rule execution +// 0 a (from main()) +// 1 ']' b +// 2 '^' c +// +// Notice that ')' is not included, because b would have to have +// been called from a different context in rule a for ')' to be +// included. +// +// For error recovery, we cannot consider FOLLOW(c) +// (context-sensitive or otherwise). We need the combined set of +// all context-sensitive FOLLOW sets--the set of all tokens that +// could follow any reference in the call chain. We need to +// reSync to one of those tokens. Note that FOLLOW(c)='^' and if +// we reSync'd to that token, we'd consume until EOF. We need to +// Sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}. +// In this case, for input "[]", LA(1) is ']' and in the set, so we would +// not consume anything. After printing an error, rule c would +// return normally. Rule b would not find the required '^' though. +// At this point, it gets a mismatched token error and panics an +// exception (since LA(1) is not in the viable following token +// set). The rule exception handler tries to recover, but finds +// the same recovery set and doesn't consume anything. Rule b +// exits normally returning to rule a. Now it finds the ']' (and +// with the successful Match exits errorRecovery mode). +// +// So, you can see that the parser walks up the call chain looking +// for the token that was a member of the recovery set. +// +// Errors are not generated in errorRecovery mode. +// +// ANTLR's error recovery mechanism is based upon original ideas: +// +// "Algorithms + Data Structures = Programs" by Niklaus Wirth +// +// and +// +// "A note on error recovery in recursive descent parsers": +// http://portal.acm.org/citation.cfm?id=947902.947905 +// +// Later, Josef Grosch had some good ideas: +// +// "Efficient and Comfortable Error Recovery in Recursive Descent +// Parsers": +// ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip +// +// Like Grosch I implement context-sensitive FOLLOW sets that are combined +// at run-time upon error to avoid overhead during parsing. +func (d *DefaultErrorStrategy) getErrorRecoverySet(recognizer Parser) *IntervalSet { + atn := recognizer.GetInterpreter().atn + ctx := recognizer.GetParserRuleContext() + recoverSet := NewIntervalSet() + for ctx != nil && ctx.GetInvokingState() >= 0 { + // compute what follows who invoked us + invokingState := atn.states[ctx.GetInvokingState()] + rt := invokingState.GetTransitions()[0] + follow := atn.NextTokens(rt.(*RuleTransition).followState, nil) + recoverSet.addSet(follow) + ctx = ctx.GetParent().(ParserRuleContext) + } + recoverSet.removeOne(TokenEpsilon) + return recoverSet +} + +// Consume tokens until one Matches the given token set.// +func (d *DefaultErrorStrategy) consumeUntil(recognizer Parser, set *IntervalSet) { + ttype := recognizer.GetTokenStream().LA(1) + for ttype != TokenEOF && !set.contains(ttype) { + recognizer.Consume() + ttype = recognizer.GetTokenStream().LA(1) + } +} + +// +// This implementation of {@link ANTLRErrorStrategy} responds to syntax errors +// by immediately canceling the parse operation with a +// {@link ParseCancellationException}. The implementation ensures that the +// {@link ParserRuleContext//exception} field is set for all parse tree nodes +// that were not completed prior to encountering the error. +// +//

+// This error strategy is useful in the following scenarios.

+// +//
    +//
  • Two-stage parsing: This error strategy allows the first +// stage of two-stage parsing to immediately terminate if an error is +// encountered, and immediately fall back to the second stage. In addition to +// avoiding wasted work by attempting to recover from errors here, the empty +// implementation of {@link BailErrorStrategy//Sync} improves the performance of +// the first stage.
  • +//
  • Silent validation: When syntax errors are not being +// Reported or logged, and the parse result is simply ignored if errors occur, +// the {@link BailErrorStrategy} avoids wasting work on recovering from errors +// when the result will be ignored either way.
  • +//
+// +//

+// {@code myparser.setErrorHandler(NewBailErrorStrategy())}

+// +// @see Parser//setErrorHandler(ANTLRErrorStrategy) + +type BailErrorStrategy struct { + *DefaultErrorStrategy +} + +var _ ErrorStrategy = &BailErrorStrategy{} + +func NewBailErrorStrategy() *BailErrorStrategy { + + b := new(BailErrorStrategy) + + b.DefaultErrorStrategy = NewDefaultErrorStrategy() + + return b +} + +// Instead of recovering from exception {@code e}, re-panic it wrapped +// in a {@link ParseCancellationException} so it is not caught by the +// rule func catches. Use {@link Exception//getCause()} to get the +// original {@link RecognitionException}. +func (b *BailErrorStrategy) Recover(recognizer Parser, e RecognitionException) { + context := recognizer.GetParserRuleContext() + for context != nil { + context.SetException(e) + if parent, ok := context.GetParent().(ParserRuleContext); ok { + context = parent + } else { + context = nil + } + } + panic(NewParseCancellationException()) // TODO we don't emit e properly +} + +// Make sure we don't attempt to recover inline if the parser +// successfully recovers, it won't panic an exception. +func (b *BailErrorStrategy) RecoverInline(recognizer Parser) Token { + b.Recover(recognizer, NewInputMisMatchException(recognizer)) + + return nil +} + +// Make sure we don't attempt to recover from problems in subrules.// +func (b *BailErrorStrategy) Sync(recognizer Parser) { + // pass +} diff --git a/runtime/Go/antlr/v4/errors.go b/runtime/Go/antlr/v4/errors.go new file mode 100644 index 0000000000..3954c13782 --- /dev/null +++ b/runtime/Go/antlr/v4/errors.go @@ -0,0 +1,238 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +// The root of the ANTLR exception hierarchy. In general, ANTLR tracks just +// 3 kinds of errors: prediction errors, failed predicate errors, and +// mismatched input errors. In each case, the parser knows where it is +// in the input, where it is in the ATN, the rule invocation stack, +// and what kind of problem occurred. + +type RecognitionException interface { + GetOffendingToken() Token + GetMessage() string + GetInputStream() IntStream +} + +type BaseRecognitionException struct { + message string + recognizer Recognizer + offendingToken Token + offendingState int + ctx RuleContext + input IntStream +} + +func NewBaseRecognitionException(message string, recognizer Recognizer, input IntStream, ctx RuleContext) *BaseRecognitionException { + + // todo + // Error.call(this) + // + // if (!!Error.captureStackTrace) { + // Error.captureStackTrace(this, RecognitionException) + // } else { + // stack := NewError().stack + // } + // TODO may be able to use - "runtime" func Stack(buf []byte, all bool) int + + t := new(BaseRecognitionException) + + t.message = message + t.recognizer = recognizer + t.input = input + t.ctx = ctx + // The current {@link Token} when an error occurred. Since not all streams + // support accessing symbols by index, we have to track the {@link Token} + // instance itself. + t.offendingToken = nil + // Get the ATN state number the parser was in at the time the error + // occurred. For {@link NoViableAltException} and + // {@link LexerNoViableAltException} exceptions, this is the + // {@link DecisionState} number. For others, it is the state whose outgoing + // edge we couldn't Match. + t.offendingState = -1 + if t.recognizer != nil { + t.offendingState = t.recognizer.GetState() + } + + return t +} + +func (b *BaseRecognitionException) GetMessage() string { + return b.message +} + +func (b *BaseRecognitionException) GetOffendingToken() Token { + return b.offendingToken +} + +func (b *BaseRecognitionException) GetInputStream() IntStream { + return b.input +} + +//

If the state number is not known, b method returns -1.

+ +// Gets the set of input symbols which could potentially follow the +// previously Matched symbol at the time b exception was panicn. +// +//

If the set of expected tokens is not known and could not be computed, +// b method returns {@code nil}.

+// +// @return The set of token types that could potentially follow the current +// state in the ATN, or {@code nil} if the information is not available. +// / +func (b *BaseRecognitionException) getExpectedTokens() *IntervalSet { + if b.recognizer != nil { + return b.recognizer.GetATN().getExpectedTokens(b.offendingState, b.ctx) + } + + return nil +} + +func (b *BaseRecognitionException) String() string { + return b.message +} + +type LexerNoViableAltException struct { + *BaseRecognitionException + + startIndex int + deadEndConfigs ATNConfigSet +} + +func NewLexerNoViableAltException(lexer Lexer, input CharStream, startIndex int, deadEndConfigs ATNConfigSet) *LexerNoViableAltException { + + l := new(LexerNoViableAltException) + + l.BaseRecognitionException = NewBaseRecognitionException("", lexer, input, nil) + + l.startIndex = startIndex + l.deadEndConfigs = deadEndConfigs + + return l +} + +func (l *LexerNoViableAltException) String() string { + symbol := "" + if l.startIndex >= 0 && l.startIndex < l.input.Size() { + symbol = l.input.(CharStream).GetTextFromInterval(NewInterval(l.startIndex, l.startIndex)) + } + return "LexerNoViableAltException" + symbol +} + +type NoViableAltException struct { + *BaseRecognitionException + + startToken Token + offendingToken Token + ctx ParserRuleContext + deadEndConfigs ATNConfigSet +} + +// Indicates that the parser could not decide which of two or more paths +// to take based upon the remaining input. It tracks the starting token +// of the offending input and also knows where the parser was +// in the various paths when the error. Reported by ReportNoViableAlternative() +func NewNoViableAltException(recognizer Parser, input TokenStream, startToken Token, offendingToken Token, deadEndConfigs ATNConfigSet, ctx ParserRuleContext) *NoViableAltException { + + if ctx == nil { + ctx = recognizer.GetParserRuleContext() + } + + if offendingToken == nil { + offendingToken = recognizer.GetCurrentToken() + } + + if startToken == nil { + startToken = recognizer.GetCurrentToken() + } + + if input == nil { + input = recognizer.GetInputStream().(TokenStream) + } + + n := new(NoViableAltException) + n.BaseRecognitionException = NewBaseRecognitionException("", recognizer, input, ctx) + + // Which configurations did we try at input.Index() that couldn't Match + // input.LT(1)?// + n.deadEndConfigs = deadEndConfigs + // The token object at the start index the input stream might + // not be buffering tokens so get a reference to it. (At the + // time the error occurred, of course the stream needs to keep a + // buffer all of the tokens but later we might not have access to those.) + n.startToken = startToken + n.offendingToken = offendingToken + + return n +} + +type InputMisMatchException struct { + *BaseRecognitionException +} + +// This signifies any kind of mismatched input exceptions such as +// when the current input does not Match the expected token. +func NewInputMisMatchException(recognizer Parser) *InputMisMatchException { + + i := new(InputMisMatchException) + i.BaseRecognitionException = NewBaseRecognitionException("", recognizer, recognizer.GetInputStream(), recognizer.GetParserRuleContext()) + + i.offendingToken = recognizer.GetCurrentToken() + + return i + +} + +// A semantic predicate failed during validation. Validation of predicates +// occurs when normally parsing the alternative just like Matching a token. +// Disambiguating predicate evaluation occurs when we test a predicate during +// prediction. + +type FailedPredicateException struct { + *BaseRecognitionException + + ruleIndex int + predicateIndex int + predicate string +} + +func NewFailedPredicateException(recognizer Parser, predicate string, message string) *FailedPredicateException { + + f := new(FailedPredicateException) + + f.BaseRecognitionException = NewBaseRecognitionException(f.formatMessage(predicate, message), recognizer, recognizer.GetInputStream(), recognizer.GetParserRuleContext()) + + s := recognizer.GetInterpreter().atn.states[recognizer.GetState()] + trans := s.GetTransitions()[0] + if trans2, ok := trans.(*PredicateTransition); ok { + f.ruleIndex = trans2.ruleIndex + f.predicateIndex = trans2.predIndex + } else { + f.ruleIndex = 0 + f.predicateIndex = 0 + } + f.predicate = predicate + f.offendingToken = recognizer.GetCurrentToken() + + return f +} + +func (f *FailedPredicateException) formatMessage(predicate, message string) string { + if message != "" { + return message + } + + return "failed predicate: {" + predicate + "}?" +} + +type ParseCancellationException struct { +} + +func NewParseCancellationException() *ParseCancellationException { + // Error.call(this) + // Error.captureStackTrace(this, ParseCancellationException) + return new(ParseCancellationException) +} diff --git a/runtime/Go/antlr/v4/file_stream.go b/runtime/Go/antlr/v4/file_stream.go new file mode 100644 index 0000000000..bd6ad5efe3 --- /dev/null +++ b/runtime/Go/antlr/v4/file_stream.go @@ -0,0 +1,49 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +import ( + "bytes" + "io" + "os" +) + +// This is an InputStream that is loaded from a file all at once +// when you construct the object. + +type FileStream struct { + *InputStream + + filename string +} + +func NewFileStream(fileName string) (*FileStream, error) { + + buf := bytes.NewBuffer(nil) + + f, err := os.Open(fileName) + if err != nil { + return nil, err + } + defer f.Close() + _, err = io.Copy(buf, f) + if err != nil { + return nil, err + } + + fs := new(FileStream) + + fs.filename = fileName + s := string(buf.Bytes()) + + fs.InputStream = NewInputStream(s) + + return fs, nil + +} + +func (f *FileStream) GetSourceName() string { + return f.filename +} diff --git a/runtime/Go/antlr/v4/go.mod b/runtime/Go/antlr/v4/go.mod new file mode 100644 index 0000000000..0d8feb6214 --- /dev/null +++ b/runtime/Go/antlr/v4/go.mod @@ -0,0 +1,5 @@ +module github.com/antlr/antlr4/runtime/Go/antlr/v4 + +go 1.18 + +require golang.org/x/exp v0.0.0-20220722155223-a9213eeb770e diff --git a/runtime/Go/antlr/v4/go.sum b/runtime/Go/antlr/v4/go.sum new file mode 100644 index 0000000000..2b05f22a47 --- /dev/null +++ b/runtime/Go/antlr/v4/go.sum @@ -0,0 +1,2 @@ +golang.org/x/exp v0.0.0-20220722155223-a9213eeb770e h1:+WEEuIdZHnUeJJmEUjyYC2gfUMj69yZXw17EnHg/otA= +golang.org/x/exp v0.0.0-20220722155223-a9213eeb770e/go.mod h1:Kr81I6Kryrl9sr8s2FK3vxD90NdsKWRuOIl2O4CvYbA= diff --git a/runtime/Go/antlr/v4/input_stream.go b/runtime/Go/antlr/v4/input_stream.go new file mode 100644 index 0000000000..a8b889cedb --- /dev/null +++ b/runtime/Go/antlr/v4/input_stream.go @@ -0,0 +1,113 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +type InputStream struct { + name string + index int + data []rune + size int +} + +func NewInputStream(data string) *InputStream { + + is := new(InputStream) + + is.name = "" + is.index = 0 + is.data = []rune(data) + is.size = len(is.data) // number of runes + + return is +} + +func (is *InputStream) reset() { + is.index = 0 +} + +func (is *InputStream) Consume() { + if is.index >= is.size { + // assert is.LA(1) == TokenEOF + panic("cannot consume EOF") + } + is.index++ +} + +func (is *InputStream) LA(offset int) int { + + if offset == 0 { + return 0 // nil + } + if offset < 0 { + offset++ // e.g., translate LA(-1) to use offset=0 + } + pos := is.index + offset - 1 + + if pos < 0 || pos >= is.size { // invalid + return TokenEOF + } + + return int(is.data[pos]) +} + +func (is *InputStream) LT(offset int) int { + return is.LA(offset) +} + +func (is *InputStream) Index() int { + return is.index +} + +func (is *InputStream) Size() int { + return is.size +} + +// mark/release do nothing we have entire buffer +func (is *InputStream) Mark() int { + return -1 +} + +func (is *InputStream) Release(marker int) { +} + +func (is *InputStream) Seek(index int) { + if index <= is.index { + is.index = index // just jump don't update stream state (line,...) + return + } + // seek forward + is.index = intMin(index, is.size) +} + +func (is *InputStream) GetText(start int, stop int) string { + if stop >= is.size { + stop = is.size - 1 + } + if start >= is.size { + return "" + } + + return string(is.data[start : stop+1]) +} + +func (is *InputStream) GetTextFromTokens(start, stop Token) string { + if start != nil && stop != nil { + return is.GetTextFromInterval(NewInterval(start.GetTokenIndex(), stop.GetTokenIndex())) + } + + return "" +} + +func (is *InputStream) GetTextFromInterval(i *Interval) string { + return is.GetText(i.Start, i.Stop) +} + +func (*InputStream) GetSourceName() string { + return "Obtained from string" +} + +func (is *InputStream) String() string { + return string(is.data) +} diff --git a/runtime/Go/antlr/v4/int_stream.go b/runtime/Go/antlr/v4/int_stream.go new file mode 100644 index 0000000000..4778878bd0 --- /dev/null +++ b/runtime/Go/antlr/v4/int_stream.go @@ -0,0 +1,16 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +type IntStream interface { + Consume() + LA(int) int + Mark() int + Release(marker int) + Index() int + Seek(index int) + Size() int + GetSourceName() string +} diff --git a/runtime/Go/antlr/v4/interval_set.go b/runtime/Go/antlr/v4/interval_set.go new file mode 100644 index 0000000000..c1e155e818 --- /dev/null +++ b/runtime/Go/antlr/v4/interval_set.go @@ -0,0 +1,312 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +import ( + "strconv" + "strings" +) + +type Interval struct { + Start int + Stop int +} + +/* stop is not included! */ +func NewInterval(start, stop int) *Interval { + i := new(Interval) + + i.Start = start + i.Stop = stop + return i +} + +func (i *Interval) Contains(item int) bool { + return item >= i.Start && item < i.Stop +} + +func (i *Interval) String() string { + if i.Start == i.Stop-1 { + return strconv.Itoa(i.Start) + } + + return strconv.Itoa(i.Start) + ".." + strconv.Itoa(i.Stop-1) +} + +func (i *Interval) length() int { + return i.Stop - i.Start +} + +type IntervalSet struct { + intervals []*Interval + readOnly bool +} + +func NewIntervalSet() *IntervalSet { + + i := new(IntervalSet) + + i.intervals = nil + i.readOnly = false + + return i +} + +func (i *IntervalSet) first() int { + if len(i.intervals) == 0 { + return TokenInvalidType + } + + return i.intervals[0].Start +} + +func (i *IntervalSet) addOne(v int) { + i.addInterval(NewInterval(v, v+1)) +} + +func (i *IntervalSet) addRange(l, h int) { + i.addInterval(NewInterval(l, h+1)) +} + +func (i *IntervalSet) addInterval(v *Interval) { + if i.intervals == nil { + i.intervals = make([]*Interval, 0) + i.intervals = append(i.intervals, v) + } else { + // find insert pos + for k, interval := range i.intervals { + // distinct range -> insert + if v.Stop < interval.Start { + i.intervals = append(i.intervals[0:k], append([]*Interval{v}, i.intervals[k:]...)...) + return + } else if v.Stop == interval.Start { + i.intervals[k].Start = v.Start + return + } else if v.Start <= interval.Stop { + i.intervals[k] = NewInterval(intMin(interval.Start, v.Start), intMax(interval.Stop, v.Stop)) + + // if not applying to end, merge potential overlaps + if k < len(i.intervals)-1 { + l := i.intervals[k] + r := i.intervals[k+1] + // if r contained in l + if l.Stop >= r.Stop { + i.intervals = append(i.intervals[0:k+1], i.intervals[k+2:]...) + } else if l.Stop >= r.Start { // partial overlap + i.intervals[k] = NewInterval(l.Start, r.Stop) + i.intervals = append(i.intervals[0:k+1], i.intervals[k+2:]...) + } + } + return + } + } + // greater than any exiting + i.intervals = append(i.intervals, v) + } +} + +func (i *IntervalSet) addSet(other *IntervalSet) *IntervalSet { + if other.intervals != nil { + for k := 0; k < len(other.intervals); k++ { + i2 := other.intervals[k] + i.addInterval(NewInterval(i2.Start, i2.Stop)) + } + } + return i +} + +func (i *IntervalSet) complement(start int, stop int) *IntervalSet { + result := NewIntervalSet() + result.addInterval(NewInterval(start, stop+1)) + for j := 0; j < len(i.intervals); j++ { + result.removeRange(i.intervals[j]) + } + return result +} + +func (i *IntervalSet) contains(item int) bool { + if i.intervals == nil { + return false + } + for k := 0; k < len(i.intervals); k++ { + if i.intervals[k].Contains(item) { + return true + } + } + return false +} + +func (i *IntervalSet) length() int { + len := 0 + + for _, v := range i.intervals { + len += v.length() + } + + return len +} + +func (i *IntervalSet) removeRange(v *Interval) { + if v.Start == v.Stop-1 { + i.removeOne(v.Start) + } else if i.intervals != nil { + k := 0 + for n := 0; n < len(i.intervals); n++ { + ni := i.intervals[k] + // intervals are ordered + if v.Stop <= ni.Start { + return + } else if v.Start > ni.Start && v.Stop < ni.Stop { + i.intervals[k] = NewInterval(ni.Start, v.Start) + x := NewInterval(v.Stop, ni.Stop) + // i.intervals.splice(k, 0, x) + i.intervals = append(i.intervals[0:k], append([]*Interval{x}, i.intervals[k:]...)...) + return + } else if v.Start <= ni.Start && v.Stop >= ni.Stop { + // i.intervals.splice(k, 1) + i.intervals = append(i.intervals[0:k], i.intervals[k+1:]...) + k = k - 1 // need another pass + } else if v.Start < ni.Stop { + i.intervals[k] = NewInterval(ni.Start, v.Start) + } else if v.Stop < ni.Stop { + i.intervals[k] = NewInterval(v.Stop, ni.Stop) + } + k++ + } + } +} + +func (i *IntervalSet) removeOne(v int) { + if i.intervals != nil { + for k := 0; k < len(i.intervals); k++ { + ki := i.intervals[k] + // intervals i ordered + if v < ki.Start { + return + } else if v == ki.Start && v == ki.Stop-1 { + // i.intervals.splice(k, 1) + i.intervals = append(i.intervals[0:k], i.intervals[k+1:]...) + return + } else if v == ki.Start { + i.intervals[k] = NewInterval(ki.Start+1, ki.Stop) + return + } else if v == ki.Stop-1 { + i.intervals[k] = NewInterval(ki.Start, ki.Stop-1) + return + } else if v < ki.Stop-1 { + x := NewInterval(ki.Start, v) + ki.Start = v + 1 + // i.intervals.splice(k, 0, x) + i.intervals = append(i.intervals[0:k], append([]*Interval{x}, i.intervals[k:]...)...) + return + } + } + } +} + +func (i *IntervalSet) String() string { + return i.StringVerbose(nil, nil, false) +} + +func (i *IntervalSet) StringVerbose(literalNames []string, symbolicNames []string, elemsAreChar bool) string { + + if i.intervals == nil { + return "{}" + } else if literalNames != nil || symbolicNames != nil { + return i.toTokenString(literalNames, symbolicNames) + } else if elemsAreChar { + return i.toCharString() + } + + return i.toIndexString() +} + +func (i *IntervalSet) GetIntervals() []*Interval { + return i.intervals +} + +func (i *IntervalSet) toCharString() string { + names := make([]string, len(i.intervals)) + + var sb strings.Builder + + for j := 0; j < len(i.intervals); j++ { + v := i.intervals[j] + if v.Stop == v.Start+1 { + if v.Start == TokenEOF { + names = append(names, "") + } else { + sb.WriteByte('\'') + sb.WriteRune(rune(v.Start)) + sb.WriteByte('\'') + names = append(names, sb.String()) + sb.Reset() + } + } else { + sb.WriteByte('\'') + sb.WriteRune(rune(v.Start)) + sb.WriteString("'..'") + sb.WriteRune(rune(v.Stop - 1)) + sb.WriteByte('\'') + names = append(names, sb.String()) + sb.Reset() + } + } + if len(names) > 1 { + return "{" + strings.Join(names, ", ") + "}" + } + + return names[0] +} + +func (i *IntervalSet) toIndexString() string { + + names := make([]string, 0) + for j := 0; j < len(i.intervals); j++ { + v := i.intervals[j] + if v.Stop == v.Start+1 { + if v.Start == TokenEOF { + names = append(names, "") + } else { + names = append(names, strconv.Itoa(v.Start)) + } + } else { + names = append(names, strconv.Itoa(v.Start)+".."+strconv.Itoa(v.Stop-1)) + } + } + if len(names) > 1 { + return "{" + strings.Join(names, ", ") + "}" + } + + return names[0] +} + +func (i *IntervalSet) toTokenString(literalNames []string, symbolicNames []string) string { + names := make([]string, 0) + for _, v := range i.intervals { + for j := v.Start; j < v.Stop; j++ { + names = append(names, i.elementName(literalNames, symbolicNames, j)) + } + } + if len(names) > 1 { + return "{" + strings.Join(names, ", ") + "}" + } + + return names[0] +} + +func (i *IntervalSet) elementName(literalNames []string, symbolicNames []string, a int) string { + if a == TokenEOF { + return "" + } else if a == TokenEpsilon { + return "" + } else { + if a < len(literalNames) && literalNames[a] != "" { + return literalNames[a] + } + + return symbolicNames[a] + } +} diff --git a/runtime/Go/antlr/v4/jcollect.go b/runtime/Go/antlr/v4/jcollect.go new file mode 100644 index 0000000000..8fb01c5bd9 --- /dev/null +++ b/runtime/Go/antlr/v4/jcollect.go @@ -0,0 +1,195 @@ +package antlr + +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +import "sort" + +// Collectable is an interface that a struct should implement if it is to be +// usable as a key in these collections. +type Collectable[T any] interface { + Hash() int + Equals(other Collectable[T]) bool +} + +type Comparator[T any] interface { + Hash1(o T) int + Equals2(T, T) bool +} + +// JStore implements a container that allows the use of a struct to calculate the key +// for a collection of values akin to map. This is not meant to be a full-blown HashMap but just +// serve the needs of the ANTLR Go runtime. +// +// For ease of porting the logic of the runtime from the master target (Java), this collection +// operates in a similar way to Java, in that it can use any struct that supplies a Hash() and Equals() +// function as the key. The values are stored in a standard go map which internally is a form of hashmap +// itself, the key for the go map is the hash supplied by the key object. The collection is able to deal with +// hash conflicts by using a simple slice of values associated with the hash code indexed bucket. That isn't +// particularly efficient, but it is simple, and it works. As this is specifically for the ANTLR runtime, and +// we understand the requirements, then this is fine - this is not a general purpose collection. +type JStore[T any, C Comparator[T]] struct { + store map[int][]T + len int + comparator Comparator[T] +} + +func NewJStore[T any, C Comparator[T]](comparator Comparator[T]) *JStore[T, C] { + + if comparator == nil { + panic("comparator cannot be nil") + } + + s := &JStore[T, C]{ + store: make(map[int][]T, 1), + comparator: comparator, + } + return s +} + +// Put will store given value in the collection. Note that the key for storage is generated from +// the value itself - this is specifically because that is what ANTLR needs - this would not be useful +// as any kind of general collection. +// +// If the key has a hash conflict, then the value will be added to the slice of values associated with the +// hash, unless the value is already in the slice, in which case the existing value is returned. Value equivalence is +// tested by calling the equals() method on the key. +// +// # If the given value is already present in the store, then the existing value is returned as v and exists is set to true +// +// If the given value is not present in the store, then the value is added to the store and returned as v and exists is set to false. +func (s *JStore[T, C]) Put(value T) (v T, exists bool) { //nolint:ireturn + + kh := s.comparator.Hash1(value) + + for _, v1 := range s.store[kh] { + if s.comparator.Equals2(value, v1) { + return v1, true + } + } + s.store[kh] = append(s.store[kh], value) + s.len++ + return value, false +} + +// Get will return the value associated with the key - the type of the key is the same type as the value +// which would not generally be useful, but this is a specific thing for ANTLR where the key is +// generated using the object we are going to store. +func (s *JStore[T, C]) Get(key T) (T, bool) { //nolint:ireturn + + kh := s.comparator.Hash1(key) + + for _, v := range s.store[kh] { + if s.comparator.Equals2(key, v) { + return v, true + } + } + return key, false +} + +// Contains returns true if the given key is present in the store +func (s *JStore[T, C]) Contains(key T) bool { //nolint:ireturn + + _, present := s.Get(key) + return present +} + +func (s *JStore[T, C]) SortedSlice(less func(i, j T) bool) []T { + vs := make([]T, 0, len(s.store)) + for _, v := range s.store { + vs = append(vs, v...) + } + sort.Slice(vs, func(i, j int) bool { + return less(vs[i], vs[j]) + }) + + return vs +} + +func (s *JStore[T, C]) Each(f func(T) bool) { + for _, e := range s.store { + for _, v := range e { + f(v) + } + } +} + +func (s *JStore[T, C]) Len() int { + return s.len +} + +func (s *JStore[T, C]) Values() []T { + vs := make([]T, 0, len(s.store)) + for _, e := range s.store { + for _, v := range e { + vs = append(vs, v) + } + } + return vs +} + +type entry[K, V any] struct { + key K + val V +} + +type JMap[K, V any, C Comparator[K]] struct { + store map[int][]*entry[K, V] + len int + comparator Comparator[K] +} + +func NewJMap[K, V any, C Comparator[K]](comparator Comparator[K]) *JMap[K, V, C] { + return &JMap[K, V, C]{ + store: make(map[int][]*entry[K, V], 1), + comparator: comparator, + } +} + +func (m *JMap[K, V, C]) Put(key K, val V) { + kh := m.comparator.Hash1(key) + m.store[kh] = append(m.store[kh], &entry[K, V]{key, val}) + m.len++ +} + +func (m *JMap[K, V, C]) Values() []V { + vs := make([]V, 0, len(m.store)) + for _, e := range m.store { + for _, v := range e { + vs = append(vs, v.val) + } + } + return vs +} + +func (m *JMap[K, V, C]) Get(key K) (V, bool) { + + var none V + kh := m.comparator.Hash1(key) + for _, e := range m.store[kh] { + if m.comparator.Equals2(e.key, key) { + return e.val, true + } + } + return none, false +} + +func (m *JMap[K, V, C]) Len() int { + return len(m.store) +} + +func (m *JMap[K, V, C]) Delete(key K) { + kh := m.comparator.Hash1(key) + for i, e := range m.store[kh] { + if m.comparator.Equals2(e.key, key) { + m.store[kh] = append(m.store[kh][:i], m.store[kh][i+1:]...) + m.len-- + return + } + } +} + +func (m *JMap[K, V, C]) Clear() { + m.store = make(map[int][]*entry[K, V]) +} diff --git a/runtime/Go/antlr/v4/jcollect_test.go b/runtime/Go/antlr/v4/jcollect_test.go new file mode 100644 index 0000000000..816307a02c --- /dev/null +++ b/runtime/Go/antlr/v4/jcollect_test.go @@ -0,0 +1,15 @@ +package antlr + +import "testing" + +func Test_try(t *testing.T) { + tests := []struct { + name string + }{ + {"Test_try"}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + }) + } +} diff --git a/runtime/Go/antlr/v4/lexer.go b/runtime/Go/antlr/v4/lexer.go new file mode 100644 index 0000000000..6533f05164 --- /dev/null +++ b/runtime/Go/antlr/v4/lexer.go @@ -0,0 +1,416 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +import ( + "fmt" + "strconv" +) + +// A lexer is recognizer that draws input symbols from a character stream. +// lexer grammars result in a subclass of this object. A Lexer object +// uses simplified Match() and error recovery mechanisms in the interest +// of speed. +/// + +type Lexer interface { + TokenSource + Recognizer + + Emit() Token + + SetChannel(int) + PushMode(int) + PopMode() int + SetType(int) + SetMode(int) +} + +type BaseLexer struct { + *BaseRecognizer + + Interpreter ILexerATNSimulator + TokenStartCharIndex int + TokenStartLine int + TokenStartColumn int + ActionType int + Virt Lexer // The most derived lexer implementation. Allows virtual method calls. + + input CharStream + factory TokenFactory + tokenFactorySourcePair *TokenSourceCharStreamPair + token Token + hitEOF bool + channel int + thetype int + modeStack IntStack + mode int + text string +} + +func NewBaseLexer(input CharStream) *BaseLexer { + + lexer := new(BaseLexer) + + lexer.BaseRecognizer = NewBaseRecognizer() + + lexer.input = input + lexer.factory = CommonTokenFactoryDEFAULT + lexer.tokenFactorySourcePair = &TokenSourceCharStreamPair{lexer, input} + + lexer.Virt = lexer + + lexer.Interpreter = nil // child classes must populate it + + // The goal of all lexer rules/methods is to create a token object. + // l is an instance variable as multiple rules may collaborate to + // create a single token. NextToken will return l object after + // Matching lexer rule(s). If you subclass to allow multiple token + // emissions, then set l to the last token to be Matched or + // something nonnil so that the auto token emit mechanism will not + // emit another token. + lexer.token = nil + + // What character index in the stream did the current token start at? + // Needed, for example, to get the text for current token. Set at + // the start of NextToken. + lexer.TokenStartCharIndex = -1 + + // The line on which the first character of the token resides/// + lexer.TokenStartLine = -1 + + // The character position of first character within the line/// + lexer.TokenStartColumn = -1 + + // Once we see EOF on char stream, next token will be EOF. + // If you have DONE : EOF then you see DONE EOF. + lexer.hitEOF = false + + // The channel number for the current token/// + lexer.channel = TokenDefaultChannel + + // The token type for the current token/// + lexer.thetype = TokenInvalidType + + lexer.modeStack = make([]int, 0) + lexer.mode = LexerDefaultMode + + // You can set the text for the current token to override what is in + // the input char buffer. Use setText() or can set l instance var. + // / + lexer.text = "" + + return lexer +} + +const ( + LexerDefaultMode = 0 + LexerMore = -2 + LexerSkip = -3 +) + +const ( + LexerDefaultTokenChannel = TokenDefaultChannel + LexerHidden = TokenHiddenChannel + LexerMinCharValue = 0x0000 + LexerMaxCharValue = 0x10FFFF +) + +func (b *BaseLexer) reset() { + // wack Lexer state variables + if b.input != nil { + b.input.Seek(0) // rewind the input + } + b.token = nil + b.thetype = TokenInvalidType + b.channel = TokenDefaultChannel + b.TokenStartCharIndex = -1 + b.TokenStartColumn = -1 + b.TokenStartLine = -1 + b.text = "" + + b.hitEOF = false + b.mode = LexerDefaultMode + b.modeStack = make([]int, 0) + + b.Interpreter.reset() +} + +func (b *BaseLexer) GetInterpreter() ILexerATNSimulator { + return b.Interpreter +} + +func (b *BaseLexer) GetInputStream() CharStream { + return b.input +} + +func (b *BaseLexer) GetSourceName() string { + return b.GrammarFileName +} + +func (b *BaseLexer) SetChannel(v int) { + b.channel = v +} + +func (b *BaseLexer) GetTokenFactory() TokenFactory { + return b.factory +} + +func (b *BaseLexer) setTokenFactory(f TokenFactory) { + b.factory = f +} + +func (b *BaseLexer) safeMatch() (ret int) { + defer func() { + if e := recover(); e != nil { + if re, ok := e.(RecognitionException); ok { + b.notifyListeners(re) // Report error + b.Recover(re) + ret = LexerSkip // default + } + } + }() + + return b.Interpreter.Match(b.input, b.mode) +} + +// Return a token from l source i.e., Match a token on the char stream. +func (b *BaseLexer) NextToken() Token { + if b.input == nil { + panic("NextToken requires a non-nil input stream.") + } + + tokenStartMarker := b.input.Mark() + + // previously in finally block + defer func() { + // make sure we release marker after Match or + // unbuffered char stream will keep buffering + b.input.Release(tokenStartMarker) + }() + + for { + if b.hitEOF { + b.EmitEOF() + return b.token + } + b.token = nil + b.channel = TokenDefaultChannel + b.TokenStartCharIndex = b.input.Index() + b.TokenStartColumn = b.Interpreter.GetCharPositionInLine() + b.TokenStartLine = b.Interpreter.GetLine() + b.text = "" + continueOuter := false + for { + b.thetype = TokenInvalidType + ttype := LexerSkip + + ttype = b.safeMatch() + + if b.input.LA(1) == TokenEOF { + b.hitEOF = true + } + if b.thetype == TokenInvalidType { + b.thetype = ttype + } + if b.thetype == LexerSkip { + continueOuter = true + break + } + if b.thetype != LexerMore { + break + } + } + + if continueOuter { + continue + } + if b.token == nil { + b.Virt.Emit() + } + return b.token + } +} + +// Instruct the lexer to Skip creating a token for current lexer rule +// and look for another token. NextToken() knows to keep looking when +// a lexer rule finishes with token set to SKIPTOKEN. Recall that +// if token==nil at end of any token rule, it creates one for you +// and emits it. +// / +func (b *BaseLexer) Skip() { + b.thetype = LexerSkip +} + +func (b *BaseLexer) More() { + b.thetype = LexerMore +} + +func (b *BaseLexer) SetMode(m int) { + b.mode = m +} + +func (b *BaseLexer) PushMode(m int) { + if LexerATNSimulatorDebug { + fmt.Println("pushMode " + strconv.Itoa(m)) + } + b.modeStack.Push(b.mode) + b.mode = m +} + +func (b *BaseLexer) PopMode() int { + if len(b.modeStack) == 0 { + panic("Empty Stack") + } + if LexerATNSimulatorDebug { + fmt.Println("popMode back to " + fmt.Sprint(b.modeStack[0:len(b.modeStack)-1])) + } + i, _ := b.modeStack.Pop() + b.mode = i + return b.mode +} + +func (b *BaseLexer) inputStream() CharStream { + return b.input +} + +// SetInputStream resets the lexer input stream and associated lexer state. +func (b *BaseLexer) SetInputStream(input CharStream) { + b.input = nil + b.tokenFactorySourcePair = &TokenSourceCharStreamPair{b, b.input} + b.reset() + b.input = input + b.tokenFactorySourcePair = &TokenSourceCharStreamPair{b, b.input} +} + +func (b *BaseLexer) GetTokenSourceCharStreamPair() *TokenSourceCharStreamPair { + return b.tokenFactorySourcePair +} + +// By default does not support multiple emits per NextToken invocation +// for efficiency reasons. Subclass and override l method, NextToken, +// and GetToken (to push tokens into a list and pull from that list +// rather than a single variable as l implementation does). +// / +func (b *BaseLexer) EmitToken(token Token) { + b.token = token +} + +// The standard method called to automatically emit a token at the +// outermost lexical rule. The token object should point into the +// char buffer start..stop. If there is a text override in 'text', +// use that to set the token's text. Override l method to emit +// custom Token objects or provide a Newfactory. +// / +func (b *BaseLexer) Emit() Token { + t := b.factory.Create(b.tokenFactorySourcePair, b.thetype, b.text, b.channel, b.TokenStartCharIndex, b.GetCharIndex()-1, b.TokenStartLine, b.TokenStartColumn) + b.EmitToken(t) + return t +} + +func (b *BaseLexer) EmitEOF() Token { + cpos := b.GetCharPositionInLine() + lpos := b.GetLine() + eof := b.factory.Create(b.tokenFactorySourcePair, TokenEOF, "", TokenDefaultChannel, b.input.Index(), b.input.Index()-1, lpos, cpos) + b.EmitToken(eof) + return eof +} + +func (b *BaseLexer) GetCharPositionInLine() int { + return b.Interpreter.GetCharPositionInLine() +} + +func (b *BaseLexer) GetLine() int { + return b.Interpreter.GetLine() +} + +func (b *BaseLexer) GetType() int { + return b.thetype +} + +func (b *BaseLexer) SetType(t int) { + b.thetype = t +} + +// What is the index of the current character of lookahead?/// +func (b *BaseLexer) GetCharIndex() int { + return b.input.Index() +} + +// Return the text Matched so far for the current token or any text override. +// Set the complete text of l token it wipes any previous changes to the text. +func (b *BaseLexer) GetText() string { + if b.text != "" { + return b.text + } + + return b.Interpreter.GetText(b.input) +} + +func (b *BaseLexer) SetText(text string) { + b.text = text +} + +func (b *BaseLexer) GetATN() *ATN { + return b.Interpreter.ATN() +} + +// Return a list of all Token objects in input char stream. +// Forces load of all tokens. Does not include EOF token. +// / +func (b *BaseLexer) GetAllTokens() []Token { + vl := b.Virt + tokens := make([]Token, 0) + t := vl.NextToken() + for t.GetTokenType() != TokenEOF { + tokens = append(tokens, t) + t = vl.NextToken() + } + return tokens +} + +func (b *BaseLexer) notifyListeners(e RecognitionException) { + start := b.TokenStartCharIndex + stop := b.input.Index() + text := b.input.GetTextFromInterval(NewInterval(start, stop)) + msg := "token recognition error at: '" + text + "'" + listener := b.GetErrorListenerDispatch() + listener.SyntaxError(b, nil, b.TokenStartLine, b.TokenStartColumn, msg, e) +} + +func (b *BaseLexer) getErrorDisplayForChar(c rune) string { + if c == TokenEOF { + return "" + } else if c == '\n' { + return "\\n" + } else if c == '\t' { + return "\\t" + } else if c == '\r' { + return "\\r" + } else { + return string(c) + } +} + +func (b *BaseLexer) getCharErrorDisplay(c rune) string { + return "'" + b.getErrorDisplayForChar(c) + "'" +} + +// Lexers can normally Match any char in it's vocabulary after Matching +// a token, so do the easy thing and just kill a character and hope +// it all works out. You can instead use the rule invocation stack +// to do sophisticated error recovery if you are in a fragment rule. +// / +func (b *BaseLexer) Recover(re RecognitionException) { + if b.input.LA(1) != TokenEOF { + if _, ok := re.(*LexerNoViableAltException); ok { + // Skip a char and try again + b.Interpreter.Consume(b.input) + } else { + // TODO: Do we lose character or line position information? + b.input.Consume() + } + } +} diff --git a/runtime/Go/antlr/v4/lexer_action.go b/runtime/Go/antlr/v4/lexer_action.go new file mode 100644 index 0000000000..111656c295 --- /dev/null +++ b/runtime/Go/antlr/v4/lexer_action.go @@ -0,0 +1,432 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +import "strconv" + +const ( + LexerActionTypeChannel = 0 //The type of a {@link LexerChannelAction} action. + LexerActionTypeCustom = 1 //The type of a {@link LexerCustomAction} action. + LexerActionTypeMode = 2 //The type of a {@link LexerModeAction} action. + LexerActionTypeMore = 3 //The type of a {@link LexerMoreAction} action. + LexerActionTypePopMode = 4 //The type of a {@link LexerPopModeAction} action. + LexerActionTypePushMode = 5 //The type of a {@link LexerPushModeAction} action. + LexerActionTypeSkip = 6 //The type of a {@link LexerSkipAction} action. + LexerActionTypeType = 7 //The type of a {@link LexerTypeAction} action. +) + +type LexerAction interface { + getActionType() int + getIsPositionDependent() bool + execute(lexer Lexer) + Hash() int + Equals(other LexerAction) bool +} + +type BaseLexerAction struct { + actionType int + isPositionDependent bool +} + +func NewBaseLexerAction(action int) *BaseLexerAction { + la := new(BaseLexerAction) + + la.actionType = action + la.isPositionDependent = false + + return la +} + +func (b *BaseLexerAction) execute(lexer Lexer) { + panic("Not implemented") +} + +func (b *BaseLexerAction) getActionType() int { + return b.actionType +} + +func (b *BaseLexerAction) getIsPositionDependent() bool { + return b.isPositionDependent +} + +func (b *BaseLexerAction) Hash() int { + return b.actionType +} + +func (b *BaseLexerAction) Equals(other LexerAction) bool { + return b == other +} + +// Implements the {@code Skip} lexer action by calling {@link Lexer//Skip}. +// +//

The {@code Skip} command does not have any parameters, so l action is +// implemented as a singleton instance exposed by {@link //INSTANCE}.

+type LexerSkipAction struct { + *BaseLexerAction +} + +func NewLexerSkipAction() *LexerSkipAction { + la := new(LexerSkipAction) + la.BaseLexerAction = NewBaseLexerAction(LexerActionTypeSkip) + return la +} + +// Provides a singleton instance of l parameterless lexer action. +var LexerSkipActionINSTANCE = NewLexerSkipAction() + +func (l *LexerSkipAction) execute(lexer Lexer) { + lexer.Skip() +} + +func (l *LexerSkipAction) String() string { + return "skip" +} + +// Implements the {@code type} lexer action by calling {@link Lexer//setType} +// +// with the assigned type. +type LexerTypeAction struct { + *BaseLexerAction + + thetype int +} + +func NewLexerTypeAction(thetype int) *LexerTypeAction { + l := new(LexerTypeAction) + l.BaseLexerAction = NewBaseLexerAction(LexerActionTypeType) + l.thetype = thetype + return l +} + +func (l *LexerTypeAction) execute(lexer Lexer) { + lexer.SetType(l.thetype) +} + +func (l *LexerTypeAction) Hash() int { + h := murmurInit(0) + h = murmurUpdate(h, l.actionType) + h = murmurUpdate(h, l.thetype) + return murmurFinish(h, 2) +} + +func (l *LexerTypeAction) Equals(other LexerAction) bool { + if l == other { + return true + } else if _, ok := other.(*LexerTypeAction); !ok { + return false + } else { + return l.thetype == other.(*LexerTypeAction).thetype + } +} + +func (l *LexerTypeAction) String() string { + return "actionType(" + strconv.Itoa(l.thetype) + ")" +} + +// Implements the {@code pushMode} lexer action by calling +// {@link Lexer//pushMode} with the assigned mode. +type LexerPushModeAction struct { + *BaseLexerAction + + mode int +} + +func NewLexerPushModeAction(mode int) *LexerPushModeAction { + + l := new(LexerPushModeAction) + l.BaseLexerAction = NewBaseLexerAction(LexerActionTypePushMode) + + l.mode = mode + return l +} + +//

This action is implemented by calling {@link Lexer//pushMode} with the +// value provided by {@link //getMode}.

+func (l *LexerPushModeAction) execute(lexer Lexer) { + lexer.PushMode(l.mode) +} + +func (l *LexerPushModeAction) Hash() int { + h := murmurInit(0) + h = murmurUpdate(h, l.actionType) + h = murmurUpdate(h, l.mode) + return murmurFinish(h, 2) +} + +func (l *LexerPushModeAction) Equals(other LexerAction) bool { + if l == other { + return true + } else if _, ok := other.(*LexerPushModeAction); !ok { + return false + } else { + return l.mode == other.(*LexerPushModeAction).mode + } +} + +func (l *LexerPushModeAction) String() string { + return "pushMode(" + strconv.Itoa(l.mode) + ")" +} + +// Implements the {@code popMode} lexer action by calling {@link Lexer//popMode}. +// +//

The {@code popMode} command does not have any parameters, so l action is +// implemented as a singleton instance exposed by {@link //INSTANCE}.

+type LexerPopModeAction struct { + *BaseLexerAction +} + +func NewLexerPopModeAction() *LexerPopModeAction { + + l := new(LexerPopModeAction) + + l.BaseLexerAction = NewBaseLexerAction(LexerActionTypePopMode) + + return l +} + +var LexerPopModeActionINSTANCE = NewLexerPopModeAction() + +//

This action is implemented by calling {@link Lexer//popMode}.

+func (l *LexerPopModeAction) execute(lexer Lexer) { + lexer.PopMode() +} + +func (l *LexerPopModeAction) String() string { + return "popMode" +} + +// Implements the {@code more} lexer action by calling {@link Lexer//more}. +// +//

The {@code more} command does not have any parameters, so l action is +// implemented as a singleton instance exposed by {@link //INSTANCE}.

+ +type LexerMoreAction struct { + *BaseLexerAction +} + +func NewLexerMoreAction() *LexerMoreAction { + l := new(LexerMoreAction) + l.BaseLexerAction = NewBaseLexerAction(LexerActionTypeMore) + + return l +} + +var LexerMoreActionINSTANCE = NewLexerMoreAction() + +//

This action is implemented by calling {@link Lexer//popMode}.

+func (l *LexerMoreAction) execute(lexer Lexer) { + lexer.More() +} + +func (l *LexerMoreAction) String() string { + return "more" +} + +// Implements the {@code mode} lexer action by calling {@link Lexer//mode} with +// the assigned mode. +type LexerModeAction struct { + *BaseLexerAction + + mode int +} + +func NewLexerModeAction(mode int) *LexerModeAction { + l := new(LexerModeAction) + l.BaseLexerAction = NewBaseLexerAction(LexerActionTypeMode) + l.mode = mode + return l +} + +//

This action is implemented by calling {@link Lexer//mode} with the +// value provided by {@link //getMode}.

+func (l *LexerModeAction) execute(lexer Lexer) { + lexer.SetMode(l.mode) +} + +func (l *LexerModeAction) Hash() int { + h := murmurInit(0) + h = murmurUpdate(h, l.actionType) + h = murmurUpdate(h, l.mode) + return murmurFinish(h, 2) +} + +func (l *LexerModeAction) Equals(other LexerAction) bool { + if l == other { + return true + } else if _, ok := other.(*LexerModeAction); !ok { + return false + } else { + return l.mode == other.(*LexerModeAction).mode + } +} + +func (l *LexerModeAction) String() string { + return "mode(" + strconv.Itoa(l.mode) + ")" +} + +// Executes a custom lexer action by calling {@link Recognizer//action} with the +// rule and action indexes assigned to the custom action. The implementation of +// a custom action is added to the generated code for the lexer in an override +// of {@link Recognizer//action} when the grammar is compiled. +// +//

This class may represent embedded actions created with the {...} +// syntax in ANTLR 4, as well as actions created for lexer commands where the +// command argument could not be evaluated when the grammar was compiled.

+ +// Constructs a custom lexer action with the specified rule and action +// indexes. +// +// @param ruleIndex The rule index to use for calls to +// {@link Recognizer//action}. +// @param actionIndex The action index to use for calls to +// {@link Recognizer//action}. + +type LexerCustomAction struct { + *BaseLexerAction + ruleIndex, actionIndex int +} + +func NewLexerCustomAction(ruleIndex, actionIndex int) *LexerCustomAction { + l := new(LexerCustomAction) + l.BaseLexerAction = NewBaseLexerAction(LexerActionTypeCustom) + l.ruleIndex = ruleIndex + l.actionIndex = actionIndex + l.isPositionDependent = true + return l +} + +//

Custom actions are implemented by calling {@link Lexer//action} with the +// appropriate rule and action indexes.

+func (l *LexerCustomAction) execute(lexer Lexer) { + lexer.Action(nil, l.ruleIndex, l.actionIndex) +} + +func (l *LexerCustomAction) Hash() int { + h := murmurInit(0) + h = murmurUpdate(h, l.actionType) + h = murmurUpdate(h, l.ruleIndex) + h = murmurUpdate(h, l.actionIndex) + return murmurFinish(h, 3) +} + +func (l *LexerCustomAction) Equals(other LexerAction) bool { + if l == other { + return true + } else if _, ok := other.(*LexerCustomAction); !ok { + return false + } else { + return l.ruleIndex == other.(*LexerCustomAction).ruleIndex && + l.actionIndex == other.(*LexerCustomAction).actionIndex + } +} + +// Implements the {@code channel} lexer action by calling +// {@link Lexer//setChannel} with the assigned channel. +// Constructs a New{@code channel} action with the specified channel value. +// @param channel The channel value to pass to {@link Lexer//setChannel}. +type LexerChannelAction struct { + *BaseLexerAction + + channel int +} + +func NewLexerChannelAction(channel int) *LexerChannelAction { + l := new(LexerChannelAction) + l.BaseLexerAction = NewBaseLexerAction(LexerActionTypeChannel) + l.channel = channel + return l +} + +//

This action is implemented by calling {@link Lexer//setChannel} with the +// value provided by {@link //getChannel}.

+func (l *LexerChannelAction) execute(lexer Lexer) { + lexer.SetChannel(l.channel) +} + +func (l *LexerChannelAction) Hash() int { + h := murmurInit(0) + h = murmurUpdate(h, l.actionType) + h = murmurUpdate(h, l.channel) + return murmurFinish(h, 2) +} + +func (l *LexerChannelAction) Equals(other LexerAction) bool { + if l == other { + return true + } else if _, ok := other.(*LexerChannelAction); !ok { + return false + } else { + return l.channel == other.(*LexerChannelAction).channel + } +} + +func (l *LexerChannelAction) String() string { + return "channel(" + strconv.Itoa(l.channel) + ")" +} + +// This implementation of {@link LexerAction} is used for tracking input offsets +// for position-dependent actions within a {@link LexerActionExecutor}. +// +//

This action is not serialized as part of the ATN, and is only required for +// position-dependent lexer actions which appear at a location other than the +// end of a rule. For more information about DFA optimizations employed for +// lexer actions, see {@link LexerActionExecutor//append} and +// {@link LexerActionExecutor//fixOffsetBeforeMatch}.

+ +// Constructs a Newindexed custom action by associating a character offset +// with a {@link LexerAction}. +// +//

Note: This class is only required for lexer actions for which +// {@link LexerAction//isPositionDependent} returns {@code true}.

+// +// @param offset The offset into the input {@link CharStream}, relative to +// the token start index, at which the specified lexer action should be +// executed. +// @param action The lexer action to execute at a particular offset in the +// input {@link CharStream}. +type LexerIndexedCustomAction struct { + *BaseLexerAction + + offset int + lexerAction LexerAction + isPositionDependent bool +} + +func NewLexerIndexedCustomAction(offset int, lexerAction LexerAction) *LexerIndexedCustomAction { + + l := new(LexerIndexedCustomAction) + l.BaseLexerAction = NewBaseLexerAction(lexerAction.getActionType()) + + l.offset = offset + l.lexerAction = lexerAction + l.isPositionDependent = true + + return l +} + +//

This method calls {@link //execute} on the result of {@link //getAction} +// using the provided {@code lexer}.

+func (l *LexerIndexedCustomAction) execute(lexer Lexer) { + // assume the input stream position was properly set by the calling code + l.lexerAction.execute(lexer) +} + +func (l *LexerIndexedCustomAction) Hash() int { + h := murmurInit(0) + h = murmurUpdate(h, l.offset) + h = murmurUpdate(h, l.lexerAction.Hash()) + return murmurFinish(h, 2) +} + +func (l *LexerIndexedCustomAction) equals(other LexerAction) bool { + if l == other { + return true + } else if _, ok := other.(*LexerIndexedCustomAction); !ok { + return false + } else { + return l.offset == other.(*LexerIndexedCustomAction).offset && + l.lexerAction.Equals(other.(*LexerIndexedCustomAction).lexerAction) + } +} diff --git a/runtime/Go/antlr/v4/lexer_action_executor.go b/runtime/Go/antlr/v4/lexer_action_executor.go new file mode 100644 index 0000000000..be1ba7a7e3 --- /dev/null +++ b/runtime/Go/antlr/v4/lexer_action_executor.go @@ -0,0 +1,186 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +import "golang.org/x/exp/slices" + +// Represents an executor for a sequence of lexer actions which traversed during +// the Matching operation of a lexer rule (token). +// +//

The executor tracks position information for position-dependent lexer actions +// efficiently, ensuring that actions appearing only at the end of the rule do +// not cause bloating of the {@link DFA} created for the lexer.

+ +type LexerActionExecutor struct { + lexerActions []LexerAction + cachedHash int +} + +func NewLexerActionExecutor(lexerActions []LexerAction) *LexerActionExecutor { + + if lexerActions == nil { + lexerActions = make([]LexerAction, 0) + } + + l := new(LexerActionExecutor) + + l.lexerActions = lexerActions + + // Caches the result of {@link //hashCode} since the hash code is an element + // of the performance-critical {@link LexerATNConfig//hashCode} operation. + l.cachedHash = murmurInit(57) + for _, a := range lexerActions { + l.cachedHash = murmurUpdate(l.cachedHash, a.Hash()) + } + + return l +} + +// Creates a {@link LexerActionExecutor} which executes the actions for +// the input {@code lexerActionExecutor} followed by a specified +// {@code lexerAction}. +// +// @param lexerActionExecutor The executor for actions already traversed by +// the lexer while Matching a token within a particular +// {@link LexerATNConfig}. If this is {@code nil}, the method behaves as +// though it were an empty executor. +// @param lexerAction The lexer action to execute after the actions +// specified in {@code lexerActionExecutor}. +// +// @return A {@link LexerActionExecutor} for executing the combine actions +// of {@code lexerActionExecutor} and {@code lexerAction}. +func LexerActionExecutorappend(lexerActionExecutor *LexerActionExecutor, lexerAction LexerAction) *LexerActionExecutor { + if lexerActionExecutor == nil { + return NewLexerActionExecutor([]LexerAction{lexerAction}) + } + + return NewLexerActionExecutor(append(lexerActionExecutor.lexerActions, lexerAction)) +} + +// Creates a {@link LexerActionExecutor} which encodes the current offset +// for position-dependent lexer actions. +// +//

Normally, when the executor encounters lexer actions where +// {@link LexerAction//isPositionDependent} returns {@code true}, it calls +// {@link IntStream//seek} on the input {@link CharStream} to set the input +// position to the end of the current token. This behavior provides +// for efficient DFA representation of lexer actions which appear at the end +// of a lexer rule, even when the lexer rule Matches a variable number of +// characters.

+// +//

Prior to traversing a Match transition in the ATN, the current offset +// from the token start index is assigned to all position-dependent lexer +// actions which have not already been assigned a fixed offset. By storing +// the offsets relative to the token start index, the DFA representation of +// lexer actions which appear in the middle of tokens remains efficient due +// to sharing among tokens of the same length, regardless of their absolute +// position in the input stream.

+// +//

If the current executor already has offsets assigned to all +// position-dependent lexer actions, the method returns {@code this}.

+// +// @param offset The current offset to assign to all position-dependent +// lexer actions which do not already have offsets assigned. +// +// @return A {@link LexerActionExecutor} which stores input stream offsets +// for all position-dependent lexer actions. +// / +func (l *LexerActionExecutor) fixOffsetBeforeMatch(offset int) *LexerActionExecutor { + var updatedLexerActions []LexerAction + for i := 0; i < len(l.lexerActions); i++ { + _, ok := l.lexerActions[i].(*LexerIndexedCustomAction) + if l.lexerActions[i].getIsPositionDependent() && !ok { + if updatedLexerActions == nil { + updatedLexerActions = make([]LexerAction, 0) + + for _, a := range l.lexerActions { + updatedLexerActions = append(updatedLexerActions, a) + } + } + + updatedLexerActions[i] = NewLexerIndexedCustomAction(offset, l.lexerActions[i]) + } + } + if updatedLexerActions == nil { + return l + } + + return NewLexerActionExecutor(updatedLexerActions) +} + +// Execute the actions encapsulated by l executor within the context of a +// particular {@link Lexer}. +// +//

This method calls {@link IntStream//seek} to set the position of the +// {@code input} {@link CharStream} prior to calling +// {@link LexerAction//execute} on a position-dependent action. Before the +// method returns, the input position will be restored to the same position +// it was in when the method was invoked.

+// +// @param lexer The lexer instance. +// @param input The input stream which is the source for the current token. +// When l method is called, the current {@link IntStream//index} for +// {@code input} should be the start of the following token, i.e. 1 +// character past the end of the current token. +// @param startIndex The token start index. This value may be passed to +// {@link IntStream//seek} to set the {@code input} position to the beginning +// of the token. +// / +func (l *LexerActionExecutor) execute(lexer Lexer, input CharStream, startIndex int) { + requiresSeek := false + stopIndex := input.Index() + + defer func() { + if requiresSeek { + input.Seek(stopIndex) + } + }() + + for i := 0; i < len(l.lexerActions); i++ { + lexerAction := l.lexerActions[i] + if la, ok := lexerAction.(*LexerIndexedCustomAction); ok { + offset := la.offset + input.Seek(startIndex + offset) + lexerAction = la.lexerAction + requiresSeek = (startIndex + offset) != stopIndex + } else if lexerAction.getIsPositionDependent() { + input.Seek(stopIndex) + requiresSeek = false + } + lexerAction.execute(lexer) + } +} + +func (l *LexerActionExecutor) Hash() int { + if l == nil { + // TODO: Why is this here? l should not be nil + return 61 + } + + // TODO: This is created from the action itself when the struct is created - will this be an issue at some point? Java uses the runtime assign hashcode + return l.cachedHash +} + +func (l *LexerActionExecutor) Equals(other interface{}) bool { + if l == other { + return true + } + othert, ok := other.(*LexerActionExecutor) + if !ok { + return false + } + if othert == nil { + return false + } + if l.cachedHash != othert.cachedHash { + return false + } + if len(l.lexerActions) != len(othert.lexerActions) { + return false + } + return slices.EqualFunc(l.lexerActions, othert.lexerActions, func(i, j LexerAction) bool { + return i.Equals(j) + }) +} diff --git a/runtime/Go/antlr/v4/lexer_atn_simulator.go b/runtime/Go/antlr/v4/lexer_atn_simulator.go new file mode 100644 index 0000000000..c573b75210 --- /dev/null +++ b/runtime/Go/antlr/v4/lexer_atn_simulator.go @@ -0,0 +1,684 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +import ( + "fmt" + "strconv" + "strings" +) + +var ( + LexerATNSimulatorDebug = false + LexerATNSimulatorDFADebug = false + + LexerATNSimulatorMinDFAEdge = 0 + LexerATNSimulatorMaxDFAEdge = 127 // forces unicode to stay in ATN + + LexerATNSimulatorMatchCalls = 0 +) + +type ILexerATNSimulator interface { + IATNSimulator + + reset() + Match(input CharStream, mode int) int + GetCharPositionInLine() int + GetLine() int + GetText(input CharStream) string + Consume(input CharStream) +} + +type LexerATNSimulator struct { + *BaseATNSimulator + + recog Lexer + predictionMode int + mergeCache DoubleDict + startIndex int + Line int + CharPositionInLine int + mode int + prevAccept *SimState + MatchCalls int +} + +func NewLexerATNSimulator(recog Lexer, atn *ATN, decisionToDFA []*DFA, sharedContextCache *PredictionContextCache) *LexerATNSimulator { + l := new(LexerATNSimulator) + + l.BaseATNSimulator = NewBaseATNSimulator(atn, sharedContextCache) + + l.decisionToDFA = decisionToDFA + l.recog = recog + // The current token's starting index into the character stream. + // Shared across DFA to ATN simulation in case the ATN fails and the + // DFA did not have a previous accept state. In l case, we use the + // ATN-generated exception object. + l.startIndex = -1 + // line number 1..n within the input/// + l.Line = 1 + // The index of the character relative to the beginning of the line + // 0..n-1/// + l.CharPositionInLine = 0 + l.mode = LexerDefaultMode + // Used during DFA/ATN exec to record the most recent accept configuration + // info + l.prevAccept = NewSimState() + // done + return l +} + +func (l *LexerATNSimulator) copyState(simulator *LexerATNSimulator) { + l.CharPositionInLine = simulator.CharPositionInLine + l.Line = simulator.Line + l.mode = simulator.mode + l.startIndex = simulator.startIndex +} + +func (l *LexerATNSimulator) Match(input CharStream, mode int) int { + l.MatchCalls++ + l.mode = mode + mark := input.Mark() + + defer func() { + input.Release(mark) + }() + + l.startIndex = input.Index() + l.prevAccept.reset() + + dfa := l.decisionToDFA[mode] + + var s0 *DFAState + l.atn.stateMu.RLock() + s0 = dfa.getS0() + l.atn.stateMu.RUnlock() + + if s0 == nil { + return l.MatchATN(input) + } + + return l.execATN(input, s0) +} + +func (l *LexerATNSimulator) reset() { + l.prevAccept.reset() + l.startIndex = -1 + l.Line = 1 + l.CharPositionInLine = 0 + l.mode = LexerDefaultMode +} + +func (l *LexerATNSimulator) MatchATN(input CharStream) int { + startState := l.atn.modeToStartState[l.mode] + + if LexerATNSimulatorDebug { + fmt.Println("MatchATN mode " + strconv.Itoa(l.mode) + " start: " + startState.String()) + } + oldMode := l.mode + s0Closure := l.computeStartState(input, startState) + suppressEdge := s0Closure.hasSemanticContext + s0Closure.hasSemanticContext = false + + next := l.addDFAState(s0Closure, suppressEdge) + + predict := l.execATN(input, next) + + if LexerATNSimulatorDebug { + fmt.Println("DFA after MatchATN: " + l.decisionToDFA[oldMode].ToLexerString()) + } + return predict +} + +func (l *LexerATNSimulator) execATN(input CharStream, ds0 *DFAState) int { + + if LexerATNSimulatorDebug { + fmt.Println("start state closure=" + ds0.configs.String()) + } + if ds0.isAcceptState { + // allow zero-length tokens + l.captureSimState(l.prevAccept, input, ds0) + } + t := input.LA(1) + s := ds0 // s is current/from DFA state + + for { // while more work + if LexerATNSimulatorDebug { + fmt.Println("execATN loop starting closure: " + s.configs.String()) + } + + // As we move src->trg, src->trg, we keep track of the previous trg to + // avoid looking up the DFA state again, which is expensive. + // If the previous target was already part of the DFA, we might + // be able to avoid doing a reach operation upon t. If s!=nil, + // it means that semantic predicates didn't prevent us from + // creating a DFA state. Once we know s!=nil, we check to see if + // the DFA state has an edge already for t. If so, we can just reuse + // it's configuration set there's no point in re-computing it. + // This is kind of like doing DFA simulation within the ATN + // simulation because DFA simulation is really just a way to avoid + // computing reach/closure sets. Technically, once we know that + // we have a previously added DFA state, we could jump over to + // the DFA simulator. But, that would mean popping back and forth + // a lot and making things more complicated algorithmically. + // This optimization makes a lot of sense for loops within DFA. + // A character will take us back to an existing DFA state + // that already has lots of edges out of it. e.g., .* in comments. + target := l.getExistingTargetState(s, t) + if target == nil { + target = l.computeTargetState(input, s, t) + // print("Computed:" + str(target)) + } + if target == ATNSimulatorError { + break + } + // If l is a consumable input element, make sure to consume before + // capturing the accept state so the input index, line, and char + // position accurately reflect the state of the interpreter at the + // end of the token. + if t != TokenEOF { + l.Consume(input) + } + if target.isAcceptState { + l.captureSimState(l.prevAccept, input, target) + if t == TokenEOF { + break + } + } + t = input.LA(1) + s = target // flip current DFA target becomes Newsrc/from state + } + + return l.failOrAccept(l.prevAccept, input, s.configs, t) +} + +// Get an existing target state for an edge in the DFA. If the target state +// for the edge has not yet been computed or is otherwise not available, +// l method returns {@code nil}. +// +// @param s The current DFA state +// @param t The next input symbol +// @return The existing target DFA state for the given input symbol +// {@code t}, or {@code nil} if the target state for l edge is not +// already cached +func (l *LexerATNSimulator) getExistingTargetState(s *DFAState, t int) *DFAState { + if t < LexerATNSimulatorMinDFAEdge || t > LexerATNSimulatorMaxDFAEdge { + return nil + } + + l.atn.edgeMu.RLock() + defer l.atn.edgeMu.RUnlock() + if s.getEdges() == nil { + return nil + } + target := s.getIthEdge(t - LexerATNSimulatorMinDFAEdge) + if LexerATNSimulatorDebug && target != nil { + fmt.Println("reuse state " + strconv.Itoa(s.stateNumber) + " edge to " + strconv.Itoa(target.stateNumber)) + } + return target +} + +// Compute a target state for an edge in the DFA, and attempt to add the +// computed state and corresponding edge to the DFA. +// +// @param input The input stream +// @param s The current DFA state +// @param t The next input symbol +// +// @return The computed target DFA state for the given input symbol +// {@code t}. If {@code t} does not lead to a valid DFA state, l method +// returns {@link //ERROR}. +func (l *LexerATNSimulator) computeTargetState(input CharStream, s *DFAState, t int) *DFAState { + reach := NewOrderedATNConfigSet() + + // if we don't find an existing DFA state + // Fill reach starting from closure, following t transitions + l.getReachableConfigSet(input, s.configs, reach.BaseATNConfigSet, t) + + if len(reach.configs) == 0 { // we got nowhere on t from s + if !reach.hasSemanticContext { + // we got nowhere on t, don't panic out l knowledge it'd + // cause a failover from DFA later. + l.addDFAEdge(s, t, ATNSimulatorError, nil) + } + // stop when we can't Match any more char + return ATNSimulatorError + } + // Add an edge from s to target DFA found/created for reach + return l.addDFAEdge(s, t, nil, reach.BaseATNConfigSet) +} + +func (l *LexerATNSimulator) failOrAccept(prevAccept *SimState, input CharStream, reach ATNConfigSet, t int) int { + if l.prevAccept.dfaState != nil { + lexerActionExecutor := prevAccept.dfaState.lexerActionExecutor + l.accept(input, lexerActionExecutor, l.startIndex, prevAccept.index, prevAccept.line, prevAccept.column) + return prevAccept.dfaState.prediction + } + + // if no accept and EOF is first char, return EOF + if t == TokenEOF && input.Index() == l.startIndex { + return TokenEOF + } + + panic(NewLexerNoViableAltException(l.recog, input, l.startIndex, reach)) +} + +// Given a starting configuration set, figure out all ATN configurations +// we can reach upon input {@code t}. Parameter {@code reach} is a return +// parameter. +func (l *LexerATNSimulator) getReachableConfigSet(input CharStream, closure ATNConfigSet, reach ATNConfigSet, t int) { + // l is used to Skip processing for configs which have a lower priority + // than a config that already reached an accept state for the same rule + SkipAlt := ATNInvalidAltNumber + + for _, cfg := range closure.GetItems() { + currentAltReachedAcceptState := (cfg.GetAlt() == SkipAlt) + if currentAltReachedAcceptState && cfg.(*LexerATNConfig).passedThroughNonGreedyDecision { + continue + } + + if LexerATNSimulatorDebug { + + fmt.Printf("testing %s at %s\n", l.GetTokenName(t), cfg.String()) // l.recog, true)) + } + + for _, trans := range cfg.GetState().GetTransitions() { + target := l.getReachableTarget(trans, t) + if target != nil { + lexerActionExecutor := cfg.(*LexerATNConfig).lexerActionExecutor + if lexerActionExecutor != nil { + lexerActionExecutor = lexerActionExecutor.fixOffsetBeforeMatch(input.Index() - l.startIndex) + } + treatEOFAsEpsilon := (t == TokenEOF) + config := NewLexerATNConfig3(cfg.(*LexerATNConfig), target, lexerActionExecutor) + if l.closure(input, config, reach, + currentAltReachedAcceptState, true, treatEOFAsEpsilon) { + // any remaining configs for l alt have a lower priority + // than the one that just reached an accept state. + SkipAlt = cfg.GetAlt() + } + } + } + } +} + +func (l *LexerATNSimulator) accept(input CharStream, lexerActionExecutor *LexerActionExecutor, startIndex, index, line, charPos int) { + if LexerATNSimulatorDebug { + fmt.Printf("ACTION %v\n", lexerActionExecutor) + } + // seek to after last char in token + input.Seek(index) + l.Line = line + l.CharPositionInLine = charPos + if lexerActionExecutor != nil && l.recog != nil { + lexerActionExecutor.execute(l.recog, input, startIndex) + } +} + +func (l *LexerATNSimulator) getReachableTarget(trans Transition, t int) ATNState { + if trans.Matches(t, 0, LexerMaxCharValue) { + return trans.getTarget() + } + + return nil +} + +func (l *LexerATNSimulator) computeStartState(input CharStream, p ATNState) *OrderedATNConfigSet { + configs := NewOrderedATNConfigSet() + for i := 0; i < len(p.GetTransitions()); i++ { + target := p.GetTransitions()[i].getTarget() + cfg := NewLexerATNConfig6(target, i+1, BasePredictionContextEMPTY) + l.closure(input, cfg, configs, false, false, false) + } + + return configs +} + +// Since the alternatives within any lexer decision are ordered by +// preference, l method stops pursuing the closure as soon as an accept +// state is reached. After the first accept state is reached by depth-first +// search from {@code config}, all other (potentially reachable) states for +// l rule would have a lower priority. +// +// @return {@code true} if an accept state is reached, otherwise +// {@code false}. +func (l *LexerATNSimulator) closure(input CharStream, config *LexerATNConfig, configs ATNConfigSet, + currentAltReachedAcceptState, speculative, treatEOFAsEpsilon bool) bool { + + if LexerATNSimulatorDebug { + fmt.Println("closure(" + config.String() + ")") // config.String(l.recog, true) + ")") + } + + _, ok := config.state.(*RuleStopState) + if ok { + + if LexerATNSimulatorDebug { + if l.recog != nil { + fmt.Printf("closure at %s rule stop %s\n", l.recog.GetRuleNames()[config.state.GetRuleIndex()], config) + } else { + fmt.Printf("closure at rule stop %s\n", config) + } + } + + if config.context == nil || config.context.hasEmptyPath() { + if config.context == nil || config.context.isEmpty() { + configs.Add(config, nil) + return true + } + + configs.Add(NewLexerATNConfig2(config, config.state, BasePredictionContextEMPTY), nil) + currentAltReachedAcceptState = true + } + if config.context != nil && !config.context.isEmpty() { + for i := 0; i < config.context.length(); i++ { + if config.context.getReturnState(i) != BasePredictionContextEmptyReturnState { + newContext := config.context.GetParent(i) // "pop" return state + returnState := l.atn.states[config.context.getReturnState(i)] + cfg := NewLexerATNConfig2(config, returnState, newContext) + currentAltReachedAcceptState = l.closure(input, cfg, configs, currentAltReachedAcceptState, speculative, treatEOFAsEpsilon) + } + } + } + return currentAltReachedAcceptState + } + // optimization + if !config.state.GetEpsilonOnlyTransitions() { + if !currentAltReachedAcceptState || !config.passedThroughNonGreedyDecision { + configs.Add(config, nil) + } + } + for j := 0; j < len(config.state.GetTransitions()); j++ { + trans := config.state.GetTransitions()[j] + cfg := l.getEpsilonTarget(input, config, trans, configs, speculative, treatEOFAsEpsilon) + if cfg != nil { + currentAltReachedAcceptState = l.closure(input, cfg, configs, + currentAltReachedAcceptState, speculative, treatEOFAsEpsilon) + } + } + return currentAltReachedAcceptState +} + +// side-effect: can alter configs.hasSemanticContext +func (l *LexerATNSimulator) getEpsilonTarget(input CharStream, config *LexerATNConfig, trans Transition, + configs ATNConfigSet, speculative, treatEOFAsEpsilon bool) *LexerATNConfig { + + var cfg *LexerATNConfig + + if trans.getSerializationType() == TransitionRULE { + + rt := trans.(*RuleTransition) + newContext := SingletonBasePredictionContextCreate(config.context, rt.followState.GetStateNumber()) + cfg = NewLexerATNConfig2(config, trans.getTarget(), newContext) + + } else if trans.getSerializationType() == TransitionPRECEDENCE { + panic("Precedence predicates are not supported in lexers.") + } else if trans.getSerializationType() == TransitionPREDICATE { + // Track traversing semantic predicates. If we traverse, + // we cannot add a DFA state for l "reach" computation + // because the DFA would not test the predicate again in the + // future. Rather than creating collections of semantic predicates + // like v3 and testing them on prediction, v4 will test them on the + // fly all the time using the ATN not the DFA. This is slower but + // semantically it's not used that often. One of the key elements to + // l predicate mechanism is not adding DFA states that see + // predicates immediately afterwards in the ATN. For example, + + // a : ID {p1}? | ID {p2}? + + // should create the start state for rule 'a' (to save start state + // competition), but should not create target of ID state. The + // collection of ATN states the following ID references includes + // states reached by traversing predicates. Since l is when we + // test them, we cannot cash the DFA state target of ID. + + pt := trans.(*PredicateTransition) + + if LexerATNSimulatorDebug { + fmt.Println("EVAL rule " + strconv.Itoa(trans.(*PredicateTransition).ruleIndex) + ":" + strconv.Itoa(pt.predIndex)) + } + configs.SetHasSemanticContext(true) + if l.evaluatePredicate(input, pt.ruleIndex, pt.predIndex, speculative) { + cfg = NewLexerATNConfig4(config, trans.getTarget()) + } + } else if trans.getSerializationType() == TransitionACTION { + if config.context == nil || config.context.hasEmptyPath() { + // execute actions anywhere in the start rule for a token. + // + // TODO: if the entry rule is invoked recursively, some + // actions may be executed during the recursive call. The + // problem can appear when hasEmptyPath() is true but + // isEmpty() is false. In l case, the config needs to be + // split into two contexts - one with just the empty path + // and another with everything but the empty path. + // Unfortunately, the current algorithm does not allow + // getEpsilonTarget to return two configurations, so + // additional modifications are needed before we can support + // the split operation. + lexerActionExecutor := LexerActionExecutorappend(config.lexerActionExecutor, l.atn.lexerActions[trans.(*ActionTransition).actionIndex]) + cfg = NewLexerATNConfig3(config, trans.getTarget(), lexerActionExecutor) + } else { + // ignore actions in referenced rules + cfg = NewLexerATNConfig4(config, trans.getTarget()) + } + } else if trans.getSerializationType() == TransitionEPSILON { + cfg = NewLexerATNConfig4(config, trans.getTarget()) + } else if trans.getSerializationType() == TransitionATOM || + trans.getSerializationType() == TransitionRANGE || + trans.getSerializationType() == TransitionSET { + if treatEOFAsEpsilon { + if trans.Matches(TokenEOF, 0, LexerMaxCharValue) { + cfg = NewLexerATNConfig4(config, trans.getTarget()) + } + } + } + return cfg +} + +// Evaluate a predicate specified in the lexer. +// +//

If {@code speculative} is {@code true}, l method was called before +// {@link //consume} for the Matched character. This method should call +// {@link //consume} before evaluating the predicate to ensure position +// sensitive values, including {@link Lexer//GetText}, {@link Lexer//GetLine}, +// and {@link Lexer//getcolumn}, properly reflect the current +// lexer state. This method should restore {@code input} and the simulator +// to the original state before returning (i.e. undo the actions made by the +// call to {@link //consume}.

+// +// @param input The input stream. +// @param ruleIndex The rule containing the predicate. +// @param predIndex The index of the predicate within the rule. +// @param speculative {@code true} if the current index in {@code input} is +// one character before the predicate's location. +// +// @return {@code true} if the specified predicate evaluates to +// {@code true}. +// / +func (l *LexerATNSimulator) evaluatePredicate(input CharStream, ruleIndex, predIndex int, speculative bool) bool { + // assume true if no recognizer was provided + if l.recog == nil { + return true + } + if !speculative { + return l.recog.Sempred(nil, ruleIndex, predIndex) + } + savedcolumn := l.CharPositionInLine + savedLine := l.Line + index := input.Index() + marker := input.Mark() + + defer func() { + l.CharPositionInLine = savedcolumn + l.Line = savedLine + input.Seek(index) + input.Release(marker) + }() + + l.Consume(input) + return l.recog.Sempred(nil, ruleIndex, predIndex) +} + +func (l *LexerATNSimulator) captureSimState(settings *SimState, input CharStream, dfaState *DFAState) { + settings.index = input.Index() + settings.line = l.Line + settings.column = l.CharPositionInLine + settings.dfaState = dfaState +} + +func (l *LexerATNSimulator) addDFAEdge(from *DFAState, tk int, to *DFAState, cfgs ATNConfigSet) *DFAState { + if to == nil && cfgs != nil { + // leading to l call, ATNConfigSet.hasSemanticContext is used as a + // marker indicating dynamic predicate evaluation makes l edge + // dependent on the specific input sequence, so the static edge in the + // DFA should be omitted. The target DFAState is still created since + // execATN has the ability to reSynchronize with the DFA state cache + // following the predicate evaluation step. + // + // TJP notes: next time through the DFA, we see a pred again and eval. + // If that gets us to a previously created (but dangling) DFA + // state, we can continue in pure DFA mode from there. + // / + suppressEdge := cfgs.HasSemanticContext() + cfgs.SetHasSemanticContext(false) + + to = l.addDFAState(cfgs, true) + + if suppressEdge { + return to + } + } + // add the edge + if tk < LexerATNSimulatorMinDFAEdge || tk > LexerATNSimulatorMaxDFAEdge { + // Only track edges within the DFA bounds + return to + } + if LexerATNSimulatorDebug { + fmt.Println("EDGE " + from.String() + " -> " + to.String() + " upon " + strconv.Itoa(tk)) + } + l.atn.edgeMu.Lock() + defer l.atn.edgeMu.Unlock() + if from.getEdges() == nil { + // make room for tokens 1..n and -1 masquerading as index 0 + from.setEdges(make([]*DFAState, LexerATNSimulatorMaxDFAEdge-LexerATNSimulatorMinDFAEdge+1)) + } + from.setIthEdge(tk-LexerATNSimulatorMinDFAEdge, to) // connect + + return to +} + +// Add a NewDFA state if there isn't one with l set of +// configurations already. This method also detects the first +// configuration containing an ATN rule stop state. Later, when +// traversing the DFA, we will know which rule to accept. +func (l *LexerATNSimulator) addDFAState(configs ATNConfigSet, suppressEdge bool) *DFAState { + + proposed := NewDFAState(-1, configs) + var firstConfigWithRuleStopState ATNConfig + + for _, cfg := range configs.GetItems() { + + _, ok := cfg.GetState().(*RuleStopState) + + if ok { + firstConfigWithRuleStopState = cfg + break + } + } + if firstConfigWithRuleStopState != nil { + proposed.isAcceptState = true + proposed.lexerActionExecutor = firstConfigWithRuleStopState.(*LexerATNConfig).lexerActionExecutor + proposed.setPrediction(l.atn.ruleToTokenType[firstConfigWithRuleStopState.GetState().GetRuleIndex()]) + } + dfa := l.decisionToDFA[l.mode] + + l.atn.stateMu.Lock() + defer l.atn.stateMu.Unlock() + existing, present := dfa.states.Get(proposed) + if present { + + // This state was already present, so just return it. + // + proposed = existing + } else { + + // We need to add the new state + // + proposed.stateNumber = dfa.states.Len() + configs.SetReadOnly(true) + proposed.configs = configs + dfa.states.Put(proposed) + } + if !suppressEdge { + dfa.setS0(proposed) + } + return proposed +} + +func (l *LexerATNSimulator) getDFA(mode int) *DFA { + return l.decisionToDFA[mode] +} + +// Get the text Matched so far for the current token. +func (l *LexerATNSimulator) GetText(input CharStream) string { + // index is first lookahead char, don't include. + return input.GetTextFromInterval(NewInterval(l.startIndex, input.Index()-1)) +} + +func (l *LexerATNSimulator) Consume(input CharStream) { + curChar := input.LA(1) + if curChar == int('\n') { + l.Line++ + l.CharPositionInLine = 0 + } else { + l.CharPositionInLine++ + } + input.Consume() +} + +func (l *LexerATNSimulator) GetCharPositionInLine() int { + return l.CharPositionInLine +} + +func (l *LexerATNSimulator) GetLine() int { + return l.Line +} + +func (l *LexerATNSimulator) GetTokenName(tt int) string { + if tt == -1 { + return "EOF" + } + + var sb strings.Builder + sb.Grow(6) + sb.WriteByte('\'') + sb.WriteRune(rune(tt)) + sb.WriteByte('\'') + + return sb.String() +} + +func resetSimState(sim *SimState) { + sim.index = -1 + sim.line = 0 + sim.column = -1 + sim.dfaState = nil +} + +type SimState struct { + index int + line int + column int + dfaState *DFAState +} + +func NewSimState() *SimState { + s := new(SimState) + resetSimState(s) + return s +} + +func (s *SimState) reset() { + resetSimState(s) +} diff --git a/runtime/Go/antlr/v4/ll1_analyzer.go b/runtime/Go/antlr/v4/ll1_analyzer.go new file mode 100644 index 0000000000..a9e202d041 --- /dev/null +++ b/runtime/Go/antlr/v4/ll1_analyzer.go @@ -0,0 +1,216 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +type LL1Analyzer struct { + atn *ATN +} + +func NewLL1Analyzer(atn *ATN) *LL1Analyzer { + la := new(LL1Analyzer) + la.atn = atn + return la +} + +// - Special value added to the lookahead sets to indicate that we hit +// a predicate during analysis if {@code seeThruPreds==false}. +// +// / +const ( + LL1AnalyzerHitPred = TokenInvalidType +) + +// * +// Calculates the SLL(1) expected lookahead set for each outgoing transition +// of an {@link ATNState}. The returned array has one element for each +// outgoing transition in {@code s}. If the closure from transition +// i leads to a semantic predicate before Matching a symbol, the +// element at index i of the result will be {@code nil}. +// +// @param s the ATN state +// @return the expected symbols for each outgoing transition of {@code s}. +func (la *LL1Analyzer) getDecisionLookahead(s ATNState) []*IntervalSet { + if s == nil { + return nil + } + count := len(s.GetTransitions()) + look := make([]*IntervalSet, count) + for alt := 0; alt < count; alt++ { + look[alt] = NewIntervalSet() + lookBusy := NewJStore[ATNConfig, Comparator[ATNConfig]](&ObjEqComparator[ATNConfig]{}) + seeThruPreds := false // fail to get lookahead upon pred + la.look1(s.GetTransitions()[alt].getTarget(), nil, BasePredictionContextEMPTY, look[alt], lookBusy, NewBitSet(), seeThruPreds, false) + // Wipe out lookahead for la alternative if we found nothing + // or we had a predicate when we !seeThruPreds + if look[alt].length() == 0 || look[alt].contains(LL1AnalyzerHitPred) { + look[alt] = nil + } + } + return look +} + +// * +// Compute set of tokens that can follow {@code s} in the ATN in the +// specified {@code ctx}. +// +//

If {@code ctx} is {@code nil} and the end of the rule containing +// {@code s} is reached, {@link Token//EPSILON} is added to the result set. +// If {@code ctx} is not {@code nil} and the end of the outermost rule is +// reached, {@link Token//EOF} is added to the result set.

+// +// @param s the ATN state +// @param stopState the ATN state to stop at. This can be a +// {@link BlockEndState} to detect epsilon paths through a closure. +// @param ctx the complete parser context, or {@code nil} if the context +// should be ignored +// +// @return The set of tokens that can follow {@code s} in the ATN in the +// specified {@code ctx}. +// / +func (la *LL1Analyzer) Look(s, stopState ATNState, ctx RuleContext) *IntervalSet { + r := NewIntervalSet() + seeThruPreds := true // ignore preds get all lookahead + var lookContext PredictionContext + if ctx != nil { + lookContext = predictionContextFromRuleContext(s.GetATN(), ctx) + } + la.look1(s, stopState, lookContext, r, NewJStore[ATNConfig, Comparator[ATNConfig]](&ObjEqComparator[ATNConfig]{}), NewBitSet(), seeThruPreds, true) + return r +} + +//* +// Compute set of tokens that can follow {@code s} in the ATN in the +// specified {@code ctx}. +// +//

If {@code ctx} is {@code nil} and {@code stopState} or the end of the +// rule containing {@code s} is reached, {@link Token//EPSILON} is added to +// the result set. If {@code ctx} is not {@code nil} and {@code addEOF} is +// {@code true} and {@code stopState} or the end of the outermost rule is +// reached, {@link Token//EOF} is added to the result set.

+// +// @param s the ATN state. +// @param stopState the ATN state to stop at. This can be a +// {@link BlockEndState} to detect epsilon paths through a closure. +// @param ctx The outer context, or {@code nil} if the outer context should +// not be used. +// @param look The result lookahead set. +// @param lookBusy A set used for preventing epsilon closures in the ATN +// from causing a stack overflow. Outside code should pass +// {@code NewSet} for la argument. +// @param calledRuleStack A set used for preventing left recursion in the +// ATN from causing a stack overflow. Outside code should pass +// {@code NewBitSet()} for la argument. +// @param seeThruPreds {@code true} to true semantic predicates as +// implicitly {@code true} and "see through them", otherwise {@code false} +// to treat semantic predicates as opaque and add {@link //HitPred} to the +// result if one is encountered. +// @param addEOF Add {@link Token//EOF} to the result if the end of the +// outermost context is reached. This parameter has no effect if {@code ctx} +// is {@code nil}. + +func (la *LL1Analyzer) look2(s, stopState ATNState, ctx PredictionContext, look *IntervalSet, lookBusy *JStore[ATNConfig, Comparator[ATNConfig]], calledRuleStack *BitSet, seeThruPreds, addEOF bool, i int) { + + returnState := la.atn.states[ctx.getReturnState(i)] + la.look1(returnState, stopState, ctx.GetParent(i), look, lookBusy, calledRuleStack, seeThruPreds, addEOF) + +} + +func (la *LL1Analyzer) look1(s, stopState ATNState, ctx PredictionContext, look *IntervalSet, lookBusy *JStore[ATNConfig, Comparator[ATNConfig]], calledRuleStack *BitSet, seeThruPreds, addEOF bool) { + + c := NewBaseATNConfig6(s, 0, ctx) + + if lookBusy.Contains(c) { + return + } + + _, present := lookBusy.Put(c) + if present { + return + + } + if s == stopState { + if ctx == nil { + look.addOne(TokenEpsilon) + return + } else if ctx.isEmpty() && addEOF { + look.addOne(TokenEOF) + return + } + } + + _, ok := s.(*RuleStopState) + + if ok { + if ctx == nil { + look.addOne(TokenEpsilon) + return + } else if ctx.isEmpty() && addEOF { + look.addOne(TokenEOF) + return + } + + if ctx != BasePredictionContextEMPTY { + removed := calledRuleStack.contains(s.GetRuleIndex()) + defer func() { + if removed { + calledRuleStack.add(s.GetRuleIndex()) + } + }() + calledRuleStack.remove(s.GetRuleIndex()) + // run thru all possible stack tops in ctx + for i := 0; i < ctx.length(); i++ { + returnState := la.atn.states[ctx.getReturnState(i)] + la.look2(returnState, stopState, ctx, look, lookBusy, calledRuleStack, seeThruPreds, addEOF, i) + } + return + } + } + + n := len(s.GetTransitions()) + + for i := 0; i < n; i++ { + t := s.GetTransitions()[i] + + if t1, ok := t.(*RuleTransition); ok { + if calledRuleStack.contains(t1.getTarget().GetRuleIndex()) { + continue + } + + newContext := SingletonBasePredictionContextCreate(ctx, t1.followState.GetStateNumber()) + la.look3(stopState, newContext, look, lookBusy, calledRuleStack, seeThruPreds, addEOF, t1) + } else if t2, ok := t.(AbstractPredicateTransition); ok { + if seeThruPreds { + la.look1(t2.getTarget(), stopState, ctx, look, lookBusy, calledRuleStack, seeThruPreds, addEOF) + } else { + look.addOne(LL1AnalyzerHitPred) + } + } else if t.getIsEpsilon() { + la.look1(t.getTarget(), stopState, ctx, look, lookBusy, calledRuleStack, seeThruPreds, addEOF) + } else if _, ok := t.(*WildcardTransition); ok { + look.addRange(TokenMinUserTokenType, la.atn.maxTokenType) + } else { + set := t.getLabel() + if set != nil { + if _, ok := t.(*NotSetTransition); ok { + set = set.complement(TokenMinUserTokenType, la.atn.maxTokenType) + } + look.addSet(set) + } + } + } +} + +func (la *LL1Analyzer) look3(stopState ATNState, ctx PredictionContext, look *IntervalSet, lookBusy *JStore[ATNConfig, Comparator[ATNConfig]], calledRuleStack *BitSet, seeThruPreds, addEOF bool, t1 *RuleTransition) { + + newContext := SingletonBasePredictionContextCreate(ctx, t1.followState.GetStateNumber()) + + defer func() { + calledRuleStack.remove(t1.getTarget().GetRuleIndex()) + }() + + calledRuleStack.add(t1.getTarget().GetRuleIndex()) + la.look1(t1.getTarget(), stopState, newContext, look, lookBusy, calledRuleStack, seeThruPreds, addEOF) + +} diff --git a/runtime/Go/antlr/v4/parser.go b/runtime/Go/antlr/v4/parser.go new file mode 100644 index 0000000000..d26bf06392 --- /dev/null +++ b/runtime/Go/antlr/v4/parser.go @@ -0,0 +1,708 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +import ( + "fmt" + "strconv" +) + +type Parser interface { + Recognizer + + GetInterpreter() *ParserATNSimulator + + GetTokenStream() TokenStream + GetTokenFactory() TokenFactory + GetParserRuleContext() ParserRuleContext + SetParserRuleContext(ParserRuleContext) + Consume() Token + GetParseListeners() []ParseTreeListener + + GetErrorHandler() ErrorStrategy + SetErrorHandler(ErrorStrategy) + GetInputStream() IntStream + GetCurrentToken() Token + GetExpectedTokens() *IntervalSet + NotifyErrorListeners(string, Token, RecognitionException) + IsExpectedToken(int) bool + GetPrecedence() int + GetRuleInvocationStack(ParserRuleContext) []string +} + +type BaseParser struct { + *BaseRecognizer + + Interpreter *ParserATNSimulator + BuildParseTrees bool + + input TokenStream + errHandler ErrorStrategy + precedenceStack IntStack + ctx ParserRuleContext + + tracer *TraceListener + parseListeners []ParseTreeListener + _SyntaxErrors int +} + +// p.is all the parsing support code essentially most of it is error +// recovery stuff.// +func NewBaseParser(input TokenStream) *BaseParser { + + p := new(BaseParser) + + p.BaseRecognizer = NewBaseRecognizer() + + // The input stream. + p.input = nil + // The error handling strategy for the parser. The default value is a new + // instance of {@link DefaultErrorStrategy}. + p.errHandler = NewDefaultErrorStrategy() + p.precedenceStack = make([]int, 0) + p.precedenceStack.Push(0) + // The {@link ParserRuleContext} object for the currently executing rule. + // p.is always non-nil during the parsing process. + p.ctx = nil + // Specifies whether or not the parser should construct a parse tree during + // the parsing process. The default value is {@code true}. + p.BuildParseTrees = true + // When {@link //setTrace}{@code (true)} is called, a reference to the + // {@link TraceListener} is stored here so it can be easily removed in a + // later call to {@link //setTrace}{@code (false)}. The listener itself is + // implemented as a parser listener so p.field is not directly used by + // other parser methods. + p.tracer = nil + // The list of {@link ParseTreeListener} listeners registered to receive + // events during the parse. + p.parseListeners = nil + // The number of syntax errors Reported during parsing. p.value is + // incremented each time {@link //NotifyErrorListeners} is called. + p._SyntaxErrors = 0 + p.SetInputStream(input) + + return p +} + +// p.field maps from the serialized ATN string to the deserialized {@link +// ATN} with +// bypass alternatives. +// +// @see ATNDeserializationOptions//isGenerateRuleBypassTransitions() +var bypassAltsAtnCache = make(map[string]int) + +// reset the parser's state// +func (p *BaseParser) reset() { + if p.input != nil { + p.input.Seek(0) + } + p.errHandler.reset(p) + p.ctx = nil + p._SyntaxErrors = 0 + p.SetTrace(nil) + p.precedenceStack = make([]int, 0) + p.precedenceStack.Push(0) + if p.Interpreter != nil { + p.Interpreter.reset() + } +} + +func (p *BaseParser) GetErrorHandler() ErrorStrategy { + return p.errHandler +} + +func (p *BaseParser) SetErrorHandler(e ErrorStrategy) { + p.errHandler = e +} + +// Match current input symbol against {@code ttype}. If the symbol type +// Matches, {@link ANTLRErrorStrategy//ReportMatch} and {@link //consume} are +// called to complete the Match process. +// +//

If the symbol type does not Match, +// {@link ANTLRErrorStrategy//recoverInline} is called on the current error +// strategy to attempt recovery. If {@link //getBuildParseTree} is +// {@code true} and the token index of the symbol returned by +// {@link ANTLRErrorStrategy//recoverInline} is -1, the symbol is added to +// the parse tree by calling {@link ParserRuleContext//addErrorNode}.

+// +// @param ttype the token type to Match +// @return the Matched symbol +// @panics RecognitionException if the current input symbol did not Match +// {@code ttype} and the error strategy could not recover from the +// mismatched symbol + +func (p *BaseParser) Match(ttype int) Token { + + t := p.GetCurrentToken() + + if t.GetTokenType() == ttype { + p.errHandler.ReportMatch(p) + p.Consume() + } else { + t = p.errHandler.RecoverInline(p) + if p.BuildParseTrees && t.GetTokenIndex() == -1 { + // we must have conjured up a Newtoken during single token + // insertion + // if it's not the current symbol + p.ctx.AddErrorNode(t) + } + } + + return t +} + +// Match current input symbol as a wildcard. If the symbol type Matches +// (i.e. has a value greater than 0), {@link ANTLRErrorStrategy//ReportMatch} +// and {@link //consume} are called to complete the Match process. +// +//

If the symbol type does not Match, +// {@link ANTLRErrorStrategy//recoverInline} is called on the current error +// strategy to attempt recovery. If {@link //getBuildParseTree} is +// {@code true} and the token index of the symbol returned by +// {@link ANTLRErrorStrategy//recoverInline} is -1, the symbol is added to +// the parse tree by calling {@link ParserRuleContext//addErrorNode}.

+// +// @return the Matched symbol +// @panics RecognitionException if the current input symbol did not Match +// a wildcard and the error strategy could not recover from the mismatched +// symbol + +func (p *BaseParser) MatchWildcard() Token { + t := p.GetCurrentToken() + if t.GetTokenType() > 0 { + p.errHandler.ReportMatch(p) + p.Consume() + } else { + t = p.errHandler.RecoverInline(p) + if p.BuildParseTrees && t.GetTokenIndex() == -1 { + // we must have conjured up a Newtoken during single token + // insertion + // if it's not the current symbol + p.ctx.AddErrorNode(t) + } + } + return t +} + +func (p *BaseParser) GetParserRuleContext() ParserRuleContext { + return p.ctx +} + +func (p *BaseParser) SetParserRuleContext(v ParserRuleContext) { + p.ctx = v +} + +func (p *BaseParser) GetParseListeners() []ParseTreeListener { + if p.parseListeners == nil { + return make([]ParseTreeListener, 0) + } + return p.parseListeners +} + +// Registers {@code listener} to receive events during the parsing process. +// +//

To support output-preserving grammar transformations (including but not +// limited to left-recursion removal, automated left-factoring, and +// optimized code generation), calls to listener methods during the parse +// may differ substantially from calls made by +// {@link ParseTreeWalker//DEFAULT} used after the parse is complete. In +// particular, rule entry and exit events may occur in a different order +// during the parse than after the parser. In addition, calls to certain +// rule entry methods may be omitted.

+// +//

With the following specific exceptions, calls to listener events are +// deterministic, i.e. for identical input the calls to listener +// methods will be the same.

+// +//
    +//
  • Alterations to the grammar used to generate code may change the +// behavior of the listener calls.
  • +//
  • Alterations to the command line options passed to ANTLR 4 when +// generating the parser may change the behavior of the listener calls.
  • +//
  • Changing the version of the ANTLR Tool used to generate the parser +// may change the behavior of the listener calls.
  • +//
+// +// @param listener the listener to add +// +// @panics nilPointerException if {@code} listener is {@code nil} +func (p *BaseParser) AddParseListener(listener ParseTreeListener) { + if listener == nil { + panic("listener") + } + if p.parseListeners == nil { + p.parseListeners = make([]ParseTreeListener, 0) + } + p.parseListeners = append(p.parseListeners, listener) +} + +// Remove {@code listener} from the list of parse listeners. +// +//

If {@code listener} is {@code nil} or has not been added as a parse +// listener, p.method does nothing.

+// @param listener the listener to remove +func (p *BaseParser) RemoveParseListener(listener ParseTreeListener) { + + if p.parseListeners != nil { + + idx := -1 + for i, v := range p.parseListeners { + if v == listener { + idx = i + break + } + } + + if idx == -1 { + return + } + + // remove the listener from the slice + p.parseListeners = append(p.parseListeners[0:idx], p.parseListeners[idx+1:]...) + + if len(p.parseListeners) == 0 { + p.parseListeners = nil + } + } +} + +// Remove all parse listeners. +func (p *BaseParser) removeParseListeners() { + p.parseListeners = nil +} + +// Notify any parse listeners of an enter rule event. +func (p *BaseParser) TriggerEnterRuleEvent() { + if p.parseListeners != nil { + ctx := p.ctx + for _, listener := range p.parseListeners { + listener.EnterEveryRule(ctx) + ctx.EnterRule(listener) + } + } +} + +// Notify any parse listeners of an exit rule event. +// +// @see //addParseListener +func (p *BaseParser) TriggerExitRuleEvent() { + if p.parseListeners != nil { + // reverse order walk of listeners + ctx := p.ctx + l := len(p.parseListeners) - 1 + + for i := range p.parseListeners { + listener := p.parseListeners[l-i] + ctx.ExitRule(listener) + listener.ExitEveryRule(ctx) + } + } +} + +func (p *BaseParser) GetInterpreter() *ParserATNSimulator { + return p.Interpreter +} + +func (p *BaseParser) GetATN() *ATN { + return p.Interpreter.atn +} + +func (p *BaseParser) GetTokenFactory() TokenFactory { + return p.input.GetTokenSource().GetTokenFactory() +} + +// Tell our token source and error strategy about a Newway to create tokens.// +func (p *BaseParser) setTokenFactory(factory TokenFactory) { + p.input.GetTokenSource().setTokenFactory(factory) +} + +// The ATN with bypass alternatives is expensive to create so we create it +// lazily. +// +// @panics UnsupportedOperationException if the current parser does not +// implement the {@link //getSerializedATN()} method. +func (p *BaseParser) GetATNWithBypassAlts() { + + // TODO + panic("Not implemented!") + + // serializedAtn := p.getSerializedATN() + // if (serializedAtn == nil) { + // panic("The current parser does not support an ATN with bypass alternatives.") + // } + // result := p.bypassAltsAtnCache[serializedAtn] + // if (result == nil) { + // deserializationOptions := NewATNDeserializationOptions(nil) + // deserializationOptions.generateRuleBypassTransitions = true + // result = NewATNDeserializer(deserializationOptions).deserialize(serializedAtn) + // p.bypassAltsAtnCache[serializedAtn] = result + // } + // return result +} + +// The preferred method of getting a tree pattern. For example, here's a +// sample use: +// +//
+// ParseTree t = parser.expr()
+// ParseTreePattern p = parser.compileParseTreePattern("<ID>+0",
+// MyParser.RULE_expr)
+// ParseTreeMatch m = p.Match(t)
+// String id = m.Get("ID")
+// 
+ +func (p *BaseParser) compileParseTreePattern(pattern, patternRuleIndex, lexer Lexer) { + + panic("NewParseTreePatternMatcher not implemented!") + // + // if (lexer == nil) { + // if (p.GetTokenStream() != nil) { + // tokenSource := p.GetTokenStream().GetTokenSource() + // if _, ok := tokenSource.(ILexer); ok { + // lexer = tokenSource + // } + // } + // } + // if (lexer == nil) { + // panic("Parser can't discover a lexer to use") + // } + + // m := NewParseTreePatternMatcher(lexer, p) + // return m.compile(pattern, patternRuleIndex) +} + +func (p *BaseParser) GetInputStream() IntStream { + return p.GetTokenStream() +} + +func (p *BaseParser) SetInputStream(input TokenStream) { + p.SetTokenStream(input) +} + +func (p *BaseParser) GetTokenStream() TokenStream { + return p.input +} + +// Set the token stream and reset the parser.// +func (p *BaseParser) SetTokenStream(input TokenStream) { + p.input = nil + p.reset() + p.input = input +} + +// Match needs to return the current input symbol, which gets put +// into the label for the associated token ref e.g., x=ID. +func (p *BaseParser) GetCurrentToken() Token { + return p.input.LT(1) +} + +func (p *BaseParser) NotifyErrorListeners(msg string, offendingToken Token, err RecognitionException) { + if offendingToken == nil { + offendingToken = p.GetCurrentToken() + } + p._SyntaxErrors++ + line := offendingToken.GetLine() + column := offendingToken.GetColumn() + listener := p.GetErrorListenerDispatch() + listener.SyntaxError(p, offendingToken, line, column, msg, err) +} + +func (p *BaseParser) Consume() Token { + o := p.GetCurrentToken() + if o.GetTokenType() != TokenEOF { + p.GetInputStream().Consume() + } + hasListener := p.parseListeners != nil && len(p.parseListeners) > 0 + if p.BuildParseTrees || hasListener { + if p.errHandler.InErrorRecoveryMode(p) { + node := p.ctx.AddErrorNode(o) + if p.parseListeners != nil { + for _, l := range p.parseListeners { + l.VisitErrorNode(node) + } + } + + } else { + node := p.ctx.AddTokenNode(o) + if p.parseListeners != nil { + for _, l := range p.parseListeners { + l.VisitTerminal(node) + } + } + } + // node.invokingState = p.state + } + + return o +} + +func (p *BaseParser) addContextToParseTree() { + // add current context to parent if we have a parent + if p.ctx.GetParent() != nil { + p.ctx.GetParent().(ParserRuleContext).AddChild(p.ctx) + } +} + +func (p *BaseParser) EnterRule(localctx ParserRuleContext, state, ruleIndex int) { + p.SetState(state) + p.ctx = localctx + p.ctx.SetStart(p.input.LT(1)) + if p.BuildParseTrees { + p.addContextToParseTree() + } + if p.parseListeners != nil { + p.TriggerEnterRuleEvent() + } +} + +func (p *BaseParser) ExitRule() { + p.ctx.SetStop(p.input.LT(-1)) + // trigger event on ctx, before it reverts to parent + if p.parseListeners != nil { + p.TriggerExitRuleEvent() + } + p.SetState(p.ctx.GetInvokingState()) + if p.ctx.GetParent() != nil { + p.ctx = p.ctx.GetParent().(ParserRuleContext) + } else { + p.ctx = nil + } +} + +func (p *BaseParser) EnterOuterAlt(localctx ParserRuleContext, altNum int) { + localctx.SetAltNumber(altNum) + // if we have Newlocalctx, make sure we replace existing ctx + // that is previous child of parse tree + if p.BuildParseTrees && p.ctx != localctx { + if p.ctx.GetParent() != nil { + p.ctx.GetParent().(ParserRuleContext).RemoveLastChild() + p.ctx.GetParent().(ParserRuleContext).AddChild(localctx) + } + } + p.ctx = localctx +} + +// Get the precedence level for the top-most precedence rule. +// +// @return The precedence level for the top-most precedence rule, or -1 if +// the parser context is not nested within a precedence rule. + +func (p *BaseParser) GetPrecedence() int { + if len(p.precedenceStack) == 0 { + return -1 + } + + return p.precedenceStack[len(p.precedenceStack)-1] +} + +func (p *BaseParser) EnterRecursionRule(localctx ParserRuleContext, state, ruleIndex, precedence int) { + p.SetState(state) + p.precedenceStack.Push(precedence) + p.ctx = localctx + p.ctx.SetStart(p.input.LT(1)) + if p.parseListeners != nil { + p.TriggerEnterRuleEvent() // simulates rule entry for + // left-recursive rules + } +} + +// +// Like {@link //EnterRule} but for recursive rules. + +func (p *BaseParser) PushNewRecursionContext(localctx ParserRuleContext, state, ruleIndex int) { + previous := p.ctx + previous.SetParent(localctx) + previous.SetInvokingState(state) + previous.SetStop(p.input.LT(-1)) + + p.ctx = localctx + p.ctx.SetStart(previous.GetStart()) + if p.BuildParseTrees { + p.ctx.AddChild(previous) + } + if p.parseListeners != nil { + p.TriggerEnterRuleEvent() // simulates rule entry for + // left-recursive rules + } +} + +func (p *BaseParser) UnrollRecursionContexts(parentCtx ParserRuleContext) { + p.precedenceStack.Pop() + p.ctx.SetStop(p.input.LT(-1)) + retCtx := p.ctx // save current ctx (return value) + // unroll so ctx is as it was before call to recursive method + if p.parseListeners != nil { + for p.ctx != parentCtx { + p.TriggerExitRuleEvent() + p.ctx = p.ctx.GetParent().(ParserRuleContext) + } + } else { + p.ctx = parentCtx + } + // hook into tree + retCtx.SetParent(parentCtx) + if p.BuildParseTrees && parentCtx != nil { + // add return ctx into invoking rule's tree + parentCtx.AddChild(retCtx) + } +} + +func (p *BaseParser) GetInvokingContext(ruleIndex int) ParserRuleContext { + ctx := p.ctx + for ctx != nil { + if ctx.GetRuleIndex() == ruleIndex { + return ctx + } + ctx = ctx.GetParent().(ParserRuleContext) + } + return nil +} + +func (p *BaseParser) Precpred(localctx RuleContext, precedence int) bool { + return precedence >= p.precedenceStack[len(p.precedenceStack)-1] +} + +func (p *BaseParser) inContext(context ParserRuleContext) bool { + // TODO: useful in parser? + return false +} + +// +// Checks whether or not {@code symbol} can follow the current state in the +// ATN. The behavior of p.method is equivalent to the following, but is +// implemented such that the complete context-sensitive follow set does not +// need to be explicitly constructed. +// +//
+// return getExpectedTokens().contains(symbol)
+// 
+// +// @param symbol the symbol type to check +// @return {@code true} if {@code symbol} can follow the current state in +// the ATN, otherwise {@code false}. + +func (p *BaseParser) IsExpectedToken(symbol int) bool { + atn := p.Interpreter.atn + ctx := p.ctx + s := atn.states[p.state] + following := atn.NextTokens(s, nil) + if following.contains(symbol) { + return true + } + if !following.contains(TokenEpsilon) { + return false + } + for ctx != nil && ctx.GetInvokingState() >= 0 && following.contains(TokenEpsilon) { + invokingState := atn.states[ctx.GetInvokingState()] + rt := invokingState.GetTransitions()[0] + following = atn.NextTokens(rt.(*RuleTransition).followState, nil) + if following.contains(symbol) { + return true + } + ctx = ctx.GetParent().(ParserRuleContext) + } + if following.contains(TokenEpsilon) && symbol == TokenEOF { + return true + } + + return false +} + +// Computes the set of input symbols which could follow the current parser +// state and context, as given by {@link //GetState} and {@link //GetContext}, +// respectively. +// +// @see ATN//getExpectedTokens(int, RuleContext) +func (p *BaseParser) GetExpectedTokens() *IntervalSet { + return p.Interpreter.atn.getExpectedTokens(p.state, p.ctx) +} + +func (p *BaseParser) GetExpectedTokensWithinCurrentRule() *IntervalSet { + atn := p.Interpreter.atn + s := atn.states[p.state] + return atn.NextTokens(s, nil) +} + +// Get a rule's index (i.e., {@code RULE_ruleName} field) or -1 if not found.// +func (p *BaseParser) GetRuleIndex(ruleName string) int { + var ruleIndex, ok = p.GetRuleIndexMap()[ruleName] + if ok { + return ruleIndex + } + + return -1 +} + +// Return List<String> of the rule names in your parser instance +// leading up to a call to the current rule. You could override if +// you want more details such as the file/line info of where +// in the ATN a rule is invoked. +// +// this very useful for error messages. + +func (p *BaseParser) GetRuleInvocationStack(c ParserRuleContext) []string { + if c == nil { + c = p.ctx + } + stack := make([]string, 0) + for c != nil { + // compute what follows who invoked us + ruleIndex := c.GetRuleIndex() + if ruleIndex < 0 { + stack = append(stack, "n/a") + } else { + stack = append(stack, p.GetRuleNames()[ruleIndex]) + } + + vp := c.GetParent() + + if vp == nil { + break + } + + c = vp.(ParserRuleContext) + } + return stack +} + +// For debugging and other purposes.// +func (p *BaseParser) GetDFAStrings() string { + return fmt.Sprint(p.Interpreter.decisionToDFA) +} + +// For debugging and other purposes.// +func (p *BaseParser) DumpDFA() { + seenOne := false + for _, dfa := range p.Interpreter.decisionToDFA { + if dfa.states.Len() > 0 { + if seenOne { + fmt.Println() + } + fmt.Println("Decision " + strconv.Itoa(dfa.decision) + ":") + fmt.Print(dfa.String(p.LiteralNames, p.SymbolicNames)) + seenOne = true + } + } +} + +func (p *BaseParser) GetSourceName() string { + return p.GrammarFileName +} + +// During a parse is sometimes useful to listen in on the rule entry and exit +// events as well as token Matches. p.is for quick and dirty debugging. +func (p *BaseParser) SetTrace(trace *TraceListener) { + if trace == nil { + p.RemoveParseListener(p.tracer) + p.tracer = nil + } else { + if p.tracer != nil { + p.RemoveParseListener(p.tracer) + } + p.tracer = NewTraceListener(p) + p.AddParseListener(p.tracer) + } +} diff --git a/runtime/Go/antlr/v4/parser_atn_simulator.go b/runtime/Go/antlr/v4/parser_atn_simulator.go new file mode 100644 index 0000000000..c780e3c5f2 --- /dev/null +++ b/runtime/Go/antlr/v4/parser_atn_simulator.go @@ -0,0 +1,1549 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +import ( + "fmt" + "strconv" + "strings" +) + +var ( + ParserATNSimulatorDebug = false + ParserATNSimulatorListATNDecisions = false + ParserATNSimulatorDFADebug = false + ParserATNSimulatorRetryDebug = false + TurnOffLRLoopEntryBranchOpt = false +) + +type ParserATNSimulator struct { + *BaseATNSimulator + + parser Parser + predictionMode int + input TokenStream + startIndex int + dfa *DFA + mergeCache *DoubleDict + outerContext ParserRuleContext +} + +func NewParserATNSimulator(parser Parser, atn *ATN, decisionToDFA []*DFA, sharedContextCache *PredictionContextCache) *ParserATNSimulator { + + p := new(ParserATNSimulator) + + p.BaseATNSimulator = NewBaseATNSimulator(atn, sharedContextCache) + + p.parser = parser + p.decisionToDFA = decisionToDFA + // SLL, LL, or LL + exact ambig detection?// + p.predictionMode = PredictionModeLL + // LAME globals to avoid parameters!!!!! I need these down deep in predTransition + p.input = nil + p.startIndex = 0 + p.outerContext = nil + p.dfa = nil + // Each prediction operation uses a cache for merge of prediction contexts. + // Don't keep around as it wastes huge amounts of memory. DoubleKeyMap + // isn't Synchronized but we're ok since two threads shouldn't reuse same + // parser/atnsim object because it can only handle one input at a time. + // This maps graphs a and b to merged result c. (a,b)&rarrc. We can avoid + // the merge if we ever see a and b again. Note that (b,a)&rarrc should + // also be examined during cache lookup. + // + p.mergeCache = nil + + return p +} + +func (p *ParserATNSimulator) GetPredictionMode() int { + return p.predictionMode +} + +func (p *ParserATNSimulator) SetPredictionMode(v int) { + p.predictionMode = v +} + +func (p *ParserATNSimulator) reset() { +} + +func (p *ParserATNSimulator) AdaptivePredict(input TokenStream, decision int, outerContext ParserRuleContext) int { + if ParserATNSimulatorDebug || ParserATNSimulatorListATNDecisions { + fmt.Println("AdaptivePredict decision " + strconv.Itoa(decision) + + " exec LA(1)==" + p.getLookaheadName(input) + + " line " + strconv.Itoa(input.LT(1).GetLine()) + ":" + + strconv.Itoa(input.LT(1).GetColumn())) + } + + p.input = input + p.startIndex = input.Index() + p.outerContext = outerContext + + dfa := p.decisionToDFA[decision] + p.dfa = dfa + m := input.Mark() + index := input.Index() + + defer func() { + p.dfa = nil + p.mergeCache = nil // wack cache after each prediction + input.Seek(index) + input.Release(m) + }() + + // Now we are certain to have a specific decision's DFA + // But, do we still need an initial state? + var s0 *DFAState + p.atn.stateMu.RLock() + if dfa.getPrecedenceDfa() { + p.atn.edgeMu.RLock() + // the start state for a precedence DFA depends on the current + // parser precedence, and is provided by a DFA method. + s0 = dfa.getPrecedenceStartState(p.parser.GetPrecedence()) + p.atn.edgeMu.RUnlock() + } else { + // the start state for a "regular" DFA is just s0 + s0 = dfa.getS0() + } + p.atn.stateMu.RUnlock() + + if s0 == nil { + if outerContext == nil { + outerContext = ParserRuleContextEmpty + } + if ParserATNSimulatorDebug || ParserATNSimulatorListATNDecisions { + fmt.Println("predictATN decision " + strconv.Itoa(dfa.decision) + + " exec LA(1)==" + p.getLookaheadName(input) + + ", outerContext=" + outerContext.String(p.parser.GetRuleNames(), nil)) + } + fullCtx := false + s0Closure := p.computeStartState(dfa.atnStartState, ParserRuleContextEmpty, fullCtx) + + p.atn.stateMu.Lock() + if dfa.getPrecedenceDfa() { + // If p is a precedence DFA, we use applyPrecedenceFilter + // to convert the computed start state to a precedence start + // state. We then use DFA.setPrecedenceStartState to set the + // appropriate start state for the precedence level rather + // than simply setting DFA.s0. + // + dfa.s0.configs = s0Closure + s0Closure = p.applyPrecedenceFilter(s0Closure) + s0 = p.addDFAState(dfa, NewDFAState(-1, s0Closure)) + p.atn.edgeMu.Lock() + dfa.setPrecedenceStartState(p.parser.GetPrecedence(), s0) + p.atn.edgeMu.Unlock() + } else { + s0 = p.addDFAState(dfa, NewDFAState(-1, s0Closure)) + dfa.setS0(s0) + } + p.atn.stateMu.Unlock() + } + + alt := p.execATN(dfa, s0, input, index, outerContext) + if ParserATNSimulatorDebug { + fmt.Println("DFA after predictATN: " + dfa.String(p.parser.GetLiteralNames(), nil)) + } + return alt + +} + +// Performs ATN simulation to compute a predicted alternative based +// upon the remaining input, but also updates the DFA cache to avoid +// having to traverse the ATN again for the same input sequence. + +// There are some key conditions we're looking for after computing a new +// set of ATN configs (proposed DFA state): +// if the set is empty, there is no viable alternative for current symbol +// does the state uniquely predict an alternative? +// does the state have a conflict that would prevent us from +// putting it on the work list? + +// We also have some key operations to do: +// add an edge from previous DFA state to potentially NewDFA state, D, +// upon current symbol but only if adding to work list, which means in all +// cases except no viable alternative (and possibly non-greedy decisions?) +// collecting predicates and adding semantic context to DFA accept states +// adding rule context to context-sensitive DFA accept states +// consuming an input symbol +// Reporting a conflict +// Reporting an ambiguity +// Reporting a context sensitivity +// Reporting insufficient predicates + +// cover these cases: +// +// dead end +// single alt +// single alt + preds +// conflict +// conflict + preds +func (p *ParserATNSimulator) execATN(dfa *DFA, s0 *DFAState, input TokenStream, startIndex int, outerContext ParserRuleContext) int { + + if ParserATNSimulatorDebug || ParserATNSimulatorListATNDecisions { + fmt.Println("execATN decision " + strconv.Itoa(dfa.decision) + + " exec LA(1)==" + p.getLookaheadName(input) + + " line " + strconv.Itoa(input.LT(1).GetLine()) + ":" + strconv.Itoa(input.LT(1).GetColumn())) + } + + previousD := s0 + + if ParserATNSimulatorDebug { + fmt.Println("s0 = " + s0.String()) + } + t := input.LA(1) + for { // for more work + D := p.getExistingTargetState(previousD, t) + if D == nil { + D = p.computeTargetState(dfa, previousD, t) + } + if D == ATNSimulatorError { + // if any configs in previous dipped into outer context, that + // means that input up to t actually finished entry rule + // at least for SLL decision. Full LL doesn't dip into outer + // so don't need special case. + // We will get an error no matter what so delay until after + // decision better error message. Also, no reachable target + // ATN states in SLL implies LL will also get nowhere. + // If conflict in states that dip out, choose min since we + // will get error no matter what. + e := p.noViableAlt(input, outerContext, previousD.configs, startIndex) + input.Seek(startIndex) + alt := p.getSynValidOrSemInvalidAltThatFinishedDecisionEntryRule(previousD.configs, outerContext) + if alt != ATNInvalidAltNumber { + return alt + } + + panic(e) + } + if D.requiresFullContext && p.predictionMode != PredictionModeSLL { + // IF PREDS, MIGHT RESOLVE TO SINGLE ALT => SLL (or syntax error) + conflictingAlts := D.configs.GetConflictingAlts() + if D.predicates != nil { + if ParserATNSimulatorDebug { + fmt.Println("DFA state has preds in DFA sim LL failover") + } + conflictIndex := input.Index() + if conflictIndex != startIndex { + input.Seek(startIndex) + } + conflictingAlts = p.evalSemanticContext(D.predicates, outerContext, true) + if conflictingAlts.length() == 1 { + if ParserATNSimulatorDebug { + fmt.Println("Full LL avoided") + } + return conflictingAlts.minValue() + } + if conflictIndex != startIndex { + // restore the index so Reporting the fallback to full + // context occurs with the index at the correct spot + input.Seek(conflictIndex) + } + } + if ParserATNSimulatorDFADebug { + fmt.Println("ctx sensitive state " + outerContext.String(nil, nil) + " in " + D.String()) + } + fullCtx := true + s0Closure := p.computeStartState(dfa.atnStartState, outerContext, fullCtx) + p.ReportAttemptingFullContext(dfa, conflictingAlts, D.configs, startIndex, input.Index()) + alt := p.execATNWithFullContext(dfa, D, s0Closure, input, startIndex, outerContext) + return alt + } + if D.isAcceptState { + if D.predicates == nil { + return D.prediction + } + stopIndex := input.Index() + input.Seek(startIndex) + alts := p.evalSemanticContext(D.predicates, outerContext, true) + + switch alts.length() { + case 0: + panic(p.noViableAlt(input, outerContext, D.configs, startIndex)) + case 1: + return alts.minValue() + default: + // Report ambiguity after predicate evaluation to make sure the correct set of ambig alts is Reported. + p.ReportAmbiguity(dfa, D, startIndex, stopIndex, false, alts, D.configs) + return alts.minValue() + } + } + previousD = D + + if t != TokenEOF { + input.Consume() + t = input.LA(1) + } + } +} + +// Get an existing target state for an edge in the DFA. If the target state +// for the edge has not yet been computed or is otherwise not available, +// p method returns {@code nil}. +// +// @param previousD The current DFA state +// @param t The next input symbol +// @return The existing target DFA state for the given input symbol +// {@code t}, or {@code nil} if the target state for p edge is not +// already cached + +func (p *ParserATNSimulator) getExistingTargetState(previousD *DFAState, t int) *DFAState { + if t+1 < 0 { + return nil + } + + p.atn.edgeMu.RLock() + defer p.atn.edgeMu.RUnlock() + edges := previousD.getEdges() + if edges == nil || t+1 >= len(edges) { + return nil + } + return previousD.getIthEdge(t + 1) +} + +// Compute a target state for an edge in the DFA, and attempt to add the +// computed state and corresponding edge to the DFA. +// +// @param dfa The DFA +// @param previousD The current DFA state +// @param t The next input symbol +// +// @return The computed target DFA state for the given input symbol +// {@code t}. If {@code t} does not lead to a valid DFA state, p method +// returns {@link //ERROR}. + +func (p *ParserATNSimulator) computeTargetState(dfa *DFA, previousD *DFAState, t int) *DFAState { + reach := p.computeReachSet(previousD.configs, t, false) + + if reach == nil { + p.addDFAEdge(dfa, previousD, t, ATNSimulatorError) + return ATNSimulatorError + } + // create Newtarget state we'll add to DFA after it's complete + D := NewDFAState(-1, reach) + + predictedAlt := p.getUniqueAlt(reach) + + if ParserATNSimulatorDebug { + altSubSets := PredictionModegetConflictingAltSubsets(reach) + fmt.Println("SLL altSubSets=" + fmt.Sprint(altSubSets) + + ", previous=" + previousD.configs.String() + + ", configs=" + reach.String() + + ", predict=" + strconv.Itoa(predictedAlt) + + ", allSubsetsConflict=" + + fmt.Sprint(PredictionModeallSubsetsConflict(altSubSets)) + + ", conflictingAlts=" + p.getConflictingAlts(reach).String()) + } + if predictedAlt != ATNInvalidAltNumber { + // NO CONFLICT, UNIQUELY PREDICTED ALT + D.isAcceptState = true + D.configs.SetUniqueAlt(predictedAlt) + D.setPrediction(predictedAlt) + } else if PredictionModehasSLLConflictTerminatingPrediction(p.predictionMode, reach) { + // MORE THAN ONE VIABLE ALTERNATIVE + D.configs.SetConflictingAlts(p.getConflictingAlts(reach)) + D.requiresFullContext = true + // in SLL-only mode, we will stop at p state and return the minimum alt + D.isAcceptState = true + D.setPrediction(D.configs.GetConflictingAlts().minValue()) + } + if D.isAcceptState && D.configs.HasSemanticContext() { + p.predicateDFAState(D, p.atn.getDecisionState(dfa.decision)) + if D.predicates != nil { + D.setPrediction(ATNInvalidAltNumber) + } + } + // all adds to dfa are done after we've created full D state + D = p.addDFAEdge(dfa, previousD, t, D) + return D +} + +func (p *ParserATNSimulator) predicateDFAState(dfaState *DFAState, decisionState DecisionState) { + // We need to test all predicates, even in DFA states that + // uniquely predict alternative. + nalts := len(decisionState.GetTransitions()) + // Update DFA so reach becomes accept state with (predicate,alt) + // pairs if preds found for conflicting alts + altsToCollectPredsFrom := p.getConflictingAltsOrUniqueAlt(dfaState.configs) + altToPred := p.getPredsForAmbigAlts(altsToCollectPredsFrom, dfaState.configs, nalts) + if altToPred != nil { + dfaState.predicates = p.getPredicatePredictions(altsToCollectPredsFrom, altToPred) + dfaState.setPrediction(ATNInvalidAltNumber) // make sure we use preds + } else { + // There are preds in configs but they might go away + // when OR'd together like {p}? || NONE == NONE. If neither + // alt has preds, resolve to min alt + dfaState.setPrediction(altsToCollectPredsFrom.minValue()) + } +} + +// comes back with reach.uniqueAlt set to a valid alt +func (p *ParserATNSimulator) execATNWithFullContext(dfa *DFA, D *DFAState, s0 ATNConfigSet, input TokenStream, startIndex int, outerContext ParserRuleContext) int { + + if ParserATNSimulatorDebug || ParserATNSimulatorListATNDecisions { + fmt.Println("execATNWithFullContext " + s0.String()) + } + + fullCtx := true + foundExactAmbig := false + var reach ATNConfigSet + previous := s0 + input.Seek(startIndex) + t := input.LA(1) + predictedAlt := -1 + + for { // for more work + reach = p.computeReachSet(previous, t, fullCtx) + if reach == nil { + // if any configs in previous dipped into outer context, that + // means that input up to t actually finished entry rule + // at least for LL decision. Full LL doesn't dip into outer + // so don't need special case. + // We will get an error no matter what so delay until after + // decision better error message. Also, no reachable target + // ATN states in SLL implies LL will also get nowhere. + // If conflict in states that dip out, choose min since we + // will get error no matter what. + e := p.noViableAlt(input, outerContext, previous, startIndex) + input.Seek(startIndex) + alt := p.getSynValidOrSemInvalidAltThatFinishedDecisionEntryRule(previous, outerContext) + if alt != ATNInvalidAltNumber { + return alt + } + + panic(e) + } + altSubSets := PredictionModegetConflictingAltSubsets(reach) + if ParserATNSimulatorDebug { + fmt.Println("LL altSubSets=" + fmt.Sprint(altSubSets) + ", predict=" + + strconv.Itoa(PredictionModegetUniqueAlt(altSubSets)) + ", resolvesToJustOneViableAlt=" + + fmt.Sprint(PredictionModeresolvesToJustOneViableAlt(altSubSets))) + } + reach.SetUniqueAlt(p.getUniqueAlt(reach)) + // unique prediction? + if reach.GetUniqueAlt() != ATNInvalidAltNumber { + predictedAlt = reach.GetUniqueAlt() + break + } + if p.predictionMode != PredictionModeLLExactAmbigDetection { + predictedAlt = PredictionModeresolvesToJustOneViableAlt(altSubSets) + if predictedAlt != ATNInvalidAltNumber { + break + } + } else { + // In exact ambiguity mode, we never try to terminate early. + // Just keeps scarfing until we know what the conflict is + if PredictionModeallSubsetsConflict(altSubSets) && PredictionModeallSubsetsEqual(altSubSets) { + foundExactAmbig = true + predictedAlt = PredictionModegetSingleViableAlt(altSubSets) + break + } + // else there are multiple non-conflicting subsets or + // we're not sure what the ambiguity is yet. + // So, keep going. + } + previous = reach + if t != TokenEOF { + input.Consume() + t = input.LA(1) + } + } + // If the configuration set uniquely predicts an alternative, + // without conflict, then we know that it's a full LL decision + // not SLL. + if reach.GetUniqueAlt() != ATNInvalidAltNumber { + p.ReportContextSensitivity(dfa, predictedAlt, reach, startIndex, input.Index()) + return predictedAlt + } + // We do not check predicates here because we have checked them + // on-the-fly when doing full context prediction. + + // + // In non-exact ambiguity detection mode, we might actually be able to + // detect an exact ambiguity, but I'm not going to spend the cycles + // needed to check. We only emit ambiguity warnings in exact ambiguity + // mode. + // + // For example, we might know that we have conflicting configurations. + // But, that does not mean that there is no way forward without a + // conflict. It's possible to have nonconflicting alt subsets as in: + + // altSubSets=[{1, 2}, {1, 2}, {1}, {1, 2}] + + // from + // + // [(17,1,[5 $]), (13,1,[5 10 $]), (21,1,[5 10 $]), (11,1,[$]), + // (13,2,[5 10 $]), (21,2,[5 10 $]), (11,2,[$])] + // + // In p case, (17,1,[5 $]) indicates there is some next sequence that + // would resolve p without conflict to alternative 1. Any other viable + // next sequence, however, is associated with a conflict. We stop + // looking for input because no amount of further lookahead will alter + // the fact that we should predict alternative 1. We just can't say for + // sure that there is an ambiguity without looking further. + + p.ReportAmbiguity(dfa, D, startIndex, input.Index(), foundExactAmbig, reach.Alts(), reach) + + return predictedAlt +} + +func (p *ParserATNSimulator) computeReachSet(closure ATNConfigSet, t int, fullCtx bool) ATNConfigSet { + if ParserATNSimulatorDebug { + fmt.Println("in computeReachSet, starting closure: " + closure.String()) + } + if p.mergeCache == nil { + p.mergeCache = NewDoubleDict() + } + intermediate := NewBaseATNConfigSet(fullCtx) + + // Configurations already in a rule stop state indicate reaching the end + // of the decision rule (local context) or end of the start rule (full + // context). Once reached, these configurations are never updated by a + // closure operation, so they are handled separately for the performance + // advantage of having a smaller intermediate set when calling closure. + // + // For full-context reach operations, separate handling is required to + // ensure that the alternative Matching the longest overall sequence is + // chosen when multiple such configurations can Match the input. + + var skippedStopStates []*BaseATNConfig + + // First figure out where we can reach on input t + for _, c := range closure.GetItems() { + if ParserATNSimulatorDebug { + fmt.Println("testing " + p.GetTokenName(t) + " at " + c.String()) + } + + if _, ok := c.GetState().(*RuleStopState); ok { + if fullCtx || t == TokenEOF { + skippedStopStates = append(skippedStopStates, c.(*BaseATNConfig)) + if ParserATNSimulatorDebug { + fmt.Println("added " + c.String() + " to SkippedStopStates") + } + } + continue + } + + for _, trans := range c.GetState().GetTransitions() { + target := p.getReachableTarget(trans, t) + if target != nil { + cfg := NewBaseATNConfig4(c, target) + intermediate.Add(cfg, p.mergeCache) + if ParserATNSimulatorDebug { + fmt.Println("added " + cfg.String() + " to intermediate") + } + } + } + } + + // Now figure out where the reach operation can take us... + var reach ATNConfigSet + + // This block optimizes the reach operation for intermediate sets which + // trivially indicate a termination state for the overall + // AdaptivePredict operation. + // + // The conditions assume that intermediate + // contains all configurations relevant to the reach set, but p + // condition is not true when one or more configurations have been + // withheld in SkippedStopStates, or when the current symbol is EOF. + // + if skippedStopStates == nil && t != TokenEOF { + if len(intermediate.configs) == 1 { + // Don't pursue the closure if there is just one state. + // It can only have one alternative just add to result + // Also don't pursue the closure if there is unique alternative + // among the configurations. + reach = intermediate + } else if p.getUniqueAlt(intermediate) != ATNInvalidAltNumber { + // Also don't pursue the closure if there is unique alternative + // among the configurations. + reach = intermediate + } + } + // If the reach set could not be trivially determined, perform a closure + // operation on the intermediate set to compute its initial value. + // + if reach == nil { + reach = NewBaseATNConfigSet(fullCtx) + closureBusy := NewJStore[ATNConfig, Comparator[ATNConfig]](&ObjEqComparator[ATNConfig]{}) + treatEOFAsEpsilon := t == TokenEOF + amount := len(intermediate.configs) + for k := 0; k < amount; k++ { + p.closure(intermediate.configs[k], reach, closureBusy, false, fullCtx, treatEOFAsEpsilon) + } + } + if t == TokenEOF { + // After consuming EOF no additional input is possible, so we are + // only interested in configurations which reached the end of the + // decision rule (local context) or end of the start rule (full + // context). Update reach to contain only these configurations. This + // handles both explicit EOF transitions in the grammar and implicit + // EOF transitions following the end of the decision or start rule. + // + // When reach==intermediate, no closure operation was performed. In + // p case, removeAllConfigsNotInRuleStopState needs to check for + // reachable rule stop states as well as configurations already in + // a rule stop state. + // + // This is handled before the configurations in SkippedStopStates, + // because any configurations potentially added from that list are + // already guaranteed to meet p condition whether or not it's + // required. + // + reach = p.removeAllConfigsNotInRuleStopState(reach, reach == intermediate) + } + // If SkippedStopStates!=nil, then it contains at least one + // configuration. For full-context reach operations, these + // configurations reached the end of the start rule, in which case we + // only add them back to reach if no configuration during the current + // closure operation reached such a state. This ensures AdaptivePredict + // chooses an alternative Matching the longest overall sequence when + // multiple alternatives are viable. + // + if skippedStopStates != nil && ((!fullCtx) || (!PredictionModehasConfigInRuleStopState(reach))) { + for l := 0; l < len(skippedStopStates); l++ { + reach.Add(skippedStopStates[l], p.mergeCache) + } + } + if len(reach.GetItems()) == 0 { + return nil + } + + return reach +} + +// Return a configuration set containing only the configurations from +// {@code configs} which are in a {@link RuleStopState}. If all +// configurations in {@code configs} are already in a rule stop state, p +// method simply returns {@code configs}. +// +//

When {@code lookToEndOfRule} is true, p method uses +// {@link ATN//NextTokens} for each configuration in {@code configs} which is +// not already in a rule stop state to see if a rule stop state is reachable +// from the configuration via epsilon-only transitions.

+// +// @param configs the configuration set to update +// @param lookToEndOfRule when true, p method checks for rule stop states +// reachable by epsilon-only transitions from each configuration in +// {@code configs}. +// +// @return {@code configs} if all configurations in {@code configs} are in a +// rule stop state, otherwise return a Newconfiguration set containing only +// the configurations from {@code configs} which are in a rule stop state +func (p *ParserATNSimulator) removeAllConfigsNotInRuleStopState(configs ATNConfigSet, lookToEndOfRule bool) ATNConfigSet { + if PredictionModeallConfigsInRuleStopStates(configs) { + return configs + } + result := NewBaseATNConfigSet(configs.FullContext()) + for _, config := range configs.GetItems() { + if _, ok := config.GetState().(*RuleStopState); ok { + result.Add(config, p.mergeCache) + continue + } + if lookToEndOfRule && config.GetState().GetEpsilonOnlyTransitions() { + NextTokens := p.atn.NextTokens(config.GetState(), nil) + if NextTokens.contains(TokenEpsilon) { + endOfRuleState := p.atn.ruleToStopState[config.GetState().GetRuleIndex()] + result.Add(NewBaseATNConfig4(config, endOfRuleState), p.mergeCache) + } + } + } + return result +} + +func (p *ParserATNSimulator) computeStartState(a ATNState, ctx RuleContext, fullCtx bool) ATNConfigSet { + // always at least the implicit call to start rule + initialContext := predictionContextFromRuleContext(p.atn, ctx) + configs := NewBaseATNConfigSet(fullCtx) + for i := 0; i < len(a.GetTransitions()); i++ { + target := a.GetTransitions()[i].getTarget() + c := NewBaseATNConfig6(target, i+1, initialContext) + closureBusy := NewJStore[ATNConfig, Comparator[ATNConfig]](&BaseATNConfigComparator[ATNConfig]{}) + p.closure(c, configs, closureBusy, true, fullCtx, false) + } + return configs +} + +// This method transforms the start state computed by +// {@link //computeStartState} to the special start state used by a +// precedence DFA for a particular precedence value. The transformation +// process applies the following changes to the start state's configuration +// set. +// +//
    +//
  1. Evaluate the precedence predicates for each configuration using +// {@link SemanticContext//evalPrecedence}.
  2. +//
  3. Remove all configurations which predict an alternative greater than +// 1, for which another configuration that predicts alternative 1 is in the +// same ATN state with the same prediction context. This transformation is +// valid for the following reasons: +//
      +//
    • The closure block cannot contain any epsilon transitions which bypass +// the body of the closure, so all states reachable via alternative 1 are +// part of the precedence alternatives of the transformed left-recursive +// rule.
    • +//
    • The "primary" portion of a left recursive rule cannot contain an +// epsilon transition, so the only way an alternative other than 1 can exist +// in a state that is also reachable via alternative 1 is by nesting calls +// to the left-recursive rule, with the outer calls not being at the +// preferred precedence level.
    • +//
    +//
  4. +//
+// +//

+// The prediction context must be considered by p filter to address +// situations like the following. +//

+// +//
+// grammar TA
+// prog: statement* EOF
+// statement: letterA | statement letterA 'b'
+// letterA: 'a'
+// 
+//
+//

+// If the above grammar, the ATN state immediately before the token +// reference {@code 'a'} in {@code letterA} is reachable from the left edge +// of both the primary and closure blocks of the left-recursive rule +// {@code statement}. The prediction context associated with each of these +// configurations distinguishes between them, and prevents the alternative +// which stepped out to {@code prog} (and then back in to {@code statement} +// from being eliminated by the filter. +//

+// +// @param configs The configuration set computed by +// {@link //computeStartState} as the start state for the DFA. +// @return The transformed configuration set representing the start state +// for a precedence DFA at a particular precedence level (determined by +// calling {@link Parser//getPrecedence}). +func (p *ParserATNSimulator) applyPrecedenceFilter(configs ATNConfigSet) ATNConfigSet { + + statesFromAlt1 := make(map[int]PredictionContext) + configSet := NewBaseATNConfigSet(configs.FullContext()) + + for _, config := range configs.GetItems() { + // handle alt 1 first + if config.GetAlt() != 1 { + continue + } + updatedContext := config.GetSemanticContext().evalPrecedence(p.parser, p.outerContext) + if updatedContext == nil { + // the configuration was eliminated + continue + } + statesFromAlt1[config.GetState().GetStateNumber()] = config.GetContext() + if updatedContext != config.GetSemanticContext() { + configSet.Add(NewBaseATNConfig2(config, updatedContext), p.mergeCache) + } else { + configSet.Add(config, p.mergeCache) + } + } + for _, config := range configs.GetItems() { + + if config.GetAlt() == 1 { + // already handled + continue + } + // In the future, p elimination step could be updated to also + // filter the prediction context for alternatives predicting alt>1 + // (basically a graph subtraction algorithm). + if !config.getPrecedenceFilterSuppressed() { + context := statesFromAlt1[config.GetState().GetStateNumber()] + if context != nil && context.Equals(config.GetContext()) { + // eliminated + continue + } + } + configSet.Add(config, p.mergeCache) + } + return configSet +} + +func (p *ParserATNSimulator) getReachableTarget(trans Transition, ttype int) ATNState { + if trans.Matches(ttype, 0, p.atn.maxTokenType) { + return trans.getTarget() + } + + return nil +} + +func (p *ParserATNSimulator) getPredsForAmbigAlts(ambigAlts *BitSet, configs ATNConfigSet, nalts int) []SemanticContext { + + altToPred := make([]SemanticContext, nalts+1) + for _, c := range configs.GetItems() { + if ambigAlts.contains(c.GetAlt()) { + altToPred[c.GetAlt()] = SemanticContextorContext(altToPred[c.GetAlt()], c.GetSemanticContext()) + } + } + nPredAlts := 0 + for i := 1; i <= nalts; i++ { + pred := altToPred[i] + if pred == nil { + altToPred[i] = SemanticContextNone + } else if pred != SemanticContextNone { + nPredAlts++ + } + } + // nonambig alts are nil in altToPred + if nPredAlts == 0 { + altToPred = nil + } + if ParserATNSimulatorDebug { + fmt.Println("getPredsForAmbigAlts result " + fmt.Sprint(altToPred)) + } + return altToPred +} + +func (p *ParserATNSimulator) getPredicatePredictions(ambigAlts *BitSet, altToPred []SemanticContext) []*PredPrediction { + pairs := make([]*PredPrediction, 0) + containsPredicate := false + for i := 1; i < len(altToPred); i++ { + pred := altToPred[i] + // unpredicated is indicated by SemanticContextNONE + if ambigAlts != nil && ambigAlts.contains(i) { + pairs = append(pairs, NewPredPrediction(pred, i)) + } + if pred != SemanticContextNone { + containsPredicate = true + } + } + if !containsPredicate { + return nil + } + return pairs +} + +// This method is used to improve the localization of error messages by +// choosing an alternative rather than panicing a +// {@link NoViableAltException} in particular prediction scenarios where the +// {@link //ERROR} state was reached during ATN simulation. +// +//

+// The default implementation of p method uses the following +// algorithm to identify an ATN configuration which successfully parsed the +// decision entry rule. Choosing such an alternative ensures that the +// {@link ParserRuleContext} returned by the calling rule will be complete +// and valid, and the syntax error will be Reported later at a more +// localized location.

+// +//
    +//
  • If a syntactically valid path or paths reach the end of the decision rule and +// they are semantically valid if predicated, return the min associated alt.
  • +//
  • Else, if a semantically invalid but syntactically valid path exist +// or paths exist, return the minimum associated alt. +//
  • +//
  • Otherwise, return {@link ATN//INVALID_ALT_NUMBER}.
  • +//
+// +//

+// In some scenarios, the algorithm described above could predict an +// alternative which will result in a {@link FailedPredicateException} in +// the parser. Specifically, p could occur if the only configuration +// capable of successfully parsing to the end of the decision rule is +// blocked by a semantic predicate. By choosing p alternative within +// {@link //AdaptivePredict} instead of panicing a +// {@link NoViableAltException}, the resulting +// {@link FailedPredicateException} in the parser will identify the specific +// predicate which is preventing the parser from successfully parsing the +// decision rule, which helps developers identify and correct logic errors +// in semantic predicates. +//

+// +// @param configs The ATN configurations which were valid immediately before +// the {@link //ERROR} state was reached +// @param outerContext The is the \gamma_0 initial parser context from the paper +// or the parser stack at the instant before prediction commences. +// +// @return The value to return from {@link //AdaptivePredict}, or +// {@link ATN//INVALID_ALT_NUMBER} if a suitable alternative was not +// identified and {@link //AdaptivePredict} should Report an error instead. +func (p *ParserATNSimulator) getSynValidOrSemInvalidAltThatFinishedDecisionEntryRule(configs ATNConfigSet, outerContext ParserRuleContext) int { + cfgs := p.splitAccordingToSemanticValidity(configs, outerContext) + semValidConfigs := cfgs[0] + semInvalidConfigs := cfgs[1] + alt := p.GetAltThatFinishedDecisionEntryRule(semValidConfigs) + if alt != ATNInvalidAltNumber { // semantically/syntactically viable path exists + return alt + } + // Is there a syntactically valid path with a failed pred? + if len(semInvalidConfigs.GetItems()) > 0 { + alt = p.GetAltThatFinishedDecisionEntryRule(semInvalidConfigs) + if alt != ATNInvalidAltNumber { // syntactically viable path exists + return alt + } + } + return ATNInvalidAltNumber +} + +func (p *ParserATNSimulator) GetAltThatFinishedDecisionEntryRule(configs ATNConfigSet) int { + alts := NewIntervalSet() + + for _, c := range configs.GetItems() { + _, ok := c.GetState().(*RuleStopState) + + if c.GetReachesIntoOuterContext() > 0 || (ok && c.GetContext().hasEmptyPath()) { + alts.addOne(c.GetAlt()) + } + } + if alts.length() == 0 { + return ATNInvalidAltNumber + } + + return alts.first() +} + +// Walk the list of configurations and split them according to +// those that have preds evaluating to true/false. If no pred, assume +// true pred and include in succeeded set. Returns Pair of sets. +// +// Create a NewSet so as not to alter the incoming parameter. +// +// Assumption: the input stream has been restored to the starting point +// prediction, which is where predicates need to evaluate. + +type ATNConfigSetPair struct { + item0, item1 ATNConfigSet +} + +func (p *ParserATNSimulator) splitAccordingToSemanticValidity(configs ATNConfigSet, outerContext ParserRuleContext) []ATNConfigSet { + succeeded := NewBaseATNConfigSet(configs.FullContext()) + failed := NewBaseATNConfigSet(configs.FullContext()) + + for _, c := range configs.GetItems() { + if c.GetSemanticContext() != SemanticContextNone { + predicateEvaluationResult := c.GetSemanticContext().evaluate(p.parser, outerContext) + if predicateEvaluationResult { + succeeded.Add(c, nil) + } else { + failed.Add(c, nil) + } + } else { + succeeded.Add(c, nil) + } + } + return []ATNConfigSet{succeeded, failed} +} + +// Look through a list of predicate/alt pairs, returning alts for the +// +// pairs that win. A {@code NONE} predicate indicates an alt containing an +// unpredicated config which behaves as "always true." If !complete +// then we stop at the first predicate that evaluates to true. This +// includes pairs with nil predicates. +func (p *ParserATNSimulator) evalSemanticContext(predPredictions []*PredPrediction, outerContext ParserRuleContext, complete bool) *BitSet { + predictions := NewBitSet() + for i := 0; i < len(predPredictions); i++ { + pair := predPredictions[i] + if pair.pred == SemanticContextNone { + predictions.add(pair.alt) + if !complete { + break + } + continue + } + + predicateEvaluationResult := pair.pred.evaluate(p.parser, outerContext) + if ParserATNSimulatorDebug || ParserATNSimulatorDFADebug { + fmt.Println("eval pred " + pair.String() + "=" + fmt.Sprint(predicateEvaluationResult)) + } + if predicateEvaluationResult { + if ParserATNSimulatorDebug || ParserATNSimulatorDFADebug { + fmt.Println("PREDICT " + fmt.Sprint(pair.alt)) + } + predictions.add(pair.alt) + if !complete { + break + } + } + } + return predictions +} + +func (p *ParserATNSimulator) closure(config ATNConfig, configs ATNConfigSet, closureBusy *JStore[ATNConfig, Comparator[ATNConfig]], collectPredicates, fullCtx, treatEOFAsEpsilon bool) { + initialDepth := 0 + p.closureCheckingStopState(config, configs, closureBusy, collectPredicates, + fullCtx, initialDepth, treatEOFAsEpsilon) +} + +func (p *ParserATNSimulator) closureCheckingStopState(config ATNConfig, configs ATNConfigSet, closureBusy *JStore[ATNConfig, Comparator[ATNConfig]], collectPredicates, fullCtx bool, depth int, treatEOFAsEpsilon bool) { + if ParserATNSimulatorDebug { + fmt.Println("closure(" + config.String() + ")") + fmt.Println("configs(" + configs.String() + ")") + if config.GetReachesIntoOuterContext() > 50 { + panic("problem") + } + } + + if _, ok := config.GetState().(*RuleStopState); ok { + // We hit rule end. If we have context info, use it + // run thru all possible stack tops in ctx + if !config.GetContext().isEmpty() { + for i := 0; i < config.GetContext().length(); i++ { + if config.GetContext().getReturnState(i) == BasePredictionContextEmptyReturnState { + if fullCtx { + configs.Add(NewBaseATNConfig1(config, config.GetState(), BasePredictionContextEMPTY), p.mergeCache) + continue + } else { + // we have no context info, just chase follow links (if greedy) + if ParserATNSimulatorDebug { + fmt.Println("FALLING off rule " + p.getRuleName(config.GetState().GetRuleIndex())) + } + p.closureWork(config, configs, closureBusy, collectPredicates, fullCtx, depth, treatEOFAsEpsilon) + } + continue + } + returnState := p.atn.states[config.GetContext().getReturnState(i)] + newContext := config.GetContext().GetParent(i) // "pop" return state + + c := NewBaseATNConfig5(returnState, config.GetAlt(), newContext, config.GetSemanticContext()) + // While we have context to pop back from, we may have + // gotten that context AFTER having falling off a rule. + // Make sure we track that we are now out of context. + c.SetReachesIntoOuterContext(config.GetReachesIntoOuterContext()) + p.closureCheckingStopState(c, configs, closureBusy, collectPredicates, fullCtx, depth-1, treatEOFAsEpsilon) + } + return + } else if fullCtx { + // reached end of start rule + configs.Add(config, p.mergeCache) + return + } else { + // else if we have no context info, just chase follow links (if greedy) + if ParserATNSimulatorDebug { + fmt.Println("FALLING off rule " + p.getRuleName(config.GetState().GetRuleIndex())) + } + } + } + p.closureWork(config, configs, closureBusy, collectPredicates, fullCtx, depth, treatEOFAsEpsilon) +} + +// Do the actual work of walking epsilon edges// +func (p *ParserATNSimulator) closureWork(config ATNConfig, configs ATNConfigSet, closureBusy *JStore[ATNConfig, Comparator[ATNConfig]], collectPredicates, fullCtx bool, depth int, treatEOFAsEpsilon bool) { + state := config.GetState() + // optimization + if !state.GetEpsilonOnlyTransitions() { + configs.Add(config, p.mergeCache) + // make sure to not return here, because EOF transitions can act as + // both epsilon transitions and non-epsilon transitions. + } + for i := 0; i < len(state.GetTransitions()); i++ { + if i == 0 && p.canDropLoopEntryEdgeInLeftRecursiveRule(config) { + continue + } + + t := state.GetTransitions()[i] + _, ok := t.(*ActionTransition) + continueCollecting := collectPredicates && !ok + c := p.getEpsilonTarget(config, t, continueCollecting, depth == 0, fullCtx, treatEOFAsEpsilon) + if ci, ok := c.(*BaseATNConfig); ok && ci != nil { + newDepth := depth + + if _, ok := config.GetState().(*RuleStopState); ok { + // target fell off end of rule mark resulting c as having dipped into outer context + // We can't get here if incoming config was rule stop and we had context + // track how far we dip into outer context. Might + // come in handy and we avoid evaluating context dependent + // preds if p is > 0. + + if p.dfa != nil && p.dfa.getPrecedenceDfa() { + if t.(*EpsilonTransition).outermostPrecedenceReturn == p.dfa.atnStartState.GetRuleIndex() { + c.setPrecedenceFilterSuppressed(true) + } + } + + c.SetReachesIntoOuterContext(c.GetReachesIntoOuterContext() + 1) + + _, present := closureBusy.Put(c) + if present { + // avoid infinite recursion for right-recursive rules + continue + } + + configs.SetDipsIntoOuterContext(true) // TODO: can remove? only care when we add to set per middle of p method + newDepth-- + if ParserATNSimulatorDebug { + fmt.Println("dips into outer ctx: " + c.String()) + } + } else { + + if !t.getIsEpsilon() { + _, present := closureBusy.Put(c) + if present { + // avoid infinite recursion for EOF* and EOF+ + continue + } + } + if _, ok := t.(*RuleTransition); ok { + // latch when newDepth goes negative - once we step out of the entry context we can't return + if newDepth >= 0 { + newDepth++ + } + } + } + p.closureCheckingStopState(c, configs, closureBusy, continueCollecting, fullCtx, newDepth, treatEOFAsEpsilon) + } + } +} + +func (p *ParserATNSimulator) canDropLoopEntryEdgeInLeftRecursiveRule(config ATNConfig) bool { + if TurnOffLRLoopEntryBranchOpt { + return false + } + + _p := config.GetState() + + // First check to see if we are in StarLoopEntryState generated during + // left-recursion elimination. For efficiency, also check if + // the context has an empty stack case. If so, it would mean + // global FOLLOW so we can't perform optimization + if _p.GetStateType() != ATNStateStarLoopEntry { + return false + } + startLoop, ok := _p.(*StarLoopEntryState) + if !ok { + return false + } + if !startLoop.precedenceRuleDecision || + config.GetContext().isEmpty() || + config.GetContext().hasEmptyPath() { + return false + } + + // Require all return states to return back to the same rule + // that p is in. + numCtxs := config.GetContext().length() + for i := 0; i < numCtxs; i++ { + returnState := p.atn.states[config.GetContext().getReturnState(i)] + if returnState.GetRuleIndex() != _p.GetRuleIndex() { + return false + } + } + x := _p.GetTransitions()[0].getTarget() + decisionStartState := x.(BlockStartState) + blockEndStateNum := decisionStartState.getEndState().stateNumber + blockEndState := p.atn.states[blockEndStateNum].(*BlockEndState) + + // Verify that the top of each stack context leads to loop entry/exit + // state through epsilon edges and w/o leaving rule. + + for i := 0; i < numCtxs; i++ { // for each stack context + returnStateNumber := config.GetContext().getReturnState(i) + returnState := p.atn.states[returnStateNumber] + + // all states must have single outgoing epsilon edge + if len(returnState.GetTransitions()) != 1 || !returnState.GetTransitions()[0].getIsEpsilon() { + return false + } + + // Look for prefix op case like 'not expr', (' type ')' expr + returnStateTarget := returnState.GetTransitions()[0].getTarget() + if returnState.GetStateType() == ATNStateBlockEnd && returnStateTarget == _p { + continue + } + + // Look for 'expr op expr' or case where expr's return state is block end + // of (...)* internal block; the block end points to loop back + // which points to p but we don't need to check that + if returnState == blockEndState { + continue + } + + // Look for ternary expr ? expr : expr. The return state points at block end, + // which points at loop entry state + if returnStateTarget == blockEndState { + continue + } + + // Look for complex prefix 'between expr and expr' case where 2nd expr's + // return state points at block end state of (...)* internal block + if returnStateTarget.GetStateType() == ATNStateBlockEnd && + len(returnStateTarget.GetTransitions()) == 1 && + returnStateTarget.GetTransitions()[0].getIsEpsilon() && + returnStateTarget.GetTransitions()[0].getTarget() == _p { + continue + } + + // anything else ain't conforming + return false + } + + return true +} + +func (p *ParserATNSimulator) getRuleName(index int) string { + if p.parser != nil && index >= 0 { + return p.parser.GetRuleNames()[index] + } + var sb strings.Builder + sb.Grow(32) + + sb.WriteString("') + return sb.String() +} + +func (p *ParserATNSimulator) getEpsilonTarget(config ATNConfig, t Transition, collectPredicates, inContext, fullCtx, treatEOFAsEpsilon bool) ATNConfig { + + switch t.getSerializationType() { + case TransitionRULE: + return p.ruleTransition(config, t.(*RuleTransition)) + case TransitionPRECEDENCE: + return p.precedenceTransition(config, t.(*PrecedencePredicateTransition), collectPredicates, inContext, fullCtx) + case TransitionPREDICATE: + return p.predTransition(config, t.(*PredicateTransition), collectPredicates, inContext, fullCtx) + case TransitionACTION: + return p.actionTransition(config, t.(*ActionTransition)) + case TransitionEPSILON: + return NewBaseATNConfig4(config, t.getTarget()) + case TransitionATOM, TransitionRANGE, TransitionSET: + // EOF transitions act like epsilon transitions after the first EOF + // transition is traversed + if treatEOFAsEpsilon { + if t.Matches(TokenEOF, 0, 1) { + return NewBaseATNConfig4(config, t.getTarget()) + } + } + return nil + default: + return nil + } +} + +func (p *ParserATNSimulator) actionTransition(config ATNConfig, t *ActionTransition) *BaseATNConfig { + if ParserATNSimulatorDebug { + fmt.Println("ACTION edge " + strconv.Itoa(t.ruleIndex) + ":" + strconv.Itoa(t.actionIndex)) + } + return NewBaseATNConfig4(config, t.getTarget()) +} + +func (p *ParserATNSimulator) precedenceTransition(config ATNConfig, + pt *PrecedencePredicateTransition, collectPredicates, inContext, fullCtx bool) *BaseATNConfig { + + if ParserATNSimulatorDebug { + fmt.Println("PRED (collectPredicates=" + fmt.Sprint(collectPredicates) + ") " + + strconv.Itoa(pt.precedence) + ">=_p, ctx dependent=true") + if p.parser != nil { + fmt.Println("context surrounding pred is " + fmt.Sprint(p.parser.GetRuleInvocationStack(nil))) + } + } + var c *BaseATNConfig + if collectPredicates && inContext { + if fullCtx { + // In full context mode, we can evaluate predicates on-the-fly + // during closure, which dramatically reduces the size of + // the config sets. It also obviates the need to test predicates + // later during conflict resolution. + currentPosition := p.input.Index() + p.input.Seek(p.startIndex) + predSucceeds := pt.getPredicate().evaluate(p.parser, p.outerContext) + p.input.Seek(currentPosition) + if predSucceeds { + c = NewBaseATNConfig4(config, pt.getTarget()) // no pred context + } + } else { + newSemCtx := SemanticContextandContext(config.GetSemanticContext(), pt.getPredicate()) + c = NewBaseATNConfig3(config, pt.getTarget(), newSemCtx) + } + } else { + c = NewBaseATNConfig4(config, pt.getTarget()) + } + if ParserATNSimulatorDebug { + fmt.Println("config from pred transition=" + c.String()) + } + return c +} + +func (p *ParserATNSimulator) predTransition(config ATNConfig, pt *PredicateTransition, collectPredicates, inContext, fullCtx bool) *BaseATNConfig { + + if ParserATNSimulatorDebug { + fmt.Println("PRED (collectPredicates=" + fmt.Sprint(collectPredicates) + ") " + strconv.Itoa(pt.ruleIndex) + + ":" + strconv.Itoa(pt.predIndex) + ", ctx dependent=" + fmt.Sprint(pt.isCtxDependent)) + if p.parser != nil { + fmt.Println("context surrounding pred is " + fmt.Sprint(p.parser.GetRuleInvocationStack(nil))) + } + } + var c *BaseATNConfig + if collectPredicates && (!pt.isCtxDependent || inContext) { + if fullCtx { + // In full context mode, we can evaluate predicates on-the-fly + // during closure, which dramatically reduces the size of + // the config sets. It also obviates the need to test predicates + // later during conflict resolution. + currentPosition := p.input.Index() + p.input.Seek(p.startIndex) + predSucceeds := pt.getPredicate().evaluate(p.parser, p.outerContext) + p.input.Seek(currentPosition) + if predSucceeds { + c = NewBaseATNConfig4(config, pt.getTarget()) // no pred context + } + } else { + newSemCtx := SemanticContextandContext(config.GetSemanticContext(), pt.getPredicate()) + c = NewBaseATNConfig3(config, pt.getTarget(), newSemCtx) + } + } else { + c = NewBaseATNConfig4(config, pt.getTarget()) + } + if ParserATNSimulatorDebug { + fmt.Println("config from pred transition=" + c.String()) + } + return c +} + +func (p *ParserATNSimulator) ruleTransition(config ATNConfig, t *RuleTransition) *BaseATNConfig { + if ParserATNSimulatorDebug { + fmt.Println("CALL rule " + p.getRuleName(t.getTarget().GetRuleIndex()) + ", ctx=" + config.GetContext().String()) + } + returnState := t.followState + newContext := SingletonBasePredictionContextCreate(config.GetContext(), returnState.GetStateNumber()) + return NewBaseATNConfig1(config, t.getTarget(), newContext) +} + +func (p *ParserATNSimulator) getConflictingAlts(configs ATNConfigSet) *BitSet { + altsets := PredictionModegetConflictingAltSubsets(configs) + return PredictionModeGetAlts(altsets) +} + +// Sam pointed out a problem with the previous definition, v3, of +// ambiguous states. If we have another state associated with conflicting +// alternatives, we should keep going. For example, the following grammar +// +// s : (ID | ID ID?) '' +// +// When the ATN simulation reaches the state before '', it has a DFA +// state that looks like: [12|1|[], 6|2|[], 12|2|[]]. Naturally +// 12|1|[] and 12|2|[] conflict, but we cannot stop processing p node +// because alternative to has another way to continue, via [6|2|[]]. +// The key is that we have a single state that has config's only associated +// with a single alternative, 2, and crucially the state transitions +// among the configurations are all non-epsilon transitions. That means +// we don't consider any conflicts that include alternative 2. So, we +// ignore the conflict between alts 1 and 2. We ignore a set of +// conflicting alts when there is an intersection with an alternative +// associated with a single alt state in the state&rarrconfig-list map. +// +// It's also the case that we might have two conflicting configurations but +// also a 3rd nonconflicting configuration for a different alternative: +// [1|1|[], 1|2|[], 8|3|[]]. This can come about from grammar: +// +// a : A | A | A B +// +// After Matching input A, we reach the stop state for rule A, state 1. +// State 8 is the state right before B. Clearly alternatives 1 and 2 +// conflict and no amount of further lookahead will separate the two. +// However, alternative 3 will be able to continue and so we do not +// stop working on p state. In the previous example, we're concerned +// with states associated with the conflicting alternatives. Here alt +// 3 is not associated with the conflicting configs, but since we can continue +// looking for input reasonably, I don't declare the state done. We +// ignore a set of conflicting alts when we have an alternative +// that we still need to pursue. +// + +func (p *ParserATNSimulator) getConflictingAltsOrUniqueAlt(configs ATNConfigSet) *BitSet { + var conflictingAlts *BitSet + if configs.GetUniqueAlt() != ATNInvalidAltNumber { + conflictingAlts = NewBitSet() + conflictingAlts.add(configs.GetUniqueAlt()) + } else { + conflictingAlts = configs.GetConflictingAlts() + } + return conflictingAlts +} + +func (p *ParserATNSimulator) GetTokenName(t int) string { + if t == TokenEOF { + return "EOF" + } + + if p.parser != nil && p.parser.GetLiteralNames() != nil { + if t >= len(p.parser.GetLiteralNames()) { + fmt.Println(strconv.Itoa(t) + " ttype out of range: " + strings.Join(p.parser.GetLiteralNames(), ",")) + // fmt.Println(p.parser.GetInputStream().(TokenStream).GetAllText()) // p seems incorrect + } else { + return p.parser.GetLiteralNames()[t] + "<" + strconv.Itoa(t) + ">" + } + } + + return strconv.Itoa(t) +} + +func (p *ParserATNSimulator) getLookaheadName(input TokenStream) string { + return p.GetTokenName(input.LA(1)) +} + +// Used for debugging in AdaptivePredict around execATN but I cut +// +// it out for clarity now that alg. works well. We can leave p +// "dead" code for a bit. +func (p *ParserATNSimulator) dumpDeadEndConfigs(nvae *NoViableAltException) { + + panic("Not implemented") + + // fmt.Println("dead end configs: ") + // var decs = nvae.deadEndConfigs + // + // for i:=0; i0) { + // var t = c.state.GetTransitions()[0] + // if t2, ok := t.(*AtomTransition); ok { + // trans = "Atom "+ p.GetTokenName(t2.label) + // } else if t3, ok := t.(SetTransition); ok { + // _, ok := t.(*NotSetTransition) + // + // var s string + // if (ok){ + // s = "~" + // } + // + // trans = s + "Set " + t3.set + // } + // } + // fmt.Errorf(c.String(p.parser, true) + ":" + trans) + // } +} + +func (p *ParserATNSimulator) noViableAlt(input TokenStream, outerContext ParserRuleContext, configs ATNConfigSet, startIndex int) *NoViableAltException { + return NewNoViableAltException(p.parser, input, input.Get(startIndex), input.LT(1), configs, outerContext) +} + +func (p *ParserATNSimulator) getUniqueAlt(configs ATNConfigSet) int { + alt := ATNInvalidAltNumber + for _, c := range configs.GetItems() { + if alt == ATNInvalidAltNumber { + alt = c.GetAlt() // found first alt + } else if c.GetAlt() != alt { + return ATNInvalidAltNumber + } + } + return alt +} + +// Add an edge to the DFA, if possible. This method calls +// {@link //addDFAState} to ensure the {@code to} state is present in the +// DFA. If {@code from} is {@code nil}, or if {@code t} is outside the +// range of edges that can be represented in the DFA tables, p method +// returns without adding the edge to the DFA. +// +//

If {@code to} is {@code nil}, p method returns {@code nil}. +// Otherwise, p method returns the {@link DFAState} returned by calling +// {@link //addDFAState} for the {@code to} state.

+// +// @param dfa The DFA +// @param from The source state for the edge +// @param t The input symbol +// @param to The target state for the edge +// +// @return If {@code to} is {@code nil}, p method returns {@code nil} +// otherwise p method returns the result of calling {@link //addDFAState} +// on {@code to} +func (p *ParserATNSimulator) addDFAEdge(dfa *DFA, from *DFAState, t int, to *DFAState) *DFAState { + if ParserATNSimulatorDebug { + fmt.Println("EDGE " + from.String() + " -> " + to.String() + " upon " + p.GetTokenName(t)) + } + if to == nil { + return nil + } + p.atn.stateMu.Lock() + to = p.addDFAState(dfa, to) // used existing if possible not incoming + p.atn.stateMu.Unlock() + if from == nil || t < -1 || t > p.atn.maxTokenType { + return to + } + p.atn.edgeMu.Lock() + if from.getEdges() == nil { + from.setEdges(make([]*DFAState, p.atn.maxTokenType+1+1)) + } + from.setIthEdge(t+1, to) // connect + p.atn.edgeMu.Unlock() + + if ParserATNSimulatorDebug { + var names []string + if p.parser != nil { + names = p.parser.GetLiteralNames() + } + + fmt.Println("DFA=\n" + dfa.String(names, nil)) + } + return to +} + +// Add state {@code D} to the DFA if it is not already present, and return +// the actual instance stored in the DFA. If a state equivalent to {@code D} +// is already in the DFA, the existing state is returned. Otherwise p +// method returns {@code D} after adding it to the DFA. +// +//

If {@code D} is {@link //ERROR}, p method returns {@link //ERROR} and +// does not change the DFA.

+// +// @param dfa The dfa +// @param D The DFA state to add +// @return The state stored in the DFA. This will be either the existing +// state if {@code D} is already in the DFA, or {@code D} itself if the +// state was not already present. +func (p *ParserATNSimulator) addDFAState(dfa *DFA, d *DFAState) *DFAState { + if d == ATNSimulatorError { + return d + } + existing, present := dfa.states.Get(d) + if present { + return existing + } + + // The state was not present, so update it with configs + // + d.stateNumber = dfa.states.Len() + if !d.configs.ReadOnly() { + d.configs.OptimizeConfigs(p.BaseATNSimulator) + d.configs.SetReadOnly(true) + } + dfa.states.Put(d) + if ParserATNSimulatorDebug { + fmt.Println("adding NewDFA state: " + d.String()) + } + + return d +} + +func (p *ParserATNSimulator) ReportAttemptingFullContext(dfa *DFA, conflictingAlts *BitSet, configs ATNConfigSet, startIndex, stopIndex int) { + if ParserATNSimulatorDebug || ParserATNSimulatorRetryDebug { + interval := NewInterval(startIndex, stopIndex+1) + fmt.Println("ReportAttemptingFullContext decision=" + strconv.Itoa(dfa.decision) + ":" + configs.String() + + ", input=" + p.parser.GetTokenStream().GetTextFromInterval(interval)) + } + if p.parser != nil { + p.parser.GetErrorListenerDispatch().ReportAttemptingFullContext(p.parser, dfa, startIndex, stopIndex, conflictingAlts, configs) + } +} + +func (p *ParserATNSimulator) ReportContextSensitivity(dfa *DFA, prediction int, configs ATNConfigSet, startIndex, stopIndex int) { + if ParserATNSimulatorDebug || ParserATNSimulatorRetryDebug { + interval := NewInterval(startIndex, stopIndex+1) + fmt.Println("ReportContextSensitivity decision=" + strconv.Itoa(dfa.decision) + ":" + configs.String() + + ", input=" + p.parser.GetTokenStream().GetTextFromInterval(interval)) + } + if p.parser != nil { + p.parser.GetErrorListenerDispatch().ReportContextSensitivity(p.parser, dfa, startIndex, stopIndex, prediction, configs) + } +} + +// If context sensitive parsing, we know it's ambiguity not conflict// +func (p *ParserATNSimulator) ReportAmbiguity(dfa *DFA, D *DFAState, startIndex, stopIndex int, + exact bool, ambigAlts *BitSet, configs ATNConfigSet) { + if ParserATNSimulatorDebug || ParserATNSimulatorRetryDebug { + interval := NewInterval(startIndex, stopIndex+1) + fmt.Println("ReportAmbiguity " + ambigAlts.String() + ":" + configs.String() + + ", input=" + p.parser.GetTokenStream().GetTextFromInterval(interval)) + } + if p.parser != nil { + p.parser.GetErrorListenerDispatch().ReportAmbiguity(p.parser, dfa, startIndex, stopIndex, exact, ambigAlts, configs) + } +} diff --git a/runtime/Go/antlr/v4/parser_rule_context.go b/runtime/Go/antlr/v4/parser_rule_context.go new file mode 100644 index 0000000000..1c8cee7479 --- /dev/null +++ b/runtime/Go/antlr/v4/parser_rule_context.go @@ -0,0 +1,362 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +import ( + "reflect" + "strconv" +) + +type ParserRuleContext interface { + RuleContext + + SetException(RecognitionException) + + AddTokenNode(token Token) *TerminalNodeImpl + AddErrorNode(badToken Token) *ErrorNodeImpl + + EnterRule(listener ParseTreeListener) + ExitRule(listener ParseTreeListener) + + SetStart(Token) + GetStart() Token + + SetStop(Token) + GetStop() Token + + AddChild(child RuleContext) RuleContext + RemoveLastChild() +} + +type BaseParserRuleContext struct { + *BaseRuleContext + + start, stop Token + exception RecognitionException + children []Tree +} + +func NewBaseParserRuleContext(parent ParserRuleContext, invokingStateNumber int) *BaseParserRuleContext { + prc := new(BaseParserRuleContext) + + prc.BaseRuleContext = NewBaseRuleContext(parent, invokingStateNumber) + + prc.RuleIndex = -1 + // * If we are debugging or building a parse tree for a Visitor, + // we need to track all of the tokens and rule invocations associated + // with prc rule's context. This is empty for parsing w/o tree constr. + // operation because we don't the need to track the details about + // how we parse prc rule. + // / + prc.children = nil + prc.start = nil + prc.stop = nil + // The exception that forced prc rule to return. If the rule successfully + // completed, prc is {@code nil}. + prc.exception = nil + + return prc +} + +func (prc *BaseParserRuleContext) SetException(e RecognitionException) { + prc.exception = e +} + +func (prc *BaseParserRuleContext) GetChildren() []Tree { + return prc.children +} + +func (prc *BaseParserRuleContext) CopyFrom(ctx *BaseParserRuleContext) { + // from RuleContext + prc.parentCtx = ctx.parentCtx + prc.invokingState = ctx.invokingState + prc.children = nil + prc.start = ctx.start + prc.stop = ctx.stop +} + +func (prc *BaseParserRuleContext) GetText() string { + if prc.GetChildCount() == 0 { + return "" + } + + var s string + for _, child := range prc.children { + s += child.(ParseTree).GetText() + } + + return s +} + +// Double dispatch methods for listeners +func (prc *BaseParserRuleContext) EnterRule(listener ParseTreeListener) { +} + +func (prc *BaseParserRuleContext) ExitRule(listener ParseTreeListener) { +} + +// * Does not set parent link other add methods do that/// +func (prc *BaseParserRuleContext) addTerminalNodeChild(child TerminalNode) TerminalNode { + if prc.children == nil { + prc.children = make([]Tree, 0) + } + if child == nil { + panic("Child may not be null") + } + prc.children = append(prc.children, child) + return child +} + +func (prc *BaseParserRuleContext) AddChild(child RuleContext) RuleContext { + if prc.children == nil { + prc.children = make([]Tree, 0) + } + if child == nil { + panic("Child may not be null") + } + prc.children = append(prc.children, child) + return child +} + +// * Used by EnterOuterAlt to toss out a RuleContext previously added as +// we entered a rule. If we have // label, we will need to remove +// generic ruleContext object. +// / +func (prc *BaseParserRuleContext) RemoveLastChild() { + if prc.children != nil && len(prc.children) > 0 { + prc.children = prc.children[0 : len(prc.children)-1] + } +} + +func (prc *BaseParserRuleContext) AddTokenNode(token Token) *TerminalNodeImpl { + + node := NewTerminalNodeImpl(token) + prc.addTerminalNodeChild(node) + node.parentCtx = prc + return node + +} + +func (prc *BaseParserRuleContext) AddErrorNode(badToken Token) *ErrorNodeImpl { + node := NewErrorNodeImpl(badToken) + prc.addTerminalNodeChild(node) + node.parentCtx = prc + return node +} + +func (prc *BaseParserRuleContext) GetChild(i int) Tree { + if prc.children != nil && len(prc.children) >= i { + return prc.children[i] + } + + return nil +} + +func (prc *BaseParserRuleContext) GetChildOfType(i int, childType reflect.Type) RuleContext { + if childType == nil { + return prc.GetChild(i).(RuleContext) + } + + for j := 0; j < len(prc.children); j++ { + child := prc.children[j] + if reflect.TypeOf(child) == childType { + if i == 0 { + return child.(RuleContext) + } + + i-- + } + } + + return nil +} + +func (prc *BaseParserRuleContext) ToStringTree(ruleNames []string, recog Recognizer) string { + return TreesStringTree(prc, ruleNames, recog) +} + +func (prc *BaseParserRuleContext) GetRuleContext() RuleContext { + return prc +} + +func (prc *BaseParserRuleContext) Accept(visitor ParseTreeVisitor) interface{} { + return visitor.VisitChildren(prc) +} + +func (prc *BaseParserRuleContext) SetStart(t Token) { + prc.start = t +} + +func (prc *BaseParserRuleContext) GetStart() Token { + return prc.start +} + +func (prc *BaseParserRuleContext) SetStop(t Token) { + prc.stop = t +} + +func (prc *BaseParserRuleContext) GetStop() Token { + return prc.stop +} + +func (prc *BaseParserRuleContext) GetToken(ttype int, i int) TerminalNode { + + for j := 0; j < len(prc.children); j++ { + child := prc.children[j] + if c2, ok := child.(TerminalNode); ok { + if c2.GetSymbol().GetTokenType() == ttype { + if i == 0 { + return c2 + } + + i-- + } + } + } + return nil +} + +func (prc *BaseParserRuleContext) GetTokens(ttype int) []TerminalNode { + if prc.children == nil { + return make([]TerminalNode, 0) + } + + tokens := make([]TerminalNode, 0) + + for j := 0; j < len(prc.children); j++ { + child := prc.children[j] + if tchild, ok := child.(TerminalNode); ok { + if tchild.GetSymbol().GetTokenType() == ttype { + tokens = append(tokens, tchild) + } + } + } + + return tokens +} + +func (prc *BaseParserRuleContext) GetPayload() interface{} { + return prc +} + +func (prc *BaseParserRuleContext) getChild(ctxType reflect.Type, i int) RuleContext { + if prc.children == nil || i < 0 || i >= len(prc.children) { + return nil + } + + j := -1 // what element have we found with ctxType? + for _, o := range prc.children { + + childType := reflect.TypeOf(o) + + if childType.Implements(ctxType) { + j++ + if j == i { + return o.(RuleContext) + } + } + } + return nil +} + +// Go lacks generics, so it's not possible for us to return the child with the correct type, but we do +// check for convertibility + +func (prc *BaseParserRuleContext) GetTypedRuleContext(ctxType reflect.Type, i int) RuleContext { + return prc.getChild(ctxType, i) +} + +func (prc *BaseParserRuleContext) GetTypedRuleContexts(ctxType reflect.Type) []RuleContext { + if prc.children == nil { + return make([]RuleContext, 0) + } + + contexts := make([]RuleContext, 0) + + for _, child := range prc.children { + childType := reflect.TypeOf(child) + + if childType.ConvertibleTo(ctxType) { + contexts = append(contexts, child.(RuleContext)) + } + } + return contexts +} + +func (prc *BaseParserRuleContext) GetChildCount() int { + if prc.children == nil { + return 0 + } + + return len(prc.children) +} + +func (prc *BaseParserRuleContext) GetSourceInterval() *Interval { + if prc.start == nil || prc.stop == nil { + return TreeInvalidInterval + } + + return NewInterval(prc.start.GetTokenIndex(), prc.stop.GetTokenIndex()) +} + +//need to manage circular dependencies, so export now + +// Print out a whole tree, not just a node, in LISP format +// (root child1 .. childN). Print just a node if b is a leaf. +// + +func (prc *BaseParserRuleContext) String(ruleNames []string, stop RuleContext) string { + + var p ParserRuleContext = prc + s := "[" + for p != nil && p != stop { + if ruleNames == nil { + if !p.IsEmpty() { + s += strconv.Itoa(p.GetInvokingState()) + } + } else { + ri := p.GetRuleIndex() + var ruleName string + if ri >= 0 && ri < len(ruleNames) { + ruleName = ruleNames[ri] + } else { + ruleName = strconv.Itoa(ri) + } + s += ruleName + } + if p.GetParent() != nil && (ruleNames != nil || !p.GetParent().(ParserRuleContext).IsEmpty()) { + s += " " + } + pi := p.GetParent() + if pi != nil { + p = pi.(ParserRuleContext) + } else { + p = nil + } + } + s += "]" + return s +} + +var ParserRuleContextEmpty = NewBaseParserRuleContext(nil, -1) + +type InterpreterRuleContext interface { + ParserRuleContext +} + +type BaseInterpreterRuleContext struct { + *BaseParserRuleContext +} + +func NewBaseInterpreterRuleContext(parent BaseInterpreterRuleContext, invokingStateNumber, ruleIndex int) *BaseInterpreterRuleContext { + + prc := new(BaseInterpreterRuleContext) + + prc.BaseParserRuleContext = NewBaseParserRuleContext(parent, invokingStateNumber) + + prc.RuleIndex = ruleIndex + + return prc +} diff --git a/runtime/Go/antlr/v4/prediction_context.go b/runtime/Go/antlr/v4/prediction_context.go new file mode 100644 index 0000000000..72d24c3260 --- /dev/null +++ b/runtime/Go/antlr/v4/prediction_context.go @@ -0,0 +1,764 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +import ( + "golang.org/x/exp/slices" + "strconv" +) + +// Represents {@code $} in local context prediction, which means wildcard. +// {@code//+x =//}. +// / +const ( + BasePredictionContextEmptyReturnState = 0x7FFFFFFF +) + +// Represents {@code $} in an array in full context mode, when {@code $} +// doesn't mean wildcard: {@code $ + x = [$,x]}. Here, +// {@code $} = {@link //EmptyReturnState}. +// / + +var ( + BasePredictionContextglobalNodeCount = 1 + BasePredictionContextid = BasePredictionContextglobalNodeCount +) + +type PredictionContext interface { + Hash() int + Equals(interface{}) bool + GetParent(int) PredictionContext + getReturnState(int) int + length() int + isEmpty() bool + hasEmptyPath() bool + String() string +} + +type BasePredictionContext struct { + cachedHash int +} + +func NewBasePredictionContext(cachedHash int) *BasePredictionContext { + pc := new(BasePredictionContext) + pc.cachedHash = cachedHash + + return pc +} + +func (b *BasePredictionContext) isEmpty() bool { + return false +} + +func calculateHash(parent PredictionContext, returnState int) int { + h := murmurInit(1) + h = murmurUpdate(h, parent.Hash()) + h = murmurUpdate(h, returnState) + return murmurFinish(h, 2) +} + +var _emptyPredictionContextHash int + +func init() { + _emptyPredictionContextHash = murmurInit(1) + _emptyPredictionContextHash = murmurFinish(_emptyPredictionContextHash, 0) +} + +func calculateEmptyHash() int { + return _emptyPredictionContextHash +} + +// Used to cache {@link BasePredictionContext} objects. Its used for the shared +// context cash associated with contexts in DFA states. This cache +// can be used for both lexers and parsers. + +type PredictionContextCache struct { + cache map[PredictionContext]PredictionContext +} + +func NewPredictionContextCache() *PredictionContextCache { + t := new(PredictionContextCache) + t.cache = make(map[PredictionContext]PredictionContext) + return t +} + +// Add a context to the cache and return it. If the context already exists, +// return that one instead and do not add a Newcontext to the cache. +// Protect shared cache from unsafe thread access. +func (p *PredictionContextCache) add(ctx PredictionContext) PredictionContext { + if ctx == BasePredictionContextEMPTY { + return BasePredictionContextEMPTY + } + existing := p.cache[ctx] + if existing != nil { + return existing + } + p.cache[ctx] = ctx + return ctx +} + +func (p *PredictionContextCache) Get(ctx PredictionContext) PredictionContext { + return p.cache[ctx] +} + +func (p *PredictionContextCache) length() int { + return len(p.cache) +} + +type SingletonPredictionContext interface { + PredictionContext +} + +type BaseSingletonPredictionContext struct { + *BasePredictionContext + + parentCtx PredictionContext + returnState int +} + +func NewBaseSingletonPredictionContext(parent PredictionContext, returnState int) *BaseSingletonPredictionContext { + var cachedHash int + if parent != nil { + cachedHash = calculateHash(parent, returnState) + } else { + cachedHash = calculateEmptyHash() + } + + s := new(BaseSingletonPredictionContext) + s.BasePredictionContext = NewBasePredictionContext(cachedHash) + + s.parentCtx = parent + s.returnState = returnState + + return s +} + +func SingletonBasePredictionContextCreate(parent PredictionContext, returnState int) PredictionContext { + if returnState == BasePredictionContextEmptyReturnState && parent == nil { + // someone can pass in the bits of an array ctx that mean $ + return BasePredictionContextEMPTY + } + + return NewBaseSingletonPredictionContext(parent, returnState) +} + +func (b *BaseSingletonPredictionContext) length() int { + return 1 +} + +func (b *BaseSingletonPredictionContext) GetParent(index int) PredictionContext { + return b.parentCtx +} + +func (b *BaseSingletonPredictionContext) getReturnState(index int) int { + return b.returnState +} + +func (b *BaseSingletonPredictionContext) hasEmptyPath() bool { + return b.returnState == BasePredictionContextEmptyReturnState +} + +func (b *BaseSingletonPredictionContext) Hash() int { + return b.cachedHash +} + +func (b *BaseSingletonPredictionContext) Equals(other interface{}) bool { + if b == other { + return true + } + if _, ok := other.(*BaseSingletonPredictionContext); !ok { + return false + } + + otherP := other.(*BaseSingletonPredictionContext) + + if b.returnState != otherP.getReturnState(0) { + return false + } + if b.parentCtx == nil { + return otherP.parentCtx == nil + } + + return b.parentCtx.Equals(otherP.parentCtx) +} + +func (b *BaseSingletonPredictionContext) String() string { + var up string + + if b.parentCtx == nil { + up = "" + } else { + up = b.parentCtx.String() + } + + if len(up) == 0 { + if b.returnState == BasePredictionContextEmptyReturnState { + return "$" + } + + return strconv.Itoa(b.returnState) + } + + return strconv.Itoa(b.returnState) + " " + up +} + +var BasePredictionContextEMPTY = NewEmptyPredictionContext() + +type EmptyPredictionContext struct { + *BaseSingletonPredictionContext +} + +func NewEmptyPredictionContext() *EmptyPredictionContext { + + p := new(EmptyPredictionContext) + + p.BaseSingletonPredictionContext = NewBaseSingletonPredictionContext(nil, BasePredictionContextEmptyReturnState) + p.cachedHash = calculateEmptyHash() + return p +} + +func (e *EmptyPredictionContext) isEmpty() bool { + return true +} + +func (e *EmptyPredictionContext) GetParent(index int) PredictionContext { + return nil +} + +func (e *EmptyPredictionContext) getReturnState(index int) int { + return e.returnState +} + +func (e *EmptyPredictionContext) Hash() int { + return e.cachedHash +} + +func (e *EmptyPredictionContext) Equals(other interface{}) bool { + return e == other +} + +func (e *EmptyPredictionContext) String() string { + return "$" +} + +type ArrayPredictionContext struct { + *BasePredictionContext + + parents []PredictionContext + returnStates []int +} + +func NewArrayPredictionContext(parents []PredictionContext, returnStates []int) *ArrayPredictionContext { + // Parent can be nil only if full ctx mode and we make an array + // from {@link //EMPTY} and non-empty. We merge {@link //EMPTY} by using + // nil parent and + // returnState == {@link //EmptyReturnState}. + hash := murmurInit(1) + + for _, parent := range parents { + hash = murmurUpdate(hash, parent.Hash()) + } + + for _, returnState := range returnStates { + hash = murmurUpdate(hash, returnState) + } + + hash = murmurFinish(hash, len(parents)<<1) + + c := new(ArrayPredictionContext) + c.BasePredictionContext = NewBasePredictionContext(hash) + + c.parents = parents + c.returnStates = returnStates + + return c +} + +func (a *ArrayPredictionContext) GetReturnStates() []int { + return a.returnStates +} + +func (a *ArrayPredictionContext) hasEmptyPath() bool { + return a.getReturnState(a.length()-1) == BasePredictionContextEmptyReturnState +} + +func (a *ArrayPredictionContext) isEmpty() bool { + // since EmptyReturnState can only appear in the last position, we + // don't need to verify that size==1 + return a.returnStates[0] == BasePredictionContextEmptyReturnState +} + +func (a *ArrayPredictionContext) length() int { + return len(a.returnStates) +} + +func (a *ArrayPredictionContext) GetParent(index int) PredictionContext { + return a.parents[index] +} + +func (a *ArrayPredictionContext) getReturnState(index int) int { + return a.returnStates[index] +} + +// Equals is the default comparison function for ArrayPredictionContext when no specialized +// implementation is needed for a collection +func (a *ArrayPredictionContext) Equals(o interface{}) bool { + if a == o { + return true + } + other, ok := o.(*ArrayPredictionContext) + if !ok { + return false + } + if a.cachedHash != other.Hash() { + return false // can't be same if hash is different + } + + // Must compare the actual array elements and not just the array address + // + return slices.Equal(a.returnStates, other.returnStates) && + slices.EqualFunc(a.parents, other.parents, func(x, y PredictionContext) bool { + return x.Equals(y) + }) +} + +// Hash is the default hash function for ArrayPredictionContext when no specialized +// implementation is needed for a collection +func (a *ArrayPredictionContext) Hash() int { + return a.BasePredictionContext.cachedHash +} + +func (a *ArrayPredictionContext) String() string { + if a.isEmpty() { + return "[]" + } + + s := "[" + for i := 0; i < len(a.returnStates); i++ { + if i > 0 { + s = s + ", " + } + if a.returnStates[i] == BasePredictionContextEmptyReturnState { + s = s + "$" + continue + } + s = s + strconv.Itoa(a.returnStates[i]) + if a.parents[i] != nil { + s = s + " " + a.parents[i].String() + } else { + s = s + "nil" + } + } + + return s + "]" +} + +// Convert a {@link RuleContext} tree to a {@link BasePredictionContext} graph. +// Return {@link //EMPTY} if {@code outerContext} is empty or nil. +// / +func predictionContextFromRuleContext(a *ATN, outerContext RuleContext) PredictionContext { + if outerContext == nil { + outerContext = ParserRuleContextEmpty + } + // if we are in RuleContext of start rule, s, then BasePredictionContext + // is EMPTY. Nobody called us. (if we are empty, return empty) + if outerContext.GetParent() == nil || outerContext == ParserRuleContextEmpty { + return BasePredictionContextEMPTY + } + // If we have a parent, convert it to a BasePredictionContext graph + parent := predictionContextFromRuleContext(a, outerContext.GetParent().(RuleContext)) + state := a.states[outerContext.GetInvokingState()] + transition := state.GetTransitions()[0] + + return SingletonBasePredictionContextCreate(parent, transition.(*RuleTransition).followState.GetStateNumber()) +} + +func merge(a, b PredictionContext, rootIsWildcard bool, mergeCache *DoubleDict) PredictionContext { + // share same graph if both same + if a == b { + return a + } + + ac, ok1 := a.(*BaseSingletonPredictionContext) + bc, ok2 := b.(*BaseSingletonPredictionContext) + + if ok1 && ok2 { + return mergeSingletons(ac, bc, rootIsWildcard, mergeCache) + } + // At least one of a or b is array + // If one is $ and rootIsWildcard, return $ as// wildcard + if rootIsWildcard { + if _, ok := a.(*EmptyPredictionContext); ok { + return a + } + if _, ok := b.(*EmptyPredictionContext); ok { + return b + } + } + // convert singleton so both are arrays to normalize + if _, ok := a.(*BaseSingletonPredictionContext); ok { + a = NewArrayPredictionContext([]PredictionContext{a.GetParent(0)}, []int{a.getReturnState(0)}) + } + if _, ok := b.(*BaseSingletonPredictionContext); ok { + b = NewArrayPredictionContext([]PredictionContext{b.GetParent(0)}, []int{b.getReturnState(0)}) + } + return mergeArrays(a.(*ArrayPredictionContext), b.(*ArrayPredictionContext), rootIsWildcard, mergeCache) +} + +// Merge two {@link SingletonBasePredictionContext} instances. +// +//

Stack tops equal, parents merge is same return left graph.
+//

+// +//

Same stack top, parents differ merge parents giving array node, then +// remainders of those graphs. A Newroot node is created to point to the +// merged parents.
+//

+// +//

Different stack tops pointing to same parent. Make array node for the +// root where both element in the root point to the same (original) +// parent.
+//

+// +//

Different stack tops pointing to different parents. Make array node for +// the root where each element points to the corresponding original +// parent.
+//

+// +// @param a the first {@link SingletonBasePredictionContext} +// @param b the second {@link SingletonBasePredictionContext} +// @param rootIsWildcard {@code true} if this is a local-context merge, +// otherwise false to indicate a full-context merge +// @param mergeCache +// / +func mergeSingletons(a, b *BaseSingletonPredictionContext, rootIsWildcard bool, mergeCache *DoubleDict) PredictionContext { + if mergeCache != nil { + previous := mergeCache.Get(a.Hash(), b.Hash()) + if previous != nil { + return previous.(PredictionContext) + } + previous = mergeCache.Get(b.Hash(), a.Hash()) + if previous != nil { + return previous.(PredictionContext) + } + } + + rootMerge := mergeRoot(a, b, rootIsWildcard) + if rootMerge != nil { + if mergeCache != nil { + mergeCache.set(a.Hash(), b.Hash(), rootMerge) + } + return rootMerge + } + if a.returnState == b.returnState { + parent := merge(a.parentCtx, b.parentCtx, rootIsWildcard, mergeCache) + // if parent is same as existing a or b parent or reduced to a parent, + // return it + if parent == a.parentCtx { + return a // ax + bx = ax, if a=b + } + if parent == b.parentCtx { + return b // ax + bx = bx, if a=b + } + // else: ax + ay = a'[x,y] + // merge parents x and y, giving array node with x,y then remainders + // of those graphs. dup a, a' points at merged array + // Newjoined parent so create Newsingleton pointing to it, a' + spc := SingletonBasePredictionContextCreate(parent, a.returnState) + if mergeCache != nil { + mergeCache.set(a.Hash(), b.Hash(), spc) + } + return spc + } + // a != b payloads differ + // see if we can collapse parents due to $+x parents if local ctx + var singleParent PredictionContext + if a == b || (a.parentCtx != nil && a.parentCtx == b.parentCtx) { // ax + + // bx = + // [a,b]x + singleParent = a.parentCtx + } + if singleParent != nil { // parents are same + // sort payloads and use same parent + payloads := []int{a.returnState, b.returnState} + if a.returnState > b.returnState { + payloads[0] = b.returnState + payloads[1] = a.returnState + } + parents := []PredictionContext{singleParent, singleParent} + apc := NewArrayPredictionContext(parents, payloads) + if mergeCache != nil { + mergeCache.set(a.Hash(), b.Hash(), apc) + } + return apc + } + // parents differ and can't merge them. Just pack together + // into array can't merge. + // ax + by = [ax,by] + payloads := []int{a.returnState, b.returnState} + parents := []PredictionContext{a.parentCtx, b.parentCtx} + if a.returnState > b.returnState { // sort by payload + payloads[0] = b.returnState + payloads[1] = a.returnState + parents = []PredictionContext{b.parentCtx, a.parentCtx} + } + apc := NewArrayPredictionContext(parents, payloads) + if mergeCache != nil { + mergeCache.set(a.Hash(), b.Hash(), apc) + } + return apc +} + +// Handle case where at least one of {@code a} or {@code b} is +// {@link //EMPTY}. In the following diagrams, the symbol {@code $} is used +// to represent {@link //EMPTY}. +// +//

Local-Context Merges

+// +//

These local-context merge operations are used when {@code rootIsWildcard} +// is true.

+// +//

{@link //EMPTY} is superset of any graph return {@link //EMPTY}.
+//

+// +//

{@link //EMPTY} and anything is {@code //EMPTY}, so merged parent is +// {@code //EMPTY} return left graph.
+//

+// +//

Special case of last merge if local context.
+//

+// +//

Full-Context Merges

+// +//

These full-context merge operations are used when {@code rootIsWildcard} +// is false.

+// +//

+// +//

Must keep all contexts {@link //EMPTY} in array is a special value (and +// nil parent).
+//

+// +//

+// +// @param a the first {@link SingletonBasePredictionContext} +// @param b the second {@link SingletonBasePredictionContext} +// @param rootIsWildcard {@code true} if this is a local-context merge, +// otherwise false to indicate a full-context merge +// / +func mergeRoot(a, b SingletonPredictionContext, rootIsWildcard bool) PredictionContext { + if rootIsWildcard { + if a == BasePredictionContextEMPTY { + return BasePredictionContextEMPTY // // + b =// + } + if b == BasePredictionContextEMPTY { + return BasePredictionContextEMPTY // a +// =// + } + } else { + if a == BasePredictionContextEMPTY && b == BasePredictionContextEMPTY { + return BasePredictionContextEMPTY // $ + $ = $ + } else if a == BasePredictionContextEMPTY { // $ + x = [$,x] + payloads := []int{b.getReturnState(-1), BasePredictionContextEmptyReturnState} + parents := []PredictionContext{b.GetParent(-1), nil} + return NewArrayPredictionContext(parents, payloads) + } else if b == BasePredictionContextEMPTY { // x + $ = [$,x] ($ is always first if present) + payloads := []int{a.getReturnState(-1), BasePredictionContextEmptyReturnState} + parents := []PredictionContext{a.GetParent(-1), nil} + return NewArrayPredictionContext(parents, payloads) + } + } + return nil +} + +// Merge two {@link ArrayBasePredictionContext} instances. +// +//

Different tops, different parents.
+//

+// +//

Shared top, same parents.
+//

+// +//

Shared top, different parents.
+//

+// +//

Shared top, all shared parents.
+//

+// +//

Equal tops, merge parents and reduce top to +// {@link SingletonBasePredictionContext}.
+//

+// / +func mergeArrays(a, b *ArrayPredictionContext, rootIsWildcard bool, mergeCache *DoubleDict) PredictionContext { + if mergeCache != nil { + previous := mergeCache.Get(a.Hash(), b.Hash()) + if previous != nil { + return previous.(PredictionContext) + } + previous = mergeCache.Get(b.Hash(), a.Hash()) + if previous != nil { + return previous.(PredictionContext) + } + } + // merge sorted payloads a + b => M + i := 0 // walks a + j := 0 // walks b + k := 0 // walks target M array + + mergedReturnStates := make([]int, len(a.returnStates)+len(b.returnStates)) + mergedParents := make([]PredictionContext, len(a.returnStates)+len(b.returnStates)) + // walk and merge to yield mergedParents, mergedReturnStates + for i < len(a.returnStates) && j < len(b.returnStates) { + aParent := a.parents[i] + bParent := b.parents[j] + if a.returnStates[i] == b.returnStates[j] { + // same payload (stack tops are equal), must yield merged singleton + payload := a.returnStates[i] + // $+$ = $ + bothDollars := payload == BasePredictionContextEmptyReturnState && aParent == nil && bParent == nil + axAX := aParent != nil && bParent != nil && aParent == bParent // ax+ax + // -> + // ax + if bothDollars || axAX { + mergedParents[k] = aParent // choose left + mergedReturnStates[k] = payload + } else { // ax+ay -> a'[x,y] + mergedParent := merge(aParent, bParent, rootIsWildcard, mergeCache) + mergedParents[k] = mergedParent + mergedReturnStates[k] = payload + } + i++ // hop over left one as usual + j++ // but also Skip one in right side since we merge + } else if a.returnStates[i] < b.returnStates[j] { // copy a[i] to M + mergedParents[k] = aParent + mergedReturnStates[k] = a.returnStates[i] + i++ + } else { // b > a, copy b[j] to M + mergedParents[k] = bParent + mergedReturnStates[k] = b.returnStates[j] + j++ + } + k++ + } + // copy over any payloads remaining in either array + if i < len(a.returnStates) { + for p := i; p < len(a.returnStates); p++ { + mergedParents[k] = a.parents[p] + mergedReturnStates[k] = a.returnStates[p] + k++ + } + } else { + for p := j; p < len(b.returnStates); p++ { + mergedParents[k] = b.parents[p] + mergedReturnStates[k] = b.returnStates[p] + k++ + } + } + // trim merged if we combined a few that had same stack tops + if k < len(mergedParents) { // write index < last position trim + if k == 1 { // for just one merged element, return singleton top + pc := SingletonBasePredictionContextCreate(mergedParents[0], mergedReturnStates[0]) + if mergeCache != nil { + mergeCache.set(a.Hash(), b.Hash(), pc) + } + return pc + } + mergedParents = mergedParents[0:k] + mergedReturnStates = mergedReturnStates[0:k] + } + + M := NewArrayPredictionContext(mergedParents, mergedReturnStates) + + // if we created same array as a or b, return that instead + // TODO: track whether this is possible above during merge sort for speed + if M == a { + if mergeCache != nil { + mergeCache.set(a.Hash(), b.Hash(), a) + } + return a + } + if M == b { + if mergeCache != nil { + mergeCache.set(a.Hash(), b.Hash(), b) + } + return b + } + combineCommonParents(mergedParents) + + if mergeCache != nil { + mergeCache.set(a.Hash(), b.Hash(), M) + } + return M +} + +// Make pass over all M {@code parents} merge any {@code equals()} +// ones. +// / +func combineCommonParents(parents []PredictionContext) { + uniqueParents := make(map[PredictionContext]PredictionContext) + + for p := 0; p < len(parents); p++ { + parent := parents[p] + if uniqueParents[parent] == nil { + uniqueParents[parent] = parent + } + } + for q := 0; q < len(parents); q++ { + parents[q] = uniqueParents[parents[q]] + } +} + +func getCachedBasePredictionContext(context PredictionContext, contextCache *PredictionContextCache, visited map[PredictionContext]PredictionContext) PredictionContext { + + if context.isEmpty() { + return context + } + existing := visited[context] + if existing != nil { + return existing + } + existing = contextCache.Get(context) + if existing != nil { + visited[context] = existing + return existing + } + changed := false + parents := make([]PredictionContext, context.length()) + for i := 0; i < len(parents); i++ { + parent := getCachedBasePredictionContext(context.GetParent(i), contextCache, visited) + if changed || parent != context.GetParent(i) { + if !changed { + parents = make([]PredictionContext, context.length()) + for j := 0; j < context.length(); j++ { + parents[j] = context.GetParent(j) + } + changed = true + } + parents[i] = parent + } + } + if !changed { + contextCache.add(context) + visited[context] = context + return context + } + var updated PredictionContext + if len(parents) == 0 { + updated = BasePredictionContextEMPTY + } else if len(parents) == 1 { + updated = SingletonBasePredictionContextCreate(parents[0], context.getReturnState(0)) + } else { + updated = NewArrayPredictionContext(parents, context.(*ArrayPredictionContext).GetReturnStates()) + } + contextCache.add(updated) + visited[updated] = updated + visited[context] = updated + + return updated +} diff --git a/runtime/Go/antlr/v4/prediction_mode.go b/runtime/Go/antlr/v4/prediction_mode.go new file mode 100644 index 0000000000..270a89d393 --- /dev/null +++ b/runtime/Go/antlr/v4/prediction_mode.go @@ -0,0 +1,529 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +// This enumeration defines the prediction modes available in ANTLR 4 along with +// utility methods for analyzing configuration sets for conflicts and/or +// ambiguities. + +const ( + // + // The SLL(*) prediction mode. This prediction mode ignores the current + // parser context when making predictions. This is the fastest prediction + // mode, and provides correct results for many grammars. This prediction + // mode is more powerful than the prediction mode provided by ANTLR 3, but + // may result in syntax errors for grammar and input combinations which are + // not SLL. + // + //

+ // When using this prediction mode, the parser will either return a correct + // parse tree (i.e. the same parse tree that would be returned with the + // {@link //LL} prediction mode), or it will Report a syntax error. If a + // syntax error is encountered when using the {@link //SLL} prediction mode, + // it may be due to either an actual syntax error in the input or indicate + // that the particular combination of grammar and input requires the more + // powerful {@link //LL} prediction abilities to complete successfully.

+ // + //

+ // This prediction mode does not provide any guarantees for prediction + // behavior for syntactically-incorrect inputs.

+ // + PredictionModeSLL = 0 + // + // The LL(*) prediction mode. This prediction mode allows the current parser + // context to be used for resolving SLL conflicts that occur during + // prediction. This is the fastest prediction mode that guarantees correct + // parse results for all combinations of grammars with syntactically correct + // inputs. + // + //

+ // When using this prediction mode, the parser will make correct decisions + // for all syntactically-correct grammar and input combinations. However, in + // cases where the grammar is truly ambiguous this prediction mode might not + // Report a precise answer for exactly which alternatives are + // ambiguous.

+ // + //

+ // This prediction mode does not provide any guarantees for prediction + // behavior for syntactically-incorrect inputs.

+ // + PredictionModeLL = 1 + // + // The LL(*) prediction mode with exact ambiguity detection. In addition to + // the correctness guarantees provided by the {@link //LL} prediction mode, + // this prediction mode instructs the prediction algorithm to determine the + // complete and exact set of ambiguous alternatives for every ambiguous + // decision encountered while parsing. + // + //

+ // This prediction mode may be used for diagnosing ambiguities during + // grammar development. Due to the performance overhead of calculating sets + // of ambiguous alternatives, this prediction mode should be avoided when + // the exact results are not necessary.

+ // + //

+ // This prediction mode does not provide any guarantees for prediction + // behavior for syntactically-incorrect inputs.

+ // + PredictionModeLLExactAmbigDetection = 2 +) + +// Computes the SLL prediction termination condition. +// +//

+// This method computes the SLL prediction termination condition for both of +// the following cases.

+// +//
    +//
  • The usual SLL+LL fallback upon SLL conflict
  • +//
  • Pure SLL without LL fallback
  • +//
+// +//

COMBINED SLL+LL PARSING

+// +//

When LL-fallback is enabled upon SLL conflict, correct predictions are +// ensured regardless of how the termination condition is computed by this +// method. Due to the substantially higher cost of LL prediction, the +// prediction should only fall back to LL when the additional lookahead +// cannot lead to a unique SLL prediction.

+// +//

Assuming combined SLL+LL parsing, an SLL configuration set with only +// conflicting subsets should fall back to full LL, even if the +// configuration sets don't resolve to the same alternative (e.g. +// {@code {1,2}} and {@code {3,4}}. If there is at least one non-conflicting +// configuration, SLL could continue with the hopes that more lookahead will +// resolve via one of those non-conflicting configurations.

+// +//

Here's the prediction termination rule them: SLL (for SLL+LL parsing) +// stops when it sees only conflicting configuration subsets. In contrast, +// full LL keeps going when there is uncertainty.

+// +//

HEURISTIC

+// +//

As a heuristic, we stop prediction when we see any conflicting subset +// unless we see a state that only has one alternative associated with it. +// The single-alt-state thing lets prediction continue upon rules like +// (otherwise, it would admit defeat too soon):

+// +//

{@code [12|1|[], 6|2|[], 12|2|[]]. s : (ID | ID ID?) ” }

+// +//

When the ATN simulation reaches the state before {@code ”}, it has a +// DFA state that looks like: {@code [12|1|[], 6|2|[], 12|2|[]]}. Naturally +// {@code 12|1|[]} and {@code 12|2|[]} conflict, but we cannot stop +// processing this node because alternative to has another way to continue, +// via {@code [6|2|[]]}.

+// +//

It also let's us continue for this rule:

+// +//

{@code [1|1|[], 1|2|[], 8|3|[]] a : A | A | A B }

+// +//

After Matching input A, we reach the stop state for rule A, state 1. +// State 8 is the state right before B. Clearly alternatives 1 and 2 +// conflict and no amount of further lookahead will separate the two. +// However, alternative 3 will be able to continue and so we do not stop +// working on this state. In the previous example, we're concerned with +// states associated with the conflicting alternatives. Here alt 3 is not +// associated with the conflicting configs, but since we can continue +// looking for input reasonably, don't declare the state done.

+// +//

PURE SLL PARSING

+// +//

To handle pure SLL parsing, all we have to do is make sure that we +// combine stack contexts for configurations that differ only by semantic +// predicate. From there, we can do the usual SLL termination heuristic.

+// +//

PREDICATES IN SLL+LL PARSING

+// +//

SLL decisions don't evaluate predicates until after they reach DFA stop +// states because they need to create the DFA cache that works in all +// semantic situations. In contrast, full LL evaluates predicates collected +// during start state computation so it can ignore predicates thereafter. +// This means that SLL termination detection can totally ignore semantic +// predicates.

+// +//

Implementation-wise, {@link ATNConfigSet} combines stack contexts but not +// semantic predicate contexts so we might see two configurations like the +// following.

+// +//

{@code (s, 1, x, {}), (s, 1, x', {p})}

+// +//

Before testing these configurations against others, we have to merge +// {@code x} and {@code x'} (without modifying the existing configurations). +// For example, we test {@code (x+x')==x”} when looking for conflicts in +// the following configurations.

+// +//

{@code (s, 1, x, {}), (s, 1, x', {p}), (s, 2, x”, {})}

+// +//

If the configuration set has predicates (as indicated by +// {@link ATNConfigSet//hasSemanticContext}), this algorithm makes a copy of +// the configurations to strip out all of the predicates so that a standard +// {@link ATNConfigSet} will merge everything ignoring predicates.

+func PredictionModehasSLLConflictTerminatingPrediction(mode int, configs ATNConfigSet) bool { + // Configs in rule stop states indicate reaching the end of the decision + // rule (local context) or end of start rule (full context). If all + // configs meet this condition, then none of the configurations is able + // to Match additional input so we terminate prediction. + // + if PredictionModeallConfigsInRuleStopStates(configs) { + return true + } + // pure SLL mode parsing + if mode == PredictionModeSLL { + // Don't bother with combining configs from different semantic + // contexts if we can fail over to full LL costs more time + // since we'll often fail over anyway. + if configs.HasSemanticContext() { + // dup configs, tossing out semantic predicates + dup := NewBaseATNConfigSet(false) + for _, c := range configs.GetItems() { + + // NewBaseATNConfig({semanticContext:}, c) + c = NewBaseATNConfig2(c, SemanticContextNone) + dup.Add(c, nil) + } + configs = dup + } + // now we have combined contexts for configs with dissimilar preds + } + // pure SLL or combined SLL+LL mode parsing + altsets := PredictionModegetConflictingAltSubsets(configs) + return PredictionModehasConflictingAltSet(altsets) && !PredictionModehasStateAssociatedWithOneAlt(configs) +} + +// Checks if any configuration in {@code configs} is in a +// {@link RuleStopState}. Configurations meeting this condition have reached +// the end of the decision rule (local context) or end of start rule (full +// context). +// +// @param configs the configuration set to test +// @return {@code true} if any configuration in {@code configs} is in a +// {@link RuleStopState}, otherwise {@code false} +func PredictionModehasConfigInRuleStopState(configs ATNConfigSet) bool { + for _, c := range configs.GetItems() { + if _, ok := c.GetState().(*RuleStopState); ok { + return true + } + } + return false +} + +// Checks if all configurations in {@code configs} are in a +// {@link RuleStopState}. Configurations meeting this condition have reached +// the end of the decision rule (local context) or end of start rule (full +// context). +// +// @param configs the configuration set to test +// @return {@code true} if all configurations in {@code configs} are in a +// {@link RuleStopState}, otherwise {@code false} +func PredictionModeallConfigsInRuleStopStates(configs ATNConfigSet) bool { + + for _, c := range configs.GetItems() { + if _, ok := c.GetState().(*RuleStopState); !ok { + return false + } + } + return true +} + +// Full LL prediction termination. +// +//

Can we stop looking ahead during ATN simulation or is there some +// uncertainty as to which alternative we will ultimately pick, after +// consuming more input? Even if there are partial conflicts, we might know +// that everything is going to resolve to the same minimum alternative. That +// means we can stop since no more lookahead will change that fact. On the +// other hand, there might be multiple conflicts that resolve to different +// minimums. That means we need more look ahead to decide which of those +// alternatives we should predict.

+// +//

The basic idea is to split the set of configurations {@code C}, into +// conflicting subsets {@code (s, _, ctx, _)} and singleton subsets with +// non-conflicting configurations. Two configurations conflict if they have +// identical {@link ATNConfig//state} and {@link ATNConfig//context} values +// but different {@link ATNConfig//alt} value, e.g. {@code (s, i, ctx, _)} +// and {@code (s, j, ctx, _)} for {@code i!=j}.

+// +//

Reduce these configuration subsets to the set of possible alternatives. +// You can compute the alternative subsets in one pass as follows:

+// +//

{@code A_s,ctx = {i | (s, i, ctx, _)}} for each configuration in +// {@code C} holding {@code s} and {@code ctx} fixed.

+// +//

Or in pseudo-code, for each configuration {@code c} in {@code C}:

+// +//
+// map[c] U= c.{@link ATNConfig//alt alt} // map hash/equals uses s and x, not
+// alt and not pred
+// 
+// +//

The values in {@code map} are the set of {@code A_s,ctx} sets.

+// +//

If {@code |A_s,ctx|=1} then there is no conflict associated with +// {@code s} and {@code ctx}.

+// +//

Reduce the subsets to singletons by choosing a minimum of each subset. If +// the union of these alternative subsets is a singleton, then no amount of +// more lookahead will help us. We will always pick that alternative. If, +// however, there is more than one alternative, then we are uncertain which +// alternative to predict and must continue looking for resolution. We may +// or may not discover an ambiguity in the future, even if there are no +// conflicting subsets this round.

+// +//

The biggest sin is to terminate early because it means we've made a +// decision but were uncertain as to the eventual outcome. We haven't used +// enough lookahead. On the other hand, announcing a conflict too late is no +// big deal you will still have the conflict. It's just inefficient. It +// might even look until the end of file.

+// +//

No special consideration for semantic predicates is required because +// predicates are evaluated on-the-fly for full LL prediction, ensuring that +// no configuration contains a semantic context during the termination +// check.

+// +//

CONFLICTING CONFIGS

+// +//

Two configurations {@code (s, i, x)} and {@code (s, j, x')}, conflict +// when {@code i!=j} but {@code x=x'}. Because we merge all +// {@code (s, i, _)} configurations together, that means that there are at +// most {@code n} configurations associated with state {@code s} for +// {@code n} possible alternatives in the decision. The merged stacks +// complicate the comparison of configuration contexts {@code x} and +// {@code x'}. Sam checks to see if one is a subset of the other by calling +// merge and checking to see if the merged result is either {@code x} or +// {@code x'}. If the {@code x} associated with lowest alternative {@code i} +// is the superset, then {@code i} is the only possible prediction since the +// others resolve to {@code min(i)} as well. However, if {@code x} is +// associated with {@code j>i} then at least one stack configuration for +// {@code j} is not in conflict with alternative {@code i}. The algorithm +// should keep going, looking for more lookahead due to the uncertainty.

+// +//

For simplicity, I'm doing a equality check between {@code x} and +// {@code x'} that lets the algorithm continue to consume lookahead longer +// than necessary. The reason I like the equality is of course the +// simplicity but also because that is the test you need to detect the +// alternatives that are actually in conflict.

+// +//

CONTINUE/STOP RULE

+// +//

Continue if union of resolved alternative sets from non-conflicting and +// conflicting alternative subsets has more than one alternative. We are +// uncertain about which alternative to predict.

+// +//

The complete set of alternatives, {@code [i for (_,i,_)]}, tells us which +// alternatives are still in the running for the amount of input we've +// consumed at this point. The conflicting sets let us to strip away +// configurations that won't lead to more states because we resolve +// conflicts to the configuration with a minimum alternate for the +// conflicting set.

+// +//

CASES

+// +//
    +// +//
  • no conflicts and more than 1 alternative in set => continue
  • +// +//
  • {@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s, 3, z)}, +// {@code (s', 1, y)}, {@code (s', 2, y)} yields non-conflicting set +// {@code {3}} U conflicting sets {@code min({1,2})} U {@code min({1,2})} = +// {@code {1,3}} => continue +//
  • +// +//
  • {@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 1, y)}, +// {@code (s', 2, y)}, {@code (s”, 1, z)} yields non-conflicting set +// {@code {1}} U conflicting sets {@code min({1,2})} U {@code min({1,2})} = +// {@code {1}} => stop and predict 1
  • +// +//
  • {@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 1, y)}, +// {@code (s', 2, y)} yields conflicting, reduced sets {@code {1}} U +// {@code {1}} = {@code {1}} => stop and predict 1, can announce +// ambiguity {@code {1,2}}
  • +// +//
  • {@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 2, y)}, +// {@code (s', 3, y)} yields conflicting, reduced sets {@code {1}} U +// {@code {2}} = {@code {1,2}} => continue
  • +// +//
  • {@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 3, y)}, +// {@code (s', 4, y)} yields conflicting, reduced sets {@code {1}} U +// {@code {3}} = {@code {1,3}} => continue
  • +// +//
+// +//

EXACT AMBIGUITY DETECTION

+// +//

If all states Report the same conflicting set of alternatives, then we +// know we have the exact ambiguity set.

+// +//

|A_i|>1 and +// A_i = A_j for all i, j.

+// +//

In other words, we continue examining lookahead until all {@code A_i} +// have more than one alternative and all {@code A_i} are the same. If +// {@code A={{1,2}, {1,3}}}, then regular LL prediction would terminate +// because the resolved set is {@code {1}}. To determine what the real +// ambiguity is, we have to know whether the ambiguity is between one and +// two or one and three so we keep going. We can only stop prediction when +// we need exact ambiguity detection when the sets look like +// {@code A={{1,2}}} or {@code {{1,2},{1,2}}}, etc...

+func PredictionModeresolvesToJustOneViableAlt(altsets []*BitSet) int { + return PredictionModegetSingleViableAlt(altsets) +} + +// Determines if every alternative subset in {@code altsets} contains more +// than one alternative. +// +// @param altsets a collection of alternative subsets +// @return {@code true} if every {@link BitSet} in {@code altsets} has +// {@link BitSet//cardinality cardinality} > 1, otherwise {@code false} +func PredictionModeallSubsetsConflict(altsets []*BitSet) bool { + return !PredictionModehasNonConflictingAltSet(altsets) +} + +// Determines if any single alternative subset in {@code altsets} contains +// exactly one alternative. +// +// @param altsets a collection of alternative subsets +// @return {@code true} if {@code altsets} contains a {@link BitSet} with +// {@link BitSet//cardinality cardinality} 1, otherwise {@code false} +func PredictionModehasNonConflictingAltSet(altsets []*BitSet) bool { + for i := 0; i < len(altsets); i++ { + alts := altsets[i] + if alts.length() == 1 { + return true + } + } + return false +} + +// Determines if any single alternative subset in {@code altsets} contains +// more than one alternative. +// +// @param altsets a collection of alternative subsets +// @return {@code true} if {@code altsets} contains a {@link BitSet} with +// {@link BitSet//cardinality cardinality} > 1, otherwise {@code false} +func PredictionModehasConflictingAltSet(altsets []*BitSet) bool { + for i := 0; i < len(altsets); i++ { + alts := altsets[i] + if alts.length() > 1 { + return true + } + } + return false +} + +// Determines if every alternative subset in {@code altsets} is equivalent. +// +// @param altsets a collection of alternative subsets +// @return {@code true} if every member of {@code altsets} is equal to the +// others, otherwise {@code false} +func PredictionModeallSubsetsEqual(altsets []*BitSet) bool { + var first *BitSet + + for i := 0; i < len(altsets); i++ { + alts := altsets[i] + if first == nil { + first = alts + } else if alts != first { + return false + } + } + + return true +} + +// Returns the unique alternative predicted by all alternative subsets in +// {@code altsets}. If no such alternative exists, this method returns +// {@link ATN//INVALID_ALT_NUMBER}. +// +// @param altsets a collection of alternative subsets +func PredictionModegetUniqueAlt(altsets []*BitSet) int { + all := PredictionModeGetAlts(altsets) + if all.length() == 1 { + return all.minValue() + } + + return ATNInvalidAltNumber +} + +// Gets the complete set of represented alternatives for a collection of +// alternative subsets. This method returns the union of each {@link BitSet} +// in {@code altsets}. +// +// @param altsets a collection of alternative subsets +// @return the set of represented alternatives in {@code altsets} +func PredictionModeGetAlts(altsets []*BitSet) *BitSet { + all := NewBitSet() + for _, alts := range altsets { + all.or(alts) + } + return all +} + +// PredictionModegetConflictingAltSubsets gets the conflicting alt subsets from a configuration set. +// For each configuration {@code c} in {@code configs}: +// +//
+// map[c] U= c.{@link ATNConfig//alt alt} // map hash/equals uses s and x, not
+// alt and not pred
+// 
+func PredictionModegetConflictingAltSubsets(configs ATNConfigSet) []*BitSet { + configToAlts := NewJMap[ATNConfig, *BitSet, *ATNAltConfigComparator[ATNConfig]](&ATNAltConfigComparator[ATNConfig]{}) + + for _, c := range configs.GetItems() { + + alts, ok := configToAlts.Get(c) + if !ok { + alts = NewBitSet() + configToAlts.Put(c, alts) + } + alts.add(c.GetAlt()) + } + + return configToAlts.Values() +} + +// PredictionModeGetStateToAltMap gets a map from state to alt subset from a configuration set. For each +// configuration {@code c} in {@code configs}: +// +//
+// map[c.{@link ATNConfig//state state}] U= c.{@link ATNConfig//alt alt}
+// 
+func PredictionModeGetStateToAltMap(configs ATNConfigSet) *AltDict { + m := NewAltDict() + + for _, c := range configs.GetItems() { + alts := m.Get(c.GetState().String()) + if alts == nil { + alts = NewBitSet() + m.put(c.GetState().String(), alts) + } + alts.(*BitSet).add(c.GetAlt()) + } + return m +} + +func PredictionModehasStateAssociatedWithOneAlt(configs ATNConfigSet) bool { + values := PredictionModeGetStateToAltMap(configs).values() + for i := 0; i < len(values); i++ { + if values[i].(*BitSet).length() == 1 { + return true + } + } + return false +} + +func PredictionModegetSingleViableAlt(altsets []*BitSet) int { + result := ATNInvalidAltNumber + + for i := 0; i < len(altsets); i++ { + alts := altsets[i] + minAlt := alts.minValue() + if result == ATNInvalidAltNumber { + result = minAlt + } else if result != minAlt { // more than 1 viable alt + return ATNInvalidAltNumber + } + } + return result +} diff --git a/runtime/Go/antlr/v4/recognizer.go b/runtime/Go/antlr/v4/recognizer.go new file mode 100644 index 0000000000..63e9b08adb --- /dev/null +++ b/runtime/Go/antlr/v4/recognizer.go @@ -0,0 +1,216 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +import ( + "fmt" + "strings" + + "strconv" +) + +type Recognizer interface { + GetLiteralNames() []string + GetSymbolicNames() []string + GetRuleNames() []string + + Sempred(RuleContext, int, int) bool + Precpred(RuleContext, int) bool + + GetState() int + SetState(int) + Action(RuleContext, int, int) + AddErrorListener(ErrorListener) + RemoveErrorListeners() + GetATN() *ATN + GetErrorListenerDispatch() ErrorListener +} + +type BaseRecognizer struct { + listeners []ErrorListener + state int + + RuleNames []string + LiteralNames []string + SymbolicNames []string + GrammarFileName string +} + +func NewBaseRecognizer() *BaseRecognizer { + rec := new(BaseRecognizer) + rec.listeners = []ErrorListener{ConsoleErrorListenerINSTANCE} + rec.state = -1 + return rec +} + +var tokenTypeMapCache = make(map[string]int) +var ruleIndexMapCache = make(map[string]int) + +func (b *BaseRecognizer) checkVersion(toolVersion string) { + runtimeVersion := "4.10.1" + if runtimeVersion != toolVersion { + fmt.Println("ANTLR runtime and generated code versions disagree: " + runtimeVersion + "!=" + toolVersion) + } +} + +func (b *BaseRecognizer) Action(context RuleContext, ruleIndex, actionIndex int) { + panic("action not implemented on Recognizer!") +} + +func (b *BaseRecognizer) AddErrorListener(listener ErrorListener) { + b.listeners = append(b.listeners, listener) +} + +func (b *BaseRecognizer) RemoveErrorListeners() { + b.listeners = make([]ErrorListener, 0) +} + +func (b *BaseRecognizer) GetRuleNames() []string { + return b.RuleNames +} + +func (b *BaseRecognizer) GetTokenNames() []string { + return b.LiteralNames +} + +func (b *BaseRecognizer) GetSymbolicNames() []string { + return b.SymbolicNames +} + +func (b *BaseRecognizer) GetLiteralNames() []string { + return b.LiteralNames +} + +func (b *BaseRecognizer) GetState() int { + return b.state +} + +func (b *BaseRecognizer) SetState(v int) { + b.state = v +} + +//func (b *Recognizer) GetTokenTypeMap() { +// var tokenNames = b.GetTokenNames() +// if (tokenNames==nil) { +// panic("The current recognizer does not provide a list of token names.") +// } +// var result = tokenTypeMapCache[tokenNames] +// if(result==nil) { +// result = tokenNames.reduce(function(o, k, i) { o[k] = i }) +// result.EOF = TokenEOF +// tokenTypeMapCache[tokenNames] = result +// } +// return result +//} + +// Get a map from rule names to rule indexes. +// +//

Used for XPath and tree pattern compilation.

+func (b *BaseRecognizer) GetRuleIndexMap() map[string]int { + + panic("Method not defined!") + // var ruleNames = b.GetRuleNames() + // if (ruleNames==nil) { + // panic("The current recognizer does not provide a list of rule names.") + // } + // + // var result = ruleIndexMapCache[ruleNames] + // if(result==nil) { + // result = ruleNames.reduce(function(o, k, i) { o[k] = i }) + // ruleIndexMapCache[ruleNames] = result + // } + // return result +} + +func (b *BaseRecognizer) GetTokenType(tokenName string) int { + panic("Method not defined!") + // var ttype = b.GetTokenTypeMap()[tokenName] + // if (ttype !=nil) { + // return ttype + // } else { + // return TokenInvalidType + // } +} + +//func (b *Recognizer) GetTokenTypeMap() map[string]int { +// Vocabulary vocabulary = getVocabulary() +// +// Synchronized (tokenTypeMapCache) { +// Map result = tokenTypeMapCache.Get(vocabulary) +// if (result == null) { +// result = new HashMap() +// for (int i = 0; i < GetATN().maxTokenType; i++) { +// String literalName = vocabulary.getLiteralName(i) +// if (literalName != null) { +// result.put(literalName, i) +// } +// +// String symbolicName = vocabulary.GetSymbolicName(i) +// if (symbolicName != null) { +// result.put(symbolicName, i) +// } +// } +// +// result.put("EOF", Token.EOF) +// result = Collections.unmodifiableMap(result) +// tokenTypeMapCache.put(vocabulary, result) +// } +// +// return result +// } +//} + +// What is the error header, normally line/character position information?// +func (b *BaseRecognizer) GetErrorHeader(e RecognitionException) string { + line := e.GetOffendingToken().GetLine() + column := e.GetOffendingToken().GetColumn() + return "line " + strconv.Itoa(line) + ":" + strconv.Itoa(column) +} + +// How should a token be displayed in an error message? The default +// +// is to display just the text, but during development you might +// want to have a lot of information spit out. Override in that case +// to use t.String() (which, for CommonToken, dumps everything about +// the token). This is better than forcing you to override a method in +// your token objects because you don't have to go modify your lexer +// so that it creates a NewJava type. +// +// @deprecated This method is not called by the ANTLR 4 Runtime. Specific +// implementations of {@link ANTLRErrorStrategy} may provide a similar +// feature when necessary. For example, see +// {@link DefaultErrorStrategy//GetTokenErrorDisplay}. +func (b *BaseRecognizer) GetTokenErrorDisplay(t Token) string { + if t == nil { + return "" + } + s := t.GetText() + if s == "" { + if t.GetTokenType() == TokenEOF { + s = "" + } else { + s = "<" + strconv.Itoa(t.GetTokenType()) + ">" + } + } + s = strings.Replace(s, "\t", "\\t", -1) + s = strings.Replace(s, "\n", "\\n", -1) + s = strings.Replace(s, "\r", "\\r", -1) + + return "'" + s + "'" +} + +func (b *BaseRecognizer) GetErrorListenerDispatch() ErrorListener { + return NewProxyErrorListener(b.listeners) +} + +// subclass needs to override these if there are sempreds or actions +// that the ATN interp needs to execute +func (b *BaseRecognizer) Sempred(localctx RuleContext, ruleIndex int, actionIndex int) bool { + return true +} + +func (b *BaseRecognizer) Precpred(localctx RuleContext, precedence int) bool { + return true +} diff --git a/runtime/Go/antlr/v4/rule_context.go b/runtime/Go/antlr/v4/rule_context.go new file mode 100644 index 0000000000..210699ba23 --- /dev/null +++ b/runtime/Go/antlr/v4/rule_context.go @@ -0,0 +1,114 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +// A rule context is a record of a single rule invocation. It knows +// which context invoked it, if any. If there is no parent context, then +// naturally the invoking state is not valid. The parent link +// provides a chain upwards from the current rule invocation to the root +// of the invocation tree, forming a stack. We actually carry no +// information about the rule associated with b context (except +// when parsing). We keep only the state number of the invoking state from +// the ATN submachine that invoked b. Contrast b with the s +// pointer inside ParserRuleContext that tracks the current state +// being "executed" for the current rule. +// +// The parent contexts are useful for computing lookahead sets and +// getting error information. +// +// These objects are used during parsing and prediction. +// For the special case of parsers, we use the subclass +// ParserRuleContext. +// +// @see ParserRuleContext +// + +type RuleContext interface { + RuleNode + + GetInvokingState() int + SetInvokingState(int) + + GetRuleIndex() int + IsEmpty() bool + + GetAltNumber() int + SetAltNumber(altNumber int) + + String([]string, RuleContext) string +} + +type BaseRuleContext struct { + parentCtx RuleContext + invokingState int + RuleIndex int +} + +func NewBaseRuleContext(parent RuleContext, invokingState int) *BaseRuleContext { + + rn := new(BaseRuleContext) + + // What context invoked b rule? + rn.parentCtx = parent + + // What state invoked the rule associated with b context? + // The "return address" is the followState of invokingState + // If parent is nil, b should be -1. + if parent == nil { + rn.invokingState = -1 + } else { + rn.invokingState = invokingState + } + + return rn +} + +func (b *BaseRuleContext) GetBaseRuleContext() *BaseRuleContext { + return b +} + +func (b *BaseRuleContext) SetParent(v Tree) { + if v == nil { + b.parentCtx = nil + } else { + b.parentCtx = v.(RuleContext) + } +} + +func (b *BaseRuleContext) GetInvokingState() int { + return b.invokingState +} + +func (b *BaseRuleContext) SetInvokingState(t int) { + b.invokingState = t +} + +func (b *BaseRuleContext) GetRuleIndex() int { + return b.RuleIndex +} + +func (b *BaseRuleContext) GetAltNumber() int { + return ATNInvalidAltNumber +} + +func (b *BaseRuleContext) SetAltNumber(altNumber int) {} + +// A context is empty if there is no invoking state meaning nobody call +// current context. +func (b *BaseRuleContext) IsEmpty() bool { + return b.invokingState == -1 +} + +// Return the combined text of all child nodes. This method only considers +// tokens which have been added to the parse tree. +//

+// Since tokens on hidden channels (e.g. whitespace or comments) are not +// added to the parse trees, they will not appear in the output of b +// method. +// + +func (b *BaseRuleContext) GetParent() Tree { + return b.parentCtx +} diff --git a/runtime/Go/antlr/v4/semantic_context.go b/runtime/Go/antlr/v4/semantic_context.go new file mode 100644 index 0000000000..f54926e760 --- /dev/null +++ b/runtime/Go/antlr/v4/semantic_context.go @@ -0,0 +1,469 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +import ( + "fmt" + "strconv" +) + +// A tree structure used to record the semantic context in which +// an ATN configuration is valid. It's either a single predicate, +// a conjunction {@code p1&&p2}, or a sum of products {@code p1||p2}. +// +//

I have scoped the {@link AND}, {@link OR}, and {@link Predicate} subclasses of +// {@link SemanticContext} within the scope of this outer class.

+// + +type SemanticContext interface { + Equals(other Collectable[SemanticContext]) bool + Hash() int + + evaluate(parser Recognizer, outerContext RuleContext) bool + evalPrecedence(parser Recognizer, outerContext RuleContext) SemanticContext + + String() string +} + +func SemanticContextandContext(a, b SemanticContext) SemanticContext { + if a == nil || a == SemanticContextNone { + return b + } + if b == nil || b == SemanticContextNone { + return a + } + result := NewAND(a, b) + if len(result.opnds) == 1 { + return result.opnds[0] + } + + return result +} + +func SemanticContextorContext(a, b SemanticContext) SemanticContext { + if a == nil { + return b + } + if b == nil { + return a + } + if a == SemanticContextNone || b == SemanticContextNone { + return SemanticContextNone + } + result := NewOR(a, b) + if len(result.opnds) == 1 { + return result.opnds[0] + } + + return result +} + +type Predicate struct { + ruleIndex int + predIndex int + isCtxDependent bool +} + +func NewPredicate(ruleIndex, predIndex int, isCtxDependent bool) *Predicate { + p := new(Predicate) + + p.ruleIndex = ruleIndex + p.predIndex = predIndex + p.isCtxDependent = isCtxDependent // e.g., $i ref in pred + return p +} + +//The default {@link SemanticContext}, which is semantically equivalent to +//a predicate of the form {@code {true}?}. + +var SemanticContextNone = NewPredicate(-1, -1, false) + +func (p *Predicate) evalPrecedence(parser Recognizer, outerContext RuleContext) SemanticContext { + return p +} + +func (p *Predicate) evaluate(parser Recognizer, outerContext RuleContext) bool { + + var localctx RuleContext + + if p.isCtxDependent { + localctx = outerContext + } + + return parser.Sempred(localctx, p.ruleIndex, p.predIndex) +} + +func (p *Predicate) Equals(other Collectable[SemanticContext]) bool { + if p == other { + return true + } else if _, ok := other.(*Predicate); !ok { + return false + } else { + return p.ruleIndex == other.(*Predicate).ruleIndex && + p.predIndex == other.(*Predicate).predIndex && + p.isCtxDependent == other.(*Predicate).isCtxDependent + } +} + +func (p *Predicate) Hash() int { + h := murmurInit(0) + h = murmurUpdate(h, p.ruleIndex) + h = murmurUpdate(h, p.predIndex) + if p.isCtxDependent { + h = murmurUpdate(h, 1) + } else { + h = murmurUpdate(h, 0) + } + return murmurFinish(h, 3) +} + +func (p *Predicate) String() string { + return "{" + strconv.Itoa(p.ruleIndex) + ":" + strconv.Itoa(p.predIndex) + "}?" +} + +type PrecedencePredicate struct { + precedence int +} + +func NewPrecedencePredicate(precedence int) *PrecedencePredicate { + + p := new(PrecedencePredicate) + p.precedence = precedence + + return p +} + +func (p *PrecedencePredicate) evaluate(parser Recognizer, outerContext RuleContext) bool { + return parser.Precpred(outerContext, p.precedence) +} + +func (p *PrecedencePredicate) evalPrecedence(parser Recognizer, outerContext RuleContext) SemanticContext { + if parser.Precpred(outerContext, p.precedence) { + return SemanticContextNone + } + + return nil +} + +func (p *PrecedencePredicate) compareTo(other *PrecedencePredicate) int { + return p.precedence - other.precedence +} + +func (p *PrecedencePredicate) Equals(other Collectable[SemanticContext]) bool { + + var op *PrecedencePredicate + var ok bool + if op, ok = other.(*PrecedencePredicate); !ok { + return false + } + + if p == op { + return true + } + + return p.precedence == other.(*PrecedencePredicate).precedence +} + +func (p *PrecedencePredicate) Hash() int { + h := uint32(1) + h = 31*h + uint32(p.precedence) + return int(h) +} + +func (p *PrecedencePredicate) String() string { + return "{" + strconv.Itoa(p.precedence) + ">=prec}?" +} + +func PrecedencePredicatefilterPrecedencePredicates(set *JStore[SemanticContext, Comparator[SemanticContext]]) []*PrecedencePredicate { + result := make([]*PrecedencePredicate, 0) + + set.Each(func(v SemanticContext) bool { + if c2, ok := v.(*PrecedencePredicate); ok { + result = append(result, c2) + } + return true + }) + + return result +} + +// A semantic context which is true whenever none of the contained contexts +// is false.` + +type AND struct { + opnds []SemanticContext +} + +func NewAND(a, b SemanticContext) *AND { + + operands := NewJStore[SemanticContext, Comparator[SemanticContext]](&ObjEqComparator[SemanticContext]{}) + if aa, ok := a.(*AND); ok { + for _, o := range aa.opnds { + operands.Put(o) + } + } else { + operands.Put(a) + } + + if ba, ok := b.(*AND); ok { + for _, o := range ba.opnds { + operands.Put(o) + } + } else { + operands.Put(b) + } + precedencePredicates := PrecedencePredicatefilterPrecedencePredicates(operands) + if len(precedencePredicates) > 0 { + // interested in the transition with the lowest precedence + var reduced *PrecedencePredicate + + for _, p := range precedencePredicates { + if reduced == nil || p.precedence < reduced.precedence { + reduced = p + } + } + + operands.Put(reduced) + } + + vs := operands.Values() + opnds := make([]SemanticContext, len(vs)) + for i, v := range vs { + opnds[i] = v.(SemanticContext) + } + + and := new(AND) + and.opnds = opnds + + return and +} + +func (a *AND) Equals(other Collectable[SemanticContext]) bool { + if a == other { + return true + } + if _, ok := other.(*AND); !ok { + return false + } else { + for i, v := range other.(*AND).opnds { + if !a.opnds[i].Equals(v) { + return false + } + } + return true + } +} + +// {@inheritDoc} +// +//

+// The evaluation of predicates by a context is short-circuiting, but +// unordered.

+func (a *AND) evaluate(parser Recognizer, outerContext RuleContext) bool { + for i := 0; i < len(a.opnds); i++ { + if !a.opnds[i].evaluate(parser, outerContext) { + return false + } + } + return true +} + +func (a *AND) evalPrecedence(parser Recognizer, outerContext RuleContext) SemanticContext { + differs := false + operands := make([]SemanticContext, 0) + + for i := 0; i < len(a.opnds); i++ { + context := a.opnds[i] + evaluated := context.evalPrecedence(parser, outerContext) + differs = differs || (evaluated != context) + if evaluated == nil { + // The AND context is false if any element is false + return nil + } else if evaluated != SemanticContextNone { + // Reduce the result by Skipping true elements + operands = append(operands, evaluated) + } + } + if !differs { + return a + } + + if len(operands) == 0 { + // all elements were true, so the AND context is true + return SemanticContextNone + } + + var result SemanticContext + + for _, o := range operands { + if result == nil { + result = o + } else { + result = SemanticContextandContext(result, o) + } + } + + return result +} + +func (a *AND) Hash() int { + h := murmurInit(37) // Init with a value different from OR + for _, op := range a.opnds { + h = murmurUpdate(h, op.Hash()) + } + return murmurFinish(h, len(a.opnds)) +} + +func (a *OR) Hash() int { + h := murmurInit(41) // Init with a value different from AND + for _, op := range a.opnds { + h = murmurUpdate(h, op.Hash()) + } + return murmurFinish(h, len(a.opnds)) +} + +func (a *AND) String() string { + s := "" + + for _, o := range a.opnds { + s += "&& " + fmt.Sprint(o) + } + + if len(s) > 3 { + return s[0:3] + } + + return s +} + +// +// A semantic context which is true whenever at least one of the contained +// contexts is true. +// + +type OR struct { + opnds []SemanticContext +} + +func NewOR(a, b SemanticContext) *OR { + + operands := NewJStore[SemanticContext, Comparator[SemanticContext]](&ObjEqComparator[SemanticContext]{}) + if aa, ok := a.(*OR); ok { + for _, o := range aa.opnds { + operands.Put(o) + } + } else { + operands.Put(a) + } + + if ba, ok := b.(*OR); ok { + for _, o := range ba.opnds { + operands.Put(o) + } + } else { + operands.Put(b) + } + precedencePredicates := PrecedencePredicatefilterPrecedencePredicates(operands) + if len(precedencePredicates) > 0 { + // interested in the transition with the lowest precedence + var reduced *PrecedencePredicate + + for _, p := range precedencePredicates { + if reduced == nil || p.precedence > reduced.precedence { + reduced = p + } + } + + operands.Put(reduced) + } + + vs := operands.Values() + + opnds := make([]SemanticContext, len(vs)) + for i, v := range vs { + opnds[i] = v.(SemanticContext) + } + + o := new(OR) + o.opnds = opnds + + return o +} + +func (o *OR) Equals(other Collectable[SemanticContext]) bool { + if o == other { + return true + } else if _, ok := other.(*OR); !ok { + return false + } else { + for i, v := range other.(*OR).opnds { + if !o.opnds[i].Equals(v) { + return false + } + } + return true + } +} + +//

+// The evaluation of predicates by o context is short-circuiting, but +// unordered.

+func (o *OR) evaluate(parser Recognizer, outerContext RuleContext) bool { + for i := 0; i < len(o.opnds); i++ { + if o.opnds[i].evaluate(parser, outerContext) { + return true + } + } + return false +} + +func (o *OR) evalPrecedence(parser Recognizer, outerContext RuleContext) SemanticContext { + differs := false + operands := make([]SemanticContext, 0) + for i := 0; i < len(o.opnds); i++ { + context := o.opnds[i] + evaluated := context.evalPrecedence(parser, outerContext) + differs = differs || (evaluated != context) + if evaluated == SemanticContextNone { + // The OR context is true if any element is true + return SemanticContextNone + } else if evaluated != nil { + // Reduce the result by Skipping false elements + operands = append(operands, evaluated) + } + } + if !differs { + return o + } + if len(operands) == 0 { + // all elements were false, so the OR context is false + return nil + } + var result SemanticContext + + for _, o := range operands { + if result == nil { + result = o + } else { + result = SemanticContextorContext(result, o) + } + } + + return result +} + +func (o *OR) String() string { + s := "" + + for _, o := range o.opnds { + s += "|| " + fmt.Sprint(o) + } + + if len(s) > 3 { + return s[0:3] + } + + return s +} diff --git a/runtime/Go/antlr/v4/testing_assert_test.go b/runtime/Go/antlr/v4/testing_assert_test.go new file mode 100644 index 0000000000..4a402a34f3 --- /dev/null +++ b/runtime/Go/antlr/v4/testing_assert_test.go @@ -0,0 +1,98 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +// These assert functions are borrowed from https://github.com/stretchr/testify/ (MIT License) + +package antlr + +import ( + "fmt" + "reflect" + "testing" +) + +type assert struct { + t *testing.T +} + +func assertNew(t *testing.T) *assert { + return &assert{ + t: t, + } +} + +func (a *assert) Equal(expected, actual interface{}) bool { + if !objectsAreEqual(expected, actual) { + return a.Fail(fmt.Sprintf("Not equal:\n"+ + "expected: %#v\n"+ + " actual: %#v\n", expected, actual)) + } + return true +} + +func objectsAreEqual(expected, actual interface{}) bool { + if expected == nil || actual == nil { + return expected == actual + } + return reflect.DeepEqual(expected, actual) +} + +func (a *assert) Nil(object interface{}) bool { + if isNil(object) { + return true + } + return a.Fail(fmt.Sprintf("Expected nil, but got: %#v", object)) +} + +func (a *assert) NotNil(object interface{}) bool { + if !isNil(object) { + return true + } + return a.Fail("Expected value not to be nil.") +} + +// isNil checks if a specified object is nil or not, without Failing. +func isNil(object interface{}) bool { + if object == nil { + return true + } + + value := reflect.ValueOf(object) + kind := value.Kind() + if kind >= reflect.Chan && kind <= reflect.Slice && value.IsNil() { + return true + } + + return false +} + +func (a *assert) Panics(f func()) bool { + if funcDidPanic, panicValue := didPanic(f); !funcDidPanic { + return a.Fail(fmt.Sprintf("func %p should panic\n\r\tPanic value:\t%v", f, panicValue)) + } + + return true +} + +// Fail reports a failure through +func (a *assert) Fail(failureMessage string) bool { + a.t.Errorf("%s", failureMessage) + return false +} + +// didPanic returns true if the function passed to it panics. Otherwise, it returns false. +func didPanic(f func()) (bool, interface{}) { + didPanic := false + var message interface{} + func() { + defer func() { + if message = recover(); message != nil { + didPanic = true + } + }() + // call the target function + f() + }() + return didPanic, message +} diff --git a/runtime/Go/antlr/v4/testing_lexer_b_test.go b/runtime/Go/antlr/v4/testing_lexer_b_test.go new file mode 100644 index 0000000000..2485abf780 --- /dev/null +++ b/runtime/Go/antlr/v4/testing_lexer_b_test.go @@ -0,0 +1,137 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +/* +LexerB is a lexer for testing purpose. + +This file is generated from this grammer. + +lexer grammar LexerB; + +ID : 'a'..'z'+; +INT : '0'..'9'+; +SEMI : ';'; +ASSIGN : '='; +PLUS : '+'; +MULT : '*'; +WS : ' '+; +*/ + +import ( + "fmt" + "sync" + "unicode" +) + +// Suppress unused import error +var _ = fmt.Printf +var _ = sync.Once{} +var _ = unicode.IsLetter + +type LexerB struct { + *BaseLexer + channelNames []string + modeNames []string + // TODO: EOF string +} + +var lexerbLexerStaticData struct { + once sync.Once + serializedATN []int32 + channelNames []string + modeNames []string + literalNames []string + symbolicNames []string + ruleNames []string + predictionContextCache *PredictionContextCache + atn *ATN + decisionToDFA []*DFA +} + +func lexerbLexerInit() { + staticData := &lexerbLexerStaticData + staticData.channelNames = []string{ + "DEFAULT_TOKEN_CHANNEL", "HIDDEN", + } + staticData.modeNames = []string{ + "DEFAULT_MODE", + } + staticData.literalNames = []string{ + "", "", "", "';'", "'='", "'+'", "'*'", + } + staticData.symbolicNames = []string{ + "", "ID", "INT", "SEMI", "ASSIGN", "PLUS", "MULT", "WS", + } + staticData.ruleNames = []string{ + "ID", "INT", "SEMI", "ASSIGN", "PLUS", "MULT", "WS", + } + staticData.predictionContextCache = NewPredictionContextCache() + staticData.serializedATN = []int32{ + 4, 0, 7, 38, 6, -1, 2, 0, 7, 0, 2, 1, 7, 1, 2, 2, 7, 2, 2, 3, 7, 3, 2, + 4, 7, 4, 2, 5, 7, 5, 2, 6, 7, 6, 1, 0, 4, 0, 17, 8, 0, 11, 0, 12, 0, 18, + 1, 1, 4, 1, 22, 8, 1, 11, 1, 12, 1, 23, 1, 2, 1, 2, 1, 3, 1, 3, 1, 4, 1, + 4, 1, 5, 1, 5, 1, 6, 4, 6, 35, 8, 6, 11, 6, 12, 6, 36, 0, 0, 7, 1, 1, 3, + 2, 5, 3, 7, 4, 9, 5, 11, 6, 13, 7, 1, 0, 0, 40, 0, 1, 1, 0, 0, 0, 0, 3, + 1, 0, 0, 0, 0, 5, 1, 0, 0, 0, 0, 7, 1, 0, 0, 0, 0, 9, 1, 0, 0, 0, 0, 11, + 1, 0, 0, 0, 0, 13, 1, 0, 0, 0, 1, 16, 1, 0, 0, 0, 3, 21, 1, 0, 0, 0, 5, + 25, 1, 0, 0, 0, 7, 27, 1, 0, 0, 0, 9, 29, 1, 0, 0, 0, 11, 31, 1, 0, 0, + 0, 13, 34, 1, 0, 0, 0, 15, 17, 2, 97, 122, 0, 16, 15, 1, 0, 0, 0, 17, 18, + 1, 0, 0, 0, 18, 16, 1, 0, 0, 0, 18, 19, 1, 0, 0, 0, 19, 2, 1, 0, 0, 0, + 20, 22, 2, 48, 57, 0, 21, 20, 1, 0, 0, 0, 22, 23, 1, 0, 0, 0, 23, 21, 1, + 0, 0, 0, 23, 24, 1, 0, 0, 0, 24, 4, 1, 0, 0, 0, 25, 26, 5, 59, 0, 0, 26, + 6, 1, 0, 0, 0, 27, 28, 5, 61, 0, 0, 28, 8, 1, 0, 0, 0, 29, 30, 5, 43, 0, + 0, 30, 10, 1, 0, 0, 0, 31, 32, 5, 42, 0, 0, 32, 12, 1, 0, 0, 0, 33, 35, + 5, 32, 0, 0, 34, 33, 1, 0, 0, 0, 35, 36, 1, 0, 0, 0, 36, 34, 1, 0, 0, 0, + 36, 37, 1, 0, 0, 0, 37, 14, 1, 0, 0, 0, 4, 0, 18, 23, 36, 0, + } + deserializer := NewATNDeserializer(nil) + staticData.atn = deserializer.Deserialize(staticData.serializedATN) + atn := staticData.atn + staticData.decisionToDFA = make([]*DFA, len(atn.DecisionToState)) + decisionToDFA := staticData.decisionToDFA + for index, state := range atn.DecisionToState { + decisionToDFA[index] = NewDFA(state, index) + } +} + +// LexerBInit initializes any static state used to implement LexerB. By default the +// static state used to implement the lexer is lazily initialized during the first call to +// NewLexerB(). You can call this function if you wish to initialize the static state ahead +// of time. +func LexerBInit() { + staticData := &lexerbLexerStaticData + staticData.once.Do(lexerbLexerInit) +} + +// NewLexerB produces a new lexer instance for the optional input antlr.CharStream. +func NewLexerB(input CharStream) *LexerB { + LexerBInit() + l := new(LexerB) + + l.BaseLexer = NewBaseLexer(input) + staticData := &lexerbLexerStaticData + l.Interpreter = NewLexerATNSimulator(l, staticData.atn, staticData.decisionToDFA, staticData.predictionContextCache) + l.channelNames = staticData.channelNames + l.modeNames = staticData.modeNames + l.RuleNames = staticData.ruleNames + l.LiteralNames = staticData.literalNames + l.SymbolicNames = staticData.symbolicNames + l.GrammarFileName = "LexerB.g4" + // TODO: l.EOF = antlr.TokenEOF + + return l +} + +// LexerB tokens. +const ( + LexerBID = 1 + LexerBINT = 2 + LexerBSEMI = 3 + LexerBASSIGN = 4 + LexerBPLUS = 5 + LexerBMULT = 6 + LexerBWS = 7 +) diff --git a/runtime/Go/antlr/v4/testing_util_test.go b/runtime/Go/antlr/v4/testing_util_test.go new file mode 100644 index 0000000000..20428831b3 --- /dev/null +++ b/runtime/Go/antlr/v4/testing_util_test.go @@ -0,0 +1,30 @@ +package antlr + +import ( + "fmt" + "strings" +) + +// newTestCommonToken create common token with tokentype, text and channel +// notice: test purpose only +func newTestCommonToken(tokenType int, text string, channel int) *CommonToken { + t := new(CommonToken) + t.BaseToken = new(BaseToken) + t.tokenType = tokenType + t.channel = channel + t.text = text + t.line = 0 + t.column = -1 + return t +} + +// tokensToString returnes []Tokens string +// notice: test purpose only +func tokensToString(tokens []Token) string { + buf := make([]string, len(tokens)) + for i, token := range tokens { + buf[i] = fmt.Sprintf("%v", token) + } + + return "[" + strings.Join(buf, ", ") + "]" +} diff --git a/runtime/Go/antlr/v4/token.go b/runtime/Go/antlr/v4/token.go new file mode 100644 index 0000000000..f73b06bc6a --- /dev/null +++ b/runtime/Go/antlr/v4/token.go @@ -0,0 +1,209 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +import ( + "strconv" + "strings" +) + +type TokenSourceCharStreamPair struct { + tokenSource TokenSource + charStream CharStream +} + +// A token has properties: text, type, line, character position in the line +// (so we can ignore tabs), token channel, index, and source from which +// we obtained this token. + +type Token interface { + GetSource() *TokenSourceCharStreamPair + GetTokenType() int + GetChannel() int + GetStart() int + GetStop() int + GetLine() int + GetColumn() int + + GetText() string + SetText(s string) + + GetTokenIndex() int + SetTokenIndex(v int) + + GetTokenSource() TokenSource + GetInputStream() CharStream +} + +type BaseToken struct { + source *TokenSourceCharStreamPair + tokenType int // token type of the token + channel int // The parser ignores everything not on DEFAULT_CHANNEL + start int // optional return -1 if not implemented. + stop int // optional return -1 if not implemented. + tokenIndex int // from 0..n-1 of the token object in the input stream + line int // line=1..n of the 1st character + column int // beginning of the line at which it occurs, 0..n-1 + text string // text of the token. + readOnly bool +} + +const ( + TokenInvalidType = 0 + + // During lookahead operations, this "token" signifies we hit rule end ATN state + // and did not follow it despite needing to. + TokenEpsilon = -2 + + TokenMinUserTokenType = 1 + + TokenEOF = -1 + + // All tokens go to the parser (unless Skip() is called in that rule) + // on a particular "channel". The parser tunes to a particular channel + // so that whitespace etc... can go to the parser on a "hidden" channel. + + TokenDefaultChannel = 0 + + // Anything on different channel than DEFAULT_CHANNEL is not parsed + // by parser. + + TokenHiddenChannel = 1 +) + +func (b *BaseToken) GetChannel() int { + return b.channel +} + +func (b *BaseToken) GetStart() int { + return b.start +} + +func (b *BaseToken) GetStop() int { + return b.stop +} + +func (b *BaseToken) GetLine() int { + return b.line +} + +func (b *BaseToken) GetColumn() int { + return b.column +} + +func (b *BaseToken) GetTokenType() int { + return b.tokenType +} + +func (b *BaseToken) GetSource() *TokenSourceCharStreamPair { + return b.source +} + +func (b *BaseToken) GetTokenIndex() int { + return b.tokenIndex +} + +func (b *BaseToken) SetTokenIndex(v int) { + b.tokenIndex = v +} + +func (b *BaseToken) GetTokenSource() TokenSource { + return b.source.tokenSource +} + +func (b *BaseToken) GetInputStream() CharStream { + return b.source.charStream +} + +type CommonToken struct { + *BaseToken +} + +func NewCommonToken(source *TokenSourceCharStreamPair, tokenType, channel, start, stop int) *CommonToken { + + t := new(CommonToken) + + t.BaseToken = new(BaseToken) + + t.source = source + t.tokenType = tokenType + t.channel = channel + t.start = start + t.stop = stop + t.tokenIndex = -1 + if t.source.tokenSource != nil { + t.line = source.tokenSource.GetLine() + t.column = source.tokenSource.GetCharPositionInLine() + } else { + t.column = -1 + } + return t +} + +// An empty {@link Pair} which is used as the default value of +// {@link //source} for tokens that do not have a source. + +//CommonToken.EMPTY_SOURCE = [ nil, nil ] + +// Constructs a New{@link CommonToken} as a copy of another {@link Token}. +// +//

+// If {@code oldToken} is also a {@link CommonToken} instance, the newly +// constructed token will share a reference to the {@link //text} field and +// the {@link Pair} stored in {@link //source}. Otherwise, {@link //text} will +// be assigned the result of calling {@link //GetText}, and {@link //source} +// will be constructed from the result of {@link Token//GetTokenSource} and +// {@link Token//GetInputStream}.

+// +// @param oldToken The token to copy. +func (c *CommonToken) clone() *CommonToken { + t := NewCommonToken(c.source, c.tokenType, c.channel, c.start, c.stop) + t.tokenIndex = c.GetTokenIndex() + t.line = c.GetLine() + t.column = c.GetColumn() + t.text = c.GetText() + return t +} + +func (c *CommonToken) GetText() string { + if c.text != "" { + return c.text + } + input := c.GetInputStream() + if input == nil { + return "" + } + n := input.Size() + if c.start < n && c.stop < n { + return input.GetTextFromInterval(NewInterval(c.start, c.stop)) + } + return "" +} + +func (c *CommonToken) SetText(text string) { + c.text = text +} + +func (c *CommonToken) String() string { + txt := c.GetText() + if txt != "" { + txt = strings.Replace(txt, "\n", "\\n", -1) + txt = strings.Replace(txt, "\r", "\\r", -1) + txt = strings.Replace(txt, "\t", "\\t", -1) + } else { + txt = "" + } + + var ch string + if c.channel > 0 { + ch = ",channel=" + strconv.Itoa(c.channel) + } else { + ch = "" + } + + return "[@" + strconv.Itoa(c.tokenIndex) + "," + strconv.Itoa(c.start) + ":" + strconv.Itoa(c.stop) + "='" + + txt + "',<" + strconv.Itoa(c.tokenType) + ">" + + ch + "," + strconv.Itoa(c.line) + ":" + strconv.Itoa(c.column) + "]" +} diff --git a/runtime/Go/antlr/v4/token_source.go b/runtime/Go/antlr/v4/token_source.go new file mode 100644 index 0000000000..a3f36eaa67 --- /dev/null +++ b/runtime/Go/antlr/v4/token_source.go @@ -0,0 +1,17 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +type TokenSource interface { + NextToken() Token + Skip() + More() + GetLine() int + GetCharPositionInLine() int + GetInputStream() CharStream + GetSourceName() string + setTokenFactory(factory TokenFactory) + GetTokenFactory() TokenFactory +} diff --git a/runtime/Go/antlr/v4/token_stream.go b/runtime/Go/antlr/v4/token_stream.go new file mode 100644 index 0000000000..1527d43f60 --- /dev/null +++ b/runtime/Go/antlr/v4/token_stream.go @@ -0,0 +1,20 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +type TokenStream interface { + IntStream + + LT(k int) Token + + Get(index int) Token + GetTokenSource() TokenSource + SetTokenSource(TokenSource) + + GetAllText() string + GetTextFromInterval(*Interval) string + GetTextFromRuleContext(RuleContext) string + GetTextFromTokens(Token, Token) string +} diff --git a/runtime/Go/antlr/v4/tokenstream_rewriter.go b/runtime/Go/antlr/v4/tokenstream_rewriter.go new file mode 100644 index 0000000000..b3e38af344 --- /dev/null +++ b/runtime/Go/antlr/v4/tokenstream_rewriter.go @@ -0,0 +1,659 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +import ( + "bytes" + "fmt" +) + +// +// Useful for rewriting out a buffered input token stream after doing some +// augmentation or other manipulations on it. + +//

+// You can insert stuff, replace, and delete chunks. Note that the operations +// are done lazily--only if you convert the buffer to a {@link String} with +// {@link TokenStream#getText()}. This is very efficient because you are not +// moving data around all the time. As the buffer of tokens is converted to +// strings, the {@link #getText()} method(s) scan the input token stream and +// check to see if there is an operation at the current index. If so, the +// operation is done and then normal {@link String} rendering continues on the +// buffer. This is like having multiple Turing machine instruction streams +// (programs) operating on a single input tape. :)

+//

+ +// This rewriter makes no modifications to the token stream. It does not ask the +// stream to fill itself up nor does it advance the input cursor. The token +// stream {@link TokenStream#index()} will return the same value before and +// after any {@link #getText()} call.

+ +//

+// The rewriter only works on tokens that you have in the buffer and ignores the +// current input cursor. If you are buffering tokens on-demand, calling +// {@link #getText()} halfway through the input will only do rewrites for those +// tokens in the first half of the file.

+ +//

+// Since the operations are done lazily at {@link #getText}-time, operations do +// not screw up the token index values. That is, an insert operation at token +// index {@code i} does not change the index values for tokens +// {@code i}+1..n-1.

+ +//

+// Because operations never actually alter the buffer, you may always get the +// original token stream back without undoing anything. Since the instructions +// are queued up, you can easily simulate transactions and roll back any changes +// if there is an error just by removing instructions. For example,

+ +//
+// CharStream input = new ANTLRFileStream("input");
+// TLexer lex = new TLexer(input);
+// CommonTokenStream tokens = new CommonTokenStream(lex);
+// T parser = new T(tokens);
+// TokenStreamRewriter rewriter = new TokenStreamRewriter(tokens);
+// parser.startRule();
+// 
+ +//

+// Then in the rules, you can execute (assuming rewriter is visible):

+ +//
+// Token t,u;
+// ...
+// rewriter.insertAfter(t, "text to put after t");}
+// rewriter.insertAfter(u, "text after u");}
+// System.out.println(rewriter.getText());
+// 
+ +//

+// You can also have multiple "instruction streams" and get multiple rewrites +// from a single pass over the input. Just name the instruction streams and use +// that name again when printing the buffer. This could be useful for generating +// a C file and also its header file--all from the same buffer:

+ +//
+// rewriter.insertAfter("pass1", t, "text to put after t");}
+// rewriter.insertAfter("pass2", u, "text after u");}
+// System.out.println(rewriter.getText("pass1"));
+// System.out.println(rewriter.getText("pass2"));
+// 
+ +//

+// If you don't use named rewrite streams, a "default" stream is used as the +// first example shows.

+ +const ( + Default_Program_Name = "default" + Program_Init_Size = 100 + Min_Token_Index = 0 +) + +// Define the rewrite operation hierarchy + +type RewriteOperation interface { + // Execute the rewrite operation by possibly adding to the buffer. + // Return the index of the next token to operate on. + Execute(buffer *bytes.Buffer) int + String() string + GetInstructionIndex() int + GetIndex() int + GetText() string + GetOpName() string + GetTokens() TokenStream + SetInstructionIndex(val int) + SetIndex(int) + SetText(string) + SetOpName(string) + SetTokens(TokenStream) +} + +type BaseRewriteOperation struct { + //Current index of rewrites list + instruction_index int + //Token buffer index + index int + //Substitution text + text string + //Actual operation name + op_name string + //Pointer to token steam + tokens TokenStream +} + +func (op *BaseRewriteOperation) GetInstructionIndex() int { + return op.instruction_index +} + +func (op *BaseRewriteOperation) GetIndex() int { + return op.index +} + +func (op *BaseRewriteOperation) GetText() string { + return op.text +} + +func (op *BaseRewriteOperation) GetOpName() string { + return op.op_name +} + +func (op *BaseRewriteOperation) GetTokens() TokenStream { + return op.tokens +} + +func (op *BaseRewriteOperation) SetInstructionIndex(val int) { + op.instruction_index = val +} + +func (op *BaseRewriteOperation) SetIndex(val int) { + op.index = val +} + +func (op *BaseRewriteOperation) SetText(val string) { + op.text = val +} + +func (op *BaseRewriteOperation) SetOpName(val string) { + op.op_name = val +} + +func (op *BaseRewriteOperation) SetTokens(val TokenStream) { + op.tokens = val +} + +func (op *BaseRewriteOperation) Execute(buffer *bytes.Buffer) int { + return op.index +} + +func (op *BaseRewriteOperation) String() string { + return fmt.Sprintf("<%s@%d:\"%s\">", + op.op_name, + op.tokens.Get(op.GetIndex()), + op.text, + ) + +} + +type InsertBeforeOp struct { + BaseRewriteOperation +} + +func NewInsertBeforeOp(index int, text string, stream TokenStream) *InsertBeforeOp { + return &InsertBeforeOp{BaseRewriteOperation: BaseRewriteOperation{ + index: index, + text: text, + op_name: "InsertBeforeOp", + tokens: stream, + }} +} + +func (op *InsertBeforeOp) Execute(buffer *bytes.Buffer) int { + buffer.WriteString(op.text) + if op.tokens.Get(op.index).GetTokenType() != TokenEOF { + buffer.WriteString(op.tokens.Get(op.index).GetText()) + } + return op.index + 1 +} + +func (op *InsertBeforeOp) String() string { + return op.BaseRewriteOperation.String() +} + +// Distinguish between insert after/before to do the "insert afters" +// first and then the "insert befores" at same index. Implementation +// of "insert after" is "insert before index+1". + +type InsertAfterOp struct { + BaseRewriteOperation +} + +func NewInsertAfterOp(index int, text string, stream TokenStream) *InsertAfterOp { + return &InsertAfterOp{BaseRewriteOperation: BaseRewriteOperation{ + index: index + 1, + text: text, + tokens: stream, + }} +} + +func (op *InsertAfterOp) Execute(buffer *bytes.Buffer) int { + buffer.WriteString(op.text) + if op.tokens.Get(op.index).GetTokenType() != TokenEOF { + buffer.WriteString(op.tokens.Get(op.index).GetText()) + } + return op.index + 1 +} + +func (op *InsertAfterOp) String() string { + return op.BaseRewriteOperation.String() +} + +// I'm going to try replacing range from x..y with (y-x)+1 ReplaceOp +// instructions. +type ReplaceOp struct { + BaseRewriteOperation + LastIndex int +} + +func NewReplaceOp(from, to int, text string, stream TokenStream) *ReplaceOp { + return &ReplaceOp{ + BaseRewriteOperation: BaseRewriteOperation{ + index: from, + text: text, + op_name: "ReplaceOp", + tokens: stream, + }, + LastIndex: to, + } +} + +func (op *ReplaceOp) Execute(buffer *bytes.Buffer) int { + if op.text != "" { + buffer.WriteString(op.text) + } + return op.LastIndex + 1 +} + +func (op *ReplaceOp) String() string { + if op.text == "" { + return fmt.Sprintf("", + op.tokens.Get(op.index), op.tokens.Get(op.LastIndex)) + } + return fmt.Sprintf("", + op.tokens.Get(op.index), op.tokens.Get(op.LastIndex), op.text) +} + +type TokenStreamRewriter struct { + //Our source stream + tokens TokenStream + // You may have multiple, named streams of rewrite operations. + // I'm calling these things "programs." + // Maps String (name) → rewrite (List) + programs map[string][]RewriteOperation + last_rewrite_token_indexes map[string]int +} + +func NewTokenStreamRewriter(tokens TokenStream) *TokenStreamRewriter { + return &TokenStreamRewriter{ + tokens: tokens, + programs: map[string][]RewriteOperation{ + Default_Program_Name: make([]RewriteOperation, 0, Program_Init_Size), + }, + last_rewrite_token_indexes: map[string]int{}, + } +} + +func (tsr *TokenStreamRewriter) GetTokenStream() TokenStream { + return tsr.tokens +} + +// Rollback the instruction stream for a program so that +// the indicated instruction (via instructionIndex) is no +// longer in the stream. UNTESTED! +func (tsr *TokenStreamRewriter) Rollback(program_name string, instruction_index int) { + is, ok := tsr.programs[program_name] + if ok { + tsr.programs[program_name] = is[Min_Token_Index:instruction_index] + } +} + +func (tsr *TokenStreamRewriter) RollbackDefault(instruction_index int) { + tsr.Rollback(Default_Program_Name, instruction_index) +} + +// Reset the program so that no instructions exist +func (tsr *TokenStreamRewriter) DeleteProgram(program_name string) { + tsr.Rollback(program_name, Min_Token_Index) //TODO: double test on that cause lower bound is not included +} + +func (tsr *TokenStreamRewriter) DeleteProgramDefault() { + tsr.DeleteProgram(Default_Program_Name) +} + +func (tsr *TokenStreamRewriter) InsertAfter(program_name string, index int, text string) { + // to insert after, just insert before next index (even if past end) + var op RewriteOperation = NewInsertAfterOp(index, text, tsr.tokens) + rewrites := tsr.GetProgram(program_name) + op.SetInstructionIndex(len(rewrites)) + tsr.AddToProgram(program_name, op) +} + +func (tsr *TokenStreamRewriter) InsertAfterDefault(index int, text string) { + tsr.InsertAfter(Default_Program_Name, index, text) +} + +func (tsr *TokenStreamRewriter) InsertAfterToken(program_name string, token Token, text string) { + tsr.InsertAfter(program_name, token.GetTokenIndex(), text) +} + +func (tsr *TokenStreamRewriter) InsertBefore(program_name string, index int, text string) { + var op RewriteOperation = NewInsertBeforeOp(index, text, tsr.tokens) + rewrites := tsr.GetProgram(program_name) + op.SetInstructionIndex(len(rewrites)) + tsr.AddToProgram(program_name, op) +} + +func (tsr *TokenStreamRewriter) InsertBeforeDefault(index int, text string) { + tsr.InsertBefore(Default_Program_Name, index, text) +} + +func (tsr *TokenStreamRewriter) InsertBeforeToken(program_name string, token Token, text string) { + tsr.InsertBefore(program_name, token.GetTokenIndex(), text) +} + +func (tsr *TokenStreamRewriter) Replace(program_name string, from, to int, text string) { + if from > to || from < 0 || to < 0 || to >= tsr.tokens.Size() { + panic(fmt.Sprintf("replace: range invalid: %d..%d(size=%d)", + from, to, tsr.tokens.Size())) + } + var op RewriteOperation = NewReplaceOp(from, to, text, tsr.tokens) + rewrites := tsr.GetProgram(program_name) + op.SetInstructionIndex(len(rewrites)) + tsr.AddToProgram(program_name, op) +} + +func (tsr *TokenStreamRewriter) ReplaceDefault(from, to int, text string) { + tsr.Replace(Default_Program_Name, from, to, text) +} + +func (tsr *TokenStreamRewriter) ReplaceDefaultPos(index int, text string) { + tsr.ReplaceDefault(index, index, text) +} + +func (tsr *TokenStreamRewriter) ReplaceToken(program_name string, from, to Token, text string) { + tsr.Replace(program_name, from.GetTokenIndex(), to.GetTokenIndex(), text) +} + +func (tsr *TokenStreamRewriter) ReplaceTokenDefault(from, to Token, text string) { + tsr.ReplaceToken(Default_Program_Name, from, to, text) +} + +func (tsr *TokenStreamRewriter) ReplaceTokenDefaultPos(index Token, text string) { + tsr.ReplaceTokenDefault(index, index, text) +} + +func (tsr *TokenStreamRewriter) Delete(program_name string, from, to int) { + tsr.Replace(program_name, from, to, "") +} + +func (tsr *TokenStreamRewriter) DeleteDefault(from, to int) { + tsr.Delete(Default_Program_Name, from, to) +} + +func (tsr *TokenStreamRewriter) DeleteDefaultPos(index int) { + tsr.DeleteDefault(index, index) +} + +func (tsr *TokenStreamRewriter) DeleteToken(program_name string, from, to Token) { + tsr.ReplaceToken(program_name, from, to, "") +} + +func (tsr *TokenStreamRewriter) DeleteTokenDefault(from, to Token) { + tsr.DeleteToken(Default_Program_Name, from, to) +} + +func (tsr *TokenStreamRewriter) GetLastRewriteTokenIndex(program_name string) int { + i, ok := tsr.last_rewrite_token_indexes[program_name] + if !ok { + return -1 + } + return i +} + +func (tsr *TokenStreamRewriter) GetLastRewriteTokenIndexDefault() int { + return tsr.GetLastRewriteTokenIndex(Default_Program_Name) +} + +func (tsr *TokenStreamRewriter) SetLastRewriteTokenIndex(program_name string, i int) { + tsr.last_rewrite_token_indexes[program_name] = i +} + +func (tsr *TokenStreamRewriter) InitializeProgram(name string) []RewriteOperation { + is := make([]RewriteOperation, 0, Program_Init_Size) + tsr.programs[name] = is + return is +} + +func (tsr *TokenStreamRewriter) AddToProgram(name string, op RewriteOperation) { + is := tsr.GetProgram(name) + is = append(is, op) + tsr.programs[name] = is +} + +func (tsr *TokenStreamRewriter) GetProgram(name string) []RewriteOperation { + is, ok := tsr.programs[name] + if !ok { + is = tsr.InitializeProgram(name) + } + return is +} + +// Return the text from the original tokens altered per the +// instructions given to this rewriter. +func (tsr *TokenStreamRewriter) GetTextDefault() string { + return tsr.GetText( + Default_Program_Name, + NewInterval(0, tsr.tokens.Size()-1)) +} + +// Return the text from the original tokens altered per the +// instructions given to this rewriter. +func (tsr *TokenStreamRewriter) GetText(program_name string, interval *Interval) string { + rewrites := tsr.programs[program_name] + start := interval.Start + stop := interval.Stop + // ensure start/end are in range + stop = min(stop, tsr.tokens.Size()-1) + start = max(start, 0) + if rewrites == nil || len(rewrites) == 0 { + return tsr.tokens.GetTextFromInterval(interval) // no instructions to execute + } + buf := bytes.Buffer{} + // First, optimize instruction stream + indexToOp := reduceToSingleOperationPerIndex(rewrites) + // Walk buffer, executing instructions and emitting tokens + for i := start; i <= stop && i < tsr.tokens.Size(); { + op := indexToOp[i] + delete(indexToOp, i) // remove so any left have index size-1 + t := tsr.tokens.Get(i) + if op == nil { + // no operation at that index, just dump token + if t.GetTokenType() != TokenEOF { + buf.WriteString(t.GetText()) + } + i++ // move to next token + } else { + i = op.Execute(&buf) // execute operation and skip + } + } + // include stuff after end if it's last index in buffer + // So, if they did an insertAfter(lastValidIndex, "foo"), include + // foo if end==lastValidIndex. + if stop == tsr.tokens.Size()-1 { + // Scan any remaining operations after last token + // should be included (they will be inserts). + for _, op := range indexToOp { + if op.GetIndex() >= tsr.tokens.Size()-1 { + buf.WriteString(op.GetText()) + } + } + } + return buf.String() +} + +// We need to combine operations and report invalid operations (like +// overlapping replaces that are not completed nested). Inserts to +// same index need to be combined etc... Here are the cases: +// +// I.i.u I.j.v leave alone, nonoverlapping +// I.i.u I.i.v combine: Iivu +// +// R.i-j.u R.x-y.v | i-j in x-y delete first R +// R.i-j.u R.i-j.v delete first R +// R.i-j.u R.x-y.v | x-y in i-j ERROR +// R.i-j.u R.x-y.v | boundaries overlap ERROR +// +// Delete special case of replace (text==null): +// D.i-j.u D.x-y.v | boundaries overlap combine to max(min)..max(right) +// +// I.i.u R.x-y.v | i in (x+1)-y delete I (since insert before +// we're not deleting i) +// I.i.u R.x-y.v | i not in (x+1)-y leave alone, nonoverlapping +// R.x-y.v I.i.u | i in x-y ERROR +// R.x-y.v I.x.u R.x-y.uv (combine, delete I) +// R.x-y.v I.i.u | i not in x-y leave alone, nonoverlapping +// +// I.i.u = insert u before op @ index i +// R.x-y.u = replace x-y indexed tokens with u +// +// First we need to examine replaces. For any replace op: +// +// 1. wipe out any insertions before op within that range. +// 2. Drop any replace op before that is contained completely within +// that range. +// 3. Throw exception upon boundary overlap with any previous replace. +// +// Then we can deal with inserts: +// +// 1. for any inserts to same index, combine even if not adjacent. +// 2. for any prior replace with same left boundary, combine this +// insert with replace and delete this replace. +// 3. throw exception if index in same range as previous replace +// +// Don't actually delete; make op null in list. Easier to walk list. +// Later we can throw as we add to index → op map. +// +// Note that I.2 R.2-2 will wipe out I.2 even though, technically, the +// inserted stuff would be before the replace range. But, if you +// add tokens in front of a method body '{' and then delete the method +// body, I think the stuff before the '{' you added should disappear too. +// +// Return a map from token index to operation. +func reduceToSingleOperationPerIndex(rewrites []RewriteOperation) map[int]RewriteOperation { + // WALK REPLACES + for i := 0; i < len(rewrites); i++ { + op := rewrites[i] + if op == nil { + continue + } + rop, ok := op.(*ReplaceOp) + if !ok { + continue + } + // Wipe prior inserts within range + for j := 0; j < i && j < len(rewrites); j++ { + if iop, ok := rewrites[j].(*InsertBeforeOp); ok { + if iop.index == rop.index { + // E.g., insert before 2, delete 2..2; update replace + // text to include insert before, kill insert + rewrites[iop.instruction_index] = nil + if rop.text != "" { + rop.text = iop.text + rop.text + } else { + rop.text = iop.text + } + } else if iop.index > rop.index && iop.index <= rop.LastIndex { + // delete insert as it's a no-op. + rewrites[iop.instruction_index] = nil + } + } + } + // Drop any prior replaces contained within + for j := 0; j < i && j < len(rewrites); j++ { + if prevop, ok := rewrites[j].(*ReplaceOp); ok { + if prevop.index >= rop.index && prevop.LastIndex <= rop.LastIndex { + // delete replace as it's a no-op. + rewrites[prevop.instruction_index] = nil + continue + } + // throw exception unless disjoint or identical + disjoint := prevop.LastIndex < rop.index || prevop.index > rop.LastIndex + // Delete special case of replace (text==null): + // D.i-j.u D.x-y.v | boundaries overlap combine to max(min)..max(right) + if prevop.text == "" && rop.text == "" && !disjoint { + rewrites[prevop.instruction_index] = nil + rop.index = min(prevop.index, rop.index) + rop.LastIndex = max(prevop.LastIndex, rop.LastIndex) + println("new rop" + rop.String()) //TODO: remove console write, taken from Java version + } else if !disjoint { + panic("replace op boundaries of " + rop.String() + " overlap with previous " + prevop.String()) + } + } + } + } + // WALK INSERTS + for i := 0; i < len(rewrites); i++ { + op := rewrites[i] + if op == nil { + continue + } + //hack to replicate inheritance in composition + _, iok := rewrites[i].(*InsertBeforeOp) + _, aok := rewrites[i].(*InsertAfterOp) + if !iok && !aok { + continue + } + iop := rewrites[i] + // combine current insert with prior if any at same index + // deviating a bit from TokenStreamRewriter.java - hard to incorporate inheritance logic + for j := 0; j < i && j < len(rewrites); j++ { + if nextIop, ok := rewrites[j].(*InsertAfterOp); ok { + if nextIop.index == iop.GetIndex() { + iop.SetText(nextIop.text + iop.GetText()) + rewrites[j] = nil + } + } + if prevIop, ok := rewrites[j].(*InsertBeforeOp); ok { + if prevIop.index == iop.GetIndex() { + iop.SetText(iop.GetText() + prevIop.text) + rewrites[prevIop.instruction_index] = nil + } + } + } + // look for replaces where iop.index is in range; error + for j := 0; j < i && j < len(rewrites); j++ { + if rop, ok := rewrites[j].(*ReplaceOp); ok { + if iop.GetIndex() == rop.index { + rop.text = iop.GetText() + rop.text + rewrites[i] = nil + continue + } + if iop.GetIndex() >= rop.index && iop.GetIndex() <= rop.LastIndex { + panic("insert op " + iop.String() + " within boundaries of previous " + rop.String()) + } + } + } + } + m := map[int]RewriteOperation{} + for i := 0; i < len(rewrites); i++ { + op := rewrites[i] + if op == nil { + continue + } + if _, ok := m[op.GetIndex()]; ok { + panic("should only be one op per index") + } + m[op.GetIndex()] = op + } + return m +} + +/* + Quick fixing Go lack of overloads +*/ + +func max(a, b int) int { + if a > b { + return a + } else { + return b + } +} +func min(a, b int) int { + if a < b { + return a + } else { + return b + } +} diff --git a/runtime/Go/antlr/v4/tokenstream_rewriter_test.go b/runtime/Go/antlr/v4/tokenstream_rewriter_test.go new file mode 100644 index 0000000000..a00265128a --- /dev/null +++ b/runtime/Go/antlr/v4/tokenstream_rewriter_test.go @@ -0,0 +1,417 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. +package antlr + +import ( + "fmt" + "strings" + "sync" + "testing" + "unicode" +) + +/* Assume the following grammar for this test. + +lexer grammar LexerA; +A : 'a'; +B : 'b'; +C : 'c'; + +*/ + +func TestInsertBeforeIndex0(t *testing.T) { + input := NewInputStream("abc") + lexer := NewLexerA(input) + stream := NewCommonTokenStream(lexer, 0) + stream.Fill() + tokens := NewTokenStreamRewriter(stream) + tokens.InsertBeforeDefault(0, "0") + result := tokens.GetTextDefault() + if result != "0abc" { + t.Errorf("test failed, got %s", result) + } +} + +func prepare_rewriter(str string) *TokenStreamRewriter { + input := NewInputStream(str) + lexer := NewLexerA(input) + stream := NewCommonTokenStream(lexer, 0) + stream.Fill() + return NewTokenStreamRewriter(stream) +} + +type LexerTest struct { + input string + expected string + description string + expected_exception []string + ops func(*TokenStreamRewriter) +} + +func NewLexerTest(input, expected, desc string, ops func(*TokenStreamRewriter)) LexerTest { + return LexerTest{input: input, expected: expected, description: desc, ops: ops} +} + +func NewLexerExceptionTest(input string, expected_err []string, desc string, ops func(*TokenStreamRewriter)) LexerTest { + return LexerTest{input: input, expected_exception: expected_err, description: desc, ops: ops} +} + +func panic_tester(t *testing.T, expected_msg []string, r *TokenStreamRewriter) { + defer func() { + r := recover() + if r == nil { + t.Errorf("Panic is expected, but finished normally") + } else { + s_e := r.(string) + for _, e := range expected_msg { + if !strings.Contains(s_e, e) { + t.Errorf("Element [%s] is not in error message: [%s]", e, s_e) + } + } + } + }() + r.GetTextDefault() +} + +func TestLexerA(t *testing.T) { + tests := []LexerTest{ + NewLexerTest("abc", "0abc", "InsertBeforeIndex0", + func(r *TokenStreamRewriter) { + r.InsertBeforeDefault(0, "0") + }), + NewLexerTest("abc", "abcx", "InsertAfterLastIndex", + func(r *TokenStreamRewriter) { + r.InsertAfterDefault(2, "x") + }), + NewLexerTest("abc", "axbxc", "2InsertBeforeAfterMiddleIndex", + func(r *TokenStreamRewriter) { + r.InsertBeforeDefault(1, "x") + r.InsertAfterDefault(1, "x") + }), + NewLexerTest("abc", "xbc", "ReplaceIndex0", + func(r *TokenStreamRewriter) { + r.ReplaceDefaultPos(0, "x") + }), + NewLexerTest("abc", "abx", "ReplaceLastIndex", + func(r *TokenStreamRewriter) { + r.ReplaceDefaultPos(2, "x") + }), + NewLexerTest("abc", "axc", "ReplaceMiddleIndex", + func(r *TokenStreamRewriter) { + r.ReplaceDefaultPos(1, "x") + }), + NewLexerTest("abc", "ayc", "2ReplaceMiddleIndex", + func(r *TokenStreamRewriter) { + r.ReplaceDefaultPos(1, "x") + r.ReplaceDefaultPos(1, "y") + }), + NewLexerTest("abc", "_ayc", "2ReplaceMiddleIndex1InsertBefore", + func(r *TokenStreamRewriter) { + r.InsertBeforeDefault(0, "_") + r.ReplaceDefaultPos(1, "x") + r.ReplaceDefaultPos(1, "y") + }), + NewLexerTest("abc", "ac", "ReplaceThenDeleteMiddleIndex", + func(r *TokenStreamRewriter) { + r.ReplaceDefaultPos(1, "x") + r.DeleteDefaultPos(1) + }), + NewLexerExceptionTest("abc", []string{"insert op", "within boundaries of previous"}, + "InsertInPriorReplace", + func(r *TokenStreamRewriter) { + r.ReplaceDefault(0, 2, "x") + r.InsertBeforeDefault(1, "0") + }), + NewLexerTest("abc", "0xbc", "InsertThenReplaceSameIndex", + func(r *TokenStreamRewriter) { + r.InsertBeforeDefault(0, "0") + r.ReplaceDefaultPos(0, "x") + }), + NewLexerTest("abc", "ayxbc", "2InsertMiddleIndex", + func(r *TokenStreamRewriter) { + r.InsertBeforeDefault(1, "x") + r.InsertBeforeDefault(1, "y") + }), + NewLexerTest("abc", "yxzbc", "2InsertThenReplaceIndex0", + func(r *TokenStreamRewriter) { + r.InsertBeforeDefault(0, "x") + r.InsertBeforeDefault(0, "y") + r.ReplaceDefaultPos(0, "z") + }), + NewLexerTest("abc", "abyx", "ReplaceThenInsertBeforeLastIndex", + func(r *TokenStreamRewriter) { + r.ReplaceDefaultPos(2, "x") + r.InsertBeforeDefault(2, "y") + }), + NewLexerTest("abc", "abyx", "InsertThenReplaceLastIndex", + func(r *TokenStreamRewriter) { + r.InsertBeforeDefault(2, "y") + r.ReplaceDefaultPos(2, "x") + }), + NewLexerTest("abc", "abxy", "ReplaceThenInsertAfterLastIndex", + func(r *TokenStreamRewriter) { + r.ReplaceDefaultPos(2, "x") + r.InsertAfterDefault(2, "y") + }), + NewLexerTest("abcccba", "abyxba", "ReplaceThenInsertAtLeftEdge", + func(r *TokenStreamRewriter) { + r.ReplaceDefault(2, 4, "x") + r.InsertBeforeDefault(2, "y") + }), + NewLexerTest("abcccba", "abyxba", "ReplaceThenInsertAtLeftEdge", + func(r *TokenStreamRewriter) { + r.ReplaceDefault(2, 4, "x") + r.InsertBeforeDefault(2, "y") + }), + NewLexerExceptionTest("abcccba", + []string{"insert op", "InsertBeforeOp", "within boundaries of previous", "ReplaceOp"}, + "ReplaceRangeThenInsertAtRightEdge", + func(r *TokenStreamRewriter) { + r.ReplaceDefault(2, 4, "x") + r.InsertBeforeDefault(4, "y") + }), + NewLexerTest("abcccba", "abxyba", "ReplaceRangeThenInsertAfterRightEdge", + func(r *TokenStreamRewriter) { + r.ReplaceDefault(2, 4, "x") + r.InsertAfterDefault(4, "y") + }), + NewLexerTest("abcccba", "x", "ReplaceAll", + func(r *TokenStreamRewriter) { + r.ReplaceDefault(0, 6, "x") + }), + NewLexerTest("abcccba", "abxyzba", "ReplaceSubsetThenFetch", + func(r *TokenStreamRewriter) { + r.ReplaceDefault(2, 4, "xyz") + }), + NewLexerExceptionTest("abcccba", + []string{"replace op boundaries of", "ReplaceOp", "overlap with previous"}, + "ReplaceThenReplaceSuperset", + func(r *TokenStreamRewriter) { + r.ReplaceDefault(2, 4, "xyz") + r.ReplaceDefault(3, 5, "foo") + }), + NewLexerExceptionTest("abcccba", + []string{"replace op boundaries of", "ReplaceOp", "overlap with previous"}, + "ReplaceThenReplaceLowerIndexedSuperset", + func(r *TokenStreamRewriter) { + r.ReplaceDefault(2, 4, "xyz") + r.ReplaceDefault(1, 3, "foo") + }), + NewLexerTest("abcba", "fooa", "ReplaceSingleMiddleThenOverlappingSuperset", + func(r *TokenStreamRewriter) { + r.ReplaceDefault(2, 2, "xyz") + r.ReplaceDefault(0, 3, "foo") + }), + NewLexerTest("abc", "yxabc", "CombineInserts", + func(r *TokenStreamRewriter) { + r.InsertBeforeDefault(0, "x") + r.InsertBeforeDefault(0, "y") + }), + NewLexerTest("abc", "yazxbc", "Combine3Inserts", + func(r *TokenStreamRewriter) { + r.InsertBeforeDefault(1, "x") + r.InsertBeforeDefault(0, "y") + r.InsertBeforeDefault(1, "z") + }), + NewLexerTest("abc", "zfoo", "CombineInsertOnLeftWithReplace", + func(r *TokenStreamRewriter) { + r.ReplaceDefault(0, 2, "foo") + r.InsertBeforeDefault(0, "z") + }), + NewLexerTest("abc", "z", "CombineInsertOnLeftWithDelete", + func(r *TokenStreamRewriter) { + r.DeleteDefault(0, 2) + r.InsertBeforeDefault(0, "z") + }), + NewLexerTest("abc", "zaxbyc", "DisjointInserts", + func(r *TokenStreamRewriter) { + r.InsertBeforeDefault(1, "x") + r.InsertBeforeDefault(2, "y") + r.InsertBeforeDefault(0, "z") + }), + NewLexerTest("abcc", "bar", "OverlappingReplace", + func(r *TokenStreamRewriter) { + r.ReplaceDefault(1, 2, "foo") + r.ReplaceDefault(0, 3, "bar") + }), + NewLexerExceptionTest("abcc", + []string{"replace op boundaries of", "ReplaceOp", "overlap with previous"}, + "OverlappingReplace2", + func(r *TokenStreamRewriter) { + r.ReplaceDefault(0, 3, "bar") + r.ReplaceDefault(1, 2, "foo") + }), + NewLexerTest("abcc", "barc", "OverlappingReplace3", + func(r *TokenStreamRewriter) { + r.ReplaceDefault(1, 2, "foo") + r.ReplaceDefault(0, 2, "bar") + }), + NewLexerTest("abcc", "abar", "OverlappingReplace4", + func(r *TokenStreamRewriter) { + r.ReplaceDefault(1, 2, "foo") + r.ReplaceDefault(1, 3, "bar") + }), + NewLexerTest("abcc", "afooc", "DropIdenticalReplace", + func(r *TokenStreamRewriter) { + r.ReplaceDefault(1, 2, "foo") + r.ReplaceDefault(1, 2, "foo") + }), + NewLexerTest("abc", "afoofoo", "DropPrevCoveredInsert", + func(r *TokenStreamRewriter) { + r.InsertBeforeDefault(1, "foo") + r.ReplaceDefault(1, 2, "foo") + }), + NewLexerTest("abcc", "axbfoo", "LeaveAloneDisjointInsert", + func(r *TokenStreamRewriter) { + r.InsertBeforeDefault(1, "x") + r.ReplaceDefault(2, 3, "foo") + }), + NewLexerTest("abcc", "axbfoo", "LeaveAloneDisjointInsert2", + func(r *TokenStreamRewriter) { + r.ReplaceDefault(2, 3, "foo") + r.InsertBeforeDefault(1, "x") + }), + NewLexerTest("abc", "aby", "InsertBeforeTokenThenDeleteThatToken", + func(r *TokenStreamRewriter) { + r.InsertBeforeDefault(2, "y") + r.DeleteDefaultPos(2) + }), + NewLexerTest("aa", "aa", "DistinguishBetweenInsertAfterAndInsertBeforeToPreserverOrder", + func(r *TokenStreamRewriter) { + r.InsertBeforeDefault(0, "") + r.InsertAfterDefault(0, "") + r.InsertBeforeDefault(1, "") + r.InsertAfterDefault(1, "") + }), + NewLexerTest("aa", "

a

a", "DistinguishBetweenInsertAfterAndInsertBeforeToPreserverOrder2", + func(r *TokenStreamRewriter) { + r.InsertBeforeDefault(0, "

") + r.InsertBeforeDefault(0, "") + r.InsertAfterDefault(0, "

") + r.InsertAfterDefault(0, "") + r.InsertBeforeDefault(1, "") + r.InsertAfterDefault(1, "") + }), + NewLexerTest("ab", "

a

!b", "DistinguishBetweenInsertAfterAndInsertBeforeToPreserverOrder2", + func(r *TokenStreamRewriter) { + r.InsertBeforeDefault(0, "

") + r.InsertBeforeDefault(0, "") + r.InsertBeforeDefault(0, "

") + r.InsertAfterDefault(0, "

") + r.InsertAfterDefault(0, "
") + r.InsertAfterDefault(0, "
") + r.InsertBeforeDefault(1, "!") + }), + } + + for _, c := range tests { + t.Run(c.description, func(t *testing.T) { + rewriter := prepare_rewriter(c.input) + c.ops(rewriter) + if len(c.expected_exception) > 0 { + panic_tester(t, c.expected_exception, rewriter) + } else { + result := rewriter.GetTextDefault() + if result != c.expected { + t.Errorf("Expected:%s | Result: %s", c.expected, result) + } + } + }) + } +} + +// Suppress unused import error +var _ = fmt.Printf +var _ = sync.Once{} +var _ = unicode.IsLetter + +type LexerA struct { + *BaseLexer + channelNames []string + modeNames []string + // TODO: EOF string +} + +var lexeraLexerStaticData struct { + once sync.Once + serializedATN []int32 + channelNames []string + modeNames []string + literalNames []string + symbolicNames []string + ruleNames []string + predictionContextCache *PredictionContextCache + atn *ATN + decisionToDFA []*DFA +} + +func lexeraLexerInit() { + staticData := &lexeraLexerStaticData + staticData.channelNames = []string{ + "DEFAULT_TOKEN_CHANNEL", "HIDDEN", + } + staticData.modeNames = []string{ + "DEFAULT_MODE", + } + staticData.literalNames = []string{ + "", "'a'", "'b'", "'c'", + } + staticData.symbolicNames = []string{ + "", "A", "B", "C", + } + staticData.ruleNames = []string{ + "A", "B", "C", + } + staticData.predictionContextCache = NewPredictionContextCache() + staticData.serializedATN = []int32{ + 4, 0, 3, 13, 6, -1, 2, 0, 7, 0, 2, 1, 7, 1, 2, 2, 7, 2, 1, 0, 1, 0, 1, + 1, 1, 1, 1, 2, 1, 2, 0, 0, 3, 1, 1, 3, 2, 5, 3, 1, 0, 0, 12, 0, 1, 1, 0, + 0, 0, 0, 3, 1, 0, 0, 0, 0, 5, 1, 0, 0, 0, 1, 7, 1, 0, 0, 0, 3, 9, 1, 0, + 0, 0, 5, 11, 1, 0, 0, 0, 7, 8, 5, 97, 0, 0, 8, 2, 1, 0, 0, 0, 9, 10, 5, + 98, 0, 0, 10, 4, 1, 0, 0, 0, 11, 12, 5, 99, 0, 0, 12, 6, 1, 0, 0, 0, 1, + 0, 0, + } + deserializer := NewATNDeserializer(nil) + staticData.atn = deserializer.Deserialize(staticData.serializedATN) + atn := staticData.atn + staticData.decisionToDFA = make([]*DFA, len(atn.DecisionToState)) + decisionToDFA := staticData.decisionToDFA + for index, state := range atn.DecisionToState { + decisionToDFA[index] = NewDFA(state, index) + } +} + +// LexerAInit initializes any static state used to implement LexerA. By default the +// static state used to implement the lexer is lazily initialized during the first call to +// NewLexerA(). You can call this function if you wish to initialize the static state ahead +// of time. +func LexerAInit() { + staticData := &lexeraLexerStaticData + staticData.once.Do(lexeraLexerInit) +} + +// NewLexerA produces a new lexer instance for the optional input antlr.CharStream. +func NewLexerA(input CharStream) *LexerA { + LexerAInit() + l := new(LexerA) + l.BaseLexer = NewBaseLexer(input) + staticData := &lexeraLexerStaticData + l.Interpreter = NewLexerATNSimulator(l, staticData.atn, staticData.decisionToDFA, staticData.predictionContextCache) + l.channelNames = staticData.channelNames + l.modeNames = staticData.modeNames + l.RuleNames = staticData.ruleNames + l.LiteralNames = staticData.literalNames + l.SymbolicNames = staticData.symbolicNames + l.GrammarFileName = "LexerA.g4" + // TODO: l.EOF = antlr.TokenEOF + + return l +} + +// LexerA tokens. +const ( + LexerAA = 1 + LexerAB = 2 + LexerAC = 3 +) diff --git a/runtime/Go/antlr/v4/trace_listener.go b/runtime/Go/antlr/v4/trace_listener.go new file mode 100644 index 0000000000..7b663bf849 --- /dev/null +++ b/runtime/Go/antlr/v4/trace_listener.go @@ -0,0 +1,32 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +import "fmt" + +type TraceListener struct { + parser *BaseParser +} + +func NewTraceListener(parser *BaseParser) *TraceListener { + tl := new(TraceListener) + tl.parser = parser + return tl +} + +func (t *TraceListener) VisitErrorNode(_ ErrorNode) { +} + +func (t *TraceListener) EnterEveryRule(ctx ParserRuleContext) { + fmt.Println("enter " + t.parser.GetRuleNames()[ctx.GetRuleIndex()] + ", LT(1)=" + t.parser.input.LT(1).GetText()) +} + +func (t *TraceListener) VisitTerminal(node TerminalNode) { + fmt.Println("consume " + fmt.Sprint(node.GetSymbol()) + " rule " + t.parser.GetRuleNames()[t.parser.ctx.GetRuleIndex()]) +} + +func (t *TraceListener) ExitEveryRule(ctx ParserRuleContext) { + fmt.Println("exit " + t.parser.GetRuleNames()[ctx.GetRuleIndex()] + ", LT(1)=" + t.parser.input.LT(1).GetText()) +} diff --git a/runtime/Go/antlr/v4/transition.go b/runtime/Go/antlr/v4/transition.go new file mode 100644 index 0000000000..36be4f7331 --- /dev/null +++ b/runtime/Go/antlr/v4/transition.go @@ -0,0 +1,428 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +import ( + "fmt" + "strconv" + "strings" +) + +// atom, set, epsilon, action, predicate, rule transitions. +// +//

This is a one way link. It emanates from a state (usually via a list of +// transitions) and has a target state.

+// +//

Since we never have to change the ATN transitions once we construct it, +// the states. We'll use the term Edge for the DFA to distinguish them from +// ATN transitions.

+ +type Transition interface { + getTarget() ATNState + setTarget(ATNState) + getIsEpsilon() bool + getLabel() *IntervalSet + getSerializationType() int + Matches(int, int, int) bool +} + +type BaseTransition struct { + target ATNState + isEpsilon bool + label int + intervalSet *IntervalSet + serializationType int +} + +func NewBaseTransition(target ATNState) *BaseTransition { + + if target == nil { + panic("target cannot be nil.") + } + + t := new(BaseTransition) + + t.target = target + // Are we epsilon, action, sempred? + t.isEpsilon = false + t.intervalSet = nil + + return t +} + +func (t *BaseTransition) getTarget() ATNState { + return t.target +} + +func (t *BaseTransition) setTarget(s ATNState) { + t.target = s +} + +func (t *BaseTransition) getIsEpsilon() bool { + return t.isEpsilon +} + +func (t *BaseTransition) getLabel() *IntervalSet { + return t.intervalSet +} + +func (t *BaseTransition) getSerializationType() int { + return t.serializationType +} + +func (t *BaseTransition) Matches(symbol, minVocabSymbol, maxVocabSymbol int) bool { + panic("Not implemented") +} + +const ( + TransitionEPSILON = 1 + TransitionRANGE = 2 + TransitionRULE = 3 + TransitionPREDICATE = 4 // e.g., {isType(input.LT(1))}? + TransitionATOM = 5 + TransitionACTION = 6 + TransitionSET = 7 // ~(A|B) or ~atom, wildcard, which convert to next 2 + TransitionNOTSET = 8 + TransitionWILDCARD = 9 + TransitionPRECEDENCE = 10 +) + +var TransitionserializationNames = []string{ + "INVALID", + "EPSILON", + "RANGE", + "RULE", + "PREDICATE", + "ATOM", + "ACTION", + "SET", + "NOT_SET", + "WILDCARD", + "PRECEDENCE", +} + +//var TransitionserializationTypes struct { +// EpsilonTransition int +// RangeTransition int +// RuleTransition int +// PredicateTransition int +// AtomTransition int +// ActionTransition int +// SetTransition int +// NotSetTransition int +// WildcardTransition int +// PrecedencePredicateTransition int +//}{ +// TransitionEPSILON, +// TransitionRANGE, +// TransitionRULE, +// TransitionPREDICATE, +// TransitionATOM, +// TransitionACTION, +// TransitionSET, +// TransitionNOTSET, +// TransitionWILDCARD, +// TransitionPRECEDENCE +//} + +// TODO: make all transitions sets? no, should remove set edges +type AtomTransition struct { + *BaseTransition +} + +func NewAtomTransition(target ATNState, intervalSet int) *AtomTransition { + + t := new(AtomTransition) + t.BaseTransition = NewBaseTransition(target) + + t.label = intervalSet // The token type or character value or, signifies special intervalSet. + t.intervalSet = t.makeLabel() + t.serializationType = TransitionATOM + + return t +} + +func (t *AtomTransition) makeLabel() *IntervalSet { + s := NewIntervalSet() + s.addOne(t.label) + return s +} + +func (t *AtomTransition) Matches(symbol, minVocabSymbol, maxVocabSymbol int) bool { + return t.label == symbol +} + +func (t *AtomTransition) String() string { + return strconv.Itoa(t.label) +} + +type RuleTransition struct { + *BaseTransition + + followState ATNState + ruleIndex, precedence int +} + +func NewRuleTransition(ruleStart ATNState, ruleIndex, precedence int, followState ATNState) *RuleTransition { + + t := new(RuleTransition) + t.BaseTransition = NewBaseTransition(ruleStart) + + t.ruleIndex = ruleIndex + t.precedence = precedence + t.followState = followState + t.serializationType = TransitionRULE + t.isEpsilon = true + + return t +} + +func (t *RuleTransition) Matches(symbol, minVocabSymbol, maxVocabSymbol int) bool { + return false +} + +type EpsilonTransition struct { + *BaseTransition + + outermostPrecedenceReturn int +} + +func NewEpsilonTransition(target ATNState, outermostPrecedenceReturn int) *EpsilonTransition { + + t := new(EpsilonTransition) + t.BaseTransition = NewBaseTransition(target) + + t.serializationType = TransitionEPSILON + t.isEpsilon = true + t.outermostPrecedenceReturn = outermostPrecedenceReturn + return t +} + +func (t *EpsilonTransition) Matches(symbol, minVocabSymbol, maxVocabSymbol int) bool { + return false +} + +func (t *EpsilonTransition) String() string { + return "epsilon" +} + +type RangeTransition struct { + *BaseTransition + + start, stop int +} + +func NewRangeTransition(target ATNState, start, stop int) *RangeTransition { + + t := new(RangeTransition) + t.BaseTransition = NewBaseTransition(target) + + t.serializationType = TransitionRANGE + t.start = start + t.stop = stop + t.intervalSet = t.makeLabel() + return t +} + +func (t *RangeTransition) makeLabel() *IntervalSet { + s := NewIntervalSet() + s.addRange(t.start, t.stop) + return s +} + +func (t *RangeTransition) Matches(symbol, minVocabSymbol, maxVocabSymbol int) bool { + return symbol >= t.start && symbol <= t.stop +} + +func (t *RangeTransition) String() string { + var sb strings.Builder + sb.WriteByte('\'') + sb.WriteRune(rune(t.start)) + sb.WriteString("'..'") + sb.WriteRune(rune(t.stop)) + sb.WriteByte('\'') + return sb.String() +} + +type AbstractPredicateTransition interface { + Transition + IAbstractPredicateTransitionFoo() +} + +type BaseAbstractPredicateTransition struct { + *BaseTransition +} + +func NewBasePredicateTransition(target ATNState) *BaseAbstractPredicateTransition { + + t := new(BaseAbstractPredicateTransition) + t.BaseTransition = NewBaseTransition(target) + + return t +} + +func (a *BaseAbstractPredicateTransition) IAbstractPredicateTransitionFoo() {} + +type PredicateTransition struct { + *BaseAbstractPredicateTransition + + isCtxDependent bool + ruleIndex, predIndex int +} + +func NewPredicateTransition(target ATNState, ruleIndex, predIndex int, isCtxDependent bool) *PredicateTransition { + + t := new(PredicateTransition) + t.BaseAbstractPredicateTransition = NewBasePredicateTransition(target) + + t.serializationType = TransitionPREDICATE + t.ruleIndex = ruleIndex + t.predIndex = predIndex + t.isCtxDependent = isCtxDependent // e.g., $i ref in pred + t.isEpsilon = true + return t +} + +func (t *PredicateTransition) Matches(symbol, minVocabSymbol, maxVocabSymbol int) bool { + return false +} + +func (t *PredicateTransition) getPredicate() *Predicate { + return NewPredicate(t.ruleIndex, t.predIndex, t.isCtxDependent) +} + +func (t *PredicateTransition) String() string { + return "pred_" + strconv.Itoa(t.ruleIndex) + ":" + strconv.Itoa(t.predIndex) +} + +type ActionTransition struct { + *BaseTransition + + isCtxDependent bool + ruleIndex, actionIndex, predIndex int +} + +func NewActionTransition(target ATNState, ruleIndex, actionIndex int, isCtxDependent bool) *ActionTransition { + + t := new(ActionTransition) + t.BaseTransition = NewBaseTransition(target) + + t.serializationType = TransitionACTION + t.ruleIndex = ruleIndex + t.actionIndex = actionIndex + t.isCtxDependent = isCtxDependent // e.g., $i ref in pred + t.isEpsilon = true + return t +} + +func (t *ActionTransition) Matches(symbol, minVocabSymbol, maxVocabSymbol int) bool { + return false +} + +func (t *ActionTransition) String() string { + return "action_" + strconv.Itoa(t.ruleIndex) + ":" + strconv.Itoa(t.actionIndex) +} + +type SetTransition struct { + *BaseTransition +} + +func NewSetTransition(target ATNState, set *IntervalSet) *SetTransition { + + t := new(SetTransition) + t.BaseTransition = NewBaseTransition(target) + + t.serializationType = TransitionSET + if set != nil { + t.intervalSet = set + } else { + t.intervalSet = NewIntervalSet() + t.intervalSet.addOne(TokenInvalidType) + } + + return t +} + +func (t *SetTransition) Matches(symbol, minVocabSymbol, maxVocabSymbol int) bool { + return t.intervalSet.contains(symbol) +} + +func (t *SetTransition) String() string { + return t.intervalSet.String() +} + +type NotSetTransition struct { + *SetTransition +} + +func NewNotSetTransition(target ATNState, set *IntervalSet) *NotSetTransition { + + t := new(NotSetTransition) + + t.SetTransition = NewSetTransition(target, set) + + t.serializationType = TransitionNOTSET + + return t +} + +func (t *NotSetTransition) Matches(symbol, minVocabSymbol, maxVocabSymbol int) bool { + return symbol >= minVocabSymbol && symbol <= maxVocabSymbol && !t.intervalSet.contains(symbol) +} + +func (t *NotSetTransition) String() string { + return "~" + t.intervalSet.String() +} + +type WildcardTransition struct { + *BaseTransition +} + +func NewWildcardTransition(target ATNState) *WildcardTransition { + + t := new(WildcardTransition) + t.BaseTransition = NewBaseTransition(target) + + t.serializationType = TransitionWILDCARD + return t +} + +func (t *WildcardTransition) Matches(symbol, minVocabSymbol, maxVocabSymbol int) bool { + return symbol >= minVocabSymbol && symbol <= maxVocabSymbol +} + +func (t *WildcardTransition) String() string { + return "." +} + +type PrecedencePredicateTransition struct { + *BaseAbstractPredicateTransition + + precedence int +} + +func NewPrecedencePredicateTransition(target ATNState, precedence int) *PrecedencePredicateTransition { + + t := new(PrecedencePredicateTransition) + t.BaseAbstractPredicateTransition = NewBasePredicateTransition(target) + + t.serializationType = TransitionPRECEDENCE + t.precedence = precedence + t.isEpsilon = true + + return t +} + +func (t *PrecedencePredicateTransition) Matches(symbol, minVocabSymbol, maxVocabSymbol int) bool { + return false +} + +func (t *PrecedencePredicateTransition) getPredicate() *PrecedencePredicate { + return NewPrecedencePredicate(t.precedence) +} + +func (t *PrecedencePredicateTransition) String() string { + return fmt.Sprint(t.precedence) + " >= _p" +} diff --git a/runtime/Go/antlr/v4/tree.go b/runtime/Go/antlr/v4/tree.go new file mode 100644 index 0000000000..85b4f137b5 --- /dev/null +++ b/runtime/Go/antlr/v4/tree.go @@ -0,0 +1,253 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +// The basic notion of a tree has a parent, a payload, and a list of children. +// It is the most abstract interface for all the trees used by ANTLR. +/// + +var TreeInvalidInterval = NewInterval(-1, -2) + +type Tree interface { + GetParent() Tree + SetParent(Tree) + GetPayload() interface{} + GetChild(i int) Tree + GetChildCount() int + GetChildren() []Tree +} + +type SyntaxTree interface { + Tree + + GetSourceInterval() *Interval +} + +type ParseTree interface { + SyntaxTree + + Accept(Visitor ParseTreeVisitor) interface{} + GetText() string + + ToStringTree([]string, Recognizer) string +} + +type RuleNode interface { + ParseTree + + GetRuleContext() RuleContext + GetBaseRuleContext() *BaseRuleContext +} + +type TerminalNode interface { + ParseTree + + GetSymbol() Token +} + +type ErrorNode interface { + TerminalNode + + errorNode() +} + +type ParseTreeVisitor interface { + Visit(tree ParseTree) interface{} + VisitChildren(node RuleNode) interface{} + VisitTerminal(node TerminalNode) interface{} + VisitErrorNode(node ErrorNode) interface{} +} + +type BaseParseTreeVisitor struct{} + +var _ ParseTreeVisitor = &BaseParseTreeVisitor{} + +func (v *BaseParseTreeVisitor) Visit(tree ParseTree) interface{} { return tree.Accept(v) } +func (v *BaseParseTreeVisitor) VisitChildren(node RuleNode) interface{} { return nil } +func (v *BaseParseTreeVisitor) VisitTerminal(node TerminalNode) interface{} { return nil } +func (v *BaseParseTreeVisitor) VisitErrorNode(node ErrorNode) interface{} { return nil } + +// TODO +//func (this ParseTreeVisitor) Visit(ctx) { +// if (Utils.isArray(ctx)) { +// self := this +// return ctx.map(function(child) { return VisitAtom(self, child)}) +// } else { +// return VisitAtom(this, ctx) +// } +//} +// +//func VisitAtom(Visitor, ctx) { +// if (ctx.parser == nil) { //is terminal +// return +// } +// +// name := ctx.parser.ruleNames[ctx.ruleIndex] +// funcName := "Visit" + Utils.titleCase(name) +// +// return Visitor[funcName](ctx) +//} + +type ParseTreeListener interface { + VisitTerminal(node TerminalNode) + VisitErrorNode(node ErrorNode) + EnterEveryRule(ctx ParserRuleContext) + ExitEveryRule(ctx ParserRuleContext) +} + +type BaseParseTreeListener struct{} + +var _ ParseTreeListener = &BaseParseTreeListener{} + +func (l *BaseParseTreeListener) VisitTerminal(node TerminalNode) {} +func (l *BaseParseTreeListener) VisitErrorNode(node ErrorNode) {} +func (l *BaseParseTreeListener) EnterEveryRule(ctx ParserRuleContext) {} +func (l *BaseParseTreeListener) ExitEveryRule(ctx ParserRuleContext) {} + +type TerminalNodeImpl struct { + parentCtx RuleContext + + symbol Token +} + +var _ TerminalNode = &TerminalNodeImpl{} + +func NewTerminalNodeImpl(symbol Token) *TerminalNodeImpl { + tn := new(TerminalNodeImpl) + + tn.parentCtx = nil + tn.symbol = symbol + + return tn +} + +func (t *TerminalNodeImpl) GetChild(i int) Tree { + return nil +} + +func (t *TerminalNodeImpl) GetChildren() []Tree { + return nil +} + +func (t *TerminalNodeImpl) SetChildren(tree []Tree) { + panic("Cannot set children on terminal node") +} + +func (t *TerminalNodeImpl) GetSymbol() Token { + return t.symbol +} + +func (t *TerminalNodeImpl) GetParent() Tree { + return t.parentCtx +} + +func (t *TerminalNodeImpl) SetParent(tree Tree) { + t.parentCtx = tree.(RuleContext) +} + +func (t *TerminalNodeImpl) GetPayload() interface{} { + return t.symbol +} + +func (t *TerminalNodeImpl) GetSourceInterval() *Interval { + if t.symbol == nil { + return TreeInvalidInterval + } + tokenIndex := t.symbol.GetTokenIndex() + return NewInterval(tokenIndex, tokenIndex) +} + +func (t *TerminalNodeImpl) GetChildCount() int { + return 0 +} + +func (t *TerminalNodeImpl) Accept(v ParseTreeVisitor) interface{} { + return v.VisitTerminal(t) +} + +func (t *TerminalNodeImpl) GetText() string { + return t.symbol.GetText() +} + +func (t *TerminalNodeImpl) String() string { + if t.symbol.GetTokenType() == TokenEOF { + return "" + } + + return t.symbol.GetText() +} + +func (t *TerminalNodeImpl) ToStringTree(s []string, r Recognizer) string { + return t.String() +} + +// Represents a token that was consumed during reSynchronization +// rather than during a valid Match operation. For example, +// we will create this kind of a node during single token insertion +// and deletion as well as during "consume until error recovery set" +// upon no viable alternative exceptions. + +type ErrorNodeImpl struct { + *TerminalNodeImpl +} + +var _ ErrorNode = &ErrorNodeImpl{} + +func NewErrorNodeImpl(token Token) *ErrorNodeImpl { + en := new(ErrorNodeImpl) + en.TerminalNodeImpl = NewTerminalNodeImpl(token) + return en +} + +func (e *ErrorNodeImpl) errorNode() {} + +func (e *ErrorNodeImpl) Accept(v ParseTreeVisitor) interface{} { + return v.VisitErrorNode(e) +} + +type ParseTreeWalker struct { +} + +func NewParseTreeWalker() *ParseTreeWalker { + return new(ParseTreeWalker) +} + +// Performs a walk on the given parse tree starting at the root and going down recursively +// with depth-first search. On each node, EnterRule is called before +// recursively walking down into child nodes, then +// ExitRule is called after the recursive call to wind up. +func (p *ParseTreeWalker) Walk(listener ParseTreeListener, t Tree) { + switch tt := t.(type) { + case ErrorNode: + listener.VisitErrorNode(tt) + case TerminalNode: + listener.VisitTerminal(tt) + default: + p.EnterRule(listener, t.(RuleNode)) + for i := 0; i < t.GetChildCount(); i++ { + child := t.GetChild(i) + p.Walk(listener, child) + } + p.ExitRule(listener, t.(RuleNode)) + } +} + +// Enters a grammar rule by first triggering the generic event {@link ParseTreeListener//EnterEveryRule} +// then by triggering the event specific to the given parse tree node +func (p *ParseTreeWalker) EnterRule(listener ParseTreeListener, r RuleNode) { + ctx := r.GetRuleContext().(ParserRuleContext) + listener.EnterEveryRule(ctx) + ctx.EnterRule(listener) +} + +// Exits a grammar rule by first triggering the event specific to the given parse tree node +// then by triggering the generic event {@link ParseTreeListener//ExitEveryRule} +func (p *ParseTreeWalker) ExitRule(listener ParseTreeListener, r RuleNode) { + ctx := r.GetRuleContext().(ParserRuleContext) + ctx.ExitRule(listener) + listener.ExitEveryRule(ctx) +} + +var ParseTreeWalkerDefault = NewParseTreeWalker() diff --git a/runtime/Go/antlr/v4/trees.go b/runtime/Go/antlr/v4/trees.go new file mode 100644 index 0000000000..d7dbb03228 --- /dev/null +++ b/runtime/Go/antlr/v4/trees.go @@ -0,0 +1,138 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +import "fmt" + +/** A set of utility routines useful for all kinds of ANTLR trees. */ + +// Print out a whole tree in LISP form. {@link //getNodeText} is used on the +// +// node payloads to get the text for the nodes. Detect +// parse trees and extract data appropriately. +func TreesStringTree(tree Tree, ruleNames []string, recog Recognizer) string { + + if recog != nil { + ruleNames = recog.GetRuleNames() + } + + s := TreesGetNodeText(tree, ruleNames, nil) + + s = EscapeWhitespace(s, false) + c := tree.GetChildCount() + if c == 0 { + return s + } + res := "(" + s + " " + if c > 0 { + s = TreesStringTree(tree.GetChild(0), ruleNames, nil) + res += s + } + for i := 1; i < c; i++ { + s = TreesStringTree(tree.GetChild(i), ruleNames, nil) + res += (" " + s) + } + res += ")" + return res +} + +func TreesGetNodeText(t Tree, ruleNames []string, recog Parser) string { + if recog != nil { + ruleNames = recog.GetRuleNames() + } + + if ruleNames != nil { + switch t2 := t.(type) { + case RuleNode: + t3 := t2.GetRuleContext() + altNumber := t3.GetAltNumber() + + if altNumber != ATNInvalidAltNumber { + return fmt.Sprintf("%s:%d", ruleNames[t3.GetRuleIndex()], altNumber) + } + return ruleNames[t3.GetRuleIndex()] + case ErrorNode: + return fmt.Sprint(t2) + case TerminalNode: + if t2.GetSymbol() != nil { + return t2.GetSymbol().GetText() + } + } + } + + // no recog for rule names + payload := t.GetPayload() + if p2, ok := payload.(Token); ok { + return p2.GetText() + } + + return fmt.Sprint(t.GetPayload()) +} + +// Return ordered list of all children of this node +func TreesGetChildren(t Tree) []Tree { + list := make([]Tree, 0) + for i := 0; i < t.GetChildCount(); i++ { + list = append(list, t.GetChild(i)) + } + return list +} + +// Return a list of all ancestors of this node. The first node of +// +// list is the root and the last is the parent of this node. +func TreesgetAncestors(t Tree) []Tree { + ancestors := make([]Tree, 0) + t = t.GetParent() + for t != nil { + f := []Tree{t} + ancestors = append(f, ancestors...) + t = t.GetParent() + } + return ancestors +} + +func TreesFindAllTokenNodes(t ParseTree, ttype int) []ParseTree { + return TreesfindAllNodes(t, ttype, true) +} + +func TreesfindAllRuleNodes(t ParseTree, ruleIndex int) []ParseTree { + return TreesfindAllNodes(t, ruleIndex, false) +} + +func TreesfindAllNodes(t ParseTree, index int, findTokens bool) []ParseTree { + nodes := make([]ParseTree, 0) + treesFindAllNodes(t, index, findTokens, &nodes) + return nodes +} + +func treesFindAllNodes(t ParseTree, index int, findTokens bool, nodes *[]ParseTree) { + // check this node (the root) first + + t2, ok := t.(TerminalNode) + t3, ok2 := t.(ParserRuleContext) + + if findTokens && ok { + if t2.GetSymbol().GetTokenType() == index { + *nodes = append(*nodes, t2) + } + } else if !findTokens && ok2 { + if t3.GetRuleIndex() == index { + *nodes = append(*nodes, t3) + } + } + // check children + for i := 0; i < t.GetChildCount(); i++ { + treesFindAllNodes(t.GetChild(i).(ParseTree), index, findTokens, nodes) + } +} + +func TreesDescendants(t ParseTree) []ParseTree { + nodes := []ParseTree{t} + for i := 0; i < t.GetChildCount(); i++ { + nodes = append(nodes, TreesDescendants(t.GetChild(i).(ParseTree))...) + } + return nodes +} diff --git a/runtime/Go/antlr/v4/utils.go b/runtime/Go/antlr/v4/utils.go new file mode 100644 index 0000000000..9fad5d916b --- /dev/null +++ b/runtime/Go/antlr/v4/utils.go @@ -0,0 +1,352 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +import ( + "bytes" + "errors" + "fmt" + "math/bits" + "strconv" + "strings" +) + +func intMin(a, b int) int { + if a < b { + return a + } + return b +} + +func intMax(a, b int) int { + if a > b { + return a + } + return b +} + +// A simple integer stack + +type IntStack []int + +var ErrEmptyStack = errors.New("Stack is empty") + +func (s *IntStack) Pop() (int, error) { + l := len(*s) - 1 + if l < 0 { + return 0, ErrEmptyStack + } + v := (*s)[l] + *s = (*s)[0:l] + return v, nil +} + +func (s *IntStack) Push(e int) { + *s = append(*s, e) +} + +type comparable interface { + Equals(other Collectable[any]) bool +} + +func standardEqualsFunction(a Collectable[any], b Collectable[any]) bool { + + return a.Equals(b) +} + +func standardHashFunction(a interface{}) int { + if h, ok := a.(hasher); ok { + return h.Hash() + } + + panic("Not Hasher") +} + +type hasher interface { + Hash() int +} + +const bitsPerWord = 64 + +func indexForBit(bit int) int { + return bit / bitsPerWord +} + +func wordForBit(data []uint64, bit int) uint64 { + idx := indexForBit(bit) + if idx >= len(data) { + return 0 + } + return data[idx] +} + +func maskForBit(bit int) uint64 { + return uint64(1) << (bit % bitsPerWord) +} + +func wordsNeeded(bit int) int { + return indexForBit(bit) + 1 +} + +type BitSet struct { + data []uint64 +} + +func NewBitSet() *BitSet { + return &BitSet{} +} + +func (b *BitSet) add(value int) { + idx := indexForBit(value) + if idx >= len(b.data) { + size := wordsNeeded(value) + data := make([]uint64, size) + copy(data, b.data) + b.data = data + } + b.data[idx] |= maskForBit(value) +} + +func (b *BitSet) clear(index int) { + idx := indexForBit(index) + if idx >= len(b.data) { + return + } + b.data[idx] &= ^maskForBit(index) +} + +func (b *BitSet) or(set *BitSet) { + // Get min size necessary to represent the bits in both sets. + bLen := b.minLen() + setLen := set.minLen() + maxLen := intMax(bLen, setLen) + if maxLen > len(b.data) { + // Increase the size of len(b.data) to repesent the bits in both sets. + data := make([]uint64, maxLen) + copy(data, b.data) + b.data = data + } + // len(b.data) is at least setLen. + for i := 0; i < setLen; i++ { + b.data[i] |= set.data[i] + } +} + +func (b *BitSet) remove(value int) { + b.clear(value) +} + +func (b *BitSet) contains(value int) bool { + idx := indexForBit(value) + if idx >= len(b.data) { + return false + } + return (b.data[idx] & maskForBit(value)) != 0 +} + +func (b *BitSet) minValue() int { + for i, v := range b.data { + if v == 0 { + continue + } + return i*bitsPerWord + bits.TrailingZeros64(v) + } + return 2147483647 +} + +func (b *BitSet) equals(other interface{}) bool { + otherBitSet, ok := other.(*BitSet) + if !ok { + return false + } + + if b == otherBitSet { + return true + } + + // We only compare set bits, so we cannot rely on the two slices having the same size. Its + // possible for two BitSets to have different slice lengths but the same set bits. So we only + // compare the relevant words and ignore the trailing zeros. + bLen := b.minLen() + otherLen := otherBitSet.minLen() + + if bLen != otherLen { + return false + } + + for i := 0; i < bLen; i++ { + if b.data[i] != otherBitSet.data[i] { + return false + } + } + + return true +} + +func (b *BitSet) minLen() int { + for i := len(b.data); i > 0; i-- { + if b.data[i-1] != 0 { + return i + } + } + return 0 +} + +func (b *BitSet) length() int { + cnt := 0 + for _, val := range b.data { + cnt += bits.OnesCount64(val) + } + return cnt +} + +func (b *BitSet) String() string { + vals := make([]string, 0, b.length()) + + for i, v := range b.data { + for v != 0 { + n := bits.TrailingZeros64(v) + vals = append(vals, strconv.Itoa(i*bitsPerWord+n)) + v &= ^(uint64(1) << n) + } + } + + return "{" + strings.Join(vals, ", ") + "}" +} + +type AltDict struct { + data map[string]interface{} +} + +func NewAltDict() *AltDict { + d := new(AltDict) + d.data = make(map[string]interface{}) + return d +} + +func (a *AltDict) Get(key string) interface{} { + key = "k-" + key + return a.data[key] +} + +func (a *AltDict) put(key string, value interface{}) { + key = "k-" + key + a.data[key] = value +} + +func (a *AltDict) values() []interface{} { + vs := make([]interface{}, len(a.data)) + i := 0 + for _, v := range a.data { + vs[i] = v + i++ + } + return vs +} + +type DoubleDict struct { + data map[int]map[int]interface{} +} + +func NewDoubleDict() *DoubleDict { + dd := new(DoubleDict) + dd.data = make(map[int]map[int]interface{}) + return dd +} + +func (d *DoubleDict) Get(a, b int) interface{} { + data := d.data[a] + + if data == nil { + return nil + } + + return data[b] +} + +func (d *DoubleDict) set(a, b int, o interface{}) { + data := d.data[a] + + if data == nil { + data = make(map[int]interface{}) + d.data[a] = data + } + + data[b] = o +} + +func EscapeWhitespace(s string, escapeSpaces bool) string { + + s = strings.Replace(s, "\t", "\\t", -1) + s = strings.Replace(s, "\n", "\\n", -1) + s = strings.Replace(s, "\r", "\\r", -1) + if escapeSpaces { + s = strings.Replace(s, " ", "\u00B7", -1) + } + return s +} + +func TerminalNodeToStringArray(sa []TerminalNode) []string { + st := make([]string, len(sa)) + + for i, s := range sa { + st[i] = fmt.Sprintf("%v", s) + } + + return st +} + +func PrintArrayJavaStyle(sa []string) string { + var buffer bytes.Buffer + + buffer.WriteString("[") + + for i, s := range sa { + buffer.WriteString(s) + if i != len(sa)-1 { + buffer.WriteString(", ") + } + } + + buffer.WriteString("]") + + return buffer.String() +} + +// murmur hash +func murmurInit(seed int) int { + return seed +} + +func murmurUpdate(h int, value int) int { + const c1 uint32 = 0xCC9E2D51 + const c2 uint32 = 0x1B873593 + const r1 uint32 = 15 + const r2 uint32 = 13 + const m uint32 = 5 + const n uint32 = 0xE6546B64 + + k := uint32(value) + k *= c1 + k = (k << r1) | (k >> (32 - r1)) + k *= c2 + + hash := uint32(h) ^ k + hash = (hash << r2) | (hash >> (32 - r2)) + hash = hash*m + n + return int(hash) +} + +func murmurFinish(h int, numberOfWords int) int { + var hash = uint32(h) + hash ^= uint32(numberOfWords) << 2 + hash ^= hash >> 16 + hash *= 0x85ebca6b + hash ^= hash >> 13 + hash *= 0xc2b2ae35 + hash ^= hash >> 16 + + return int(hash) +} diff --git a/runtime/Go/antlr/v4/utils_set.go b/runtime/Go/antlr/v4/utils_set.go new file mode 100644 index 0000000000..c9bd6751e3 --- /dev/null +++ b/runtime/Go/antlr/v4/utils_set.go @@ -0,0 +1,235 @@ +package antlr + +import "math" + +const ( + _initalCapacity = 16 + _initalBucketCapacity = 8 + _loadFactor = 0.75 +) + +type Set interface { + Add(value interface{}) (added interface{}) + Len() int + Get(value interface{}) (found interface{}) + Contains(value interface{}) bool + Values() []interface{} + Each(f func(interface{}) bool) +} + +type array2DHashSet struct { + buckets [][]Collectable[any] + hashcodeFunction func(interface{}) int + equalsFunction func(Collectable[any], Collectable[any]) bool + + n int // How many elements in set + threshold int // when to expand + + currentPrime int // jump by 4 primes each expand or whatever + initialBucketCapacity int +} + +func (as *array2DHashSet) Each(f func(interface{}) bool) { + if as.Len() < 1 { + return + } + + for _, bucket := range as.buckets { + for _, o := range bucket { + if o == nil { + break + } + if !f(o) { + return + } + } + } +} + +func (as *array2DHashSet) Values() []interface{} { + if as.Len() < 1 { + return nil + } + + values := make([]interface{}, 0, as.Len()) + as.Each(func(i interface{}) bool { + values = append(values, i) + return true + }) + return values +} + +func (as *array2DHashSet) Contains(value Collectable[any]) bool { + return as.Get(value) != nil +} + +func (as *array2DHashSet) Add(value Collectable[any]) interface{} { + if as.n > as.threshold { + as.expand() + } + return as.innerAdd(value) +} + +func (as *array2DHashSet) expand() { + old := as.buckets + + as.currentPrime += 4 + + var ( + newCapacity = len(as.buckets) << 1 + newTable = as.createBuckets(newCapacity) + newBucketLengths = make([]int, len(newTable)) + ) + + as.buckets = newTable + as.threshold = int(float64(newCapacity) * _loadFactor) + + for _, bucket := range old { + if bucket == nil { + continue + } + + for _, o := range bucket { + if o == nil { + break + } + + b := as.getBuckets(o) + bucketLength := newBucketLengths[b] + var newBucket []Collectable[any] + if bucketLength == 0 { + // new bucket + newBucket = as.createBucket(as.initialBucketCapacity) + newTable[b] = newBucket + } else { + newBucket = newTable[b] + if bucketLength == len(newBucket) { + // expand + newBucketCopy := make([]Collectable[any], len(newBucket)<<1) + copy(newBucketCopy[:bucketLength], newBucket) + newBucket = newBucketCopy + newTable[b] = newBucket + } + } + + newBucket[bucketLength] = o + newBucketLengths[b]++ + } + } +} + +func (as *array2DHashSet) Len() int { + return as.n +} + +func (as *array2DHashSet) Get(o Collectable[any]) interface{} { + if o == nil { + return nil + } + + b := as.getBuckets(o) + bucket := as.buckets[b] + if bucket == nil { // no bucket + return nil + } + + for _, e := range bucket { + if e == nil { + return nil // empty slot; not there + } + if as.equalsFunction(e, o) { + return e + } + } + + return nil +} + +func (as *array2DHashSet) innerAdd(o Collectable[any]) interface{} { + b := as.getBuckets(o) + + bucket := as.buckets[b] + + // new bucket + if bucket == nil { + bucket = as.createBucket(as.initialBucketCapacity) + bucket[0] = o + + as.buckets[b] = bucket + as.n++ + return o + } + + // look for it in bucket + for i := 0; i < len(bucket); i++ { + existing := bucket[i] + if existing == nil { // empty slot; not there, add. + bucket[i] = o + as.n++ + return o + } + + if as.equalsFunction(existing, o) { // found existing, quit + return existing + } + } + + // full bucket, expand and add to end + oldLength := len(bucket) + bucketCopy := make([]Collectable[any], oldLength<<1) + copy(bucketCopy[:oldLength], bucket) + bucket = bucketCopy + as.buckets[b] = bucket + bucket[oldLength] = o + as.n++ + return o +} + +func (as *array2DHashSet) getBuckets(value Collectable[any]) int { + hash := as.hashcodeFunction(value) + return hash & (len(as.buckets) - 1) +} + +func (as *array2DHashSet) createBuckets(cap int) [][]Collectable[any] { + return make([][]Collectable[any], cap) +} + +func (as *array2DHashSet) createBucket(cap int) []Collectable[any] { + return make([]Collectable[any], cap) +} + +func newArray2DHashSetWithCap( + hashcodeFunction func(interface{}) int, + equalsFunction func(Collectable[any], Collectable[any]) bool, + initCap int, + initBucketCap int, +) *array2DHashSet { + if hashcodeFunction == nil { + hashcodeFunction = standardHashFunction + } + + if equalsFunction == nil { + equalsFunction = standardEqualsFunction + } + + ret := &array2DHashSet{ + hashcodeFunction: hashcodeFunction, + equalsFunction: equalsFunction, + + n: 0, + threshold: int(math.Floor(_initalCapacity * _loadFactor)), + + currentPrime: 1, + initialBucketCapacity: initBucketCap, + } + + ret.buckets = ret.createBuckets(initCap) + return ret +} + +func newArray2DHashSet( + hashcodeFunction func(interface{}) int, + equalsFunction func(Collectable[any], Collectable[any]) bool, +) *array2DHashSet { + return newArray2DHashSetWithCap(hashcodeFunction, equalsFunction, _initalCapacity, _initalBucketCapacity) +} diff --git a/runtime/Go/antlr/v4/utils_test.go b/runtime/Go/antlr/v4/utils_test.go new file mode 100644 index 0000000000..ed274ef339 --- /dev/null +++ b/runtime/Go/antlr/v4/utils_test.go @@ -0,0 +1,62 @@ +package antlr + +import "testing" + +func testBitSet(t *testing.T, bs *BitSet, str string, length int, contains []int, minValue int, minLen int) { + t.Helper() + if got, want := bs.String(), str; got != want { + t.Errorf("%+v.String() = %q, want %q", bs, got, want) + } + if got, want := bs.length(), length; got != want { + t.Errorf("%+v.length() = %q, want %q", bs, got, want) + } + for i := 0; i < len(bs.data)*bitsPerWord; i++ { + var want bool + for _, val := range contains { + if i == val { + want = true + break + } + } + if got := bs.contains(i); got != want { + t.Errorf("%+v.contains(%v) = %v, want %v", bs, i, got, want) + } + } + if got, want := bs.minValue(), minValue; got != want { + t.Errorf("%+v.minValue() = %v, want %v", bs, got, want) + } + if got, want := bs.minLen(), minLen; got != want { + t.Errorf("%+v.minLen() = %v, want %v", bs, got, want) + } +} + +func TestBitSet(t *testing.T) { + bs1 := NewBitSet() + testBitSet(t, bs1, "{}", 0, []int{}, 2147483647, 0) + bs1.add(0) + testBitSet(t, bs1, "{0}", 1, []int{0}, 0, 1) + bs1.add(63) + testBitSet(t, bs1, "{0, 63}", 2, []int{0, 63}, 0, 1) + bs1.remove(0) + testBitSet(t, bs1, "{63}", 1, []int{63}, 63, 1) + bs1.add(20) + testBitSet(t, bs1, "{20, 63}", 2, []int{20, 63}, 20, 1) + bs1.clear(63) + testBitSet(t, bs1, "{20}", 1, []int{20}, 20, 1) + bs2 := NewBitSet() + bs2.add(64) + bs1.or(bs2) + testBitSet(t, bs1, "{20, 64}", 2, []int{20, 64}, 20, 2) + bs1.remove(20) + testBitSet(t, bs1, "{64}", 1, []int{64}, 64, 2) + bs3 := NewBitSet() + bs3.add(63) + bs1.or(bs3) + testBitSet(t, bs1, "{63, 64}", 2, []int{63, 64}, 63, 2) + bs1.clear(64) + bs4 := NewBitSet() + bs4.or(bs1) + if got, want := bs4.equals(bs1), true; got != want { + t.Errorf("%+v.equals(%+v) = %v, want %v", bs4, bs1, got, want) + } +} diff --git a/tool/resources/org/antlr/v4/tool/templates/codegen/Go/Go.stg b/tool/resources/org/antlr/v4/tool/templates/codegen/Go/Go.stg index d23802d0ca..2426add0aa 100644 --- a/tool/resources/org/antlr/v4/tool/templates/codegen/Go/Go.stg +++ b/tool/resources/org/antlr/v4/tool/templates/codegen/Go/Go.stg @@ -16,7 +16,7 @@ import ( "strconv" "sync" - "github.com/antlr/antlr4/runtime/Go/antlr" + "github.com/antlr/antlr4/runtime/Go/antlr/v4" ) @@ -45,7 +45,7 @@ package // package parser // -import "github.com/antlr/antlr4/runtime/Go/antlr" +import "github.com/antlr/antlr4/runtime/Go/antlr/v4" // Listener is a complete listener for a parse tree produced by . type Listener interface { @@ -69,7 +69,7 @@ package // package parser // -import "github.com/antlr/antlr4/runtime/Go/antlr" +import "github.com/antlr/antlr4/runtime/Go/antlr/v4" // BaseListener is a complete listener for a parse tree produced by . type BaseListener struct{} @@ -105,7 +105,7 @@ package // package parser // -import "github.com/antlr/antlr4/runtime/Go/antlr" +import "github.com/antlr/antlr4/runtime/Go/antlr/v4"
@@ -131,7 +131,7 @@ package // package parser // -import "github.com/antlr/antlr4/runtime/Go/antlr" +import "github.com/antlr/antlr4/runtime/Go/antlr/v4" type BaseVisitor struct { *antlr.BaseParseTreeVisitor @@ -1399,7 +1399,7 @@ import ( "sync" "unicode" - "github.com/antlr/antlr4/runtime/Go/antlr" + "github.com/antlr/antlr4/runtime/Go/antlr/v4" )