Skip to content

Commit

Permalink
Merge pull request #3243 from jjeffcaii/fix_hashset_and_hash
Browse files Browse the repository at this point in the history
fix: use 32bit murmur3 same with java, implement array2dhashset which…
  • Loading branch information
parrt authored Sep 24, 2021
2 parents ce3c483 + 860620a commit 1f7fee2
Show file tree
Hide file tree
Showing 16 changed files with 566 additions and 343 deletions.
1 change: 1 addition & 0 deletions contributors.txt
Original file line number Diff line number Diff line change
Expand Up @@ -307,5 +307,6 @@ YYYY/MM/DD, github id, Full name, email
2021/07/29, ksyx, Qixing Xue, qixingxue@outlook.com
2021/07/29, rachidlamouri, Rachid Lamouri, rachidlamouri@gmail.com
2021/08/02, minjoosur, Minjoo Sur, msur@salesforce.com
2021/08/05, jjeffcaii, Jeff Tsai, caiweiwei.cww@alibaba-inc.com
2021/08/08, ansiemens, Yi-Hong Lin, ansiemens@gmail.com
2021/09/08, jmcken8, Joel McKenzie, joel.b.mckenzie@gmail.com
2 changes: 1 addition & 1 deletion runtime/Go/antlr/atn_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,7 @@ func (l *LexerATNConfig) hash() int {
f = 0
}
h := murmurInit(7)
h = murmurUpdate(h, l.state.hash())
h = murmurUpdate(h, l.state.GetStateNumber())
h = murmurUpdate(h, l.alt)
h = murmurUpdate(h, l.context.hash())
h = murmurUpdate(h, l.semanticContext.hash())
Expand Down
70 changes: 45 additions & 25 deletions runtime/Go/antlr/atn_config_set.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ type ATNConfigSet interface {
Add(ATNConfig, *DoubleDict) bool
AddAll([]ATNConfig) bool

GetStates() *Set
GetStates() Set
GetPredicates() []SemanticContext
GetItems() []ATNConfig

Expand All @@ -35,6 +35,8 @@ type ATNConfigSet interface {
GetConflictingAlts() *BitSet
SetConflictingAlts(*BitSet)

Alts() *BitSet

FullContext() bool

GetUniqueAlt() int
Expand All @@ -55,7 +57,7 @@ type BaseATNConfigSet struct {
// effectively doubles the number of objects associated with ATNConfigs. All
// keys are hashed by (s, i, _, pi), not including the context. Wiped out when
// read-only because a set becomes a DFA state.
configLookup *Set
configLookup Set

// configs is the added elements.
configs []ATNConfig
Expand Down Expand Up @@ -91,11 +93,19 @@ type BaseATNConfigSet struct {
uniqueAlt int
}

func (b *BaseATNConfigSet) Alts() *BitSet {
alts := NewBitSet()
for _, it := range b.configs {
alts.add(it.GetAlt())
}
return alts
}

func NewBaseATNConfigSet(fullCtx bool) *BaseATNConfigSet {
return &BaseATNConfigSet{
cachedHash: -1,
configLookup: NewSet(nil, equalATNConfigs),
fullCtx: fullCtx,
cachedHash: -1,
configLookup: NewArray2DHashSetWithCap(hashATNConfig, equalATNConfigs, 16, 2),
fullCtx: fullCtx,
}
}

Expand All @@ -116,12 +126,11 @@ func (b *BaseATNConfigSet) Add(config ATNConfig, mergeCache *DoubleDict) bool {
b.dipsIntoOuterContext = true
}

existing := b.configLookup.add(config).(ATNConfig)
existing := b.configLookup.Add(config).(ATNConfig)

if existing == config {
b.cachedHash = -1
b.configs = append(b.configs, config) // Track order here

return true
}

Expand All @@ -145,11 +154,11 @@ func (b *BaseATNConfigSet) Add(config ATNConfig, mergeCache *DoubleDict) bool {
return true
}

func (b *BaseATNConfigSet) GetStates() *Set {
states := NewSet(nil, nil)
func (b *BaseATNConfigSet) GetStates() Set {
states := NewArray2DHashSet(nil, nil)

for i := 0; i < len(b.configs); i++ {
states.add(b.configs[i].GetState())
states.Add(b.configs[i].GetState())
}

return states
Expand Down Expand Up @@ -186,7 +195,7 @@ func (b *BaseATNConfigSet) OptimizeConfigs(interpreter *BaseATNSimulator) {
panic("set is read-only")
}

if b.configLookup.length() == 0 {
if b.configLookup.Len() == 0 {
return
}

Expand Down Expand Up @@ -236,13 +245,11 @@ func (b *BaseATNConfigSet) hash() int {
}

func (b *BaseATNConfigSet) hashCodeConfigs() int {
h := murmurInit(1)
for _, c := range b.configs {
if c != nil {
h = murmurUpdate(h, c.hash())
}
h := 1
for _, config := range b.configs {
h = 31*h + config.hash()
}
return murmurFinish(h, len(b.configs))
return h
}

func (b *BaseATNConfigSet) Length() int {
Expand All @@ -258,15 +265,15 @@ func (b *BaseATNConfigSet) Contains(item ATNConfig) bool {
panic("not implemented for read-only sets")
}

return b.configLookup.contains(item)
return b.configLookup.Contains(item)
}

func (b *BaseATNConfigSet) ContainsFast(item ATNConfig) bool {
if b.configLookup == nil {
panic("not implemented for read-only sets")
}

return b.configLookup.contains(item) // TODO: containsFast is not implemented for Set
return b.configLookup.Contains(item) // TODO: containsFast is not implemented for Set
}

func (b *BaseATNConfigSet) Clear() {
Expand All @@ -276,7 +283,7 @@ func (b *BaseATNConfigSet) Clear() {

b.configs = make([]ATNConfig, 0)
b.cachedHash = -1
b.configLookup = NewSet(nil, equalATNConfigs)
b.configLookup = NewArray2DHashSet(nil, equalATNConfigs)
}

func (b *BaseATNConfigSet) FullContext() bool {
Expand Down Expand Up @@ -358,11 +365,20 @@ type OrderedATNConfigSet struct {
func NewOrderedATNConfigSet() *OrderedATNConfigSet {
b := NewBaseATNConfigSet(false)

b.configLookup = NewSet(nil, nil)
b.configLookup = NewArray2DHashSet(nil, nil)

return &OrderedATNConfigSet{BaseATNConfigSet: b}
}

func hashATNConfig(i interface{}) int {
o := i.(ATNConfig)
hash := 7
hash = 31*hash + o.GetState().GetStateNumber()
hash = 31*hash + o.GetAlt()
hash = 31*hash + o.GetSemanticContext().hash()
return hash
}

func equalATNConfigs(a, b interface{}) bool {
if a == nil || b == nil {
return false
Expand All @@ -379,9 +395,13 @@ func equalATNConfigs(a, b interface{}) bool {
return false
}

nums := ai.GetState().GetStateNumber() == bi.GetState().GetStateNumber()
alts := ai.GetAlt() == bi.GetAlt()
cons := ai.GetSemanticContext().equals(bi.GetSemanticContext())
if ai.GetState().GetStateNumber() != bi.GetState().GetStateNumber() {
return false
}

if ai.GetAlt() != bi.GetAlt() {
return false
}

return nums && alts && cons
return ai.GetSemanticContext().equals(bi.GetSemanticContext())
}
8 changes: 7 additions & 1 deletion runtime/Go/antlr/dfa_serializer.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ package antlr
import (
"fmt"
"strconv"
"strings"
)

// DFASerializer is a DFA walker that knows how to dump them to serialized
Expand Down Expand Up @@ -112,7 +113,12 @@ func NewLexerDFASerializer(dfa *DFA) *LexerDFASerializer {
}

func (l *LexerDFASerializer) getEdgeLabel(i int) string {
return "'" + string(i) + "'"
var sb strings.Builder
sb.Grow(6)
sb.WriteByte('\'')
sb.WriteRune(rune(i))
sb.WriteByte('\'')
return sb.String()
}

func (l *LexerDFASerializer) String() string {
Expand Down
35 changes: 10 additions & 25 deletions runtime/Go/antlr/dfa_state.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,8 @@ type DFAState struct {

// edges elements point to the target of the symbol. Shift up by 1 so (-1)
// Token.EOF maps to the first element.
edges []*DFAState
edgesMu sync.RWMutex
edges []*DFAState
edgesMu sync.RWMutex

isAcceptState bool

Expand Down Expand Up @@ -92,16 +92,16 @@ func NewDFAState(stateNumber int, configs ATNConfigSet) *DFAState {
}

// GetAltSet gets the set of all alts mentioned by all ATN configurations in d.
func (d *DFAState) GetAltSet() *Set {
alts := NewSet(nil, nil)
func (d *DFAState) GetAltSet() Set {
alts := NewArray2DHashSet(nil, nil)

if d.configs != nil {
for _, c := range d.configs.GetItems() {
alts.add(c.GetAlt())
alts.Add(c.GetAlt())
}
}

if alts.length() == 0 {
if alts.Len() == 0 {
return nil
}

Expand Down Expand Up @@ -173,26 +173,11 @@ func (d *DFAState) String() string {
}
}

return fmt.Sprintf("%d:%s%s", fmt.Sprint(d.configs), s)
return fmt.Sprintf("%d:%s%s", d.stateNumber, fmt.Sprint(d.configs), s)
}

func (d *DFAState) hash() int {
h := murmurInit(11)

c := 1
if d.isAcceptState {
if d.predicates != nil {
for _, p := range d.predicates {
h = murmurUpdate(h, p.alt)
h = murmurUpdate(h, p.pred.hash())
c += 2
}
} else {
h = murmurUpdate(h, d.prediction)
c += 1
}
}

h := murmurInit(7)
h = murmurUpdate(h, d.configs.hash())
return murmurFinish(h, c)
}
return murmurFinish(h, 1)
}
16 changes: 14 additions & 2 deletions runtime/Go/antlr/interval_set.go
Original file line number Diff line number Diff line change
Expand Up @@ -226,16 +226,28 @@ func (i *IntervalSet) StringVerbose(literalNames []string, symbolicNames []strin
func (i *IntervalSet) toCharString() string {
names := make([]string, len(i.intervals))

var sb strings.Builder

for j := 0; j < len(i.intervals); j++ {
v := i.intervals[j]
if v.Stop == v.Start+1 {
if v.Start == TokenEOF {
names = append(names, "<EOF>")
} else {
names = append(names, ("'" + string(v.Start) + "'"))
sb.WriteByte('\'')
sb.WriteRune(rune(v.Start))
sb.WriteByte('\'')
names = append(names, sb.String())
sb.Reset()
}
} else {
names = append(names, "'"+string(v.Start)+"'..'"+string(v.Stop-1)+"'")
sb.WriteByte('\'')
sb.WriteRune(rune(v.Start))
sb.WriteString("'..'")
sb.WriteRune(rune(v.Stop - 1))
sb.WriteByte('\'')
names = append(names, sb.String())
sb.Reset()
}
}
if len(names) > 1 {
Expand Down
3 changes: 1 addition & 2 deletions runtime/Go/antlr/lexer_action.go
Original file line number Diff line number Diff line change
Expand Up @@ -414,10 +414,9 @@ func (l *LexerIndexedCustomAction) execute(lexer Lexer) {

func (l *LexerIndexedCustomAction) hash() int {
h := murmurInit(0)
h = murmurUpdate(h, l.actionType)
h = murmurUpdate(h, l.offset)
h = murmurUpdate(h, l.lexerAction.hash())
return murmurFinish(h, 3)
return murmurFinish(h, 2)
}

func (l *LexerIndexedCustomAction) equals(other LexerAction) bool {
Expand Down
13 changes: 10 additions & 3 deletions runtime/Go/antlr/lexer_atn_simulator.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ package antlr
import (
"fmt"
"strconv"
"strings"
)

var (
Expand Down Expand Up @@ -206,7 +207,7 @@ func (l *LexerATNSimulator) getExistingTargetState(s *DFAState, t int) *DFAState
return nil
}

target := s.getIthEdge(t-LexerATNSimulatorMinDFAEdge)
target := s.getIthEdge(t - LexerATNSimulatorMinDFAEdge)
if LexerATNSimulatorDebug && target != nil {
fmt.Println("reuse state " + strconv.Itoa(s.stateNumber) + " edge to " + strconv.Itoa(target.stateNumber))
}
Expand Down Expand Up @@ -299,7 +300,7 @@ func (l *LexerATNSimulator) getReachableConfigSet(input CharStream, closure ATNC

func (l *LexerATNSimulator) accept(input CharStream, lexerActionExecutor *LexerActionExecutor, startIndex, index, line, charPos int) {
if LexerATNSimulatorDebug {
fmt.Printf("ACTION %s\n", lexerActionExecutor)
fmt.Printf("ACTION %v\n", lexerActionExecutor)
}
// seek to after last char in token
input.Seek(index)
Expand Down Expand Up @@ -630,7 +631,13 @@ func (l *LexerATNSimulator) GetTokenName(tt int) string {
return "EOF"
}

return "'" + string(tt) + "'"
var sb strings.Builder
sb.Grow(6)
sb.WriteByte('\'')
sb.WriteRune(rune(tt))
sb.WriteByte('\'')

return sb.String()
}

func resetSimState(sim *SimState) {
Expand Down
Loading

0 comments on commit 1f7fee2

Please sign in to comment.