Skip to content

Commit

Permalink
Merge pull request #22 from jakopako:jakopako/issue10
Browse files Browse the repository at this point in the history
Parallelize crawling
  • Loading branch information
jakopako authored Jan 17, 2022
2 parents ffdf66e + d4e6d2e commit f365217
Showing 1 changed file with 16 additions and 6 deletions.
22 changes: 16 additions & 6 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
"regexp"
"sort"
"strings"
"sync"
"time"

"github.com/PuerkitoBio/goquery"
Expand Down Expand Up @@ -319,7 +320,9 @@ func extractStringRegex(rc *RegexConfig, s string) (string, error) {
return extractedString, nil
}

func writeEventsToAPI(c Crawler) {
func writeEventsToAPI(wg *sync.WaitGroup, c Crawler) {
log.Printf("Crawling %s\n", c.Name)
defer wg.Done()
apiUrl := os.Getenv("EVENT_API")
client := &http.Client{
Timeout: time.Second * 10,
Expand Down Expand Up @@ -372,9 +375,11 @@ func writeEventsToAPI(c Crawler) {

}
}
log.Printf("Done crawling and writing %s data to API.\n", c.Name)
}

func prettyPrintEvents(c Crawler) {
func prettyPrintEvents(wg *sync.WaitGroup, c Crawler) {
defer wg.Done()
events, err := c.getEvents()
if err != nil {
log.Fatal(err)
Expand Down Expand Up @@ -472,22 +477,27 @@ func main() {
log.Fatal(err)
}

var wg sync.WaitGroup

for _, c := range config.Crawlers {
if *singleCrawler != "" {
if *singleCrawler == c.Name {
wg.Add(1)
if *storeData {
writeEventsToAPI(c)
writeEventsToAPI(&wg, c)
} else {
prettyPrintEvents(c)
prettyPrintEvents(&wg, c)
}
break
}
} else {
wg.Add(1)
if *storeData {
writeEventsToAPI(c)
go writeEventsToAPI(&wg, c)
} else {
prettyPrintEvents(c)
go prettyPrintEvents(&wg, c)
}
}
}
wg.Wait()
}

0 comments on commit f365217

Please sign in to comment.