From dd41e20140f5a015edd53c39278d5f8620724c33 Mon Sep 17 00:00:00 2001 From: jakopako Date: Tue, 4 Jan 2022 19:39:15 +0100 Subject: [PATCH] Make url attribute customizable Fixes #7 --- config.yml | 30 +++++++++++++++++++++++++++++- main.go | 18 +++++++++++++----- 2 files changed, 42 insertions(+), 6 deletions(-) diff --git a/config.yml b/config.yml index a528d0c..23d8302 100644 --- a/config.yml +++ b/config.yml @@ -217,4 +217,32 @@ crawlers: loc: ".commingupEventsList_block4" layout: "15Uhr04" location: "Europe/Berlin" - language: "de_DE" \ No newline at end of file + language: "de_DE" + + - name: Mascotte + type: concert + city: Zurich + url: "https://www.mascotte.ch/nu/events/event_list_type/2" + event: ".nu-e-concert" + fields: + title: + loc: ".screen-only h2" + comment: + loc: ".nu-e-subt" + url: + loc: ".nu-e-link-share" + attr: "data-a2a-url" + date: + day_month_year: + loc: ".screen-only .nu-e-date" + layout: "2.1.06" + regex: + exp: "([0-9]{1,2}\\.){2}[0-9]{2}" + index: 0 + time: + loc: ".nu-e-time-age" + layout: "15.04" + regex: + exp: "[0-9]{2}\\.[0-9]{2}" + index: 1 + location: "Europe/Berlin" diff --git a/main.go b/main.go index fd28bb8..d70d0ad 100644 --- a/main.go +++ b/main.go @@ -204,10 +204,14 @@ func extractField(item string, s *goquery.Selection, crawler *Crawler, event *Ev event.Comment = getFieldString(&crawler.Fields.Comment, s) case "url": var url string + attr := "href" + if crawler.Fields.URL.Attr != "" { + attr = crawler.Fields.URL.Attr + } if crawler.Fields.URL.Loc == "" { - url = s.AttrOr("href", crawler.URL) + url = s.AttrOr(attr, crawler.URL) } else { - url = s.Find(crawler.Fields.URL.Loc).AttrOr("href", crawler.URL) + url = s.Find(crawler.Fields.URL.Loc).AttrOr(attr, crawler.URL) } if crawler.Fields.URL.Relative { @@ -257,9 +261,12 @@ func getFieldString(f *Field, s *goquery.Selection) string { var fieldString string fieldSelection := s.Find(f.Loc) if len(fieldSelection.Nodes) > 0 { - fieldString = fieldSelection.Get(f.NodeIndex).FirstChild.Data - if f.MaxLength > 0 && f.MaxLength < len(fieldString) { - return fieldString[:f.MaxLength] + "..." + fieldNode := fieldSelection.Get(f.NodeIndex).FirstChild + if fieldNode.Type == html.TextNode { + fieldString = fieldSelection.Get(f.NodeIndex).FirstChild.Data + if f.MaxLength > 0 && f.MaxLength < len(fieldString) { + return fieldString[:f.MaxLength] + "..." + } } } return fieldString @@ -366,6 +373,7 @@ type Crawler struct { Loc string `yaml:"loc"` Relative bool `yaml:"relative"` OnSubpage []string `yaml:"on_subpage"` + Attr string `yaml:"attr"` } `yaml:"url"` Date struct { Day DateLocator `yaml:"day"`