Skip to content

Commit

Permalink
Merge pull request #74 from jakopako:jakopako/issue61
Browse files Browse the repository at this point in the history
Add Volkshaus
  • Loading branch information
jakopako authored Mar 11, 2022
2 parents d0b8c54 + ca79c7e commit 6fff327
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 14 deletions.
57 changes: 56 additions & 1 deletion concerts-config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -1178,4 +1178,59 @@ scrapers:
exp: "[0-9]{2}:[0-9]{2}"
index: 0
layout: "15:04"
date_location: "Europe/Berlin"
date_location: "Europe/Berlin"

- name: Volkshaus
url: https://www.volkshaus.ch/aktuell
item: ".event"
fields:
static:
- name: "location"
value: "Volkshaus"
- name: "city"
value: "Zurich"
- name: "type"
value: "concert"
dynamic:
- name: "title"
location:
selector: ".event__title span"
- name: "event-type"
location:
selector: ".event_type"
# hide: true
- name: "event-status"
location:
selector: ".event_links span.inbetweenSpace"
can_be_empty: true
hide: true
- name: "url"
type: "url"
location:
selector: ".event_links a"
- name: "date"
type: "date"
components:
- covers:
day: true
month: true
year: true
time: true
location:
selector: ".date"
attr: "content"
regex_extract:
exp: "[^+]*"
index: 0
layout: "2006-01-02T15:04:05"
date_location: "Europe/Berlin"
filters:
- field: "event-type"
regex: "(?i)konzert"
match: true
- field: "event-status"
regex: "VERSCHOBEN.*"
match: false
- field: "event-status"
regex: "ABGESAGT"
match: false
23 changes: 10 additions & 13 deletions scraper/scraper.go
Original file line number Diff line number Diff line change
Expand Up @@ -246,27 +246,20 @@ func (c Scraper) GetItems() ([]map[string]interface{}, error) {
}

func (c *Scraper) filterItem(item map[string]interface{}) (bool, error) {
if len(c.Filters) == 0 {
return true, nil
}
filterBool := false
// if len(c.Filters) == 0 {
// return true, nil
// }
filterBool := true
for _, filter := range c.Filters {
regex, err := regexp.Compile(filter.Regex)
if err != nil {
return false, err
}
if fieldValue, found := item[filter.Field]; found {
if regex.MatchString(fmt.Sprint(fieldValue)) {
if !filter.Match {
// as soon as one filter says 'remove item' we return false
// and hence the item doesn't make it into the result list
return false, nil
}
filterBool = true
filterBool = filterBool && filter.Match
} else {
if !filter.Match {
filterBool = true
}
filterBool = filterBool && !filter.Match
}
}
}
Expand Down Expand Up @@ -484,6 +477,10 @@ func getTextString(t *ElementLocation, s *goquery.Selection) (string, error) {
}
} else {
fieldString = fieldSelection.AttrOr(t.Attr, "")
fieldString, err = extractStringRegex(&t.RegexExtract, fieldString)
if err != nil {
return fieldString, err
}
}
}
// automitcally trimming whitespaces might be confusing in some cases...
Expand Down

0 comments on commit 6fff327

Please sign in to comment.