Skip to content

Commit

Permalink
Merge pull request #1 from jakopako/config-crawler
Browse files Browse the repository at this point in the history
Config crawler
  • Loading branch information
jakopako authored Dec 26, 2021
2 parents 12ef22f + cb55ddb commit 2b72c47
Show file tree
Hide file tree
Showing 18 changed files with 33,587 additions and 909 deletions.
204 changes: 204 additions & 0 deletions config.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,204 @@
crawlers:
- name: Helsinki
type: concert
url: https://www.helsinkiklub.ch
city: "Zurich"
event: ".event .header"
fields:
title:
locs: # only the first option that results in a non-empty title is taken into account.
- ".agenda .top"
- ".agenda .support"
comment:
locs: # only the first option that results in a non-empty comment is taken into account.
- ".agenda .top .addition"
- ".agenda .support .addition"
date:
day:
loc: ".day"
layout: "2"
month:
loc: ".month"
layout: "January" # has to be in English
location: "Europe/Berlin"
language: "de_DE"

- name: Mehrspur
type: concert
url: "https://www.mehrspur.ch/veranstaltungen"
city: "Zurich"
event: ".post-holder"
fields:
title:
locs:
- ".block_under_title a"
comment:
locs:
- ".event-excerpt-fluid"
url:
loc: ".block_under_title a"
date:
day_month:
loc: ".event-date"
layout: "Mon 2.Jan."
time:
loc: ".event-time"
layout: "15:04"
location: "Europe/Berlin"
language: "de_DE"

- name: Umbo
type: concert
url: "https://www.umbo.wtf"
city: "Zurich"
event: ".w-dyn-item"
fields:
title:
locs:
- ".text-block-21"
comment:
locs:
- ".text-block-28"
url:
loc: ".w-inline-block"
relative: true
date:
day_month_year_time:
loc: ".text-block-26"
layout: "2.1.2006 15:04"
location: "Europe/Berlin"

- name: Sender
type: concert
url: "https://gds.fm/SENDER"
city: "Zurich"
event: ".event-list__item"
fields:
title:
locs:
- ".event-preview__title"
comment:
locs:
- ".event-detail__content div p"
max_length: 200
url:
loc: "" # An empty string means that we look in the event node itself for an href
relative: true
on_subpage: ["comment"]
date:
day_month_year_time:
loc: ".event-preview__date-long"
layout: "January 02, 2006, 03:04 PM"
location: "GMT"
language: "en_US"

- name: Unterfahrt
type: concert
url: "https://www.unterfahrt.de/programm.php"
city: "Munich"
event: ".b-events__el"
fields:
title:
locs:
- ".b-events__el-title"
comment:
locs:
- ".b-events__el-text"
url:
loc: ".o-btn-hex__link"
relative: true
date:
day_month_year:
loc: ".b-events__el-date"
layout: "Mon•2•1•2006"
regex: # In case the node contains more than just the desired string the substring can be determined with a regex
exp: "[A-Za-z]{2}•[0-9]{1,2}•[0-9]{1,2}•[0-9]{4}"
index: 0
time:
loc: ".b-events__el-date"
layout: "15••04"
regex:
exp: "[0-9]{2}••[0-9]{2}"
index: -1
location: "Europe/Berlin"
language: "de_DE"

- name: Moods
type: concert
url: "https://www.moods.club/en/?a=1"
city: "Zurich"
event: ".event"
fields:
url:
loc: "a"
relative: true
on_subpage: ["comment", "date"]
title:
locs:
- "h2"
comment:
locs:
- ".left .content"
max_length: 200
date:
day:
loc: ".date .day"
layout: "2"
month:
loc: ".date .month_name"
layout: "Jan"
time:
loc: ".right .time"
layout: "Start: 15:04"
node_index: -1 # -1 means the last node
location: "Europe/Berlin"
language: "en_US"

- name: ImportExport
type: concert
url: "https://import-export.cc/"
city: "Munich"
event: ".events a"
exclude: ".old" # events that have a node with this class will be excluded from the results.
fields:
url:
loc: ""
on_subpage: ["date"]
title:
locs:
- ".io-title"
date:
day_month_year:
loc: ".event .event-info .io-big"
layout: "Mon. 2.1.06"
node_index: 0
time:
loc: ".event .event-info .io-big"
node_index: 3 # We need this in case there are multiple nodes that would be selected with the given loc
layout: "Beginn: 15.04"
location: "Europe/Berlin"
language: de_DE

- name: Milla
type: concert
city: Munich
url: "https://www.milla-club.de/category/event/"
event: ".preview-box-outline"
fields:
url:
loc: ".post-title a"
title:
locs:
- ".post-title a"
date:
day:
loc: ".post-date-day"
layout: "02"
month:
loc: ".post-date-month"
layout: "Jan"
year:
loc: ".post-date-year"
layout: "2006"
location: "Europe/Berlin"
language: "de_DE"
5 changes: 4 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,11 @@ module github.com/jakopako/croncert
go 1.16

require (
github.com/MontFerret/cli v1.1.0 // indirect
github.com/PuerkitoBio/goquery v1.8.0
github.com/goodsign/monday v1.0.0
github.com/kr/text v0.2.0 // indirect
github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e // indirect
golang.org/x/net v0.0.0-20210916014120-12bc252f5db8
gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f // indirect
gopkg.in/yaml.v2 v2.4.0
)
Loading

0 comments on commit 2b72c47

Please sign in to comment.