Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bump sidekiq from 7.1.2 to 7.1.3 #894

Closed
wants to merge 10 commits into from
4 changes: 2 additions & 2 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -561,7 +561,7 @@ GEM
redcarpet (3.6.0)
redis (5.0.7)
redis-client (>= 0.9.0)
redis-client (0.16.0)
redis-client (0.17.0)
connection_pool
regexp_parser (2.8.1)
responders (3.1.0)
Expand Down Expand Up @@ -628,7 +628,7 @@ GEM
rdf-xsd (~> 3.2)
sparql (~> 3.2)
sxp (~> 1.2)
sidekiq (7.1.2)
sidekiq (7.1.3)
concurrent-ruby (< 2)
connection_pool (>= 2.3.0)
rack (>= 2.2.4)
Expand Down
52 changes: 52 additions & 0 deletions lib/ingestors/dcc_ingestor.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
require 'open-uri'
require 'csv'
require 'nokogiri'

module Ingestors
class DccIngestor < Ingestor
def self.config
{
key: 'dcc_event',
title: 'DCC Events API',
category: :events
}
end

def read(url)
begin
process_dcc(url)
rescue Exception => e
@messages << "#{self.class.name} failed with: #{e.message}"
end

# finished
nil
end

private

def process_dcc(url)
event_page = Nokogiri::HTML5.parse(open_url(url.to_s, raise: true)).css("div[class='archive__content grid']")[0].css("div[class='column span-4-sm span-8-md span-6-lg']")
event_page.each do |event_data|
event = OpenStruct.new

event.url = event_data.css("h2[class='post-item__title h5']")[0].css("a")[0].get_attribute('href')
event.title = event_data.css("h2[class='post-item__title h5']")[0].css("a")[0].text.strip

start_str = event_data.css("ul[class='post-item__meta']")[0].css("li")[0].text.strip.split('—')
event.start = Time.zone.parse(start_str[0])
event.end = Time.zone.parse(start_str[0]).beginning_of_day + Time.zone.parse(start_str[1]).seconds_since_midnight.seconds

event.venue = event_data.css("ul[class='post-item__meta']")[0].css("li")[1].text.strip

event.source = 'DCC'
event.timezone = 'Amsterdam'
event.set_default_times

add_event(event)
rescue Exception => e
@messages << "Extract event fields failed with: #{e.message}"
end
end
end
end
3 changes: 3 additions & 0 deletions lib/ingestors/ingestor_factory.rb
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ def self.ingestors
Ingestors::UhasseltIngestor,
Ingestors::OdisseiIngestor,
Ingestors::RstIngestor,
Ingestors::OsciIngestor,
Ingestors::DccIngestor,
Ingestors::SenseIngestor
]
end

Expand Down
79 changes: 79 additions & 0 deletions lib/ingestors/osci_ingestor.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
require 'open-uri'
require 'csv'
require 'nokogiri'

module Ingestors
class OsciIngestor < Ingestor
def self.config
{
key: 'osci',
title: 'OSCI Events API',
category: :events
}
end

def read(url)
begin
process_osci(url)
rescue Exception => e
@messages << "#{self.class.name} failed with: #{e.message}"
end

# finished
nil
end

private

def process_osci(url)
month = Time.zone.now.month
year = Time.zone.now.year
(1..12).each do |i|
unless Rails.env.test? and File.exist?('test/vcr_cassettes/ingestors/osci.yml')
sleep(1)
end
scrape_url = "https://osc-international.com/my-calendar/?format=calendar&month=#{i}&yr=#{i >= month ? year : year + 1}"
event_page = Nokogiri::HTML5.parse(open_url(scrape_url.to_s, raise: true)).css("div[id='my-calendar']")[0].css("tbody")[0].css("td")
event_page.each do |event_data|
next if event_data.get_attribute('class').include?('no-events')

beep = event_data.css("div[id*=calendar-my-calendar]")
beep.each do |boop|
event = OpenStruct.new
el = boop.css("h3[class='event-title summary']")[0]
url_str = el.css("a")[0].get_attribute('href')
event.url = scrape_url + url_str

el2 = boop.css("div[id='#{url_str.gsub('#', '')}']")[0]
event.title = el2.css("h4[class='mc-title']")[0].text.strip
event.venue = el2.css("div[class='mc-location']")[0].css("strong[class='location-link']")[0].text.strip

if el2.css("div[class='time-block']")[0].css("span[class='event-time dtstart']").count.positive?
event.start = Time.zone.parse(el2.css("div[class='time-block']")[0].css("span[class='event-time dtstart']")[0].css("time")[0].get_attribute('datetime'))
event.end = Time.zone.parse(el2.css("div[class='time-block']")[0].css("span[class='end-time dtend']")[0].css("time")[0].get_attribute('datetime'))
else
event.start = Time.zone.parse(el2.css("div[class='time-block']")[0].css("span[class='mc-start-date dtstart']")[0].get_attribute('content'))
if el2.css("div[class='time-block']")[0].css("span[class='event-time dtend']").count.positive?
event.end = Time.zone.parse(el2.css("div[class='time-block']")[0].css("span[class='event-time dtend']")[0].text.strip)
else
event.end = event.start
end
end

# parsed datetimes are always 2 hours off
event.start += 2.hours
event.end += 2.hours

event.source = 'OSCI'
event.timezone = 'Amsterdam'
event.set_default_times

add_event(event)
end
rescue Exception => e
@messages << "Extract event fields failed with: #{e.message}"
end
end
end
end
end
106 changes: 106 additions & 0 deletions lib/ingestors/sense_ingestor.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
require 'open-uri'
require 'csv'
require 'nokogiri'

module Ingestors
class SenseIngestor < Ingestor
def self.config
{
key: 'sense_event',
title: 'Sense Events API',
category: :events
}
end

def read(url)
begin
process_sense(url)
rescue Exception => e
@messages << "#{self.class.name} failed with: #{e.message}"
end

# finished
nil
end

private

def process_sense(url)
(1..2).each do |i|
url = "https://sense.nl/event/page/#{i}"
unless Rails.env.test? and File.exist?('test/vcr_cassettes/ingestors/sense.yml')
sleep(1)
end
event_page = Nokogiri::HTML5.parse(open_url(url.to_s, raise: true)).css("div[class='event-list-part']")[0].css("div[class='upcoming-event-box']")
event_page.each do |event_data|
event = OpenStruct.new

event.url = event_data.css('a')[0].get_attribute('href')

event_page2 = Nokogiri::HTML5.parse(open_url(event.url.to_s, raise: true)).css("div[class='news-banner-content']")[0]
event.title = event_page2.css("h1")[0].text.strip
location = nil
date = nil
time = nil
event_page2.css("ul[class='dissertation-meta-info']")[0].css('li').each do |li|
case li.css("label").text.strip
when 'Date'
date = li.css("span").text.strip
when 'Time'
time = li.css("span").text.strip
when 'Location'
location = li.css("span").text.strip
end
end
event.venue = location
time ||= nil
times = date_parsing(date, time)
event.start = times[0]
event.end = times[1]

event.source = 'Sense'
event.timezone = 'Amsterdam'
event.set_default_times

add_event(event)
rescue Exception => e
@messages << "Extract event fields failed with: #{e.message}"
end
end
end
end
end

def date_parsing(date, time)
if time.nil?
date_parsing_without_time(date)
else
date_parsing_with_time(date, time)
end
end

def date_parsing_with_time(date, time)
times = time.split('-')
d = Date.parse(date)
ts = times.map{ |t| Time.zone.parse(t) }
start_time = DateTime.new(d.year, d.month, d.day, ts[0].hour, ts[0].min)
end_time = DateTime.new(d.year, d.month, d.day, ts[1].hour, ts[1].min)
return [start_time, end_time]
end

def date_parsing_without_time(date)
dates = date.split('-')
ds = [nil, nil]
ds[1] = Date.parse(dates[1])
start_list = [nil, nil, nil]
end_list = dates[1].strip.split(' ')
dates[0].strip.split(' ').each_with_index.map{ |x, i| start_list[i] = x }
start_list.each_with_index do |x, i|
start_list[i] ||= end_list[i]
end
d = Time.zone.parse(start_list.join(' '))
start_time = DateTime.new(d.year, d.month, d.day, 9)
d = Time.zone.parse(end_list.join(' '))
end_time = DateTime.new(d.year, d.month, d.day, 17)
return [start_time, end_time]
end
58 changes: 58 additions & 0 deletions test/unit/ingestors/dcc_ingestor.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
require 'test_helper'

class DccIngestorTest < ActiveSupport::TestCase
setup do
@user = users(:regular_user)
@content_provider = content_providers(:another_portal_provider)
mock_ingestions
mock_timezone # System time zone should not affect test result
end

teardown do
reset_timezone
end

test 'can ingest events from dcc' do
source = @content_provider.sources.build(
url: 'https://dcc-po.nl/agenda/',
method: 'dcc',
enabled: true
)

ingestor = Ingestors::DccIngestor.new

# check event doesn't
new_title = "DCC-PO dag"
new_url = 'https://dcc-po.nl/agenda/dcc-po-dag/'
refute Event.where(title: new_title, url: new_url).any?

# run task
assert_difference 'Event.count', 1 do
freeze_time(2019) do
VCR.use_cassette("ingestors/dcc") do
ingestor.read(source.url)
ingestor.write(@user, @content_provider)
end
end
end

assert_equal 1, ingestor.events.count
assert ingestor.materials.empty?
assert_equal 1, ingestor.stats[:events][:added]
assert_equal 0, ingestor.stats[:events][:updated]
assert_equal 0, ingestor.stats[:events][:rejected]

# check event does exist
event = Event.where(title: new_title, url: new_url).first
assert event
assert_equal new_title, event.title
assert_equal new_url, event.url

# check other fields
assert_equal 'DCC', event.source
assert_equal 'Amsterdam', event.timezone
assert_equal Time.zone.parse('Mon, 09 Oct 2019 10:00:00.000000000 UTC +00:00'), event.start
assert_equal Time.zone.parse('Mon, 09 Oct 2019 16:30:00.000000000 UTC +00:00'), event.end
assert_equal 'Domstad, Utrecht', event.venue
end
end
59 changes: 59 additions & 0 deletions test/unit/ingestors/osci_ingestor_test.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
require 'test_helper'

class OsciIngestorTest < ActiveSupport::TestCase
setup do
@user = users(:regular_user)
@content_provider = content_providers(:another_portal_provider)
mock_ingestions
mock_timezone # System time zone should not affect test result
end

teardown do
reset_timezone
end

test 'can ingest events from osci' do
source = @content_provider.sources.build(
url: 'https://osc-international.com/my-calendar/',
method: 'osci',
enabled: true
)

ingestor = Ingestors::OsciIngestor.new

# check event doesn't
new_title = "14:00: Open Science Coffee: Assessing robustness through multiverse analysis – Applications in research and education"
new_url = 'https://osc-international.com/my-calendar/?format=calendar&month=9&yr=2023#mc_calendar_03_2-calendar-details-my-calendar'

refute Event.where(title: new_title, url: new_url).any?

# run task
assert_difference 'Event.count', 12 do
freeze_time(2023) do
VCR.use_cassette("ingestors/osci") do
ingestor.read(source.url)
ingestor.write(@user, @content_provider)
end
end
end

assert_equal 18, ingestor.events.count
assert ingestor.materials.empty?
assert_equal 12, ingestor.stats[:events][:added]
assert_equal 6, ingestor.stats[:events][:updated]
assert_equal 0, ingestor.stats[:events][:rejected]

# check event does exist
event = Event.where(title: new_title, url: new_url).first
assert event
assert_equal new_title, event.title
assert_equal new_url, event.url

# check other fields
assert_equal 'OSCI', event.source
assert_equal 'Amsterdam', event.timezone
assert_equal Time.zone.parse('Sun, 03 Sep 2023 14:00:00.000000000 UTC +00:00'), event.start
assert_equal Time.zone.parse('Sun, 03 Sep 2023 15:00:00.000000000 UTC +00:00'), event.end
assert_equal 'OSC Leiden', event.venue
end
end
Loading