blob: a2a213ed37a3acac3f3a0b482752904181f27a89 (
plain) (
blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
|
require 'open-uri'
require 'nokogiri'
require 'csv'
result = []
transcripts = open('https://steven-universe.fandom.com/wiki/Category:Transcripts').read
docTrans = Nokogiri::HTML transcripts
docTrans.css(".category-page__member-link").each do |node|
puts node['href']
subpage = open("https://steven-universe.fandom.com" + node['href']).read
subpagedoc = Nokogiri::HTML subpage
rows = subpagedoc.css(".bgrevo tr")
rows.shift
rows.pop
rows.each do |row|
if row.children.length == 2
result << ["", row.children[1].content.strip.gsub(/\n/," ")]
elsif row.children.length == 3
result << [row.children[1].content.strip, row.children[2].content.strip.gsub(/\n/," ")]
end
end
end
CSV.open("dialogue.csv", "w") do |csv|
result.each do |line|
csv << line
end
end
|