summary refs log tree commit diff stats
path: root/get.rb
diff options
context:
space:
mode:
Diffstat (limited to 'get.rb')
-rw-r--r--get.rb28
1 files changed, 28 insertions, 0 deletions
diff --git a/get.rb b/get.rb new file mode 100644 index 0000000..a2a213e --- /dev/null +++ b/get.rb
@@ -0,0 +1,28 @@
1require 'open-uri'
2require 'nokogiri'
3require 'csv'
4
5result = []
6transcripts = open('https://steven-universe.fandom.com/wiki/Category:Transcripts').read
7docTrans = Nokogiri::HTML transcripts
8docTrans.css(".category-page__member-link").each do |node|
9 puts node['href']
10 subpage = open("https://steven-universe.fandom.com" + node['href']).read
11 subpagedoc = Nokogiri::HTML subpage
12 rows = subpagedoc.css(".bgrevo tr")
13 rows.shift
14 rows.pop
15 rows.each do |row|
16 if row.children.length == 2
17 result << ["", row.children[1].content.strip.gsub(/\n/," ")]
18 elsif row.children.length == 3
19 result << [row.children[1].content.strip, row.children[2].content.strip.gsub(/\n/," ")]
20 end
21 end
22end
23
24CSV.open("dialogue.csv", "w") do |csv|
25 result.each do |line|
26 csv << line
27 end
28end