diff options
Diffstat (limited to 'get.rb')
-rw-r--r-- | get.rb | 28 |
1 files changed, 28 insertions, 0 deletions
diff --git a/get.rb b/get.rb new file mode 100644 index 0000000..a2a213e --- /dev/null +++ b/get.rb | |||
@@ -0,0 +1,28 @@ | |||
1 | require 'open-uri' | ||
2 | require 'nokogiri' | ||
3 | require 'csv' | ||
4 | |||
5 | result = [] | ||
6 | transcripts = open('https://steven-universe.fandom.com/wiki/Category:Transcripts').read | ||
7 | docTrans = Nokogiri::HTML transcripts | ||
8 | docTrans.css(".category-page__member-link").each do |node| | ||
9 | puts node['href'] | ||
10 | subpage = open("https://steven-universe.fandom.com" + node['href']).read | ||
11 | subpagedoc = Nokogiri::HTML subpage | ||
12 | rows = subpagedoc.css(".bgrevo tr") | ||
13 | rows.shift | ||
14 | rows.pop | ||
15 | rows.each do |row| | ||
16 | if row.children.length == 2 | ||
17 | result << ["", row.children[1].content.strip.gsub(/\n/," ")] | ||
18 | elsif row.children.length == 3 | ||
19 | result << [row.children[1].content.strip, row.children[2].content.strip.gsub(/\n/," ")] | ||
20 | end | ||
21 | end | ||
22 | end | ||
23 | |||
24 | CSV.open("dialogue.csv", "w") do |csv| | ||
25 | result.each do |line| | ||
26 | csv << line | ||
27 | end | ||
28 | end | ||