diff options
Diffstat (limited to 'get.rb')
| -rw-r--r-- | get.rb | 28 |
1 files changed, 28 insertions, 0 deletions
| diff --git a/get.rb b/get.rb new file mode 100644 index 0000000..a2a213e --- /dev/null +++ b/get.rb | |||
| @@ -0,0 +1,28 @@ | |||
| 1 | require 'open-uri' | ||
| 2 | require 'nokogiri' | ||
| 3 | require 'csv' | ||
| 4 | |||
| 5 | result = [] | ||
| 6 | transcripts = open('https://steven-universe.fandom.com/wiki/Category:Transcripts').read | ||
| 7 | docTrans = Nokogiri::HTML transcripts | ||
| 8 | docTrans.css(".category-page__member-link").each do |node| | ||
| 9 | puts node['href'] | ||
| 10 | subpage = open("https://steven-universe.fandom.com" + node['href']).read | ||
| 11 | subpagedoc = Nokogiri::HTML subpage | ||
| 12 | rows = subpagedoc.css(".bgrevo tr") | ||
| 13 | rows.shift | ||
| 14 | rows.pop | ||
| 15 | rows.each do |row| | ||
| 16 | if row.children.length == 2 | ||
| 17 | result << ["", row.children[1].content.strip.gsub(/\n/," ")] | ||
| 18 | elsif row.children.length == 3 | ||
| 19 | result << [row.children[1].content.strip, row.children[2].content.strip.gsub(/\n/," ")] | ||
| 20 | end | ||
| 21 | end | ||
| 22 | end | ||
| 23 | |||
| 24 | CSV.open("dialogue.csv", "w") do |csv| | ||
| 25 | result.each do |line| | ||
| 26 | csv << line | ||
| 27 | end | ||
| 28 | end | ||
