From 2b152d09881559a0330b3ff923e03e715777c6c3 Mon Sep 17 00:00:00 2001 From: Kelly Rauchenberger Date: Wed, 27 Feb 2019 20:45:17 -0500 Subject: Initial commit (by Pink!) --- get.rb | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 get.rb (limited to 'get.rb') diff --git a/get.rb b/get.rb new file mode 100644 index 0000000..a2a213e --- /dev/null +++ b/get.rb @@ -0,0 +1,28 @@ +require 'open-uri' +require 'nokogiri' +require 'csv' + +result = [] +transcripts = open('https://steven-universe.fandom.com/wiki/Category:Transcripts').read +docTrans = Nokogiri::HTML transcripts +docTrans.css(".category-page__member-link").each do |node| + puts node['href'] + subpage = open("https://steven-universe.fandom.com" + node['href']).read + subpagedoc = Nokogiri::HTML subpage + rows = subpagedoc.css(".bgrevo tr") + rows.shift + rows.pop + rows.each do |row| + if row.children.length == 2 + result << ["", row.children[1].content.strip.gsub(/\n/," ")] + elsif row.children.length == 3 + result << [row.children[1].content.strip, row.children[2].content.strip.gsub(/\n/," ")] + end + end +end + +CSV.open("dialogue.csv", "w") do |csv| + result.each do |line| + csv << line + end +end -- cgit 1.4.1