summary refs log tree commit diff stats
path: root/get.rb
blob: a2a213ed37a3acac3f3a0b482752904181f27a89 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
require 'open-uri'
require 'nokogiri'
require 'csv'

result = []
transcripts = open('https://steven-universe.fandom.com/wiki/Category:Transcripts').read
docTrans = Nokogiri::HTML transcripts
docTrans.css(".category-page__member-link").each do |node|
  puts node['href']
  subpage = open("https://steven-universe.fandom.com" + node['href']).read
  subpagedoc = Nokogiri::HTML subpage
  rows = subpagedoc.css(".bgrevo tr")
  rows.shift
  rows.pop
  rows.each do |row|
    if row.children.length == 2
      result << ["", row.children[1].content.strip.gsub(/\n/," ")]
    elsif row.children.length == 3
      result << [row.children[1].content.strip, row.children[2].content.strip.gsub(/\n/," ")]
    end
  end
end

CSV.open("dialogue.csv", "w") do |csv|
  result.each do |line|
    csv << line
  end
end