about summary refs log tree commit diff stats
path: root/scrape.rb
diff options
context:
space:
mode:
authorKelly Rauchenberger <fefferburbia@gmail.com>2018-03-01 16:03:16 -0500
committerKelly Rauchenberger <fefferburbia@gmail.com>2018-03-01 16:03:16 -0500
commit473b327ceed3afb5e5683002b39fd9c1947cb25a (patch)
tree0dd2eac68605ad8cf34a2e2e54c6a44ad3ab14c1 /scrape.rb
parentd85fed8541a9580e820a907d83a2184b020572ba (diff)
downloadlunatic-473b327ceed3afb5e5683002b39fd9c1947cb25a.tar.gz
lunatic-473b327ceed3afb5e5683002b39fd9c1947cb25a.tar.bz2
lunatic-473b327ceed3afb5e5683002b39fd9c1947cb25a.zip
Redesigned persistent data formta
This is the start of a project to add imagery to the bot's output. We began by rewriting the scraper to use a SQLite datafile instead of dumping achievement names to a text file. This allows storage of additional information about each achievement, and allows for more sophisticated scraping.

Profiles to be scraped can be added on the command line using the scraper script, instead of being specified in a config file. The scraper can conduct full or delta scrapes; in a delta scrape, only each profile's recent games are scraped, whereas they are all scraped in a full scrape.

When a game is scraped for the first time, images from the store page of that game are saved locally to be used by the bot. The bot has been altered to not use Twitter, and instead generate a pixelated image based on an image from the game of the chosen achievement. This is just for development purposes. It also crashes occasionally due to picking an achievement from a game that does not have any images saved.

Sprites of the moons from Odyssey have been included in the repository. A short message denoting their copyright is included.
Diffstat (limited to 'scrape.rb')
-rw-r--r--scrape.rb181
1 files changed, 157 insertions, 24 deletions
diff --git a/scrape.rb b/scrape.rb index a28f4c5..6f3a8e4 100644 --- a/scrape.rb +++ b/scrape.rb
@@ -1,42 +1,175 @@
1require 'json' 1require 'json'
2require 'nokogiri'
3require 'open-uri' 2require 'open-uri'
4require 'yaml' 3require 'yaml'
5 4
6config = YAML.load(open(ARGV[0])) 5require 'rubygems'
7usernames = config["usernames"] 6require 'bundler/setup'
7Bundler.require :default
8 8
9achieves = usernames.map do |username| 9@config = YAML.load(open(ARGV[0]))
10 page = Nokogiri::HTML(open("https://steamcommunity.com/#{username}/games/?tab=all")) 10db_existed = File.exists?(@config["database"])
11db = Sequel.connect("sqlite://" + @config["database"])
12
13if ARGV[1] == "init"
14 if db_existed
15 raise "Datafile already exists"
16 end
17
18 schema = File.read("schema.sql")
19
20 db.run schema
21
22 puts "Initialized datafile"
23
24 exit
25end
26
27class Profile < Sequel::Model
28 many_to_many :achievements, join_table: :dids
29end
30
31class Game < Sequel::Model
32 one_to_many :achievements
33 one_to_many :images
34end
35
36class Achievement < Sequel::Model
37 many_to_one :game
38 many_to_many :profiles, join_table: :dids
39end
40
41class Image < Sequel::Model
42 many_to_one :game
43end
44
45class Did < Sequel::Model
46 many_to_one :profile
47 many_to_one :achievement
48end
49
50@moonimgs = Dir.entries(@config["moon_images"]).select do |img|
51 img.end_with? ".png"
52end
53
54def scrape_profile(profile, full)
55 if full
56 url = "https://steamcommunity.com/#{profile.profile_path}/games/?tab=all"
57 else
58 url = "https://steamcommunity.com/#{profile.profile_path}/games/"
59 end
60
61 page = Nokogiri::HTML(open(url))
11 script = page.css(".responsive_page_template_content script").text[18..-1] 62 script = page.css(".responsive_page_template_content script").text[18..-1]
12 data = JSON.parse(script[0..script.index(";\r\n\t\t")-1]) 63 data = JSON.parse(script[0..script.index(";\r\n\t\t")-1])
13 ids = data.map { |d| d["appid"] } 64 ids = data.map { |d| d["appid"] }
14 65
15 index = 0 66 index = 0
16 ids.map do |id| 67 ids.each do |id|
17 index += 1 68 index += 1
18 puts "#{username} - #{index}/#{ids.count}" 69 puts "#{profile.profile_path} - #{index}/#{ids.count}"
19 70
20 achsp = Nokogiri::HTML(open("https://steamcommunity.com/#{username}/stats/#{id}/")) 71 achsp = Nokogiri::HTML(
21 achsp.css(".achieveTxt .achieveUnlockTime + h3").map { |d| d.text } 72 open("https://steamcommunity.com/#{profile.profile_path}/stats/#{id}/"))
22 end
23end.flatten
24 73
25if File.exists?(config["achievements"]) 74 achsp.css(".achieveTxt").each do |node|
26 already = File.read(config["achievements"]).split("\n") 75 unless node.css(".achieveUnlockTime").empty?
27 all_achieves = achieves + already 76 if Game.where(steam_appid: id).count > 0
28else 77 game = Game.where(steam_appid: id).first
29 all_achieves = achieves 78 else
30end 79 moon_index = Random.rand(@moonimgs.size)
80
81 game = Game.new(steam_appid: id, moon_image: @moonimgs[moon_index])
82 game.save
83
84 storepage = Nokogiri::HTML(
85 open("http://store.steampowered.com/app/#{id}"))
86
87 img_id = 0
88 storepage.css(".highlight_screenshot_link").each do |node|
89 begin
90 imagepage = open(node["href"]).read
91
92 img_id += 1
93 img_filename = "#{id}-#{img_id}.jpg"
94 img_filepath = File.join(@config["images"], img_filename)
95
96 img_file = File.open(img_filepath, "w")
97 img_file.write(imagepage)
98 img_file.close
99
100 image = Image.new(game: game, filename: img_filename)
101 image.save
102 rescue OpenURI::HTTPError
103 puts "Error downloading an image"
104 end
105
106 sleep 2
107 end
108 end
109
110 title = node.at_css("h3").text
31 111
32all_achieves.sort! 112 if game.achievements_dataset.where(title: title).count > 0
33all_achieves.uniq! 113 achievement = game.achievements_dataset.where(title: title).first
114 else
115 achievement = Achievement.new(game: game, title: title)
116 achievement.save
117 end
34 118
35if config.key? "blacklist" 119 unless Did.where(profile: profile, achievement: achievement).count > 0
36 blacklist = File.read(config["blacklist"]).split("\n") 120 begin
37 all_achieves.reject! { |l| blacklist.include? l } 121 unlock = DateTime.strptime(
122 node.css(".achieveUnlockTime").text.lstrip[9..-1],
123 "%b %d, %Y @ %l:%M%P")
124 rescue ArgumentError
125 unlock = DateTime.strptime(
126 node.css(".achieveUnlockTime").text.lstrip[9..-1],
127 "%b %d @ %l:%M%P")
128 end
129
130 join = Did.new(
131 profile: profile,
132 achievement: achievement,
133 achieved_at: unlock)
134 join.save
135 end
136 end
137 end
138 end
38end 139end
39 140
40File.open(config["achievements"], "w") do |f| 141if ARGV[1] == "add"
41 f << all_achieves.join("\n") 142 userpath = ARGV[2]
143
144 if Profile.where(profile_path: userpath).count > 0
145 raise "Profile " + userpath + " already exists"
146 end
147
148 profile = Profile.new(profile_path: userpath)
149 profile.save
150
151 scrape_profile profile, true
152elsif ARGV[1] == "update"
153 if ARGV.size == 3
154 scrape_profile Profile.where(profile_path: ARGV[2]).first, false
155 else
156 Profile.all.each do |profile|
157 scrape_profile profile, false
158 end
159 end
160elsif ARGV[1] == "full"
161 if ARGV.size == 3
162 scrape_profile Profile.where(profile_path: ARGV[2]).first, true
163 else
164 Profile.all.each do |profile|
165 scrape_profile profile, true
166 end
167 end
168elsif ARGV[1] == "recolor"
169 Game.all.each do |game|
170 moon_index = Random.rand(@moonimgs.size)
171
172 game.moon_image = @moonimgs[moon_index]
173 game.save
174 end
42end 175end