about summary refs log tree commit diff stats
path: root/scrape.rb
diff options
context:
space:
mode:
Diffstat (limited to 'scrape.rb')
-rw-r--r--scrape.rb181
1 files changed, 157 insertions, 24 deletions
diff --git a/scrape.rb b/scrape.rb index a28f4c5..6f3a8e4 100644 --- a/scrape.rb +++ b/scrape.rb
@@ -1,42 +1,175 @@
1require 'json' 1require 'json'
2require 'nokogiri'
3require 'open-uri' 2require 'open-uri'
4require 'yaml' 3require 'yaml'
5 4
6config = YAML.load(open(ARGV[0])) 5require 'rubygems'
7usernames = config["usernames"] 6require 'bundler/setup'
7Bundler.require :default
8 8
9achieves = usernames.map do |username| 9@config = YAML.load(open(ARGV[0]))
10 page = Nokogiri::HTML(open("https://steamcommunity.com/#{username}/games/?tab=all")) 10db_existed = File.exists?(@config["database"])
11db = Sequel.connect("sqlite://" + @config["database"])
12
13if ARGV[1] == "init"
14 if db_existed
15 raise "Datafile already exists"
16 end
17
18 schema = File.read("schema.sql")
19
20 db.run schema
21
22 puts "Initialized datafile"
23
24 exit
25end
26
27class Profile < Sequel::Model
28 many_to_many :achievements, join_table: :dids
29end
30
31class Game < Sequel::Model
32 one_to_many :achievements
33 one_to_many :images
34end
35
36class Achievement < Sequel::Model
37 many_to_one :game
38 many_to_many :profiles, join_table: :dids
39end
40
41class Image < Sequel::Model
42 many_to_one :game
43end
44
45class Did < Sequel::Model
46 many_to_one :profile
47 many_to_one :achievement
48end
49
50@moonimgs = Dir.entries(@config["moon_images"]).select do |img|
51 img.end_with? ".png"
52end
53
54def scrape_profile(profile, full)
55 if full
56 url = "https://steamcommunity.com/#{profile.profile_path}/games/?tab=all"
57 else
58 url = "https://steamcommunity.com/#{profile.profile_path}/games/"
59 end
60
61 page = Nokogiri::HTML(open(url))
11 script = page.css(".responsive_page_template_content script").text[18..-1] 62 script = page.css(".responsive_page_template_content script").text[18..-1]
12 data = JSON.parse(script[0..script.index(";\r\n\t\t")-1]) 63 data = JSON.parse(script[0..script.index(";\r\n\t\t")-1])
13 ids = data.map { |d| d["appid"] } 64 ids = data.map { |d| d["appid"] }
14 65
15 index = 0 66 index = 0
16 ids.map do |id| 67 ids.each do |id|
17 index += 1 68 index += 1
18 puts "#{username} - #{index}/#{ids.count}" 69 puts "#{profile.profile_path} - #{index}/#{ids.count}"
19 70
20 achsp = Nokogiri::HTML(open("https://steamcommunity.com/#{username}/stats/#{id}/")) 71 achsp = Nokogiri::HTML(
21 achsp.css(".achieveTxt .achieveUnlockTime + h3").map { |d| d.text } 72 open("https://steamcommunity.com/#{profile.profile_path}/stats/#{id}/"))
22 end
23end.flatten
24 73
25if File.exists?(config["achievements"]) 74 achsp.css(".achieveTxt").each do |node|
26 already = File.read(config["achievements"]).split("\n") 75 unless node.css(".achieveUnlockTime").empty?
27 all_achieves = achieves + already 76 if Game.where(steam_appid: id).count > 0
28else 77 game = Game.where(steam_appid: id).first
29 all_achieves = achieves 78 else
30end 79 moon_index = Random.rand(@moonimgs.size)
80
81 game = Game.new(steam_appid: id, moon_image: @moonimgs[moon_index])
82 game.save
83
84 storepage = Nokogiri::HTML(
85 open("http://store.steampowered.com/app/#{id}"))
86
87 img_id = 0
88 storepage.css(".highlight_screenshot_link").each do |node|
89 begin
90 imagepage = open(node["href"]).read
91
92 img_id += 1
93 img_filename = "#{id}-#{img_id}.jpg"
94 img_filepath = File.join(@config["images"], img_filename)
95
96 img_file = File.open(img_filepath, "w")
97 img_file.write(imagepage)
98 img_file.close
99
100 image = Image.new(game: game, filename: img_filename)
101 image.save
102 rescue OpenURI::HTTPError
103 puts "Error downloading an image"
104 end
105
106 sleep 2
107 end
108 end
109
110 title = node.at_css("h3").text
31 111
32all_achieves.sort! 112 if game.achievements_dataset.where(title: title).count > 0
33all_achieves.uniq! 113 achievement = game.achievements_dataset.where(title: title).first
114 else
115 achievement = Achievement.new(game: game, title: title)
116 achievement.save
117 end
34 118
35if config.key? "blacklist" 119 unless Did.where(profile: profile, achievement: achievement).count > 0
36 blacklist = File.read(config["blacklist"]).split("\n") 120 begin
37 all_achieves.reject! { |l| blacklist.include? l } 121 unlock = DateTime.strptime(
122 node.css(".achieveUnlockTime").text.lstrip[9..-1],
123 "%b %d, %Y @ %l:%M%P")
124 rescue ArgumentError
125 unlock = DateTime.strptime(
126 node.css(".achieveUnlockTime").text.lstrip[9..-1],
127 "%b %d @ %l:%M%P")
128 end
129
130 join = Did.new(
131 profile: profile,
132 achievement: achievement,
133 achieved_at: unlock)
134 join.save
135 end
136 end
137 end
138 end
38end 139end
39 140
40File.open(config["achievements"], "w") do |f| 141if ARGV[1] == "add"
41 f << all_achieves.join("\n") 142 userpath = ARGV[2]
143
144 if Profile.where(profile_path: userpath).count > 0
145 raise "Profile " + userpath + " already exists"
146 end
147
148 profile = Profile.new(profile_path: userpath)
149 profile.save
150
151 scrape_profile profile, true
152elsif ARGV[1] == "update"
153 if ARGV.size == 3
154 scrape_profile Profile.where(profile_path: ARGV[2]).first, false
155 else
156 Profile.all.each do |profile|
157 scrape_profile profile, false
158 end
159 end
160elsif ARGV[1] == "full"
161 if ARGV.size == 3
162 scrape_profile Profile.where(profile_path: ARGV[2]).first, true
163 else
164 Profile.all.each do |profile|
165 scrape_profile profile, true
166 end
167 end
168elsif ARGV[1] == "recolor"
169 Game.all.each do |game|
170 moon_index = Random.rand(@moonimgs.size)
171
172 game.moon_image = @moonimgs[moon_index]
173 game.save
174 end
42end 175end