diff options
Diffstat (limited to 'scrape.rb')
-rw-r--r-- | scrape.rb | 181 |
1 files changed, 157 insertions, 24 deletions
diff --git a/scrape.rb b/scrape.rb index a28f4c5..6f3a8e4 100644 --- a/scrape.rb +++ b/scrape.rb | |||
@@ -1,42 +1,175 @@ | |||
1 | require 'json' | 1 | require 'json' |
2 | require 'nokogiri' | ||
3 | require 'open-uri' | 2 | require 'open-uri' |
4 | require 'yaml' | 3 | require 'yaml' |
5 | 4 | ||
6 | config = YAML.load(open(ARGV[0])) | 5 | require 'rubygems' |
7 | usernames = config["usernames"] | 6 | require 'bundler/setup' |
7 | Bundler.require :default | ||
8 | 8 | ||
9 | achieves = usernames.map do |username| | 9 | @config = YAML.load(open(ARGV[0])) |
10 | page = Nokogiri::HTML(open("https://steamcommunity.com/#{username}/games/?tab=all")) | 10 | db_existed = File.exists?(@config["database"]) |
11 | db = Sequel.connect("sqlite://" + @config["database"]) | ||
12 | |||
13 | if ARGV[1] == "init" | ||
14 | if db_existed | ||
15 | raise "Datafile already exists" | ||
16 | end | ||
17 | |||
18 | schema = File.read("schema.sql") | ||
19 | |||
20 | db.run schema | ||
21 | |||
22 | puts "Initialized datafile" | ||
23 | |||
24 | exit | ||
25 | end | ||
26 | |||
27 | class Profile < Sequel::Model | ||
28 | many_to_many :achievements, join_table: :dids | ||
29 | end | ||
30 | |||
31 | class Game < Sequel::Model | ||
32 | one_to_many :achievements | ||
33 | one_to_many :images | ||
34 | end | ||
35 | |||
36 | class Achievement < Sequel::Model | ||
37 | many_to_one :game | ||
38 | many_to_many :profiles, join_table: :dids | ||
39 | end | ||
40 | |||
41 | class Image < Sequel::Model | ||
42 | many_to_one :game | ||
43 | end | ||
44 | |||
45 | class Did < Sequel::Model | ||
46 | many_to_one :profile | ||
47 | many_to_one :achievement | ||
48 | end | ||
49 | |||
50 | @moonimgs = Dir.entries(@config["moon_images"]).select do |img| | ||
51 | img.end_with? ".png" | ||
52 | end | ||
53 | |||
54 | def scrape_profile(profile, full) | ||
55 | if full | ||
56 | url = "https://steamcommunity.com/#{profile.profile_path}/games/?tab=all" | ||
57 | else | ||
58 | url = "https://steamcommunity.com/#{profile.profile_path}/games/" | ||
59 | end | ||
60 | |||
61 | page = Nokogiri::HTML(open(url)) | ||
11 | script = page.css(".responsive_page_template_content script").text[18..-1] | 62 | script = page.css(".responsive_page_template_content script").text[18..-1] |
12 | data = JSON.parse(script[0..script.index(";\r\n\t\t")-1]) | 63 | data = JSON.parse(script[0..script.index(";\r\n\t\t")-1]) |
13 | ids = data.map { |d| d["appid"] } | 64 | ids = data.map { |d| d["appid"] } |
14 | 65 | ||
15 | index = 0 | 66 | index = 0 |
16 | ids.map do |id| | 67 | ids.each do |id| |
17 | index += 1 | 68 | index += 1 |
18 | puts "#{username} - #{index}/#{ids.count}" | 69 | puts "#{profile.profile_path} - #{index}/#{ids.count}" |
19 | 70 | ||
20 | achsp = Nokogiri::HTML(open("https://steamcommunity.com/#{username}/stats/#{id}/")) | 71 | achsp = Nokogiri::HTML( |
21 | achsp.css(".achieveTxt .achieveUnlockTime + h3").map { |d| d.text } | 72 | open("https://steamcommunity.com/#{profile.profile_path}/stats/#{id}/")) |
22 | end | ||
23 | end.flatten | ||
24 | 73 | ||
25 | if File.exists?(config["achievements"]) | 74 | achsp.css(".achieveTxt").each do |node| |
26 | already = File.read(config["achievements"]).split("\n") | 75 | unless node.css(".achieveUnlockTime").empty? |
27 | all_achieves = achieves + already | 76 | if Game.where(steam_appid: id).count > 0 |
28 | else | 77 | game = Game.where(steam_appid: id).first |
29 | all_achieves = achieves | 78 | else |
30 | end | 79 | moon_index = Random.rand(@moonimgs.size) |
80 | |||
81 | game = Game.new(steam_appid: id, moon_image: @moonimgs[moon_index]) | ||
82 | game.save | ||
83 | |||
84 | storepage = Nokogiri::HTML( | ||
85 | open("http://store.steampowered.com/app/#{id}")) | ||
86 | |||
87 | img_id = 0 | ||
88 | storepage.css(".highlight_screenshot_link").each do |node| | ||
89 | begin | ||
90 | imagepage = open(node["href"]).read | ||
91 | |||
92 | img_id += 1 | ||
93 | img_filename = "#{id}-#{img_id}.jpg" | ||
94 | img_filepath = File.join(@config["images"], img_filename) | ||
95 | |||
96 | img_file = File.open(img_filepath, "w") | ||
97 | img_file.write(imagepage) | ||
98 | img_file.close | ||
99 | |||
100 | image = Image.new(game: game, filename: img_filename) | ||
101 | image.save | ||
102 | rescue OpenURI::HTTPError | ||
103 | puts "Error downloading an image" | ||
104 | end | ||
105 | |||
106 | sleep 2 | ||
107 | end | ||
108 | end | ||
109 | |||
110 | title = node.at_css("h3").text | ||
31 | 111 | ||
32 | all_achieves.sort! | 112 | if game.achievements_dataset.where(title: title).count > 0 |
33 | all_achieves.uniq! | 113 | achievement = game.achievements_dataset.where(title: title).first |
114 | else | ||
115 | achievement = Achievement.new(game: game, title: title) | ||
116 | achievement.save | ||
117 | end | ||
34 | 118 | ||
35 | if config.key? "blacklist" | 119 | unless Did.where(profile: profile, achievement: achievement).count > 0 |
36 | blacklist = File.read(config["blacklist"]).split("\n") | 120 | begin |
37 | all_achieves.reject! { |l| blacklist.include? l } | 121 | unlock = DateTime.strptime( |
122 | node.css(".achieveUnlockTime").text.lstrip[9..-1], | ||
123 | "%b %d, %Y @ %l:%M%P") | ||
124 | rescue ArgumentError | ||
125 | unlock = DateTime.strptime( | ||
126 | node.css(".achieveUnlockTime").text.lstrip[9..-1], | ||
127 | "%b %d @ %l:%M%P") | ||
128 | end | ||
129 | |||
130 | join = Did.new( | ||
131 | profile: profile, | ||
132 | achievement: achievement, | ||
133 | achieved_at: unlock) | ||
134 | join.save | ||
135 | end | ||
136 | end | ||
137 | end | ||
138 | end | ||
38 | end | 139 | end |
39 | 140 | ||
40 | File.open(config["achievements"], "w") do |f| | 141 | if ARGV[1] == "add" |
41 | f << all_achieves.join("\n") | 142 | userpath = ARGV[2] |
143 | |||
144 | if Profile.where(profile_path: userpath).count > 0 | ||
145 | raise "Profile " + userpath + " already exists" | ||
146 | end | ||
147 | |||
148 | profile = Profile.new(profile_path: userpath) | ||
149 | profile.save | ||
150 | |||
151 | scrape_profile profile, true | ||
152 | elsif ARGV[1] == "update" | ||
153 | if ARGV.size == 3 | ||
154 | scrape_profile Profile.where(profile_path: ARGV[2]).first, false | ||
155 | else | ||
156 | Profile.all.each do |profile| | ||
157 | scrape_profile profile, false | ||
158 | end | ||
159 | end | ||
160 | elsif ARGV[1] == "full" | ||
161 | if ARGV.size == 3 | ||
162 | scrape_profile Profile.where(profile_path: ARGV[2]).first, true | ||
163 | else | ||
164 | Profile.all.each do |profile| | ||
165 | scrape_profile profile, true | ||
166 | end | ||
167 | end | ||
168 | elsif ARGV[1] == "recolor" | ||
169 | Game.all.each do |game| | ||
170 | moon_index = Random.rand(@moonimgs.size) | ||
171 | |||
172 | game.moon_image = @moonimgs[moon_index] | ||
173 | game.save | ||
174 | end | ||
42 | end | 175 | end |