diff options
Diffstat (limited to 'scrape.rb')
| -rw-r--r-- | scrape.rb | 181 |
1 files changed, 157 insertions, 24 deletions
| diff --git a/scrape.rb b/scrape.rb index a28f4c5..6f3a8e4 100644 --- a/scrape.rb +++ b/scrape.rb | |||
| @@ -1,42 +1,175 @@ | |||
| 1 | require 'json' | 1 | require 'json' |
| 2 | require 'nokogiri' | ||
| 3 | require 'open-uri' | 2 | require 'open-uri' |
| 4 | require 'yaml' | 3 | require 'yaml' |
| 5 | 4 | ||
| 6 | config = YAML.load(open(ARGV[0])) | 5 | require 'rubygems' |
| 7 | usernames = config["usernames"] | 6 | require 'bundler/setup' |
| 7 | Bundler.require :default | ||
| 8 | 8 | ||
| 9 | achieves = usernames.map do |username| | 9 | @config = YAML.load(open(ARGV[0])) |
| 10 | page = Nokogiri::HTML(open("https://steamcommunity.com/#{username}/games/?tab=all")) | 10 | db_existed = File.exists?(@config["database"]) |
| 11 | db = Sequel.connect("sqlite://" + @config["database"]) | ||
| 12 | |||
| 13 | if ARGV[1] == "init" | ||
| 14 | if db_existed | ||
| 15 | raise "Datafile already exists" | ||
| 16 | end | ||
| 17 | |||
| 18 | schema = File.read("schema.sql") | ||
| 19 | |||
| 20 | db.run schema | ||
| 21 | |||
| 22 | puts "Initialized datafile" | ||
| 23 | |||
| 24 | exit | ||
| 25 | end | ||
| 26 | |||
| 27 | class Profile < Sequel::Model | ||
| 28 | many_to_many :achievements, join_table: :dids | ||
| 29 | end | ||
| 30 | |||
| 31 | class Game < Sequel::Model | ||
| 32 | one_to_many :achievements | ||
| 33 | one_to_many :images | ||
| 34 | end | ||
| 35 | |||
| 36 | class Achievement < Sequel::Model | ||
| 37 | many_to_one :game | ||
| 38 | many_to_many :profiles, join_table: :dids | ||
| 39 | end | ||
| 40 | |||
| 41 | class Image < Sequel::Model | ||
| 42 | many_to_one :game | ||
| 43 | end | ||
| 44 | |||
| 45 | class Did < Sequel::Model | ||
| 46 | many_to_one :profile | ||
| 47 | many_to_one :achievement | ||
| 48 | end | ||
| 49 | |||
| 50 | @moonimgs = Dir.entries(@config["moon_images"]).select do |img| | ||
| 51 | img.end_with? ".png" | ||
| 52 | end | ||
| 53 | |||
| 54 | def scrape_profile(profile, full) | ||
| 55 | if full | ||
| 56 | url = "https://steamcommunity.com/#{profile.profile_path}/games/?tab=all" | ||
| 57 | else | ||
| 58 | url = "https://steamcommunity.com/#{profile.profile_path}/games/" | ||
| 59 | end | ||
| 60 | |||
| 61 | page = Nokogiri::HTML(open(url)) | ||
| 11 | script = page.css(".responsive_page_template_content script").text[18..-1] | 62 | script = page.css(".responsive_page_template_content script").text[18..-1] |
| 12 | data = JSON.parse(script[0..script.index(";\r\n\t\t")-1]) | 63 | data = JSON.parse(script[0..script.index(";\r\n\t\t")-1]) |
| 13 | ids = data.map { |d| d["appid"] } | 64 | ids = data.map { |d| d["appid"] } |
| 14 | 65 | ||
| 15 | index = 0 | 66 | index = 0 |
| 16 | ids.map do |id| | 67 | ids.each do |id| |
| 17 | index += 1 | 68 | index += 1 |
| 18 | puts "#{username} - #{index}/#{ids.count}" | 69 | puts "#{profile.profile_path} - #{index}/#{ids.count}" |
| 19 | 70 | ||
| 20 | achsp = Nokogiri::HTML(open("https://steamcommunity.com/#{username}/stats/#{id}/")) | 71 | achsp = Nokogiri::HTML( |
| 21 | achsp.css(".achieveTxt .achieveUnlockTime + h3").map { |d| d.text } | 72 | open("https://steamcommunity.com/#{profile.profile_path}/stats/#{id}/")) |
| 22 | end | ||
| 23 | end.flatten | ||
| 24 | 73 | ||
| 25 | if File.exists?(config["achievements"]) | 74 | achsp.css(".achieveTxt").each do |node| |
| 26 | already = File.read(config["achievements"]).split("\n") | 75 | unless node.css(".achieveUnlockTime").empty? |
| 27 | all_achieves = achieves + already | 76 | if Game.where(steam_appid: id).count > 0 |
| 28 | else | 77 | game = Game.where(steam_appid: id).first |
| 29 | all_achieves = achieves | 78 | else |
| 30 | end | 79 | moon_index = Random.rand(@moonimgs.size) |
| 80 | |||
| 81 | game = Game.new(steam_appid: id, moon_image: @moonimgs[moon_index]) | ||
| 82 | game.save | ||
| 83 | |||
| 84 | storepage = Nokogiri::HTML( | ||
| 85 | open("http://store.steampowered.com/app/#{id}")) | ||
| 86 | |||
| 87 | img_id = 0 | ||
| 88 | storepage.css(".highlight_screenshot_link").each do |node| | ||
| 89 | begin | ||
| 90 | imagepage = open(node["href"]).read | ||
| 91 | |||
| 92 | img_id += 1 | ||
| 93 | img_filename = "#{id}-#{img_id}.jpg" | ||
| 94 | img_filepath = File.join(@config["images"], img_filename) | ||
| 95 | |||
| 96 | img_file = File.open(img_filepath, "w") | ||
| 97 | img_file.write(imagepage) | ||
| 98 | img_file.close | ||
| 99 | |||
| 100 | image = Image.new(game: game, filename: img_filename) | ||
| 101 | image.save | ||
| 102 | rescue OpenURI::HTTPError | ||
| 103 | puts "Error downloading an image" | ||
| 104 | end | ||
| 105 | |||
| 106 | sleep 2 | ||
| 107 | end | ||
| 108 | end | ||
| 109 | |||
| 110 | title = node.at_css("h3").text | ||
| 31 | 111 | ||
| 32 | all_achieves.sort! | 112 | if game.achievements_dataset.where(title: title).count > 0 |
| 33 | all_achieves.uniq! | 113 | achievement = game.achievements_dataset.where(title: title).first |
| 114 | else | ||
| 115 | achievement = Achievement.new(game: game, title: title) | ||
| 116 | achievement.save | ||
| 117 | end | ||
| 34 | 118 | ||
| 35 | if config.key? "blacklist" | 119 | unless Did.where(profile: profile, achievement: achievement).count > 0 |
| 36 | blacklist = File.read(config["blacklist"]).split("\n") | 120 | begin |
| 37 | all_achieves.reject! { |l| blacklist.include? l } | 121 | unlock = DateTime.strptime( |
| 122 | node.css(".achieveUnlockTime").text.lstrip[9..-1], | ||
| 123 | "%b %d, %Y @ %l:%M%P") | ||
| 124 | rescue ArgumentError | ||
| 125 | unlock = DateTime.strptime( | ||
| 126 | node.css(".achieveUnlockTime").text.lstrip[9..-1], | ||
| 127 | "%b %d @ %l:%M%P") | ||
| 128 | end | ||
| 129 | |||
| 130 | join = Did.new( | ||
| 131 | profile: profile, | ||
| 132 | achievement: achievement, | ||
| 133 | achieved_at: unlock) | ||
| 134 | join.save | ||
| 135 | end | ||
| 136 | end | ||
| 137 | end | ||
| 138 | end | ||
| 38 | end | 139 | end |
| 39 | 140 | ||
| 40 | File.open(config["achievements"], "w") do |f| | 141 | if ARGV[1] == "add" |
| 41 | f << all_achieves.join("\n") | 142 | userpath = ARGV[2] |
| 143 | |||
| 144 | if Profile.where(profile_path: userpath).count > 0 | ||
| 145 | raise "Profile " + userpath + " already exists" | ||
| 146 | end | ||
| 147 | |||
| 148 | profile = Profile.new(profile_path: userpath) | ||
| 149 | profile.save | ||
| 150 | |||
| 151 | scrape_profile profile, true | ||
| 152 | elsif ARGV[1] == "update" | ||
| 153 | if ARGV.size == 3 | ||
| 154 | scrape_profile Profile.where(profile_path: ARGV[2]).first, false | ||
| 155 | else | ||
| 156 | Profile.all.each do |profile| | ||
| 157 | scrape_profile profile, false | ||
| 158 | end | ||
| 159 | end | ||
| 160 | elsif ARGV[1] == "full" | ||
| 161 | if ARGV.size == 3 | ||
| 162 | scrape_profile Profile.where(profile_path: ARGV[2]).first, true | ||
| 163 | else | ||
| 164 | Profile.all.each do |profile| | ||
| 165 | scrape_profile profile, true | ||
| 166 | end | ||
| 167 | end | ||
| 168 | elsif ARGV[1] == "recolor" | ||
| 169 | Game.all.each do |game| | ||
| 170 | moon_index = Random.rand(@moonimgs.size) | ||
| 171 | |||
| 172 | game.moon_image = @moonimgs[moon_index] | ||
| 173 | game.save | ||
| 174 | end | ||
| 42 | end | 175 | end |
