From 473b327ceed3afb5e5683002b39fd9c1947cb25a Mon Sep 17 00:00:00 2001 From: Kelly Rauchenberger Date: Thu, 1 Mar 2018 16:03:16 -0500 Subject: Redesigned persistent data formta This is the start of a project to add imagery to the bot's output. We began by rewriting the scraper to use a SQLite datafile instead of dumping achievement names to a text file. This allows storage of additional information about each achievement, and allows for more sophisticated scraping. Profiles to be scraped can be added on the command line using the scraper script, instead of being specified in a config file. The scraper can conduct full or delta scrapes; in a delta scrape, only each profile's recent games are scraped, whereas they are all scraped in a full scrape. When a game is scraped for the first time, images from the store page of that game are saved locally to be used by the bot. The bot has been altered to not use Twitter, and instead generate a pixelated image based on an image from the game of the chosen achievement. This is just for development purposes. It also crashes occasionally due to picking an achievement from a game that does not have any images saved. Sprites of the moons from Odyssey have been included in the repository. A short message denoting their copyright is included. --- CMakeLists.txt | 11 +++- Gemfile | 5 ++ Gemfile.lock | 19 ++++++ database.cpp | 122 +++++++++++++++++++++++++++++++++++++ database.h | 53 ++++++++++++++++ lunatic.cpp | 50 +++++++++++++-- moons/README.md | 1 + moons/blue.png | Bin 0 -> 37618 bytes moons/brown.png | Bin 0 -> 38150 bytes moons/cyan.png | Bin 0 -> 33139 bytes moons/green.png | Bin 0 -> 36760 bytes moons/orange.png | Bin 0 -> 36465 bytes moons/pink.png | Bin 0 -> 30838 bytes moons/purple.png | Bin 0 -> 32331 bytes moons/red.png | Bin 0 -> 37165 bytes moons/star.png | Bin 0 -> 27829 bytes moons/white.png | Bin 0 -> 29878 bytes moons/yellow.png | Bin 0 -> 32939 bytes schema.sql | 32 ++++++++++ scrape.rb | 181 +++++++++++++++++++++++++++++++++++++++++++++++-------- 20 files changed, 442 insertions(+), 32 deletions(-) create mode 100644 Gemfile create mode 100644 Gemfile.lock create mode 100644 database.cpp create mode 100644 database.h create mode 100644 moons/README.md create mode 100644 moons/blue.png create mode 100644 moons/brown.png create mode 100644 moons/cyan.png create mode 100644 moons/green.png create mode 100644 moons/orange.png create mode 100644 moons/pink.png create mode 100644 moons/purple.png create mode 100644 moons/red.png create mode 100644 moons/star.png create mode 100644 moons/white.png create mode 100644 moons/yellow.png create mode 100644 schema.sql diff --git a/CMakeLists.txt b/CMakeLists.txt index 55a671b..00c0ad9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,12 +5,17 @@ add_subdirectory(vendor/libtwittercpp) find_package(PkgConfig) pkg_check_modules(yaml-cpp yaml-cpp REQUIRED) +pkg_check_modules(GraphicsMagick GraphicsMagick++ REQUIRED) +pkg_check_modules(sqlite3 sqlite3>=3.8.3 REQUIRED) include_directories( vendor/libtwittercpp/src - ${yaml-cpp_INCLUDE_DIRS}) + ${yaml-cpp_INCLUDE_DIRS} + ${GraphicsMagick_INCLUDE_DIRS}) -add_executable(lunatic lunatic.cpp) +link_directories(${GraphicsMagick_LIBRARY_DIRS}) + +add_executable(lunatic lunatic.cpp database.cpp) set_property(TARGET lunatic PROPERTY CXX_STANDARD 11) set_property(TARGET lunatic PROPERTY CXX_STANDARD_REQUIRED ON) -target_link_libraries(lunatic ${yaml-cpp_LIBRARIES} twitter++) \ No newline at end of file +target_link_libraries(lunatic ${yaml-cpp_LIBRARIES} ${GraphicsMagick_LIBRARIES} ${sqlite3_LIBRARIES} twitter++) \ No newline at end of file diff --git a/Gemfile b/Gemfile new file mode 100644 index 0000000..57a02a2 --- /dev/null +++ b/Gemfile @@ -0,0 +1,5 @@ +source 'https://rubygems.org' + +gem 'sqlite3' +gem 'sequel' +gem 'nokogiri' diff --git a/Gemfile.lock b/Gemfile.lock new file mode 100644 index 0000000..58ec121 --- /dev/null +++ b/Gemfile.lock @@ -0,0 +1,19 @@ +GEM + remote: https://rubygems.org/ + specs: + mini_portile2 (2.3.0) + nokogiri (1.8.2) + mini_portile2 (~> 2.3.0) + sequel (5.5.0) + sqlite3 (1.3.13) + +PLATFORMS + ruby + +DEPENDENCIES + nokogiri + sequel + sqlite3 + +BUNDLED WITH + 1.16.1 diff --git a/database.cpp b/database.cpp new file mode 100644 index 0000000..f8b5016 --- /dev/null +++ b/database.cpp @@ -0,0 +1,122 @@ +#include "database.h" +#include +#include + +database::database(std::string path) +{ + if (sqlite3_open_v2( + path.c_str(), + &ppdb_, + SQLITE_OPEN_READONLY, + NULL) != SQLITE_OK) + { + // We still have to free the resources allocated. In the event that + // allocation failed, ppdb will be null and sqlite3_close_v2 will just + // ignore it. + std::string errmsg(sqlite3_errmsg(ppdb_)); + sqlite3_close_v2(ppdb_); + + throw std::logic_error(errmsg); + } +} + +database::database(database&& other) : database() +{ + swap(*this, other); +} + +database& database::operator=(database&& other) +{ + swap(*this, other); + + return *this; +} + +void swap(database& first, database& second) +{ + std::swap(first.ppdb_, second.ppdb_); +} + +database::~database() +{ + sqlite3_close_v2(ppdb_); +} + +achievement database::getRandomAchievement() const +{ + std::string queryString = "SELECT achievements.achievement_id, achievements.game_id, achievements.title, games.moon_image FROM achievements INNER JOIN games ON games.game_id = achievements.game_id ORDER BY RANDOM() LIMIT 1"; + + sqlite3_stmt* ppstmt; + if (sqlite3_prepare_v2( + ppdb_, + queryString.c_str(), + queryString.length(), + &ppstmt, + NULL) != SQLITE_OK) + { + std::string errorMsg = sqlite3_errmsg(ppdb_); + sqlite3_finalize(ppstmt); + + throw std::logic_error(errorMsg); + } + + if (sqlite3_step(ppstmt) != SQLITE_ROW) + { + std::string errorMsg = sqlite3_errmsg(ppdb_); + sqlite3_finalize(ppstmt); + + throw std::logic_error(errorMsg); + } + + achievement result; + + result.achievementId = sqlite3_column_int(ppstmt, 0); + result.gameId = sqlite3_column_int(ppstmt, 1); + result.title = reinterpret_cast(sqlite3_column_text(ppstmt, 2)); + result.moonImage = reinterpret_cast(sqlite3_column_text(ppstmt, 3)); + + sqlite3_finalize(ppstmt); + + return result; +} + +std::string database::getRandomImageForGame(int gameId) const +{ + std::string queryString = "SELECT filename FROM images WHERE game_id = ? ORDER BY RANDOM() LIMIT 1"; + + sqlite3_stmt* ppstmt; + if (sqlite3_prepare_v2( + ppdb_, + queryString.c_str(), + queryString.length(), + &ppstmt, + NULL) != SQLITE_OK) + { + std::string errorMsg = sqlite3_errmsg(ppdb_); + sqlite3_finalize(ppstmt); + + throw std::logic_error(errorMsg); + } + + if (sqlite3_bind_int(ppstmt, 1, gameId) != SQLITE_OK) + { + std::string errorMsg = sqlite3_errmsg(ppdb_); + sqlite3_finalize(ppstmt); + + throw std::logic_error(errorMsg); + } + + if (sqlite3_step(ppstmt) != SQLITE_ROW) + { + std::string errorMsg = sqlite3_errmsg(ppdb_); + sqlite3_finalize(ppstmt); + + throw std::logic_error(errorMsg); + } + + std::string result = reinterpret_cast(sqlite3_column_text(ppstmt, 0)); + + sqlite3_finalize(ppstmt); + + return result; +} diff --git a/database.h b/database.h new file mode 100644 index 0000000..560eeda --- /dev/null +++ b/database.h @@ -0,0 +1,53 @@ +#ifndef DATABASE_H_75C3CE0F +#define DATABASE_H_75C3CE0F + +#include + +struct sqlite3; + +struct achievement { + int achievementId; + int gameId; + std::string title; + std::string moonImage; +}; + +class database { +public: + + // Constructor + + explicit database(std::string path); + + // Disable copying + + database(const database& other) = delete; + database& operator=(const database& other) = delete; + + // Move constructor and move assignment + + database(database&& other); + database& operator=(database&& other); + + // Swap + + friend void swap(database& first, database& second); + + // Destructor + + ~database(); + + // Accessors + + achievement getRandomAchievement() const; + + std::string getRandomImageForGame(int gameId) const; + +private: + + database() = default; + + sqlite3* ppdb_ = nullptr; +}; + +#endif /* end of include guard: DATABASE_H_75C3CE0F */ diff --git a/lunatic.cpp b/lunatic.cpp index 09dcc41..ca3140a 100644 --- a/lunatic.cpp +++ b/lunatic.cpp @@ -5,6 +5,8 @@ #include #include #include +#include +#include "database.h" int main(int argc, char** argv) { @@ -14,6 +16,8 @@ int main(int argc, char** argv) return -1; } + Magick::InitializeMagick(nullptr); + std::string configfile(argv[1]); YAML::Node config = YAML::LoadFile(configfile); @@ -23,7 +27,9 @@ int main(int argc, char** argv) auth.setAccessKey(config["access_key"].as()); auth.setAccessSecret(config["access_secret"].as()); - twitter::client client(auth); + //twitter::client client(auth); + + database db(config["database"].as()); std::random_device randomDevice; std::mt19937 rng(randomDevice()); @@ -32,6 +38,40 @@ int main(int argc, char** argv) { std::cout << "Generating tweet" << std::endl; + achievement ach = db.getRandomAchievement(); + std::string imageName = db.getRandomImageForGame(ach.gameId); + std::string imagePath = config["images"].as() + + "/" + imageName; + + + try + { + Magick::Image image; + image.read(imagePath); + image.transform("1600x900"); + image.scale("160x90"); + image.scale("1600x900"); + image.magick("png"); + image.write("output.png"); + } catch (const Magick::WarningCoder& ex) + { + // Ok + } + + + + + + + + + + + + + + /* + // Reload achievements list every time in case it has been updated std::vector achievements; std::ifstream datafile(config["achievements"].as()); @@ -53,7 +93,7 @@ int main(int argc, char** argv) } std::uniform_int_distribution dist(0, achievements.size() - 1); - std::string achievement = achievements[dist(rng)]; + std::string achievement = achievements[dist(rng)];*/ std::string header; if (std::bernoulli_distribution(1.0 / 50.0)(rng)) @@ -63,16 +103,16 @@ int main(int argc, char** argv) header = "YOU GOT A MOON!"; } - std::string action = header + "\n" + achievement; + std::string action = header + "\n" + ach.title; action.resize(140); - try + /*try { client.updateStatus(action); } catch (const twitter::twitter_error& e) { std::cout << "Twitter error: " << e.what() << std::endl; - } + }*/ std::cout << action << std::endl; std::cout << "Waiting" << std::endl; diff --git a/moons/README.md b/moons/README.md new file mode 100644 index 0000000..876f0db --- /dev/null +++ b/moons/README.md @@ -0,0 +1 @@ +The images in this directory are originally from the game Super Mario Odyssey, which is copyrighted by Nintendo. \ No newline at end of file diff --git a/moons/blue.png b/moons/blue.png new file mode 100644 index 0000000..5e4ca02 Binary files /dev/null and b/moons/blue.png differ diff --git a/moons/brown.png b/moons/brown.png new file mode 100644 index 0000000..6a31b16 Binary files /dev/null and b/moons/brown.png differ diff --git a/moons/cyan.png b/moons/cyan.png new file mode 100644 index 0000000..8984126 Binary files /dev/null and b/moons/cyan.png differ diff --git a/moons/green.png b/moons/green.png new file mode 100644 index 0000000..5e7d311 Binary files /dev/null and b/moons/green.png differ diff --git a/moons/orange.png b/moons/orange.png new file mode 100644 index 0000000..06ca595 Binary files /dev/null and b/moons/orange.png differ diff --git a/moons/pink.png b/moons/pink.png new file mode 100644 index 0000000..3b1521b Binary files /dev/null and b/moons/pink.png differ diff --git a/moons/purple.png b/moons/purple.png new file mode 100644 index 0000000..9eeacf9 Binary files /dev/null and b/moons/purple.png differ diff --git a/moons/red.png b/moons/red.png new file mode 100644 index 0000000..2c0ef77 Binary files /dev/null and b/moons/red.png differ diff --git a/moons/star.png b/moons/star.png new file mode 100644 index 0000000..29263d5 Binary files /dev/null and b/moons/star.png differ diff --git a/moons/white.png b/moons/white.png new file mode 100644 index 0000000..fae666b Binary files /dev/null and b/moons/white.png differ diff --git a/moons/yellow.png b/moons/yellow.png new file mode 100644 index 0000000..8c012df Binary files /dev/null and b/moons/yellow.png differ diff --git a/schema.sql b/schema.sql new file mode 100644 index 0000000..61fdc45 --- /dev/null +++ b/schema.sql @@ -0,0 +1,32 @@ +CREATE TABLE `profiles` ( + `profile_id` INTEGER PRIMARY KEY, + `profile_path` VARCHAR(255) NOT NULL +); + +CREATE UNIQUE INDEX `profile_by_path` ON `profiles`(`profile_path`); + +CREATE TABLE `games` ( + `game_id` INTEGER PRIMARY KEY, + `steam_appid` INTEGER NOT NULL, + `moon_image` VARCHAR(255) NOT NULL +); + +CREATE UNIQUE INDEX `game_by_appid` ON `games`(`steam_appid`); + +CREATE TABLE `achievements` ( + `achievement_id` INTEGER PRIMARY KEY, + `game_id` INTEGER NOT NULL, + `title` VARCHAR(255) NOT NULL +); + +CREATE TABLE `dids` ( + `profile_id` INTEGER NOT NULL, + `achievement_id` INTEGER NOT NULL, + `achieved_at` DATETIME NOT NULL +); + +CREATE TABLE `images` ( + `image_id` INTEGER PRIMARY KEY, + `game_id` INTEGER NOT NULL, + `filename` VARCHAR(255) NOT NULL +); diff --git a/scrape.rb b/scrape.rb index a28f4c5..6f3a8e4 100644 --- a/scrape.rb +++ b/scrape.rb @@ -1,42 +1,175 @@ require 'json' -require 'nokogiri' require 'open-uri' require 'yaml' -config = YAML.load(open(ARGV[0])) -usernames = config["usernames"] +require 'rubygems' +require 'bundler/setup' +Bundler.require :default -achieves = usernames.map do |username| - page = Nokogiri::HTML(open("https://steamcommunity.com/#{username}/games/?tab=all")) +@config = YAML.load(open(ARGV[0])) +db_existed = File.exists?(@config["database"]) +db = Sequel.connect("sqlite://" + @config["database"]) + +if ARGV[1] == "init" + if db_existed + raise "Datafile already exists" + end + + schema = File.read("schema.sql") + + db.run schema + + puts "Initialized datafile" + + exit +end + +class Profile < Sequel::Model + many_to_many :achievements, join_table: :dids +end + +class Game < Sequel::Model + one_to_many :achievements + one_to_many :images +end + +class Achievement < Sequel::Model + many_to_one :game + many_to_many :profiles, join_table: :dids +end + +class Image < Sequel::Model + many_to_one :game +end + +class Did < Sequel::Model + many_to_one :profile + many_to_one :achievement +end + +@moonimgs = Dir.entries(@config["moon_images"]).select do |img| + img.end_with? ".png" +end + +def scrape_profile(profile, full) + if full + url = "https://steamcommunity.com/#{profile.profile_path}/games/?tab=all" + else + url = "https://steamcommunity.com/#{profile.profile_path}/games/" + end + + page = Nokogiri::HTML(open(url)) script = page.css(".responsive_page_template_content script").text[18..-1] data = JSON.parse(script[0..script.index(";\r\n\t\t")-1]) ids = data.map { |d| d["appid"] } index = 0 - ids.map do |id| + ids.each do |id| index += 1 - puts "#{username} - #{index}/#{ids.count}" + puts "#{profile.profile_path} - #{index}/#{ids.count}" - achsp = Nokogiri::HTML(open("https://steamcommunity.com/#{username}/stats/#{id}/")) - achsp.css(".achieveTxt .achieveUnlockTime + h3").map { |d| d.text } - end -end.flatten + achsp = Nokogiri::HTML( + open("https://steamcommunity.com/#{profile.profile_path}/stats/#{id}/")) -if File.exists?(config["achievements"]) - already = File.read(config["achievements"]).split("\n") - all_achieves = achieves + already -else - all_achieves = achieves -end + achsp.css(".achieveTxt").each do |node| + unless node.css(".achieveUnlockTime").empty? + if Game.where(steam_appid: id).count > 0 + game = Game.where(steam_appid: id).first + else + moon_index = Random.rand(@moonimgs.size) + + game = Game.new(steam_appid: id, moon_image: @moonimgs[moon_index]) + game.save + + storepage = Nokogiri::HTML( + open("http://store.steampowered.com/app/#{id}")) + + img_id = 0 + storepage.css(".highlight_screenshot_link").each do |node| + begin + imagepage = open(node["href"]).read + + img_id += 1 + img_filename = "#{id}-#{img_id}.jpg" + img_filepath = File.join(@config["images"], img_filename) + + img_file = File.open(img_filepath, "w") + img_file.write(imagepage) + img_file.close + + image = Image.new(game: game, filename: img_filename) + image.save + rescue OpenURI::HTTPError + puts "Error downloading an image" + end + + sleep 2 + end + end + + title = node.at_css("h3").text -all_achieves.sort! -all_achieves.uniq! + if game.achievements_dataset.where(title: title).count > 0 + achievement = game.achievements_dataset.where(title: title).first + else + achievement = Achievement.new(game: game, title: title) + achievement.save + end -if config.key? "blacklist" - blacklist = File.read(config["blacklist"]).split("\n") - all_achieves.reject! { |l| blacklist.include? l } + unless Did.where(profile: profile, achievement: achievement).count > 0 + begin + unlock = DateTime.strptime( + node.css(".achieveUnlockTime").text.lstrip[9..-1], + "%b %d, %Y @ %l:%M%P") + rescue ArgumentError + unlock = DateTime.strptime( + node.css(".achieveUnlockTime").text.lstrip[9..-1], + "%b %d @ %l:%M%P") + end + + join = Did.new( + profile: profile, + achievement: achievement, + achieved_at: unlock) + join.save + end + end + end + end end -File.open(config["achievements"], "w") do |f| - f << all_achieves.join("\n") +if ARGV[1] == "add" + userpath = ARGV[2] + + if Profile.where(profile_path: userpath).count > 0 + raise "Profile " + userpath + " already exists" + end + + profile = Profile.new(profile_path: userpath) + profile.save + + scrape_profile profile, true +elsif ARGV[1] == "update" + if ARGV.size == 3 + scrape_profile Profile.where(profile_path: ARGV[2]).first, false + else + Profile.all.each do |profile| + scrape_profile profile, false + end + end +elsif ARGV[1] == "full" + if ARGV.size == 3 + scrape_profile Profile.where(profile_path: ARGV[2]).first, true + else + Profile.all.each do |profile| + scrape_profile profile, true + end + end +elsif ARGV[1] == "recolor" + Game.all.each do |game| + moon_index = Random.rand(@moonimgs.size) + + game.moon_image = @moonimgs[moon_index] + game.save + end end -- cgit 1.4.1