From a76e81ee8bcbad87c9eb58cdb25452cae65bd0c7 Mon Sep 17 00:00:00 2001 From: Star Rauchenberger Date: Fri, 23 Dec 2022 11:09:58 +0000 Subject: Bot is now a Mastodon bot --- imagenet.cpp | 93 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 93 insertions(+) create mode 100644 imagenet.cpp (limited to 'imagenet.cpp') diff --git a/imagenet.cpp b/imagenet.cpp new file mode 100644 index 0000000..d5aeada --- /dev/null +++ b/imagenet.cpp @@ -0,0 +1,93 @@ +#include "imagenet.h" +#include +#include +#include +#include +#include +#include + +imagenet::imagenet(std::string path) : path_(path) {} + +std::tuple imagenet::getImageForNotion(int notion_id, std::mt19937& rng) const +{ + auto result = getImagesForNotion(notion_id, rng, 1); + return result[0]; +} + +std::vector> imagenet::getImagesForNotion(int notion_id, std::mt19937& rng, int num) const +{ + std::filesystem::path filename = path_ / std::to_string(notion_id); + if (!std::filesystem::exists(filename)) + { + throw std::invalid_argument(std::string("File does not exist: ") + std::string(filename)); + } + + std::ifstream file(filename); + std::string line; + std::vector urls; + while (std::getline(file, line)) + { + if (!line.empty()) + { + urls.push_back(line); + } + } + + // output, extension + std::vector> results; + while (!urls.empty() && results.size() < num) + { + int index = std::uniform_int_distribution(0, urls.size()-1)(rng); + std::string url = urls.at(index); + urls.erase(std::begin(urls) + index); + + // willyfogg.com is a thumbnail generator known to return 200 even if the target image no longer exists + if (url.find("willyfogg.com/thumb.php") != std::string::npos) + { + continue; + } + + // Accept string from Google Chrome + std::string accept = "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8"; + curl::curl_header headers; + headers.add(accept); + + std::ostringstream imgbuf; + curl::curl_ios imgios(imgbuf); + curl::curl_easy imghandle(imgios); + + imghandle.add(headers.get()); + imghandle.add(url.c_str()); + imghandle.add(30); + imghandle.add(300); + + try + { + imghandle.perform(); + } catch (const curl::curl_easy_exception& error) { + error.print_traceback(); + + continue; + } + + if (imghandle.get_info().get() != 200) + { + continue; + } + + std::string content_type = imghandle.get_info().get(); + if (content_type.substr(0, 6) != "image/") + { + continue; + } + + results.emplace_back(imgbuf.str(), url.substr(url.rfind(".") + 1)); + } + + if (results.size() < num) + { + throw std::invalid_argument(std::string("Not enough valid urls found for ") + std::string(filename)); + } + + return results; +} -- cgit 1.4.1