From be86554f2325427bb9421aa7274d135becab443c Mon Sep 17 00:00:00 2001 From: Star Rauchenberger Date: Fri, 9 Dec 2022 18:32:34 -0500 Subject: Added green hints --- imagenet.cpp | 89 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 89 insertions(+) create mode 100644 imagenet.cpp (limited to 'imagenet.cpp') diff --git a/imagenet.cpp b/imagenet.cpp new file mode 100644 index 0000000..3a107bd --- /dev/null +++ b/imagenet.cpp @@ -0,0 +1,89 @@ +#include "imagenet.h" +#include +#include +#include +#include +#include +#include + +imagenet::imagenet(std::string path) : path_(path) {} + +std::tuple imagenet::getImageForNotion(int notion_id, std::mt19937& rng) const +{ + std::filesystem::path filename = path_ / std::to_string(notion_id); + if (!std::filesystem::exists(filename)) + { + throw std::invalid_argument(std::string("File does not exist: ") + std::string(filename)); + } + + std::ifstream file(filename); + std::string line; + std::vector urls; + while (std::getline(file, line)) + { + if (!line.empty()) + { + urls.push_back(line); + } + } + + std::string output; + std::string extension; + while (!urls.empty()) + { + int index = std::uniform_int_distribution(0, urls.size()-1)(rng); + std::string url = urls.at(index); + urls.erase(std::begin(urls) + index); + + // willyfogg.com is a thumbnail generator known to return 200 even if the target image no longer exists + if (url.find("willyfogg.com/thumb.php") != std::string::npos) + { + continue; + } + + // Accept string from Google Chrome + std::string accept = "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8"; + curl::curl_header headers; + headers.add(accept); + + std::ostringstream imgbuf; + curl::curl_ios imgios(imgbuf); + curl::curl_easy imghandle(imgios); + + imghandle.add(headers.get()); + imghandle.add(url.c_str()); + imghandle.add(30); + imghandle.add(300); + + try + { + imghandle.perform(); + } catch (const curl::curl_easy_exception& error) { + error.print_traceback(); + + continue; + } + + if (imghandle.get_info().get() != 200) + { + continue; + } + + std::string content_type = imghandle.get_info().get(); + if (content_type.substr(0, 6) != "image/") + { + continue; + } + + output = imgbuf.str(); + extension = url.substr(url.rfind(".") + 1); + break; + } + + if (output.empty()) + { + throw std::invalid_argument(std::string("No valid urls found for ") + std::string(filename)); + } + + return {output, extension}; +} -- cgit 1.4.1