diff options
Diffstat (limited to 'imagenet.cpp')
| -rw-r--r-- | imagenet.cpp | 93 |
1 files changed, 93 insertions, 0 deletions
| diff --git a/imagenet.cpp b/imagenet.cpp new file mode 100644 index 0000000..d5aeada --- /dev/null +++ b/imagenet.cpp | |||
| @@ -0,0 +1,93 @@ | |||
| 1 | #include "imagenet.h" | ||
| 2 | #include <fstream> | ||
| 3 | #include <stdexcept> | ||
| 4 | #include <sstream> | ||
| 5 | #include <vector> | ||
| 6 | #include <curl_easy.h> | ||
| 7 | #include <curl_header.h> | ||
| 8 | |||
| 9 | imagenet::imagenet(std::string path) : path_(path) {} | ||
| 10 | |||
| 11 | std::tuple<std::string, std::string> imagenet::getImageForNotion(int notion_id, std::mt19937& rng) const | ||
| 12 | { | ||
| 13 | auto result = getImagesForNotion(notion_id, rng, 1); | ||
| 14 | return result[0]; | ||
| 15 | } | ||
| 16 | |||
| 17 | std::vector<std::tuple<std::string, std::string>> imagenet::getImagesForNotion(int notion_id, std::mt19937& rng, int num) const | ||
| 18 | { | ||
| 19 | std::filesystem::path filename = path_ / std::to_string(notion_id); | ||
| 20 | if (!std::filesystem::exists(filename)) | ||
| 21 | { | ||
| 22 | throw std::invalid_argument(std::string("File does not exist: ") + std::string(filename)); | ||
| 23 | } | ||
| 24 | |||
| 25 | std::ifstream file(filename); | ||
| 26 | std::string line; | ||
| 27 | std::vector<std::string> urls; | ||
| 28 | while (std::getline(file, line)) | ||
| 29 | { | ||
| 30 | if (!line.empty()) | ||
| 31 | { | ||
| 32 | urls.push_back(line); | ||
| 33 | } | ||
| 34 | } | ||
| 35 | |||
| 36 | // output, extension | ||
| 37 | std::vector<std::tuple<std::string, std::string>> results; | ||
| 38 | while (!urls.empty() && results.size() < num) | ||
| 39 | { | ||
| 40 | int index = std::uniform_int_distribution<int>(0, urls.size()-1)(rng); | ||
| 41 | std::string url = urls.at(index); | ||
| 42 | urls.erase(std::begin(urls) + index); | ||
| 43 | |||
| 44 | // willyfogg.com is a thumbnail generator known to return 200 even if the target image no longer exists | ||
| 45 | if (url.find("willyfogg.com/thumb.php") != std::string::npos) | ||
| 46 | { | ||
| 47 | continue; | ||
| 48 | } | ||
| 49 | |||
| 50 | // Accept string from Google Chrome | ||
| 51 | std::string accept = "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8"; | ||
| 52 | curl::curl_header headers; | ||
| 53 | headers.add(accept); | ||
| 54 | |||
| 55 | std::ostringstream imgbuf; | ||
| 56 | curl::curl_ios<std::ostringstream> imgios(imgbuf); | ||
| 57 | curl::curl_easy imghandle(imgios); | ||
| 58 | |||
| 59 | imghandle.add<CURLOPT_HTTPHEADER>(headers.get()); | ||
| 60 | imghandle.add<CURLOPT_URL>(url.c_str()); | ||
| 61 | imghandle.add<CURLOPT_CONNECTTIMEOUT>(30); | ||
| 62 | imghandle.add<CURLOPT_TIMEOUT>(300); | ||
| 63 | |||
| 64 | try | ||
| 65 | { | ||
| 66 | imghandle.perform(); | ||
| 67 | } catch (const curl::curl_easy_exception& error) { | ||
| 68 | error.print_traceback(); | ||
| 69 | |||
| 70 | continue; | ||
| 71 | } | ||
| 72 | |||
| 73 | if (imghandle.get_info<CURLINFO_RESPONSE_CODE>().get() != 200) | ||
| 74 | { | ||
| 75 | continue; | ||
| 76 | } | ||
| 77 | |||
| 78 | std::string content_type = imghandle.get_info<CURLINFO_CONTENT_TYPE>().get(); | ||
| 79 | if (content_type.substr(0, 6) != "image/") | ||
| 80 | { | ||
| 81 | continue; | ||
| 82 | } | ||
| 83 | |||
| 84 | results.emplace_back(imgbuf.str(), url.substr(url.rfind(".") + 1)); | ||
| 85 | } | ||
| 86 | |||
| 87 | if (results.size() < num) | ||
| 88 | { | ||
| 89 | throw std::invalid_argument(std::string("Not enough valid urls found for ") + std::string(filename)); | ||
| 90 | } | ||
| 91 | |||
| 92 | return results; | ||
| 93 | } | ||
