diff options
Diffstat (limited to 'wizard.cpp')
-rw-r--r-- | wizard.cpp | 561 |
1 files changed, 561 insertions, 0 deletions
diff --git a/wizard.cpp b/wizard.cpp new file mode 100644 index 0000000..ef4941f --- /dev/null +++ b/wizard.cpp | |||
@@ -0,0 +1,561 @@ | |||
1 | |||
2 | |||
3 | #include <json.hpp> | ||
4 | #include <fstream> | ||
5 | #include <sstream> | ||
6 | #include <list> | ||
7 | #include <iostream> | ||
8 | #include <vector> | ||
9 | #include <tuple> | ||
10 | #include <random> | ||
11 | #include <set> | ||
12 | #include <hkutil/string.h> | ||
13 | #include <tesseract/baseapi.h> | ||
14 | #include <leptonica/allheaders.h> | ||
15 | #include <curl_easy.h> | ||
16 | #include <Magick++.h> | ||
17 | #include "prefix_search.h" | ||
18 | |||
19 | |||
20 | |||
21 | |||
22 | |||
23 | |||
24 | std::string stripSpaces(std::string in) | ||
25 | { | ||
26 | in.erase( | ||
27 | std::remove_if( | ||
28 | std::begin(in), | ||
29 | std::end(in), | ||
30 | ::isspace), | ||
31 | std::end(in)); | ||
32 | |||
33 | return in; | ||
34 | } | ||
35 | |||
36 | |||
37 | |||
38 | |||
39 | |||
40 | using ps_type = prefix_search<std::tuple<size_t, size_t>>; | ||
41 | |||
42 | enum class card_frame { | ||
43 | m2015, | ||
44 | modern | ||
45 | }; | ||
46 | |||
47 | struct card { | ||
48 | size_t id; | ||
49 | std::string name; | ||
50 | std::string imageUri; | ||
51 | card_frame frame; | ||
52 | |||
53 | card( | ||
54 | size_t id, | ||
55 | std::string name, | ||
56 | std::string imageUri, | ||
57 | card_frame frame) : | ||
58 | id(id), | ||
59 | name(std::move(name)), | ||
60 | imageUri(std::move(imageUri)), | ||
61 | frame(frame) | ||
62 | { | ||
63 | } | ||
64 | }; | ||
65 | |||
66 | struct usage { | ||
67 | size_t cardId; | ||
68 | size_t strIndex; | ||
69 | size_t strLen; | ||
70 | |||
71 | usage( | ||
72 | size_t ci, | ||
73 | size_t si, | ||
74 | size_t sl) : | ||
75 | cardId(ci), | ||
76 | strIndex(si), | ||
77 | strLen(sl) | ||
78 | { | ||
79 | } | ||
80 | }; | ||
81 | |||
82 | struct solution { | ||
83 | const ps_type& prefix; | ||
84 | std::vector<size_t> lengths; | ||
85 | size_t score; | ||
86 | }; | ||
87 | |||
88 | class designer { | ||
89 | public: | ||
90 | |||
91 | designer( | ||
92 | std::string text, | ||
93 | const ps_type& titles) : | ||
94 | text_(std::move(text)), | ||
95 | titles_(titles), | ||
96 | solutions_(text_.length() + 1) | ||
97 | { | ||
98 | } | ||
99 | |||
100 | std::list<usage> generate(std::mt19937& rng) const; | ||
101 | |||
102 | |||
103 | private: | ||
104 | |||
105 | const solution& get(size_t i) const; | ||
106 | |||
107 | solution calculate(size_t i) const; | ||
108 | |||
109 | const std::string text_; | ||
110 | const ps_type& titles_; | ||
111 | |||
112 | mutable std::vector<std::unique_ptr<solution>> solutions_; | ||
113 | }; | ||
114 | |||
115 | |||
116 | |||
117 | |||
118 | |||
119 | |||
120 | |||
121 | |||
122 | |||
123 | std::list<usage> designer::generate(std::mt19937& rng) const | ||
124 | { | ||
125 | std::list<usage> result; | ||
126 | size_t cur = 0; | ||
127 | |||
128 | while (cur < text_.length()) | ||
129 | { | ||
130 | const solution& curSol = get(cur); | ||
131 | const std::vector<size_t>& posLens = curSol.lengths; | ||
132 | |||
133 | std::uniform_int_distribution<size_t> lenDist(0, posLens.size() - 1); | ||
134 | size_t len = posLens.at(lenDist(rng)); | ||
135 | |||
136 | const ps_type& prefix = curSol.prefix; | ||
137 | std::uniform_int_distribution<size_t> cardDist(0, prefix.getCount() - 1); | ||
138 | size_t cardIndex = cardDist(rng); | ||
139 | std::tuple<size_t, size_t> pd = prefix.at(cardIndex); | ||
140 | |||
141 | result.emplace_back(std::get<0>(pd), std::get<1>(pd), len); | ||
142 | |||
143 | cur += len; | ||
144 | } | ||
145 | |||
146 | return result; | ||
147 | } | ||
148 | |||
149 | |||
150 | |||
151 | |||
152 | solution designer::calculate(size_t i) const | ||
153 | { | ||
154 | if (i == text_.length()) | ||
155 | { | ||
156 | return { | ||
157 | titles_, | ||
158 | {}, | ||
159 | 0 | ||
160 | }; | ||
161 | } | ||
162 | |||
163 | const ps_type& prefix = titles_.find(text_, i); | ||
164 | |||
165 | bool foundScore = false; | ||
166 | size_t bestScore; | ||
167 | std::vector<size_t> bestLens; | ||
168 | |||
169 | for (int j = 1; | ||
170 | (j <= prefix.getDepth()) && (i + j <= text_.length()); | ||
171 | j++) | ||
172 | { | ||
173 | const solution& subSol = get(i + j); | ||
174 | |||
175 | if (subSol.score > 0 || (i + j == text_.length())) | ||
176 | { | ||
177 | size_t tempScore = subSol.score + 1; | ||
178 | |||
179 | if (!foundScore || tempScore < bestScore) | ||
180 | { | ||
181 | foundScore = true; | ||
182 | bestScore = tempScore; | ||
183 | |||
184 | bestLens.clear(); | ||
185 | bestLens.push_back(j); | ||
186 | } else if (tempScore == bestScore) | ||
187 | { | ||
188 | bestLens.push_back(j); | ||
189 | } | ||
190 | } | ||
191 | } | ||
192 | |||
193 | if (!foundScore) | ||
194 | { | ||
195 | return { | ||
196 | titles_, | ||
197 | {}, | ||
198 | 0 | ||
199 | }; | ||
200 | } else { | ||
201 | return { | ||
202 | prefix, | ||
203 | std::move(bestLens), | ||
204 | bestScore | ||
205 | }; | ||
206 | } | ||
207 | } | ||
208 | |||
209 | const solution& designer::get(size_t i) const | ||
210 | { | ||
211 | if (!solutions_.at(i)) | ||
212 | { | ||
213 | solutions_[i] = std::make_unique<solution>(calculate(i)); | ||
214 | } | ||
215 | |||
216 | return *solutions_.at(i); | ||
217 | } | ||
218 | |||
219 | |||
220 | |||
221 | |||
222 | Magick::Image downloadImage(const std::string& url) | ||
223 | { | ||
224 | std::ostringstream imgbuf; | ||
225 | curl::curl_ios<std::ostringstream> imgios(imgbuf); | ||
226 | curl::curl_easy imghandle(imgios); | ||
227 | |||
228 | imghandle.add<CURLOPT_URL>(url.c_str()); | ||
229 | imghandle.add<CURLOPT_CONNECTTIMEOUT>(30); | ||
230 | imghandle.add<CURLOPT_TIMEOUT>(300); | ||
231 | |||
232 | imghandle.perform(); | ||
233 | |||
234 | if (imghandle.get_info<CURLINFO_RESPONSE_CODE>().get() != 200) | ||
235 | { | ||
236 | throw std::runtime_error("Could not download image"); | ||
237 | } | ||
238 | |||
239 | std::string content_type = imghandle.get_info<CURLINFO_CONTENT_TYPE>().get(); | ||
240 | if (content_type.substr(0, 6) != "image/") | ||
241 | { | ||
242 | throw std::runtime_error("Could not download image"); | ||
243 | } | ||
244 | |||
245 | std::string imgstr = imgbuf.str(); | ||
246 | Magick::Blob img(imgstr.c_str(), imgstr.length()); | ||
247 | |||
248 | Magick::Image pic; | ||
249 | |||
250 | try | ||
251 | { | ||
252 | pic.read(img); | ||
253 | } catch (const Magick::ErrorOption& e) | ||
254 | { | ||
255 | // Occurs when the the data downloaded from the server is malformed | ||
256 | std::cout << "Magick: " << e.what() << std::endl; | ||
257 | |||
258 | throw std::runtime_error("Could not download image"); | ||
259 | } | ||
260 | |||
261 | return pic; | ||
262 | } | ||
263 | |||
264 | |||
265 | |||
266 | |||
267 | |||
268 | |||
269 | |||
270 | int main(int argc, char** argv) | ||
271 | { | ||
272 | Magick::InitializeMagick(nullptr); | ||
273 | |||
274 | std::random_device randomDevice; | ||
275 | std::mt19937 rng(randomDevice()); | ||
276 | |||
277 | std::cout << "Compiling prefix search..." << std::endl; | ||
278 | |||
279 | std::vector<card> cards; | ||
280 | ps_type titles; | ||
281 | |||
282 | std::set<char> chars; | ||
283 | |||
284 | { | ||
285 | std::ifstream in( | ||
286 | "/Users/hatkirby/Downloads/scryfall-default-cards.json", | ||
287 | std::ios::in | std::ios::binary); | ||
288 | std::ostringstream contents; | ||
289 | contents << in.rdbuf(); | ||
290 | |||
291 | |||
292 | nlohmann::json cardsJson = nlohmann::json::parse(contents.str()); | ||
293 | |||
294 | |||
295 | |||
296 | for (const auto& cardJson : cardsJson) | ||
297 | { | ||
298 | if ( | ||
299 | // The object needs to be a card | ||
300 | cardJson["object"] == "card" && | ||
301 | // It needs to have a downloadable image | ||
302 | cardJson.count("image_uris") && | ||
303 | // Make sure we can support the card layout | ||
304 | ( | ||
305 | cardJson["layout"] == "normal" || | ||
306 | cardJson["layout"] == "leveler" || | ||
307 | cardJson["layout"] == "saga" | ||
308 | ) && | ||
309 | // Digital cards look slightly different so ignore them | ||
310 | !cardJson["digital"] && | ||
311 | // Only use english printings | ||
312 | cardJson["lang"] == "en" && | ||
313 | // Currently not supporting silver bordered cards | ||
314 | cardJson["border_color"] != "silver" && | ||
315 | // It is hard to read the name of a planeswalker | ||
316 | cardJson["type_line"].get<std::string>() | ||
317 | .find("Planeswalker") == std::string::npos && | ||
318 | // This cuts out checklists and special tokens | ||
319 | cardJson["type_line"] != "Card" && | ||
320 | // Amonkhet invocations are impossible | ||
321 | cardJson["set"] != "mp2") | ||
322 | { | ||
323 | card_frame frame; | ||
324 | |||
325 | if (cardJson["frame"] == "2015") | ||
326 | { | ||
327 | frame = card_frame::m2015; | ||
328 | } else if (cardJson["frame"] == "2003") | ||
329 | { | ||
330 | frame = card_frame::modern; | ||
331 | } else { | ||
332 | continue; | ||
333 | } | ||
334 | |||
335 | size_t cardId = cards.size(); | ||
336 | cards.emplace_back( | ||
337 | cardId, | ||
338 | cardJson["name"], | ||
339 | cardJson["image_uris"]["png"], | ||
340 | frame); | ||
341 | |||
342 | std::string canon = hatkirby::lowercase(cardJson["name"]); | ||
343 | |||
344 | for (int i = 0; i < canon.length(); i++) | ||
345 | { | ||
346 | titles.add(canon, {cardId, i}, i); | ||
347 | |||
348 | chars.insert(canon.at(i)); | ||
349 | } | ||
350 | } | ||
351 | } | ||
352 | } | ||
353 | |||
354 | std::cout << "Characters: "; | ||
355 | for (char ch : chars) | ||
356 | { | ||
357 | std::cout << ch; | ||
358 | } | ||
359 | std::cout << std::endl; | ||
360 | |||
361 | std::cout << "Calculating card list..." << std::endl; | ||
362 | |||
363 | std::string text = "is it pretentious that i'm sending someone over 100 common and uncommon magic cards in an iphone six box"; | ||
364 | std::string canonText = hatkirby::lowercase(text); | ||
365 | designer des(canonText, titles); | ||
366 | std::list<usage> res = des.generate(rng); | ||
367 | |||
368 | Magick::Image endImage; | ||
369 | bool firstSlice = false; | ||
370 | |||
371 | for (const usage& u : res) | ||
372 | { | ||
373 | const card& theCard = cards.at(u.cardId); | ||
374 | const std::string& cardName = theCard.name; | ||
375 | |||
376 | std::cout << cardName.substr(0, u.strIndex) | ||
377 | << "[" << cardName.substr(u.strIndex, u.strLen) | ||
378 | << "]" << cardName.substr(u.strIndex + u.strLen) | ||
379 | << std::endl; | ||
380 | |||
381 | std::cout << "Downloading image..." << std::endl; | ||
382 | |||
383 | Magick::Image cardImg = downloadImage(theCard.imageUri); | ||
384 | |||
385 | std::cout << "Reading text..." << std::endl; | ||
386 | |||
387 | Magick::Image titleImg = cardImg; | ||
388 | titleImg.magick("TIFF"); | ||
389 | |||
390 | //titleImg.threshold(MaxRGB / 2); | ||
391 | titleImg.write("pre.tif"); | ||
392 | |||
393 | |||
394 | Magick::Geometry margin; | ||
395 | |||
396 | if (theCard.frame == card_frame::m2015) | ||
397 | { | ||
398 | margin = Magick::Geometry { 595, 46, 57, 54 }; | ||
399 | } else if (theCard.frame == card_frame::modern) | ||
400 | { | ||
401 | margin = Magick::Geometry { 581, 50, 63, 57 }; | ||
402 | } | ||
403 | |||
404 | titleImg.crop(margin); | ||
405 | titleImg.zoom({ margin.width() * 5, margin.height() * 5 }); | ||
406 | |||
407 | //titleImg.quantizeColorSpace(Magick::GRAYColorspace); | ||
408 | //titleImg.quantizeColors(2); | ||
409 | //titleImg.quantize(); | ||
410 | titleImg.backgroundColor("white"); | ||
411 | titleImg.matte(false); | ||
412 | titleImg.resolutionUnits(Magick::PixelsPerInchResolution); | ||
413 | titleImg.density({ 300, 300 }); | ||
414 | titleImg.type(Magick::GrayscaleType); | ||
415 | |||
416 | titleImg.write("title.tif"); | ||
417 | |||
418 | Magick::Blob titleBlob; | ||
419 | titleImg.write(&titleBlob); | ||
420 | |||
421 | Pix* titlePix = pixReadMemTiff( | ||
422 | reinterpret_cast<const unsigned char*>(titleBlob.data()), | ||
423 | titleBlob.length(), | ||
424 | 0); | ||
425 | |||
426 | tesseract::TessBaseAPI* api = new tesseract::TessBaseAPI(); | ||
427 | |||
428 | if (api->Init(nullptr, "eng")) | ||
429 | { | ||
430 | throw std::runtime_error("Could not initialize tesseract"); | ||
431 | } | ||
432 | |||
433 | api->SetImage(titlePix); | ||
434 | api->Recognize(nullptr); | ||
435 | |||
436 | tesseract::ResultIterator* ri = api->GetIterator(); | ||
437 | tesseract::PageIteratorLevel level = tesseract::RIL_TEXTLINE; | ||
438 | bool foundName = false; | ||
439 | |||
440 | if (ri) | ||
441 | { | ||
442 | do | ||
443 | { | ||
444 | const char* line = ri->GetUTF8Text(level); | ||
445 | |||
446 | if (line) | ||
447 | { | ||
448 | std::string lineStr(line); | ||
449 | |||
450 | //if (stripSpaces(hatkirby::lowercase(lineStr)).find(stripSpaces(hatkirby::lowercase((cardName)))) == 0) | ||
451 | { | ||
452 | foundName = true; | ||
453 | |||
454 | break; | ||
455 | }/* else { | ||
456 | std::cout << "WRONG: " << lineStr << std::endl; | ||
457 | }*/ | ||
458 | } | ||
459 | } while (ri->Next(level)); | ||
460 | } | ||
461 | |||
462 | if (foundName) | ||
463 | { | ||
464 | level = tesseract::RIL_SYMBOL; | ||
465 | |||
466 | std::vector<std::tuple<unsigned int, unsigned int>> characters; | ||
467 | size_t cur = 0; | ||
468 | |||
469 | do | ||
470 | { | ||
471 | int x1, y1, x2, y2; | ||
472 | ri->BoundingBox(level, &x1, &y1, &x2, &y2); | ||
473 | |||
474 | x1 /= 5; | ||
475 | x2 /= 5; | ||
476 | |||
477 | if (cardName.at(cur) == ' ') | ||
478 | { | ||
479 | if (cur == 0) | ||
480 | { | ||
481 | characters.emplace_back(0, x1); | ||
482 | } else { | ||
483 | const auto& prev = characters.back(); | ||
484 | characters.emplace_back(std::get<1>(characters.back()), x1); | ||
485 | } | ||
486 | |||
487 | cur++; | ||
488 | } | ||
489 | |||
490 | characters.emplace_back(x1, x2); | ||
491 | |||
492 | cur++; | ||
493 | } while (ri->Next(level) && (cur < cardName.length())); | ||
494 | |||
495 | if (cur != cardName.length()) | ||
496 | { | ||
497 | throw std::runtime_error("Error detecting character bounds"); | ||
498 | } | ||
499 | |||
500 | cardImg.crop({ | ||
501 | std::get<1>(characters[u.strIndex + u.strLen - 1]) | ||
502 | - std::get<0>(characters[u.strIndex]), | ||
503 | cardImg.rows(), | ||
504 | margin.xOff() + std::get<0>(characters[u.strIndex]), | ||
505 | 0 | ||
506 | }); | ||
507 | |||
508 | cardImg.magick("PNG"); | ||
509 | cardImg.write("slice.png"); | ||
510 | } else { | ||
511 | std::cout << "Didn't find name" << std::endl; | ||
512 | } | ||
513 | |||
514 | |||
515 | |||
516 | |||
517 | |||
518 | |||
519 | |||
520 | |||
521 | |||
522 | |||
523 | api->End(); | ||
524 | pixDestroy(&titlePix); | ||
525 | |||
526 | if (!firstSlice) | ||
527 | { | ||
528 | firstSlice = true; | ||
529 | endImage = cardImg; | ||
530 | } else { | ||
531 | |||
532 | int xoff = endImage.columns(); | ||
533 | |||
534 | endImage.backgroundColor("black"); | ||
535 | |||
536 | endImage.extent( | ||
537 | {endImage.columns() + cardImg.columns(), cardImg.rows()}, | ||
538 | Magick::WestGravity); | ||
539 | |||
540 | endImage.composite( | ||
541 | cardImg, | ||
542 | xoff, | ||
543 | (theCard.frame == card_frame::m2015) ? 6 : 0); | ||
544 | } | ||
545 | |||
546 | |||
547 | //break; | ||
548 | } | ||
549 | |||
550 | |||
551 | endImage.magick("PNG"); | ||
552 | endImage.write("output.png"); | ||
553 | |||
554 | |||
555 | |||
556 | |||
557 | |||
558 | |||
559 | } | ||
560 | |||
561 | |||