From 46b7119fab32b05923dbf9cda7cd6ce62f572846 Mon Sep 17 00:00:00 2001 From: Kelly Rauchenberger Date: Mon, 9 Jan 2017 14:01:51 -0500 Subject: Initial commit --- generator/generator.cpp | 180 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 180 insertions(+) create mode 100644 generator/generator.cpp (limited to 'generator/generator.cpp') diff --git a/generator/generator.cpp b/generator/generator.cpp new file mode 100644 index 0000000..eb2750c --- /dev/null +++ b/generator/generator.cpp @@ -0,0 +1,180 @@ +#include "generator.h" +#include +#include +#include +#include +#include +#include "progress.h" +#include "field.h" +#include "../util.h" +#include "mood.h" + +namespace cadence { + namespace generator { + + generator::generator( + std::string inputpath, + std::string outputpath) : + inputpath_(inputpath), + db_(outputpath) + { + // Add directory separator to input path + if ((inputpath_.back() != '/') && (inputpath_.back() != '\\')) + { + inputpath_ += '/'; + } + + inputpath_ += "highlevel/"; + } + + void generator::run() + { + // Creates the datafile. + writeSchema(); + + // Scans the AcousticBrainz data dump and generates a list of all of the + // files in the dump. + scanDirectories(); + + // Parses each data file and enters it into the database. + parseData(); + } + + void generator::writeSchema() + { + std::ifstream file("schema.sql"); + if (!file) + { + throw std::invalid_argument("Could not find database schema"); + } + + std::ostringstream schemaBuilder; + std::string line; + while (std::getline(file, line)) + { + if (line.back() == '\r') + { + line.pop_back(); + } + + schemaBuilder << line; + } + + std::string schema = schemaBuilder.str(); + auto queries = split>(schema, ";"); + progress ppgs("Writing database schema...", queries.size()); + for (std::string query : queries) + { + if (!queries.empty()) + { + db_.runQuery(query); + } + + ppgs.update(); + } + } + + void generator::scanDirectories() + { + std::cout << "Scanning AcousticBrainz dump..." << std::endl; + + DIR* topdir; + if ((topdir = opendir(inputpath_.c_str())) == nullptr) + { + throw std::invalid_argument("Invalid AcousticBrainz data directory"); + } + + struct dirent* topent; + while ((topent = readdir(topdir)) != nullptr) + { + if (topent->d_name[0] != '.') + { + std::string directory = inputpath_ + topent->d_name + "/"; + + DIR* subdir; + if ((subdir = opendir(directory.c_str())) == nullptr) + { + throw std::invalid_argument("Invalid AcousticBrainz data directory"); + } + + struct dirent* subent; + while ((subent = readdir(subdir)) != nullptr) + { + if (subent->d_name[0] != '.') + { + std::string subdirectory = directory + subent->d_name + "/"; + + DIR* subsubdir; + if ((subsubdir = opendir(subdirectory.c_str())) == nullptr) + { + throw std::invalid_argument("Invalid AcousticBrainz data directory"); + } + + struct dirent* subsubent; + while ((subsubent = readdir(subsubdir)) != nullptr) + { + if (subsubent->d_name[0] != '.') + { + std::string datafile = subdirectory + subsubent->d_name; + + datafiles_.push_back(datafile); + } + } + + closedir(subsubdir); + } + } + + closedir(subdir); + } + } + + closedir(topdir); + } + + void generator::parseData() + { + progress ppgs("Parsing AcousticBrainz data files...", datafiles_.size()); + + for (std::string datafile : datafiles_) + { + ppgs.update(); + + nlohmann::json jsonData; + { + std::ifstream dataStream(datafile); + dataStream >> jsonData; + } + + try + { + std::vector moods; + moods.emplace_back(mood::type::danceable, jsonData["highlevel"]["danceability"]["all"]["danceable"]); + moods.emplace_back(mood::type::acoustic, jsonData["highlevel"]["mood_acoustic"]["all"]["acoustic"]); + moods.emplace_back(mood::type::aggressive, jsonData["highlevel"]["mood_aggressive"]["all"]["aggressive"]); + moods.emplace_back(mood::type::electronic, jsonData["highlevel"]["mood_electronic"]["all"]["electronic"]); + moods.emplace_back(mood::type::happy, jsonData["highlevel"]["mood_happy"]["all"]["happy"]); + moods.emplace_back(mood::type::party, jsonData["highlevel"]["mood_party"]["all"]["party"]); + moods.emplace_back(mood::type::relaxed, jsonData["highlevel"]["mood_relaxed"]["all"]["relaxed"]); + moods.emplace_back(mood::type::sad, jsonData["highlevel"]["mood_sad"]["all"]["sad"]); + moods.emplace_back(mood::type::instrumental, jsonData["highlevel"]["voice_instrumental"]["all"]["instrumental"]); + + std::sort(std::begin(moods), std::end(moods), [] (const mood& left, const mood& right) -> bool { + return left.getProbability() > right.getProbability(); + }); + + std::list fields; + fields.emplace_back("title", jsonData["metadata"]["tags"]["title"][0].get()); + fields.emplace_back("artist", jsonData["metadata"]["tags"]["artist"][0].get()); + fields.emplace_back("category", moods.front().getCategory()); + + db_.insertIntoTable("songs", std::move(fields)); + } catch (const std::domain_error& ex) + { + // Weird data. Ignore silently. + } + } + } + + }; +}; -- cgit 1.4.1