about summary refs log tree commit diff stats
path: root/generator/generator.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'generator/generator.cpp')
-rw-r--r--generator/generator.cpp180
1 files changed, 180 insertions, 0 deletions
diff --git a/generator/generator.cpp b/generator/generator.cpp new file mode 100644 index 0000000..eb2750c --- /dev/null +++ b/generator/generator.cpp
@@ -0,0 +1,180 @@
1#include "generator.h"
2#include <stdexcept>
3#include <iostream>
4#include <fstream>
5#include <dirent.h>
6#include <json.hpp>
7#include "progress.h"
8#include "field.h"
9#include "../util.h"
10#include "mood.h"
11
12namespace cadence {
13 namespace generator {
14
15 generator::generator(
16 std::string inputpath,
17 std::string outputpath) :
18 inputpath_(inputpath),
19 db_(outputpath)
20 {
21 // Add directory separator to input path
22 if ((inputpath_.back() != '/') && (inputpath_.back() != '\\'))
23 {
24 inputpath_ += '/';
25 }
26
27 inputpath_ += "highlevel/";
28 }
29
30 void generator::run()
31 {
32 // Creates the datafile.
33 writeSchema();
34
35 // Scans the AcousticBrainz data dump and generates a list of all of the
36 // files in the dump.
37 scanDirectories();
38
39 // Parses each data file and enters it into the database.
40 parseData();
41 }
42
43 void generator::writeSchema()
44 {
45 std::ifstream file("schema.sql");
46 if (!file)
47 {
48 throw std::invalid_argument("Could not find database schema");
49 }
50
51 std::ostringstream schemaBuilder;
52 std::string line;
53 while (std::getline(file, line))
54 {
55 if (line.back() == '\r')
56 {
57 line.pop_back();
58 }
59
60 schemaBuilder << line;
61 }
62
63 std::string schema = schemaBuilder.str();
64 auto queries = split<std::list<std::string>>(schema, ";");
65 progress ppgs("Writing database schema...", queries.size());
66 for (std::string query : queries)
67 {
68 if (!queries.empty())
69 {
70 db_.runQuery(query);
71 }
72
73 ppgs.update();
74 }
75 }
76
77 void generator::scanDirectories()
78 {
79 std::cout << "Scanning AcousticBrainz dump..." << std::endl;
80
81 DIR* topdir;
82 if ((topdir = opendir(inputpath_.c_str())) == nullptr)
83 {
84 throw std::invalid_argument("Invalid AcousticBrainz data directory");
85 }
86
87 struct dirent* topent;
88 while ((topent = readdir(topdir)) != nullptr)
89 {
90 if (topent->d_name[0] != '.')
91 {
92 std::string directory = inputpath_ + topent->d_name + "/";
93
94 DIR* subdir;
95 if ((subdir = opendir(directory.c_str())) == nullptr)
96 {
97 throw std::invalid_argument("Invalid AcousticBrainz data directory");
98 }
99
100 struct dirent* subent;
101 while ((subent = readdir(subdir)) != nullptr)
102 {
103 if (subent->d_name[0] != '.')
104 {
105 std::string subdirectory = directory + subent->d_name + "/";
106
107 DIR* subsubdir;
108 if ((subsubdir = opendir(subdirectory.c_str())) == nullptr)
109 {
110 throw std::invalid_argument("Invalid AcousticBrainz data directory");
111 }
112
113 struct dirent* subsubent;
114 while ((subsubent = readdir(subsubdir)) != nullptr)
115 {
116 if (subsubent->d_name[0] != '.')
117 {
118 std::string datafile = subdirectory + subsubent->d_name;
119
120 datafiles_.push_back(datafile);
121 }
122 }
123
124 closedir(subsubdir);
125 }
126 }
127
128 closedir(subdir);
129 }
130 }
131
132 closedir(topdir);
133 }
134
135 void generator::parseData()
136 {
137 progress ppgs("Parsing AcousticBrainz data files...", datafiles_.size());
138
139 for (std::string datafile : datafiles_)
140 {
141 ppgs.update();
142
143 nlohmann::json jsonData;
144 {
145 std::ifstream dataStream(datafile);
146 dataStream >> jsonData;
147 }
148
149 try
150 {
151 std::vector<mood> moods;
152 moods.emplace_back(mood::type::danceable, jsonData["highlevel"]["danceability"]["all"]["danceable"]);
153 moods.emplace_back(mood::type::acoustic, jsonData["highlevel"]["mood_acoustic"]["all"]["acoustic"]);
154 moods.emplace_back(mood::type::aggressive, jsonData["highlevel"]["mood_aggressive"]["all"]["aggressive"]);
155 moods.emplace_back(mood::type::electronic, jsonData["highlevel"]["mood_electronic"]["all"]["electronic"]);
156 moods.emplace_back(mood::type::happy, jsonData["highlevel"]["mood_happy"]["all"]["happy"]);
157 moods.emplace_back(mood::type::party, jsonData["highlevel"]["mood_party"]["all"]["party"]);
158 moods.emplace_back(mood::type::relaxed, jsonData["highlevel"]["mood_relaxed"]["all"]["relaxed"]);
159 moods.emplace_back(mood::type::sad, jsonData["highlevel"]["mood_sad"]["all"]["sad"]);
160 moods.emplace_back(mood::type::instrumental, jsonData["highlevel"]["voice_instrumental"]["all"]["instrumental"]);
161
162 std::sort(std::begin(moods), std::end(moods), [] (const mood& left, const mood& right) -> bool {
163 return left.getProbability() > right.getProbability();
164 });
165
166 std::list<field> fields;
167 fields.emplace_back("title", jsonData["metadata"]["tags"]["title"][0].get<std::string>());
168 fields.emplace_back("artist", jsonData["metadata"]["tags"]["artist"][0].get<std::string>());
169 fields.emplace_back("category", moods.front().getCategory());
170
171 db_.insertIntoTable("songs", std::move(fields));
172 } catch (const std::domain_error& ex)
173 {
174 // Weird data. Ignore silently.
175 }
176 }
177 }
178
179 };
180};