From 6746da6edd7d9d50efe374eabbb79a3cac882d81 Mon Sep 17 00:00:00 2001 From: Kelly Rauchenberger Date: Mon, 16 Jan 2017 18:02:50 -0500 Subject: Started structural rewrite The new object structure was designed to build on the existing WordNet structure, while also adding in all of the data that we get from other sources. More information about this can be found on the project wiki. The generator has already been completely rewritten to generate a datafile that uses the new structure. In addition, a number of indexes are created, which does double the size of the datafile, but also allows for much faster lookups. Finally, the new generator is written modularly and is a lot more readable than the old one. The verbly interface to the new object structure has mostly been completed, but has not been tested fully. There is a completely new search API which utilizes a lot of operator overloading; documentation on how to use it should go up at some point. Token processing and verb frames are currently unimplemented. Source for these have been left in the repository for now. --- generator/schema.sql | 352 ++++++++++++++++++++------------------------------- 1 file changed, 135 insertions(+), 217 deletions(-) (limited to 'generator/schema.sql') diff --git a/generator/schema.sql b/generator/schema.sql index 410b536..c3e54d8 100644 --- a/generator/schema.sql +++ b/generator/schema.sql @@ -1,286 +1,204 @@ -DROP TABLE IF EXISTS `verbs`; -CREATE TABLE `verbs` ( - `verb_id` INTEGER PRIMARY KEY, - `infinitive` VARCHAR(32) NOT NULL, - `past_tense` VARCHAR(32) NOT NULL, - `past_participle` VARCHAR(32) NOT NULL, - `ing_form` VARCHAR(32) NOT NULL, - `s_form` VARCHAR(32) NOT NULL +CREATE TABLE `notions` ( + `notion_id` INTEGER PRIMARY KEY, + `part_of_speech` SMALLINT NOT NULL, + `wnid` INTEGER, + `images` INTEGER ); -DROP TABLE IF EXISTS `groups`; -CREATE TABLE `groups` ( - `group_id` INTEGER PRIMARY KEY, - `data` BLOB NOT NULL -); - -DROP TABLE IF EXISTS `frames`; -CREATE TABLE `frames` ( - `frame_id` INTEGER PRIMARY KEY, - `group_id` INTEGER NOT NULL, - `data` BLOB NOT NULL, - FOREIGN KEY (`group_id`) REFERENCES `groups`(`group_id`) -); +CREATE UNIQUE INDEX `notion_by_wnid` ON `notions`(`wnid`); -DROP TABLE IF EXISTS `verb_groups`; -CREATE TABLE `verb_groups` ( - `verb_id` INTEGER NOT NULL, - `group_id` INTEGER NOT NULL, - FOREIGN KEY (`verb_id`) REFERENCES `verbs`(`verb_id`), - FOREIGN KEY (`group_id`) REFERENCES `groups`(`group_id`) -); - -DROP TABLE IF EXISTS `adjectives`; -CREATE TABLE `adjectives` ( - `adjective_id` INTEGER PRIMARY KEY, - `base_form` VARCHAR(32) NOT NULL, - `comparative` VARCHAR(32), - `superlative` VARCHAR(32), - `position` CHAR(1), - `complexity` INTEGER NOT NULL -); - -DROP TABLE IF EXISTS `adverbs`; -CREATE TABLE `adverbs` ( - `adverb_id` INTEGER PRIMARY KEY, - `base_form` VARCHAR(32) NOT NULL, - `comparative` VARCHAR(32), - `superlative` VARCHAR(32), - `complexity` INTEGER NOT NULL -); - -DROP TABLE IF EXISTS `nouns`; -CREATE TABLE `nouns` ( - `noun_id` INTEGER PRIMARY KEY, - `singular` VARCHAR(32) NOT NULL, - `plural` VARCHAR(32), - `proper` INTEGER(1) NOT NULL, - `complexity` INTEGER NOT NULL, - `images` INTEGER NOT NULL, - `wnid` INTEGER NOT NULL -); - -DROP TABLE IF EXISTS `hypernymy`; CREATE TABLE `hypernymy` ( `hypernym_id` INTEGER NOT NULL, - `hyponym_id` INTEGER NOT NULL, - FOREIGN KEY (`hypernym_id`) REFERENCES `nouns`(`noun_id`), - FOREIGN KEY (`hyponym_id`) REFERENCES `nouns`(`noun_id`) + `hyponym_id` INTEGER NOT NULL ); -DROP TABLE IF EXISTS `instantiation`; +CREATE INDEX `hyponym_of` ON `hypernymy`(`hypernym_id`); +CREATE INDEX `hypernym_of` ON `hypernymy`(`hyponym_id`); + CREATE TABLE `instantiation` ( `class_id` INTEGER NOT NULL, - `instance_id` INTEGER NOT NULL, - FOREIGN KEY (`class_id`) REFERENCES `nouns`(`noun_id`), - FOREIGN KEY (`instance_id`) REFERENCES `nouns`(`noun_id`) + `instance_id` INTEGER NOT NULL ); -DROP TABLE IF EXISTS `member_meronymy`; +CREATE INDEX `instance_of` ON `instantiation`(`class_id`); +CREATE INDEX `class_of` ON `instantiation`(`instance_id`); + CREATE TABLE `member_meronymy` ( `meronym_id` INTEGER NOT NULL, - `holonym_id` INTEGER NOT NULL, - FOREIGN KEY (`meronym_id`) REFERENCES `nouns`(`noun_id`), - FOREIGN KEY (`holonym_id`) REFERENCES `nouns`(`noun_id`) + `holonym_id` INTEGER NOT NULL ); -DROP TABLE IF EXISTS `part_meronymy`; +CREATE INDEX `member_holonym_of` ON `member_meronymy`(`meronym_id`); +CREATE INDEX `member_meronym_of` ON `member_meronymy`(`holonym_id`); + CREATE TABLE `part_meronymy` ( `meronym_id` INTEGER NOT NULL, - `holonym_id` INTEGER NOT NULL, - FOREIGN KEY (`meronym_id`) REFERENCES `nouns`(`noun_id`), - FOREIGN KEY (`holonym_id`) REFERENCES `nouns`(`noun_id`) + `holonym_id` INTEGER NOT NULL ); -DROP TABLE IF EXISTS `substance_meronymy`; +CREATE INDEX `part_holonym_of` ON `part_meronymy`(`meronym_id`); +CREATE INDEX `part_meronym_of` ON `part_meronymy`(`holonym_id`); + CREATE TABLE `substance_meronymy` ( `meronym_id` INTEGER NOT NULL, - `holonym_id` INTEGER NOT NULL, - FOREIGN KEY (`meronym_id`) REFERENCES `nouns`(`noun_id`), - FOREIGN KEY (`holonym_id`) REFERENCES `nouns`(`noun_id`) + `holonym_id` INTEGER NOT NULL ); -DROP TABLE IF EXISTS `variation`; +CREATE INDEX `substance_holonym_of` ON `substance_meronymy`(`meronym_id`); +CREATE INDEX `substance_meronym_of` ON `substance_meronymy`(`holonym_id`); + CREATE TABLE `variation` ( `noun_id` INTEGER NOT NULL, - `adjective_id` INTEGER NOT NULL, - FOREIGN KEY (`noun_id`) REFERENCES `nouns`(`noun_id`), - FOREIGN KEY (`adjective_id`) REFERENCES `adjectives`(`adjective_id`) + `adjective_id` INTEGER NOT NULL ); -DROP TABLE IF EXISTS `noun_antonymy`; -CREATE TABLE `noun_antonymy` ( - `noun_1_id` INTEGER NOT NULL, - `noun_2_id` INTEGER NOT NULL, - FOREIGN KEY (`noun_1_id`) REFERENCES `nouns`(`noun_id`), - FOREIGN KEY (`noun_2_id`) REFERENCES `nouns`(`noun_id`) -); +CREATE INDEX `variant_of` ON `variation`(`noun_id`); +CREATE INDEX `attribute_of` ON `variation`(`adjective_id`); -DROP TABLE IF EXISTS `adjective_antonymy`; -CREATE TABLE `adjective_antonymy` ( +CREATE TABLE `similarity` ( `adjective_1_id` INTEGER NOT NULL, - `adjective_2_id` INTEGER NOT NULL, - FOREIGN KEY (`adjective_1_id`) REFERENCES `adjectives`(`adjective_id`), - FOREIGN KEY (`adjective_2_id`) REFERENCES `adjectives`(`adjective_id`) + `adjective_2_id` INTEGER NOT NULL +); + +CREATE INDEX `similar_to` ON `similarity`(`adjective_1_id`); + +CREATE TABLE `is_a` ( + `notion_id` INTEGER NOT NULL, + `groupname` VARCHAR(32) NOT NULL ); -DROP TABLE IF EXISTS `adverb_antonymy`; -CREATE TABLE `adverb_antonymy` ( - `adverb_1_id` INTEGER NOT NULL, - `adverb_2_id` INTEGER NOT NULL, - FOREIGN KEY (`adverb_1_id`) REFERENCES `adverbs`(`adverb_id`), - FOREIGN KEY (`adverb_2_id`) REFERENCES `adverbs`(`adverb_id`) +CREATE TABLE `entailment` ( + `given_id` INTEGER NOT NULL, + `entailment_id` INTEGER NOT NULL +); + +CREATE INDEX `entailment_of` ON `entailment`(`given_id`); +CREATE INDEX `entailed_by` ON `entailment`(`entailment_id`); + +CREATE TABLE `causality` ( + `cause_id` INTEGER NOT NULL, + `effect_id` INTEGER NOT NULL +); + +CREATE INDEX `effect_of` ON `causality`(`cause_id`); +CREATE INDEX `cause_of` ON `causality`(`effect_id`); + +CREATE TABLE `words` ( + `word_id` INTEGER PRIMARY KEY, + `notion_id` INTEGER NOT NULL, + `lemma_id` INTEGER NOT NULL, + `tag_count` INTEGER, + `position` SMALLINT, + `group_id` INTEGER +); + +CREATE INDEX `notion_words` ON `words`(`notion_id`); +CREATE INDEX `lemma_words` ON `words`(`lemma_id`); +CREATE INDEX `group_words` ON `words`(`group_id`); + +CREATE TABLE `antonymy` ( + `antonym_1_id` INTEGER NOT NULL, + `antonym_2_id` INTEGER NOT NULL ); -DROP TABLE IF EXISTS `specification`; +CREATE INDEX `antonym_of` ON `antonymy`(`antonym_1_id`); + CREATE TABLE `specification` ( `general_id` INTEGER NOT NULL, - `specific_id` INTEGER NOT NULL, - FOREIGN KEY (`general_id`) REFERENCES `adjectives`(`adjective_id`), - FOREIGN KEY (`specific_id`) REFERENCES `adjectives`(`adjective_id`) + `specific_id` INTEGER NOT NULL ); -DROP TABLE IF EXISTS `pertainymy`; +CREATE INDEX `specification_of` ON `specification`(`general_id`); +CREATE INDEX `generalization_of` ON `specification`(`specific_id`); + CREATE TABLE `pertainymy` ( `noun_id` INTEGER NOT NULL, - `pertainym_id` INTEGER NOT NULL, - FOREIGN KEY (`noun_id`) REFERENCES `nouns`(`noun_id`), - FOREIGN KEY (`pertainym_id`) REFERENCES `adjectives`(`adjective_id`) + `pertainym_id` INTEGER NOT NULL ); -DROP TABLE IF EXISTS `mannernymy`; +CREATE INDEX `pertainym_of` ON `pertainymy`(`noun_id`); +CREATE INDEX `anti_pertainym_of` ON `pertainymy`(`pertainym_id`); + CREATE TABLE `mannernymy` ( `adjective_id` INTEGER NOT NULL, - `mannernym_id` INTEGER NOT NULL, - FOREIGN KEY (`adjective_id`) REFERENCES `adjectives`(`adjective_id`), - FOREIGN KEY (`mannernym_id`) REFERENCES `adverbs`(`adverb_id`) + `mannernym_id` INTEGER NOT NULL ); -DROP TABLE IF EXISTS `noun_synonymy`; -CREATE TABLE `noun_synonymy` ( - `noun_1_id` INTEGER NOT NULL, - `noun_2_id` INTEGER NOT NULL, - FOREIGN KEY (`noun_1_id`) REFERENCES `nouns`(`nouns_id`), - FOREIGN KEY (`noun_2_id`) REFERENCES `nouns`(`nouns_id`) -); +CREATE INDEX `mannernym_of` ON `mannernymy`(`adjective_id`); +CREATE INDEX `anti_mannernym_of` ON `mannernymy`(`mannernym_id`); -DROP TABLE IF EXISTS `adjective_synonymy`; -CREATE TABLE `adjective_synonymy` ( - `adjective_1_id` INTEGER NOT NULL, - `adjective_2_id` INTEGER NOT NULL, - FOREIGN KEY (`adjective_1_id`) REFERENCES `adjectives`(`adjective_id`), - FOREIGN KEY (`adjective_2_id`) REFERENCES `adjectives`(`adjective_id`) +CREATE TABLE `usage` ( + `domain_id` INTEGER NOT NULL, + `term_id` INTEGER NOT NULL ); -DROP TABLE IF EXISTS `adverb_synonymy`; -CREATE TABLE `adverb_synonymy` ( - `adverb_1_id` INTEGER NOT NULL, - `adverb_2_id` INTEGER NOT NULL, - FOREIGN KEY (`adverb_1_id`) REFERENCES `adverbs`(`adverb_id`), - FOREIGN KEY (`adverb_2_id`) REFERENCES `adverbs`(`adverb_id`) -); +CREATE INDEX `usage_term_of` ON `usage`(`domain_id`); +CREATE INDEX `usage_domain_of` ON `usage`(`term_id`); -DROP TABLE IF EXISTS `noun_pronunciations`; -CREATE TABLE `noun_pronunciations` ( - `noun_id` INTEGER NOT NULL, - `pronunciation` VARCHAR(64) NOT NULL, - `prerhyme` VARCHAR(8), - `rhyme` VARCHAR(64), - `syllables` INT NOT NULL, - `stress` VARCHAR(64) NOT NULL, - FOREIGN KEY (`noun_id`) REFERENCES `nouns`(`noun_id`) +CREATE TABLE `topicality` ( + `domain_id` INTEGER NOT NULL, + `term_id` INTEGER NOT NULL ); -DROP TABLE IF EXISTS `verb_pronunciations`; -CREATE TABLE `verb_pronunciations` ( - `verb_id` INTEGER NOT NULL, - `pronunciation` VARCHAR(64) NOT NULL, - `prerhyme` VARCHAR(8), - `rhyme` VARCHAR(64), - `syllables` INT NOT NULL, - `stress` VARCHAR(64) NOT NULL, - FOREIGN KEY (`verb_id`) REFERENCES `verbs`(`verb_id`) -); +CREATE INDEX `topical_term_of` ON `topicality`(`domain_id`); +CREATE INDEX `topical_domain_of` ON `topicality`(`term_id`); -DROP TABLE IF EXISTS `adjective_pronunciations`; -CREATE TABLE `adjective_pronunciations` ( - `adjective_id` INTEGER NOT NULL, - `pronunciation` VARCHAR(64) NOT NULL, - `prerhyme` VARCHAR(8), - `rhyme` VARCHAR(64), - `syllables` INT NOT NULL, - `stress` VARCHAR(64) NOT NULL, - FOREIGN KEY (`adjective_id`) REFERENCES `adjectives`(`adjective_id`) +CREATE TABLE `regionality` ( + `domain_id` INTEGER NOT NULL, + `term_id` INTEGER NOT NULL ); -DROP TABLE IF EXISTS `adverb_pronunciations`; -CREATE TABLE `adverb_pronunciations` ( - `adverb_id` INTEGER NOT NULL, - `pronunciation` VARCHAR(64) NOT NULL, - `prerhyme` VARCHAR(8), - `rhyme` VARCHAR(64), - `syllables` INT NOT NULL, - `stress` VARCHAR(64) NOT NULL, - FOREIGN KEY (`adverb_id`) REFERENCES `adverbs`(`adverb_id`) -); +CREATE INDEX `regional_term_of` ON `regionality`(`domain_id`); +CREATE INDEX `regional_domain_of` ON `regionality`(`term_id`); -DROP TABLE IF EXISTS `noun_noun_derivation`; -CREATE TABLE `noun_noun_derivation` ( - `noun_1_id` INTEGER NOT NULL, - `noun_2_id` INTEGER NOT NULL, - FOREIGN KEY (`noun_1_id`) REFERENCES `nouns`(`noun_id`), - FOREIGN KEY (`noun_2_id`) REFERENCES `nouns`(`noun_id`) +CREATE TABLE `forms` ( + `form_id` INTEGER PRIMARY KEY, + `form` VARCHAR(32) NOT NULL, + `complexity` SMALLINT NOT NULL, + `proper` SMALLINT NOT NULL ); -DROP TABLE IF EXISTS `noun_adjective_derivation`; -CREATE TABLE `noun_adjective_derivation` ( - `noun_id` INTEGER NOT NULL, - `adjective_id` INTEGER NOT NULL, - FOREIGN KEY (`noun_id`) REFERENCES `nouns`(`noun_id`), - FOREIGN KEY (`adjective_id`) REFERENCES `adjectives`(`adjective_id`) -); +CREATE UNIQUE INDEX `form_by_string` ON `forms`(`form`); -DROP TABLE IF EXISTS `noun_adverb_derivation`; -CREATE TABLE `noun_adverb_derivation` ( - `noun_id` INTEGER NOT NULL, - `adverb_id` INTEGER NOT NULL, - FOREIGN KEY (`noun_id`) REFERENCES `nouns`(`noun_id`), - FOREIGN KEY (`adverb_id`) REFERENCES `adverbs`(`adverb_id`) +CREATE TABLE `lemmas_forms` ( + `lemma_id` INTEGER NOT NULL, + `form_id` INTEGER NOT NULL, + `category` SMALLINT NOT NULL ); -DROP TABLE IF EXISTS `adjective_adjective_derivation`; -CREATE TABLE `adjective_adjective_derivation` ( - `adjective_1_id` INTEGER NOT NULL, - `adjective_2_id` INTEGER NOT NULL, - FOREIGN KEY (`adjective_1_id`) REFERENCES `adjectives`(`adjective_id`), - FOREIGN KEY (`adjective_2_id`) REFERENCES `adjectives`(`adjective_id`) +CREATE INDEX `form_of` ON `lemmas_forms`(`lemma_id`); +CREATE INDEX `lemma_of` ON `lemmas_forms`(`form_id`); + +CREATE TABLE `pronunciations` ( + `pronunciation_id` INTEGER PRIMARY KEY, + `phonemes` VARCHAR(64) NOT NULL, + `prerhyme` VARCHAR(8), + `rhyme` VARCHAR(64), + `syllables` INTEGER NOT NULL, + `stress` VARCHAR(64) NOT NULL ); -DROP TABLE IF EXISTS `adjective_adverb_derivation`; -CREATE TABLE `adjective_adverb_derivation` ( - `adjective_id` INTEGER NOT NULL, - `adverb_id` INTEGER NOT NULL, - FOREIGN KEY (`adjective_id`) REFERENCES `adjectives`(`adjective_id`), - FOREIGN KEY (`adverb_id`) REFERENCES `adverbs`(`adjective_id`) +CREATE TABLE `forms_pronunciations` ( + `form_id` INTEGER NOT NULL, + `pronunciation_id` INTEGER NOT NULL ); -DROP TABLE IF EXISTS `adverb_adverb_derivation`; -CREATE TABLE `adverb_adverb_derivation` ( - `adverb_1_id` INTEGER NOT NULL, - `adverb_2_id` INTEGER NOT NULL, - FOREIGN KEY (`adverb_1_id`) REFERENCES `adverbs`(`adverb_id`), - FOREIGN KEY (`adverb_2_id`) REFERENCES `adverbs`(`adverb_id`) +CREATE INDEX `pronunciation_of` ON `forms_pronunciations`(`form_id`); +CREATE INDEX `spelling_of` ON `forms_pronunciations`(`pronunciation_id`); + +CREATE TABLE `groups` ( + `group_id` INTEGER PRIMARY KEY, + `data` BLOB NOT NULL ); -DROP TABLE IF EXISTS `prepositions`; -CREATE TABLE `prepositions` ( - `preposition_id` INTEGER PRIMARY KEY, - `form` VARCHAR(32) NOT NULL +CREATE TABLE `frames` ( + `frame_id` INTEGER PRIMARY KEY, + `data` BLOB NOT NULL ); -DROP TABLE IF EXISTS `preposition_groups`; -CREATE TABLE `preposition_groups` ( - `preposition_id` INTEGER NOT NULL, - `groupname` VARCHAR(32) NOT NULL, - FOREIGN KEY (`preposition_id`) REFERENCES `prepositions`(`preposition_id`) +CREATE TABLE `groups_frames` ( + `group_id` INTEGER NOT NULL, + `frame_id` INTEGER NOT NULL ); + +CREATE INDEX `frames_in` ON `groups_frames`(`group_id`); -- cgit 1.4.1