Files
babbler/tools/mapgen.c
2026-01-28 22:20:52 +01:00

120 lines
5.0 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/* tools/mapgen.c */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <float.h>
// Import physics directly to access PHONEME_DB and f6_t
// Assumes compilation from the main directory
#include "../src/articulator.h"
// External declaration of the database (located in articulator_db.c)
extern const phoneme_t PHONEME_DB[];
// --- ANCHOR DEFINITIONS ---
// Anchors serve as reference points for how a specific language writes specific sounds
typedef struct {
const char* spelling;
const char* ipa_ref;
} anchor_t;
const anchor_t ANCHORS_HUNGARIAN[] = {
{"a", "ɒ"}, {"a", "ɑ"}, {"á", "a"}, {"e", "ɛ"}, {"é", "e"}, {"i", "i"}, {"i", "ɨ"}, {"o", "o"},
{"ó", "o"}, {"u", "ʊ"}, {"ú", "u"}, {"ö", "ø"}, {"ő", "ø"}, {"ü", "y"}, {"ű", "y"}, {"p", "p"},
{"b", "b"}, {"t", "t"}, {"d", "d"}, {"k", "k"}, {"g", "g"}, {"m", "m"}, {"n", "n"}, {"ny", "ɲ"},
{"f", "f"}, {"v", "v"}, {"s", "ʃ"}, {"sz", "s"}, {"z", "z"}, {"zs", "ʒ"}, {"ty", "c"}, {"gy", "ɟ"},
{"l", "l"}, {"ly", "ʎ"}, {"r", "r"}, {"h", "h"}, {"c\u035Ch", "x"}, {"j", "j"}, {NULL, NULL}};
const anchor_t ANCHORS_POLISH[] = {
{"a", "a"}, {"a", "ɑ"}, {"e", "ɛ"}, {"i", "i"}, {"o", "o"}, {"u", "u"}, {"y", "ɨ"}, {"p", "p"},
{"b", "b"}, {"t", "t"}, {"d", "d"}, {"k", "k"}, {"g", "g"}, {"m", "m"}, {"n", "n"}, {"ń", "ɲ"},
{"f", "f"}, {"w", "v"}, {"s", "s"}, {"z", "z"}, {"sz", "ʂ"}, {"ż", "ʐ"}, {"ś", "ɕ"}, {"ź", "ʑ"},
{"ch", "x"}, {"h", "h"}, {"l", "l"}, {"ł", "w"}, {"r", "r"}, {"j", "j"}, {NULL, NULL}};
const anchor_t ANCHORS_GERMAN[] = {
{"a", "a"}, {"a", "ɑ"}, {"e", "e"}, {"e", "ɛ"}, {"e", "ə"}, {"i", "i"}, {"i", "ɪ"}, {"o", "o"}, {"o", "ɔ"},
{"u", "u"}, {"u", "ʊ"}, {"ä", "ɛ"}, {"ö", "ø"}, {"ü", "y"}, {"p", "p"}, {"b", "b"}, {"t", "t"}, {"d", "d"},
{"k", "k"}, {"g", "g"}, {"m", "m"}, {"n", "n"}, {"ng", "ŋ"}, {"f", "f"}, {"w", "v"}, {"s", "z"}, {"ss", "s"},
{"sch", "ʃ"}, {"j", "j"}, {"r", "r"}, {"ch", "x"}, {"ch", "ç"}, {"ts", "z"}, {NULL, NULL}};
const anchor_t ANCHORS_CYRILLIC[] = {
{"а", "a"}, {"а", "ɑ"}, {"б", "b"}, {"в", "v"}, {"г", "g"}, {"д", "d"}, {"е", "ɛ"}, {"ж", "ʒ"}, {"з", "z"},
{"и", "i"}, {"й", "j"}, {"к", "k"}, {"л", "l"}, {"м", "m"}, {"н", "n"}, {"о", "o"}, {"п", "p"}, {"р", "r"},
{"с", "s"}, {"т", "t"}, {"у", "u"}, {"ф", "f"}, {"х", "x"}, {"ш", "ʃ"}, {"ы", "ɨ"}, {NULL, NULL}};
const anchor_t ANCHORS_ORCISH[] = {
{"u", "y"}, {"u", "u"}, {"u", "ʊ"}, {"o", "ø"}, {"o", "o"}, {"o", "ɔ"}, {"a", "a"}, {"a", "ɑ"}, {"a", "ʌ"},
{"e", "e"}, {"i", "i"}, {"b", "b"}, {"d", "d"}, {"g", "g"}, {"p", "p"}, {"t", "t"}, {"k", "k"}, {"kh", "x"},
{"kh", "χ"}, {"gh", "ɣ"}, {"gh", "ʁ"}, {"q", "q"}, {"k", "ɢ"}, {"h", "h"}, {"hh", "ɦ"}, {"kh", "ħ"}, {"m", "m"},
{"n", "n"}, {"ng", "ŋ"}, {"ny", "ɲ"}, {"l", "l"}, {"r", "r"}, {"rr", "ʀ"}, {"v", "v"}, {"w", "w"}, {"z", "z"},
{"zg", "ʒ"}, {"sh", "ʃ"}, {"th", "θ"}, {"dh", "ð"}, {"y", "j"}, {NULL, NULL}};
// --- GENERATOR LOGIC ---
// Helper to retrieve vector from DB
f6_t get_vector(const char* ipa) {
for (int i = 0; PHONEME_DB[i].ipa; i++) {
if (strcmp(PHONEME_DB[i].ipa, ipa) == 0) return PHONEME_DB[i].target;
}
fprintf(stderr, "ERROR: Anchor refers to unknown IPA: %s\n", ipa);
exit(1);
}
void generate_ipa_identity_table(const char* array_name) {
printf("const ortho_rule_t %s[] = {\n", array_name);
for (int i = 0; PHONEME_DB[i].ipa != NULL; i++) {
printf(" {\"%s\", \"%s\"},\n", PHONEME_DB[i].ipa, PHONEME_DB[i].ipa);
}
printf(" {NULL, NULL}\n};\n\n");
}
void generate_table(const char* array_name, const anchor_t* anchors) {
int anchor_count = 0;
while (anchors[anchor_count].spelling) anchor_count++;
f6_t* anchor_vecs = malloc(sizeof(f6_t) * anchor_count);
for (int j = 0; j < anchor_count; j++) {
anchor_vecs[j] = get_vector(anchors[j].ipa_ref);
}
printf("const ortho_rule_t %s[] = {\n", array_name);
for (int i = 0; PHONEME_DB[i].ipa != NULL; i++) {
f6_t source = PHONEME_DB[i].target;
f6_t weights = PHONEME_DB[i].weights;
float min_dist = FLT_MAX;
int best = 0;
for (int j = 0; j < anchor_count; j++) {
float d = f6_dist_sq(source, anchor_vecs[j], weights);
if (d < min_dist) {
min_dist = d;
best = j;
}
}
printf(" {\"%s\", \"%s\"},", PHONEME_DB[i].ipa, anchors[best].spelling);
printf("\n");
}
printf(" {NULL, NULL}\n};\n\n");
free(anchor_vecs);
}
int main(void) {
printf("// Automatically generated by mapgen\n");
printf("#ifndef GENERATED_ORTHO_H\n");
printf("#define GENERATED_ORTHO_H\n\n");
printf("#include \"transcriber.h\"\n\n");
generate_table("ORTHO_POLISH", ANCHORS_POLISH);
generate_table("ORTHO_HUNGARIAN", ANCHORS_HUNGARIAN);
generate_table("ORTHO_GERMAN", ANCHORS_GERMAN);
generate_table("ORTHO_CYRILLIC", ANCHORS_CYRILLIC);
generate_table("ORTHO_ORCISH", ANCHORS_ORCISH);
generate_ipa_identity_table("ORTHO_IPA");
printf("#endif\n");
return 0;
}