first commit

This commit is contained in:
David Ali
2026-01-28 22:20:52 +01:00
commit c913cacf8d
13 changed files with 1172 additions and 0 deletions

120
tools/mapgen.c Normal file
View File

@@ -0,0 +1,120 @@
/* tools/mapgen.c */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <float.h>
// Import physics directly to access PHONEME_DB and f6_t
// Assumes compilation from the main directory
#include "../src/articulator.h"
// External declaration of the database (located in articulator_db.c)
extern const phoneme_t PHONEME_DB[];
// --- ANCHOR DEFINITIONS ---
// Anchors serve as reference points for how a specific language writes specific sounds
typedef struct {
const char* spelling;
const char* ipa_ref;
} anchor_t;
const anchor_t ANCHORS_HUNGARIAN[] = {
{"a", "ɒ"}, {"a", "ɑ"}, {"á", "a"}, {"e", "ɛ"}, {"é", "e"}, {"i", "i"}, {"i", "ɨ"}, {"o", "o"},
{"ó", "o"}, {"u", "ʊ"}, {"ú", "u"}, {"ö", "ø"}, {"ő", "ø"}, {"ü", "y"}, {"ű", "y"}, {"p", "p"},
{"b", "b"}, {"t", "t"}, {"d", "d"}, {"k", "k"}, {"g", "g"}, {"m", "m"}, {"n", "n"}, {"ny", "ɲ"},
{"f", "f"}, {"v", "v"}, {"s", "ʃ"}, {"sz", "s"}, {"z", "z"}, {"zs", "ʒ"}, {"ty", "c"}, {"gy", "ɟ"},
{"l", "l"}, {"ly", "ʎ"}, {"r", "r"}, {"h", "h"}, {"c\u035Ch", "x"}, {"j", "j"}, {NULL, NULL}};
const anchor_t ANCHORS_POLISH[] = {
{"a", "a"}, {"a", "ɑ"}, {"e", "ɛ"}, {"i", "i"}, {"o", "o"}, {"u", "u"}, {"y", "ɨ"}, {"p", "p"},
{"b", "b"}, {"t", "t"}, {"d", "d"}, {"k", "k"}, {"g", "g"}, {"m", "m"}, {"n", "n"}, {"ń", "ɲ"},
{"f", "f"}, {"w", "v"}, {"s", "s"}, {"z", "z"}, {"sz", "ʂ"}, {"ż", "ʐ"}, {"ś", "ɕ"}, {"ź", "ʑ"},
{"ch", "x"}, {"h", "h"}, {"l", "l"}, {"ł", "w"}, {"r", "r"}, {"j", "j"}, {NULL, NULL}};
const anchor_t ANCHORS_GERMAN[] = {
{"a", "a"}, {"a", "ɑ"}, {"e", "e"}, {"e", "ɛ"}, {"e", "ə"}, {"i", "i"}, {"i", "ɪ"}, {"o", "o"}, {"o", "ɔ"},
{"u", "u"}, {"u", "ʊ"}, {"ä", "ɛ"}, {"ö", "ø"}, {"ü", "y"}, {"p", "p"}, {"b", "b"}, {"t", "t"}, {"d", "d"},
{"k", "k"}, {"g", "g"}, {"m", "m"}, {"n", "n"}, {"ng", "ŋ"}, {"f", "f"}, {"w", "v"}, {"s", "z"}, {"ss", "s"},
{"sch", "ʃ"}, {"j", "j"}, {"r", "r"}, {"ch", "x"}, {"ch", "ç"}, {"ts", "z"}, {NULL, NULL}};
const anchor_t ANCHORS_CYRILLIC[] = {
{"а", "a"}, {"а", "ɑ"}, {"б", "b"}, {"в", "v"}, {"г", "g"}, {"д", "d"}, {"е", "ɛ"}, {"ж", "ʒ"}, {"з", "z"},
{"и", "i"}, {"й", "j"}, {"к", "k"}, {"л", "l"}, {"м", "m"}, {"н", "n"}, {"о", "o"}, {"п", "p"}, {"р", "r"},
{"с", "s"}, {"т", "t"}, {"у", "u"}, {"ф", "f"}, {"х", "x"}, {"ш", "ʃ"}, {"ы", "ɨ"}, {NULL, NULL}};
const anchor_t ANCHORS_ORCISH[] = {
{"u", "y"}, {"u", "u"}, {"u", "ʊ"}, {"o", "ø"}, {"o", "o"}, {"o", "ɔ"}, {"a", "a"}, {"a", "ɑ"}, {"a", "ʌ"},
{"e", "e"}, {"i", "i"}, {"b", "b"}, {"d", "d"}, {"g", "g"}, {"p", "p"}, {"t", "t"}, {"k", "k"}, {"kh", "x"},
{"kh", "χ"}, {"gh", "ɣ"}, {"gh", "ʁ"}, {"q", "q"}, {"k", "ɢ"}, {"h", "h"}, {"hh", "ɦ"}, {"kh", "ħ"}, {"m", "m"},
{"n", "n"}, {"ng", "ŋ"}, {"ny", "ɲ"}, {"l", "l"}, {"r", "r"}, {"rr", "ʀ"}, {"v", "v"}, {"w", "w"}, {"z", "z"},
{"zg", "ʒ"}, {"sh", "ʃ"}, {"th", "θ"}, {"dh", "ð"}, {"y", "j"}, {NULL, NULL}};
// --- GENERATOR LOGIC ---
// Helper to retrieve vector from DB
f6_t get_vector(const char* ipa) {
for (int i = 0; PHONEME_DB[i].ipa; i++) {
if (strcmp(PHONEME_DB[i].ipa, ipa) == 0) return PHONEME_DB[i].target;
}
fprintf(stderr, "ERROR: Anchor refers to unknown IPA: %s\n", ipa);
exit(1);
}
void generate_ipa_identity_table(const char* array_name) {
printf("const ortho_rule_t %s[] = {\n", array_name);
for (int i = 0; PHONEME_DB[i].ipa != NULL; i++) {
printf(" {\"%s\", \"%s\"},\n", PHONEME_DB[i].ipa, PHONEME_DB[i].ipa);
}
printf(" {NULL, NULL}\n};\n\n");
}
void generate_table(const char* array_name, const anchor_t* anchors) {
int anchor_count = 0;
while (anchors[anchor_count].spelling) anchor_count++;
f6_t* anchor_vecs = malloc(sizeof(f6_t) * anchor_count);
for (int j = 0; j < anchor_count; j++) {
anchor_vecs[j] = get_vector(anchors[j].ipa_ref);
}
printf("const ortho_rule_t %s[] = {\n", array_name);
for (int i = 0; PHONEME_DB[i].ipa != NULL; i++) {
f6_t source = PHONEME_DB[i].target;
f6_t weights = PHONEME_DB[i].weights;
float min_dist = FLT_MAX;
int best = 0;
for (int j = 0; j < anchor_count; j++) {
float d = f6_dist_sq(source, anchor_vecs[j], weights);
if (d < min_dist) {
min_dist = d;
best = j;
}
}
printf(" {\"%s\", \"%s\"},", PHONEME_DB[i].ipa, anchors[best].spelling);
printf("\n");
}
printf(" {NULL, NULL}\n};\n\n");
free(anchor_vecs);
}
int main(void) {
printf("// Automatically generated by mapgen\n");
printf("#ifndef GENERATED_ORTHO_H\n");
printf("#define GENERATED_ORTHO_H\n\n");
printf("#include \"transcriber.h\"\n\n");
generate_table("ORTHO_POLISH", ANCHORS_POLISH);
generate_table("ORTHO_HUNGARIAN", ANCHORS_HUNGARIAN);
generate_table("ORTHO_GERMAN", ANCHORS_GERMAN);
generate_table("ORTHO_CYRILLIC", ANCHORS_CYRILLIC);
generate_table("ORTHO_ORCISH", ANCHORS_ORCISH);
generate_ipa_identity_table("ORTHO_IPA");
printf("#endif\n");
return 0;
}