summaryrefslogtreecommitdiff
path: root/source/luametatex/source/utilities/auxunistring.c
diff options
context:
space:
mode:
Diffstat (limited to 'source/luametatex/source/utilities/auxunistring.c')
-rw-r--r--source/luametatex/source/utilities/auxunistring.c100
1 files changed, 82 insertions, 18 deletions
diff --git a/source/luametatex/source/utilities/auxunistring.c b/source/luametatex/source/utilities/auxunistring.c
index 9fe5531d6..746fde4ad 100644
--- a/source/luametatex/source/utilities/auxunistring.c
+++ b/source/luametatex/source/utilities/auxunistring.c
@@ -11,36 +11,100 @@
*/
-unsigned aux_str2uni(const unsigned char *k)
+// unsigned xaux_str2uni(const unsigned char *k)
+// {
+// const unsigned char *text = k;
+// int ch = *text++;
+// if (ch < 0x80) {
+// return (unsigned) ch;
+// } else if (ch <= 0xbf) {
+// return 0xFFFD;
+// } else if (ch <= 0xdf) {
+// if (text[0] >= 0x80 && text[0] < 0xc0) {
+// return (unsigned) (((ch & 0x1f) << 6) | (text[0] & 0x3f));
+// }
+// } else if (ch <= 0xef) {
+// if (text[0] >= 0x80 && text[0] < 0xc0 && text[1] >= 0x80 && text[1] < 0xc0) {
+// return (unsigned) (((ch & 0xf) << 12) | ((text[0] & 0x3f) << 6) | (text[1] & 0x3f));
+// }
+// } else if (ch <= 0xf7) {
+// if (text[0] < 0x80 || text[1] < 0x80 || text[2] < 0x80 ||
+// text[0] >= 0xc0 || text[1] >= 0xc0 || text[2] >= 0xc0) {
+// return 0xFFFD;
+// } else {
+// int w1 = (((ch & 0x7) << 2) | ((text[0] & 0x30) >> 4)) - 1;
+// int w2 = ((text[1] & 0xf) << 6) | (text[2] & 0x3f);
+// w1 = (w1 << 6) | ((text[0] & 0xf) << 2) | ((text[1] & 0x30) >> 4);
+// return (unsigned) (w1 * 0x400 + w2 + 0x10000);
+// }
+// }
+// return 0xFFFD;
+// }
+
+unsigned aux_str2uni(const unsigned char *text)
+{
+ if (text[0] < 0x80) {
+ return (unsigned) text[0];
+ } else if (text[0] <= 0xbf) {
+ return 0xFFFD;
+ } else if (text[0] <= 0xdf) {
+ if (text[1] >= 0x80 && text[1] < 0xc0) {
+ return (unsigned) (((text[0] & 0x1f) << 6) | (text[1] & 0x3f));
+ }
+ } else if (text[0] <= 0xef) {
+ if (text[1] >= 0x80 && text[1] < 0xc0 && text[2] >= 0x80 && text[2] < 0xc0) {
+ return (unsigned) (((text[0] & 0xf) << 12) | ((text[1] & 0x3f) << 6) | (text[2] & 0x3f));
+ }
+ } else if (text[0] <= 0xf7) {
+ if (text[1] < 0x80 || text[2] < 0x80 || text[3] < 0x80 ||
+ text[1] >= 0xc0 || text[2] >= 0xc0 || text[3] >= 0xc0) {
+ return 0xFFFD;
+ } else {
+ int w1 = (((text[0] & 0x7) << 2) | ((text[1] & 0x30) >> 4)) - 1;
+ int w2 = ((text[2] & 0xf) << 6) | (text[3] & 0x3f);
+ w1 = (w1 << 6) | ((text[1] & 0xf) << 2) | ((text[2] & 0x30) >> 4);
+ return (unsigned) (w1 * 0x400 + w2 + 0x10000);
+ }
+ }
+ return 0xFFFD;
+}
+
+unsigned aux_str2uni_len(const unsigned char *text, int *len)
{
- const unsigned char *text = k;
- int ch = *text++;
- if (ch < 0x80) {
- return (unsigned) ch;
- } else if (ch <= 0xbf) {
+ if (text[0] < 0x80) {
+ *len = 1;
+ return (unsigned) text[0];
+ } else if (text[0] <= 0xbf) {
+ *len = 1;
return 0xFFFD;
- } else if (ch <= 0xdf) {
- if (text[0] >= 0x80 && text[0] < 0xc0) {
- return (unsigned) (((ch & 0x1f) << 6) | (text[0] & 0x3f));
+ } else if (text[0] <= 0xdf) {
+ if (text[1] >= 0x80 && text[1] < 0xc0) {
+ *len = 2;
+ return (unsigned) (((text[0] & 0x1f) << 6) | (text[1] & 0x3f));
}
- } else if (ch <= 0xef) {
- if (text[0] >= 0x80 && text[0] < 0xc0 && text[1] >= 0x80 && text[1] < 0xc0) {
- return (unsigned) (((ch & 0xf) << 12) | ((text[0] & 0x3f) << 6) | (text[1] & 0x3f));
+ } else if (text[0] <= 0xef) {
+ if (text[1] >= 0x80 && text[1] < 0xc0 && text[2] >= 0x80 && text[2] < 0xc0) {
+ *len = 3;
+ return (unsigned) (((text[0] & 0xf) << 12) | ((text[1] & 0x3f) << 6) | (text[2] & 0x3f));
}
- } else if (ch <= 0xf7) {
- if (text[0] < 0x80 || text[1] < 0x80 || text[2] < 0x80 ||
- text[0] >= 0xc0 || text[1] >= 0xc0 || text[2] >= 0xc0) {
+ } else if (text[0] <= 0xf7) {
+ if (text[1] < 0x80 || text[2] < 0x80 || text[3] < 0x80 ||
+ text[1] >= 0xc0 || text[2] >= 0xc0 || text[3] >= 0xc0) {
+ *len = 4;
return 0xFFFD;
} else {
- int w1 = (((ch & 0x7) << 2) | ((text[0] & 0x30) >> 4)) - 1;
- int w2 = ((text[1] & 0xf) << 6) | (text[2] & 0x3f);
- w1 = (w1 << 6) | ((text[0] & 0xf) << 2) | ((text[1] & 0x30) >> 4);
+ *len = 4;
+ int w1 = (((text[0] & 0x7) << 2) | ((text[1] & 0x30) >> 4)) - 1;
+ int w2 = ((text[2] & 0xf) << 6) | (text[3] & 0x3f);
+ w1 = (w1 << 6) | ((text[1] & 0xf) << 2) | ((text[2] & 0x30) >> 4);
return (unsigned) (w1 * 0x400 + w2 + 0x10000);
}
}
+ *len = 1;
return 0xFFFD;
}
+
unsigned char *aux_uni2str(unsigned unic)
{
unsigned char *buf = lmt_memory_malloc(5);