kernel_optimize_test/fs/nls/nls_utf8.c
Alan Stern 74675a5850 NLS: update handling of Unicode
This patch (as1239) updates the kernel's treatment of Unicode.  The
character-set conversion routines are well behind the current state of
the Unicode specification: They don't recognize the existence of code
points beyond plane 0 or of surrogate pairs in the UTF-16 encoding.

The old wchar_t 16-bit type is retained because it's still used in
lots of places.  This shouldn't cause any new problems; if a
conversion now results in an invalid 16-bit code then before it must
have yielded an undefined code.

Difficult-to-read names like "utf_mbstowcs" are replaced with more
transparent names like "utf8s_to_utf16s" and the ordering of the
parameters is rationalized (buffer lengths come immediate after the
pointers they refer to, and the inputs precede the outputs).
Fortunately the low-level conversion routines are used in only a few
places; the interfaces to the higher-level uni2char and char2uni
methods have been left unchanged.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Acked-by: Clemens Ladisch <clemens@ladisch.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
2009-06-15 21:44:43 -07:00

69 lines
1.2 KiB
C

/*
* Module for handling utf8 just like any other charset.
* By Urban Widmark 2000
*/
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/nls.h>
#include <linux/errno.h>
static unsigned char identity[256];
static int uni2char(wchar_t uni, unsigned char *out, int boundlen)
{
int n;
if (boundlen <= 0)
return -ENAMETOOLONG;
n = utf32_to_utf8(uni, out, boundlen);
if (n < 0) {
*out = '?';
return -EINVAL;
}
return n;
}
static int char2uni(const unsigned char *rawstring, int boundlen, wchar_t *uni)
{
int n;
unicode_t u;
n = utf8_to_utf32(rawstring, boundlen, &u);
if (n < 0 || u > MAX_WCHAR_T) {
*uni = 0x003f; /* ? */
return -EINVAL;
}
*uni = (wchar_t) u;
return n;
}
static struct nls_table table = {
.charset = "utf8",
.uni2char = uni2char,
.char2uni = char2uni,
.charset2lower = identity, /* no conversion */
.charset2upper = identity,
.owner = THIS_MODULE,
};
static int __init init_nls_utf8(void)
{
int i;
for (i=0; i<256; i++)
identity[i] = i;
return register_nls(&table);
}
static void __exit exit_nls_utf8(void)
{
unregister_nls(&table);
}
module_init(init_nls_utf8)
module_exit(exit_nls_utf8)
MODULE_LICENSE("Dual BSD/GPL");