summaryrefslogtreecommitdiff
path: root/ucs2_to_utf8.c
blob: ab56bf2b41dcc8dd66a1837bdf0d098db420b522 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
/*
 * vim:ts=8:expandtab
 *
 * i3 - an improved dynamic tiling window manager
 *
 * © 2009-2012 Michael Stapelberg and contributors
 *
 * See file LICENSE for license information.
 *
 */
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <err.h>
#include <iconv.h>

static iconv_t conversion_descriptor = 0;
static iconv_t conversion_descriptor2 = 0;

/* isutf, u8_dec © 2005 Jeff Bezanson, public domain */
#define isutf(c) (((c) & 0xC0) != 0x80)

/*
 * Decrements i to point to the previous unicode glyph
 *
 */
void u8_dec(char *s, int *i) {
    (void)(isutf(s[--(*i)]) || isutf(s[--(*i)]) || isutf(s[--(*i)]) || --(*i));
}

/*
 * Returns the input string, but converted from UCS-2 to UTF-8. Memory will be
 * allocated, thus the caller has to free the output.
 *
 */
int convert_ucs_to_utf8(char *input, char *output) {
	size_t input_size = 2;

	/* UTF-8 may consume up to 4 byte */
	size_t output_size = 8;
	/* We need to use an additional pointer, because iconv() modifies it */
	char *outptr = output;

	/* We convert the input into UCS-2 big endian */
        if (conversion_descriptor == 0) {
                conversion_descriptor = iconv_open("UTF-8", "UCS-2BE");
                if (conversion_descriptor == 0) {
                        fprintf(stderr, "error opening the conversion context\n");
                        exit(1);
                }
        }

	/* Get the conversion descriptor back to original state */
	iconv(conversion_descriptor, NULL, NULL, NULL, NULL);

	/* Convert our text */
	int rc = iconv(conversion_descriptor, (void*)&input, &input_size, &outptr, &output_size);
        if (rc == (size_t)-1) {
                perror("Converting to UCS-2 failed");
                return 0;
	}

	return (8 - output_size);
}

/*
 * Converts the given string to UCS-2 big endian for use with
 * xcb_image_text_16(). The amount of real glyphs is stored in real_strlen,
 * a buffer containing the UCS-2 encoded string (16 bit per glyph) is
 * returned. It has to be freed when done.
 *
 */
char *convert_utf8_to_ucs2(char *input, int *real_strlen) {
	size_t input_size = strlen(input) + 1;
	/* UCS-2 consumes exactly two bytes for each glyph */
	int buffer_size = input_size * 2;

	char *buffer = malloc(buffer_size);
        if (buffer == NULL)
                err(EXIT_FAILURE, "malloc() failed\n");
	size_t output_size = buffer_size;
	/* We need to use an additional pointer, because iconv() modifies it */
	char *output = buffer;

	/* We convert the input into UCS-2 big endian */
        if (conversion_descriptor2 == 0) {
                conversion_descriptor2 = iconv_open("UCS-2BE", "UTF-8");
                if (conversion_descriptor2 == 0) {
                        fprintf(stderr, "error opening the conversion context\n");
                        exit(1);
                }
        }

	/* Get the conversion descriptor back to original state */
	iconv(conversion_descriptor2, NULL, NULL, NULL, NULL);

	/* Convert our text */
	int rc = iconv(conversion_descriptor2, (void*)&input, &input_size, &output, &output_size);
        if (rc == (size_t)-1) {
                perror("Converting to UCS-2 failed");
                if (real_strlen != NULL)
		        *real_strlen = 0;
                return NULL;
	}

        if (real_strlen != NULL)
	        *real_strlen = ((buffer_size - output_size) / 2) - 1;

	return buffer;
}