1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
|
#include<stdio.h>
#include<wchar.h>
#include<unistr.h>
/*pointers to foreground and background text input files*/
FILE * foreground;
FILE * background;
/* gets a UTF-32 character from assumed UTF-8 encoding FILE* , if invalid returns U+F000 in the Private Use Area*/
uint32_t fget32uc(FILE * file) {
/*string to hold things in, 4 bytes(utf-8 max), plus one more for NULL
* terminater. initiallized as all null
* technically, since we give a length the null terminator is not
* required
*/
uint8_t bytes[5] = {'\0','\0','\0','\0','\0'};
/* index for which byte in string to read into*/
int counter = 0;
/*var to save the return value as a 32 bit single character, technically
* ucs4_t should be used, but all the conversion functions take strings
*/
uint32_t returnval;
/*
* Size of the above
*/
size_t returnvallen = sizeof(uint32_t) * 1;
/* loops through fgetc until a valid unicode character is read, if it's
* past the max size of a unicode char, then return EOF as an error.
* Since EOF is an int, and reads are being stored as an uint8_t, we
* must typecast EOF
*/
while (counter < 4) {
/*reads byte from input stream*/
bytes[counter] = fgetc(file);
/*check for EOF, see above*/
if (bytes[counter] == (uint8_t)EOF) {
return EOF;
}
/*check to see if a valid unicode character is found yet, len is
* only as long as # of bytes read so far
*/
if (u8_check(bytes, counter+1) == NULL) {
/*returns 8bit encoding converted to 32bit*/
u8_to_u32(bytes, (counter+1) *sizeof(uint8_t),
&returnval, &returnvallen);
return returnval;
}
counter++;
}
/*error, went past it all*/
return EOF;
}
/*
* Prints a single UTF32 character in utf8
*/
void print32t8uc(uint32_t ch) {
/*
* string to store the output, max 4 of one car in utf8 + null
* delimiter
*/
uint8_t utf8string[5] = {'\0','\0','\0','\0','\0'};
/*len of above for use in conversions*/
size_t len = 4 * sizeof(uint8_t);
/*converts UTF32 to UTF8, if there is extra space left
* it copies it again
*/
u32_to_u8(&ch,1 * sizeof(uint32_t),utf8string,&len);
/*
* temporary char for use in the below function
*/
ucs4_t tempch;
/* finds the size of the first char, then make the byte immediately
* after that a '\0' to end the string and prevent double chars.
*/
utf8string[u8_mbtouc_unsafe(&tempch,utf8string,len)] = '\0';
/*
* prints the actual string
*/
printf("%s",utf8string);
}
/*runs fgetc on a FILE until newline, returns EOF on EOF, \n on success, assume
* reads will eventually end in newline or EOF*/
int gotonewl(FILE * file) {
char readchar;
while (1) {
readchar = fget32uc(file);
if (readchar == EOF) {
return EOF;
}
if (readchar == '\n') {
return '\n';
}
}
}
int main(int argc, char * argv[]) {
/*chars that hold the most recent read values for both inputs, '\n' so
* that fore proceeds on first loop
*/
uint32_t readforechar = '\n'; //foreground char read
uint32_t readbackchar = '\n'; //background char read
int loop = 1; //main loop controller
/* option to add color */
int color = 0;
/* checks for all 3 arguments */
if (argc < 3) {
printf("usage: repl [foreground input] [background input]\n");
return -1;
}
/*checks for color arg*/
if (argc >= 3) {
color = 1;
}
/* opens the input files */
foreground = fopen(argv[1],"r");
/* fopen returns NULL on error*/
if (foreground == NULL) {
printf("error opening foreground input\n");
return -1;
}
/*ditto for background*/
background = fopen(argv[2],"r");
if (background == NULL) {
printf("error opening backgroundground input\n");
return -1;
}
/*while(1) {
uint32_t g = fget32uc(background);
if (g == EOF) {
return 0;
}
print32t8uc(g);
}*/
/* main loop */
while(loop) {
/*foreground reads will only go further if the previous reads
* for both fore and back were '\n',or if they were just normal
* reads
*/
if ((readforechar == '\n')) {
if (readbackchar == '\n') {
readforechar = fget32uc(foreground);
}
} else {
readforechar = fget32uc(foreground);
}
/* reads background char */
readbackchar = fget32uc(background);
/*if backchar has ended, then it everything is finished*/
if (readbackchar == EOF) {
return 0;
}
/* if the foreground has reached a new line, then it will no
* be read any further, but if backchar reached a newline, then
* foreground char will proceed until it reaches newline or EOF
*/
if (readbackchar == '\n') {
/* read foreground char until newline if it's not
* already there
*/
if (readforechar != '\n') {
/*returns EOF if end of file, \n if end of line*/
readforechar = gotonewl(foreground);
/*get 1st character after new line*/
}
}
/* the below determines which character gets printed to stdout*/
switch (readforechar) {
case ' ':
print32t8uc(readbackchar);
break;
case '\n':
print32t8uc(readbackchar);
break;
case EOF:
print32t8uc(readbackchar);
break;
default:
if (color) {
printf("[33m[47m");
}
print32t8uc(readforechar);
if (color) {
printf("[39m[49m");
}
break;
}
}
/*closes files*/
fclose(foreground);
fclose(background);
return 0;
}
|