1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package com.healthmarketscience.jackcess.impl;
18
19 import java.io.BufferedReader;
20 import java.io.IOException;
21 import java.io.InputStreamReader;
22 import java.util.Arrays;
23 import java.util.HashMap;
24 import java.util.Map;
25
26 import static com.healthmarketscience.jackcess.impl.ByteUtil.ByteStream;
27
28
29
30
31
32
33
34 public class GeneralLegacyIndexCodes {
35
36 static final int MAX_TEXT_INDEX_CHAR_LENGTH =
37 (JetFormat.TEXT_FIELD_MAX_LENGTH / JetFormat.TEXT_FIELD_UNIT_SIZE);
38
39 static final byte END_TEXT = (byte)0x01;
40 static final byte END_EXTRA_TEXT = (byte)0x00;
41
42
43
44
45
46
47 static final int UNPRINTABLE_COUNT_START = 7;
48 static final int UNPRINTABLE_COUNT_MULTIPLIER = 4;
49 static final int UNPRINTABLE_OFFSET_FLAGS = 0x8000;
50 static final byte UNPRINTABLE_MIDFIX = (byte)0x06;
51
52
53
54
55 static final byte INTERNATIONAL_EXTRA_PLACEHOLDER = (byte)0x02;
56
57
58 static final byte CRAZY_CODE_START = (byte)0x80;
59 static final byte CRAZY_CODE_1 = (byte)0x02;
60 static final byte CRAZY_CODE_2 = (byte)0x03;
61 static final byte[] CRAZY_CODES_SUFFIX =
62 new byte[]{(byte)0xFF, (byte)0x02, (byte)0x80, (byte)0xFF, (byte)0x80};
63 static final byte CRAZY_CODES_UNPRINT_SUFFIX = (byte)0xFF;
64
65
66 private static final String CODES_FILE =
67 DatabaseImpl.RESOURCE_PATH + "index_codes_genleg.txt";
68 private static final String EXT_CODES_FILE =
69 DatabaseImpl.RESOURCE_PATH + "index_codes_ext_genleg.txt";
70
71
72
73
74
75 enum Type {
76 SIMPLE("S") {
77 @Override public CharHandler parseCodes(String[] codeStrings) {
78 return parseSimpleCodes(codeStrings);
79 }
80 },
81 INTERNATIONAL("I") {
82 @Override public CharHandler parseCodes(String[] codeStrings) {
83 return parseInternationalCodes(codeStrings);
84 }
85 },
86 UNPRINTABLE("U") {
87 @Override public CharHandler parseCodes(String[] codeStrings) {
88 return parseUnprintableCodes(codeStrings);
89 }
90 },
91 UNPRINTABLE_EXT("P") {
92 @Override public CharHandler parseCodes(String[] codeStrings) {
93 return parseUnprintableExtCodes(codeStrings);
94 }
95 },
96 INTERNATIONAL_EXT("Z") {
97 @Override public CharHandler parseCodes(String[] codeStrings) {
98 return parseInternationalExtCodes(codeStrings);
99 }
100 },
101 SIGNIFICANT("G") {
102 @Override public CharHandler parseCodes(String[] codeStrings) {
103 return parseSignificantCodes(codeStrings);
104 }
105 },
106 SURROGATE("Q") {
107 @Override public CharHandler parseCodes(String[] codeStrings) {
108
109 throw new UnsupportedOperationException();
110 }
111 },
112 IGNORED("X") {
113 @Override public CharHandler parseCodes(String[] codeStrings) {
114 return IGNORED_CHAR_HANDLER;
115 }
116 };
117
118 private final String _prefixCode;
119
120 private Type(String prefixCode) {
121 _prefixCode = prefixCode;
122 }
123
124 public String getPrefixCode() {
125 return _prefixCode;
126 }
127
128 public abstract CharHandler parseCodes(String[] codeStrings);
129 }
130
131
132
133
134
135 abstract static class CharHandler {
136 public abstract Type getType();
137 public byte[] getInlineBytes(char c) {
138 return null;
139 }
140 public byte[] getExtraBytes() {
141 return null;
142 }
143 public byte[] getUnprintableBytes() {
144 return null;
145 }
146 public byte getExtraByteModifier() {
147 return 0;
148 }
149 public byte getCrazyFlag() {
150 return 0;
151 }
152 public boolean isSignificantChar() {
153 return false;
154 }
155 }
156
157
158
159
160 private static final class SimpleCharHandler extends CharHandler {
161 private final byte[] _bytes;
162 private SimpleCharHandler(byte[] bytes) {
163 _bytes = bytes;
164 }
165 @Override public Type getType() {
166 return Type.SIMPLE;
167 }
168 @Override public byte[] getInlineBytes(char c) {
169 return _bytes;
170 }
171 }
172
173
174
175
176 private static final class InternationalCharHandler extends CharHandler {
177 private final byte[] _bytes;
178 private final byte[] _extraBytes;
179 private InternationalCharHandler(byte[] bytes, byte[] extraBytes) {
180 _bytes = bytes;
181 _extraBytes = extraBytes;
182 }
183 @Override public Type getType() {
184 return Type.INTERNATIONAL;
185 }
186 @Override public byte[] getInlineBytes(char c) {
187 return _bytes;
188 }
189 @Override public byte[] getExtraBytes() {
190 return _extraBytes;
191 }
192 }
193
194
195
196
197 private static final class UnprintableCharHandler extends CharHandler {
198 private final byte[] _unprintBytes;
199 private UnprintableCharHandler(byte[] unprintBytes) {
200 _unprintBytes = unprintBytes;
201 }
202 @Override public Type getType() {
203 return Type.UNPRINTABLE;
204 }
205 @Override public byte[] getUnprintableBytes() {
206 return _unprintBytes;
207 }
208 }
209
210
211
212
213 private static final class UnprintableExtCharHandler extends CharHandler {
214 private final byte _extraByteMod;
215 private UnprintableExtCharHandler(Byte extraByteMod) {
216 _extraByteMod = extraByteMod;
217 }
218 @Override public Type getType() {
219 return Type.UNPRINTABLE_EXT;
220 }
221 @Override public byte getExtraByteModifier() {
222 return _extraByteMod;
223 }
224 }
225
226
227
228
229 private static final class InternationalExtCharHandler extends CharHandler {
230 private final byte[] _bytes;
231 private final byte[] _extraBytes;
232 private final byte _crazyFlag;
233 private InternationalExtCharHandler(byte[] bytes, byte[] extraBytes,
234 byte crazyFlag) {
235 _bytes = bytes;
236 _extraBytes = extraBytes;
237 _crazyFlag = crazyFlag;
238 }
239 @Override public Type getType() {
240 return Type.INTERNATIONAL_EXT;
241 }
242 @Override public byte[] getInlineBytes(char c) {
243 return _bytes;
244 }
245 @Override public byte[] getExtraBytes() {
246 return _extraBytes;
247 }
248 @Override public byte getCrazyFlag() {
249 return _crazyFlag;
250 }
251 }
252
253
254
255
256 private static final class SignificantCharHandler extends CharHandler {
257 private final byte[] _bytes;
258 private SignificantCharHandler(byte[] bytes) {
259 _bytes = bytes;
260 }
261 @Override public Type getType() {
262 return Type.SIGNIFICANT;
263 }
264 @Override public byte[] getInlineBytes(char c) {
265 return _bytes;
266 }
267 @Override public boolean isSignificantChar() {
268 return true;
269 }
270 }
271
272
273 static final CharHandler IGNORED_CHAR_HANDLER = new CharHandler() {
274 @Override public Type getType() {
275 return Type.IGNORED;
276 }
277 };
278
279
280
281 private static final ThreadLocal<byte[]> SURROGATE_CHAR_BUF =
282 ThreadLocal.withInitial(() -> new byte[2]);
283 private static final byte[] SURROGATE_EXTRA_BYTES = {0x3f};
284
285 private static abstract class SurrogateCharHandler extends CharHandler {
286 @Override public Type getType() {
287 return Type.SURROGATE;
288 }
289 @Override public byte[] getExtraBytes() {
290 return SURROGATE_EXTRA_BYTES;
291 }
292 protected static byte[] toInlineBytes(int idxC) {
293 byte[] bytes = SURROGATE_CHAR_BUF.get();
294 bytes[0] = (byte)((idxC >>> 8) & 0xFF);
295 bytes[1] = (byte)(idxC & 0xFF);
296 return bytes;
297 }
298 }
299
300
301
302 static final CharHandler HIGH_SURROGATE_CHAR_HANDLER =
303 new SurrogateCharHandler() {
304 @Override public byte[] getInlineBytes(char c) {
305
306 int idxC = asUnsignedChar(c) - 10238;
307 return toInlineBytes(idxC);
308 }
309 };
310
311
312
313 static final CharHandler LOW_SURROGATE_CHAR_HANDLER =
314 new SurrogateCharHandler() {
315 @Override public byte[] getInlineBytes(char c) {
316
317
318 int charOffset = (asUnsignedChar(c) - 0xdc00) % 1024;
319
320 int idxOffset = 0;
321 if(charOffset < 8) {
322 idxOffset = 9992;
323 } else if(charOffset < (8 + 254)) {
324 idxOffset = 9990;
325 } else if(charOffset < (8 + 254 + 254)) {
326 idxOffset = 9988;
327 } else if(charOffset < (8 + 254 + 254 + 254)) {
328 idxOffset = 9986;
329 } else {
330 idxOffset = 9984;
331 }
332 int idxC = asUnsignedChar(c) - idxOffset;
333 return toInlineBytes(idxC);
334 }
335 };
336
337 static final char FIRST_CHAR = (char)0x0000;
338 static final char LAST_CHAR = (char)0x00FF;
339 static final char FIRST_EXT_CHAR = LAST_CHAR + 1;
340 static final char LAST_EXT_CHAR = (char)0xFFFF;
341
342 private static final class Codes
343 {
344
345
346 private static final CharHandler[] _values = loadCodes(
347 CODES_FILE, FIRST_CHAR, LAST_CHAR);
348 }
349
350 private static final class ExtCodes
351 {
352
353
354 private static final CharHandler[] _values = loadCodes(
355 EXT_CODES_FILE, FIRST_EXT_CHAR, LAST_EXT_CHAR);
356 }
357
358 static final GeneralLegacyIndexCodes GEN_LEG_INSTANCE =
359 new GeneralLegacyIndexCodes();
360
361 GeneralLegacyIndexCodes() {
362 }
363
364
365
366
367 CharHandler getCharHandler(char c)
368 {
369 if(c <= LAST_CHAR) {
370 return Codes._values[c];
371 }
372
373 int extOffset = asUnsignedChar(c) - asUnsignedChar(FIRST_EXT_CHAR);
374 return ExtCodes._values[extOffset];
375 }
376
377
378
379
380
381 static CharHandler[] loadCodes(String codesFilePath,
382 char firstChar, char lastChar)
383 {
384 int numCodes = (asUnsignedChar(lastChar) - asUnsignedChar(firstChar)) + 1;
385 CharHandler[] values = new CharHandler[numCodes];
386
387 Map<String,Type> prefixMap = new HashMap<String,Type>();
388 for(Type type : Type.values()) {
389 prefixMap.put(type.getPrefixCode(), type);
390 }
391
392 BufferedReader reader = null;
393 try {
394
395 reader = new BufferedReader(
396 new InputStreamReader(
397 DatabaseImpl.getResourceAsStream(codesFilePath), "US-ASCII"));
398
399 int start = asUnsignedChar(firstChar);
400 int end = asUnsignedChar(lastChar);
401 for(int i = start; i <= end; ++i) {
402 char c = (char)i;
403 CharHandler ch = null;
404 if(Character.isHighSurrogate(c)) {
405
406 ch = HIGH_SURROGATE_CHAR_HANDLER;
407 } else if(Character.isLowSurrogate(c)) {
408
409 ch = LOW_SURROGATE_CHAR_HANDLER;
410 } else {
411 String codeLine = reader.readLine();
412 ch = parseCodes(prefixMap, codeLine);
413 }
414 values[(i - start)] = ch;
415 }
416
417 } catch(IOException e) {
418 throw new RuntimeException("failed loading index codes file " +
419 codesFilePath, e);
420 } finally {
421 ByteUtil.closeQuietly(reader);
422 }
423
424 return values;
425 }
426
427
428
429
430
431 private static CharHandler parseCodes(Map<String,Type> prefixMap,
432 String codeLine)
433 {
434 String prefix = codeLine.substring(0, 1);
435 String suffix = ((codeLine.length() > 1) ? codeLine.substring(1) : "");
436 return prefixMap.get(prefix).parseCodes(suffix.split(",", -1));
437 }
438
439
440
441
442 private static CharHandler parseSimpleCodes(String[] codeStrings)
443 {
444 if(codeStrings.length != 1) {
445 throw new IllegalStateException("Unexpected code strings " +
446 Arrays.asList(codeStrings));
447 }
448 return new SimpleCharHandler(codesToBytes(codeStrings[0], true));
449 }
450
451
452
453
454
455 private static CharHandler parseInternationalCodes(String[] codeStrings)
456 {
457 if(codeStrings.length != 2) {
458 throw new IllegalStateException("Unexpected code strings " +
459 Arrays.asList(codeStrings));
460 }
461 return new InternationalCharHandler(codesToBytes(codeStrings[0], true),
462 codesToBytes(codeStrings[1], true));
463 }
464
465
466
467
468
469 private static CharHandler parseUnprintableCodes(String[] codeStrings)
470 {
471 if(codeStrings.length != 1) {
472 throw new IllegalStateException("Unexpected code strings " +
473 Arrays.asList(codeStrings));
474 }
475 return new UnprintableCharHandler(codesToBytes(codeStrings[0], true));
476 }
477
478
479
480
481
482 private static CharHandler parseUnprintableExtCodes(String[] codeStrings)
483 {
484 if(codeStrings.length != 1) {
485 throw new IllegalStateException("Unexpected code strings " +
486 Arrays.asList(codeStrings));
487 }
488 byte[] bytes = codesToBytes(codeStrings[0], true);
489 if(bytes.length != 1) {
490 throw new IllegalStateException("Unexpected code strings " +
491 Arrays.asList(codeStrings));
492 }
493 return new UnprintableExtCharHandler(bytes[0]);
494 }
495
496
497
498
499
500 private static CharHandler parseInternationalExtCodes(String[] codeStrings)
501 {
502 if(codeStrings.length != 3) {
503 throw new IllegalStateException("Unexpected code strings " +
504 Arrays.asList(codeStrings));
505 }
506
507 byte crazyFlag = ("1".equals(codeStrings[2]) ?
508 CRAZY_CODE_1 : CRAZY_CODE_2);
509 return new InternationalExtCharHandler(codesToBytes(codeStrings[0], true),
510 codesToBytes(codeStrings[1], false),
511 crazyFlag);
512 }
513
514
515
516
517 private static CharHandler parseSignificantCodes(String[] codeStrings)
518 {
519 if(codeStrings.length != 1) {
520 throw new IllegalStateException("Unexpected code strings " +
521 Arrays.asList(codeStrings));
522 }
523 return new SignificantCharHandler(codesToBytes(codeStrings[0], true));
524 }
525
526
527
528
529
530 private static byte[] codesToBytes(String codes, boolean required)
531 {
532 if(codes.length() == 0) {
533 if(required) {
534 throw new IllegalStateException("empty code bytes");
535 }
536 return null;
537 }
538 if((codes.length() % 2) != 0) {
539
540 codes = "0" + codes;
541 }
542 byte[] bytes = new byte[codes.length() / 2];
543 for(int i = 0; i < bytes.length; ++i) {
544 int charIdx = i*2;
545 bytes[i] = (byte)(Integer.parseInt(codes.substring(charIdx, charIdx + 2),
546 16));
547 }
548 return bytes;
549 }
550
551
552
553
554
555
556 static int asUnsignedChar(char c)
557 {
558 return c & 0xFFFF;
559 }
560
561
562
563
564
565 void writeNonNullIndexTextValue(
566 Object value, ByteStream bout, boolean isAscending)
567 throws IOException
568 {
569
570 String str = toIndexCharSequence(value);
571
572
573
574 int prevLength = bout.getLength();
575
576
577 ExtraCodesStream extraCodes = null;
578 ByteStream unprintableCodes = null;
579 ByteStream crazyCodes = null;
580 int charOffset = 0;
581 for(int i = 0; i < str.length(); ++i) {
582
583 char c = str.charAt(i);
584 CharHandler ch = getCharHandler(c);
585
586 int curCharOffset = charOffset;
587 byte[] bytes = ch.getInlineBytes(c);
588 if(bytes != null) {
589
590 bout.write(bytes);
591
592
593 ++charOffset;
594 }
595
596 if(ch.getType() == Type.SIMPLE) {
597
598 continue;
599 }
600
601 bytes = ch.getExtraBytes();
602 byte extraCodeModifier = ch.getExtraByteModifier();
603 if((bytes != null) || (extraCodeModifier != 0)) {
604 if(extraCodes == null) {
605 extraCodes = new ExtraCodesStream(str.length());
606 }
607
608
609 writeExtraCodes(curCharOffset, bytes, extraCodeModifier, extraCodes);
610 }
611
612 bytes = ch.getUnprintableBytes();
613 if(bytes != null) {
614 if(unprintableCodes == null) {
615 unprintableCodes = new ByteStream();
616 }
617
618
619 writeUnprintableCodes(curCharOffset, bytes, unprintableCodes,
620 extraCodes);
621 }
622
623 byte crazyFlag = ch.getCrazyFlag();
624 if(crazyFlag != 0) {
625 if(crazyCodes == null) {
626 crazyCodes = new ByteStream();
627 }
628
629
630 crazyCodes.write(crazyFlag);
631 }
632 }
633
634
635 bout.write(END_TEXT);
636
637 boolean hasExtraCodes = trimExtraCodes(
638 extraCodes, (byte)0, INTERNATIONAL_EXTRA_PLACEHOLDER);
639 boolean hasUnprintableCodes = (unprintableCodes != null);
640 boolean hasCrazyCodes = (crazyCodes != null);
641 if(hasExtraCodes || hasUnprintableCodes || hasCrazyCodes) {
642
643
644 if(hasExtraCodes) {
645 extraCodes.writeTo(bout);
646 }
647
648 if(hasCrazyCodes || hasUnprintableCodes) {
649
650
651 bout.write(END_TEXT);
652 bout.write(END_TEXT);
653
654
655 if(hasCrazyCodes) {
656
657 writeCrazyCodes(crazyCodes, bout);
658
659
660
661 if(hasUnprintableCodes) {
662 bout.write(CRAZY_CODES_UNPRINT_SUFFIX);
663 }
664 }
665
666
667 if(hasUnprintableCodes) {
668
669
670 bout.write(END_TEXT);
671
672 unprintableCodes.writeTo(bout);
673 }
674 }
675 }
676
677
678 if(!isAscending) {
679
680
681
682 bout.write(END_EXTRA_TEXT);
683
684
685 IndexData.flipBytes(bout.getBytes(), prevLength,
686 (bout.getLength() - prevLength));
687 }
688
689
690 bout.write(END_EXTRA_TEXT);
691 }
692
693 protected static String toIndexCharSequence(Object value)
694 throws IOException {
695
696
697 String str = ColumnImpl.toCharSequence(value).toString();
698
699
700
701 int len = str.length();
702 if(len > MAX_TEXT_INDEX_CHAR_LENGTH) {
703 str = str.substring(0, MAX_TEXT_INDEX_CHAR_LENGTH);
704 len = MAX_TEXT_INDEX_CHAR_LENGTH;
705 }
706
707
708 if((len > 0) && (str.charAt(len - 1) == ' ')) {
709 do {
710 --len;
711 } while((len > 0) && (str.charAt(len - 1) == ' '));
712
713 str = str.substring(0, len);
714 }
715
716 return str;
717 }
718
719
720
721
722 private static void writeExtraCodes(
723 int charOffset, byte[] bytes, byte extraCodeModifier,
724 ExtraCodesStream extraCodes)
725 {
726
727 int numChars = extraCodes.getNumChars();
728 if(numChars < charOffset) {
729 int fillChars = charOffset - numChars;
730 extraCodes.writeFill(fillChars, INTERNATIONAL_EXTRA_PLACEHOLDER);
731 extraCodes.incrementNumChars(fillChars);
732 }
733
734 if(bytes != null) {
735
736
737 extraCodes.write(bytes);
738 extraCodes.incrementNumChars(1);
739
740 } else {
741
742
743
744 int lastIdx = extraCodes.getLength() - 1;
745 if(lastIdx >= 0) {
746
747
748 byte lastByte = extraCodes.get(lastIdx);
749 lastByte += extraCodeModifier;
750 extraCodes.set(lastIdx, lastByte);
751
752 } else {
753
754
755
756 extraCodes.write(extraCodeModifier);
757 extraCodes.setUnprintablePrefixLen(1);
758 }
759 }
760 }
761
762
763
764
765
766
767 private static boolean trimExtraCodes(ByteStream extraCodes,
768 byte minTrimCode, byte maxTrimCode)
769 {
770 if(extraCodes == null) {
771 return false;
772 }
773
774 extraCodes.trimTrailing(minTrimCode, maxTrimCode);
775
776
777 return (extraCodes.getLength() > 0);
778 }
779
780
781
782
783 private static void writeUnprintableCodes(
784 int charOffset, byte[] bytes, ByteStream unprintableCodes,
785 ExtraCodesStream extraCodes)
786 {
787
788
789
790 int unprintCharOffset = charOffset;
791 if(extraCodes != null) {
792
793
794
795 unprintCharOffset = extraCodes.getLength() +
796 (charOffset - extraCodes.getNumChars()) -
797 extraCodes.getUnprintablePrefixLen();
798 }
799
800
801
802 int offset =
803 (UNPRINTABLE_COUNT_START +
804 (UNPRINTABLE_COUNT_MULTIPLIER * unprintCharOffset))
805 | UNPRINTABLE_OFFSET_FLAGS;
806
807
808 unprintableCodes.write((offset >> 8) & 0xFF);
809 unprintableCodes.write(offset & 0xFF);
810
811 unprintableCodes.write(UNPRINTABLE_MIDFIX);
812 unprintableCodes.write(bytes);
813 }
814
815
816
817
818 private static void writeCrazyCodes(ByteStream crazyCodes, ByteStream bout)
819 {
820
821 trimExtraCodes(crazyCodes, CRAZY_CODE_2, CRAZY_CODE_2);
822
823 if(crazyCodes.getLength() > 0) {
824
825
826
827 byte curByte = CRAZY_CODE_START;
828 int idx = 0;
829 for(int i = 0; i < crazyCodes.getLength(); ++i) {
830 byte nextByte = crazyCodes.get(i);
831 nextByte <<= ((2 - idx) * 2);
832 curByte |= nextByte;
833
834 ++idx;
835 if(idx == 3) {
836
837 bout.write(curByte);
838 curByte = CRAZY_CODE_START;
839 idx = 0;
840 }
841 }
842
843
844 if(idx > 0) {
845 bout.write(curByte);
846 }
847 }
848
849
850
851 bout.write(CRAZY_CODES_SUFFIX);
852 }
853
854
855
856
857
858 private static final class ExtraCodesStream extends ByteStream
859 {
860 private int _numChars;
861 private int _unprintablePrefixLen;
862
863 private ExtraCodesStream(int length) {
864 super(length);
865 }
866
867 public int getNumChars() {
868 return _numChars;
869 }
870
871 public void incrementNumChars(int inc) {
872 _numChars += inc;
873 }
874
875 public int getUnprintablePrefixLen() {
876 return _unprintablePrefixLen;
877 }
878
879 public void setUnprintablePrefixLen(int len) {
880 _unprintablePrefixLen = len;
881 }
882 }
883
884 }