General97IndexCodes.java

/*
Copyright (c) 2019 James Ahlborn

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package com.healthmarketscience.jackcess.impl;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;

import static com.healthmarketscience.jackcess.impl.ByteUtil.ByteStream;

/**
 * Various constants used for creating "general" (access 1997) sort order
 * text index entries.
 *
 * @author James Ahlborn
 */
public class General97IndexCodes extends GeneralLegacyIndexCodes
{
  // stash the codes in some resource files
  private static final String CODES_FILE =
    DatabaseImpl.RESOURCE_PATH + "index_codes_gen_97.txt";
  private static final String EXT_MAPPINGS_FILE =
    DatabaseImpl.RESOURCE_PATH + "index_mappings_ext_gen_97.txt";

  // we only have a small range of extended chars which can mapped back into
  // the valid chars
  private static final char FIRST_MAP_CHAR = 338;
  private static final char LAST_MAP_CHAR = 8482;

  private static final byte EXT_CODES_BOUNDS_NIBBLE = (byte)0x00;

  private static final class Codes
  {
    /** handlers for the first 256 chars.  use nested class to lazy load the
        handlers */
    private static final CharHandler[] _values = loadCodes(
        CODES_FILE, FIRST_CHAR, LAST_CHAR);
  }

  private static final class ExtMappings
  {
    /** mappings for a small subset of the rest of the chars in BMP 0.  use
        nested class to lazy load the handlers.  since these codes are for
        single byte encodings, you would think you wouldn't need any ext
        codes.  however, some chars in the extended range have corollaries in
        the single byte range. this array holds the mappings from the ext
        range to the single byte range.  chars without mappings go to 0
        (ignored). */
    private static final short[] _values = loadMappings(
        EXT_MAPPINGS_FILE, FIRST_MAP_CHAR, LAST_MAP_CHAR);
  }

  static final General97IndexCodes GEN_97_INSTANCE = new General97IndexCodes();

  General97IndexCodes() {}

  /**
   * Returns the CharHandler for the given character.
   */
  @Override
  CharHandler getCharHandler(char c)
  {
    if(c <= LAST_CHAR) {
      return Codes._values[c];
    }

    if((c < FIRST_MAP_CHAR) || (c > LAST_MAP_CHAR)) {
      // outside the mapped range, ignored
      return IGNORED_CHAR_HANDLER;
    }

    // some ext chars are equivalent to single byte chars.  most chars have no
    // equivalent, and they map to 0 (which is an "ignored" char, so it all
    // works out)
    int extOffset = asUnsignedChar(c) - asUnsignedChar(FIRST_MAP_CHAR);
    return Codes._values[ExtMappings._values[extOffset]];
  }

  /**
   * Converts a 97 index value for a text column into the entry value (which
   * is based on a variety of nifty codes).
   */
  @Override
  void writeNonNullIndexTextValue(
      Object value, ByteStream bout, boolean isAscending)
    throws IOException
  {
    // convert to string
    String str = toIndexCharSequence(value);

    // record previous entry length so we can do any post-processing
    // necessary for this entry (handling descending)
    int prevLength = bout.getLength();

    // now, convert each character to a "code" of one or more bytes
    NibbleStream extraCodes = null;
    int sigCharCount = 0;
    for(int i = 0; i < str.length(); ++i) {

      char c = str.charAt(i);
      CharHandler ch = getCharHandler(c);

      byte[] bytes = ch.getInlineBytes(c);
      if(bytes != null) {
        // write the "inline" codes immediately
        bout.write(bytes);
      }

      if(ch.getType() == Type.SIMPLE) {
        // common case, skip further code handling
        continue;
      }

      if(ch.isSignificantChar()) {
        ++sigCharCount;
        // significant chars never have extra bytes
        continue;
      }

      bytes = ch.getExtraBytes();
      if(bytes != null) {
        if(extraCodes == null) {
          extraCodes = new NibbleStream(str.length());
          extraCodes.writeNibble(EXT_CODES_BOUNDS_NIBBLE);
        }

        // keep track of the extra code for later
        writeExtraCodes(sigCharCount, bytes, extraCodes);
        sigCharCount = 0;
      }
    }

    if(extraCodes != null) {

      // write the extra codes to the end
      extraCodes.writeNibble(EXT_CODES_BOUNDS_NIBBLE);
      extraCodes.writeTo(bout);

    } else {

      // write end extra text
      bout.write(END_EXTRA_TEXT);
    }

    // handle descending order by inverting the bytes
    if(!isAscending) {

      // flip the bytes that we have written thus far for this text value
      IndexData.flipBytes(bout.getBytes(), prevLength,
                          (bout.getLength() - prevLength));
    }
  }

  private static void writeExtraCodes(int numSigChars, byte[] bytes,
                                      NibbleStream extraCodes)
  {
    // need to fill in placeholder nibbles for any "significant" chars
    if(numSigChars > 0) {
      extraCodes.writeFillNibbles(numSigChars, INTERNATIONAL_EXTRA_PLACEHOLDER);
    }

    // there should only ever be a single "extra" byte
    extraCodes.writeNibble(bytes[0]);
  }

  static short[] loadMappings(String mappingsFilePath,
                              char firstChar, char lastChar)
  {
    int firstCharCode = asUnsignedChar(firstChar);
    int numMappings = (asUnsignedChar(lastChar) - firstCharCode) + 1;
    short[] values = new short[numMappings];

    BufferedReader reader = null;
    try {

      reader = new BufferedReader(
          new InputStreamReader(
              DatabaseImpl.getResourceAsStream(mappingsFilePath), "US-ASCII"));

      // this is a sparse file with entries like <fromCode>,<toCode>
      String mappingLine = null;
      while((mappingLine = reader.readLine()) != null) {
        mappingLine = mappingLine.trim();
        if(mappingLine.length() == 0) {
          continue;
        }

        String[] mappings = mappingLine.split(",");
        int fromCode = Integer.parseInt(mappings[0]);
        int toCode = Integer.parseInt(mappings[1]);

        values[fromCode - firstCharCode] = (short)toCode;
      }

    } catch(IOException e) {
      throw new RuntimeException("failed loading index mappings file " +
                                 mappingsFilePath, e);
    } finally {
      ByteUtil.closeQuietly(reader);
    }

    return values;
  }

  /**
   * Extension of ByteStream which enables writing individual nibbles.
   */
  protected static final class NibbleStream extends ByteStream
  {
    private int _nibbleLen;

    protected NibbleStream(int length) {
      super(length);
    }

    private boolean nextIsHi() {
      return (_nibbleLen % 2) == 0;
    }

    private static int asLowNibble(int b) {
      return (b & 0x0F);
    }

    private static int asHiNibble(int b) {
      return ((b << 4) & 0xF0);
    }

    private void writeLowNibble(int b) {
      int byteOff = _nibbleLen / 2;
      setBits(byteOff, (byte)asLowNibble(b));
    }

    public void writeNibble(int b) {

      if(nextIsHi()) {
        write(asHiNibble(b));
      } else {
        writeLowNibble(b);
      }

      ++_nibbleLen;
    }

    public void writeFillNibbles(int length, byte b) {

      int newNibbleLen = _nibbleLen + length;
      ensureCapacity((newNibbleLen + 1) / 2);

      if(!nextIsHi()) {
        writeLowNibble(b);
        --length;
      }

      if(length > 1) {
        byte doubleB = (byte)(asHiNibble(b) | asLowNibble(b));

        do {
          write(doubleB);
          length -= 2;
        } while(length > 1);
      }

      if(length == 1) {
        write(asHiNibble(b));
      }

      _nibbleLen = newNibbleLen;
    }

  }

}