OleUtil.java
/*
Copyright (c) 2013 James Ahlborn
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package com.healthmarketscience.jackcess.impl;
import java.io.ByteArrayInputStream;
import java.io.Closeable;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.sql.Blob;
import java.sql.SQLException;
import java.sql.SQLFeatureNotSupportedException;
import java.text.Normalizer;
import java.util.EnumSet;
import java.util.Set;
import java.util.regex.Pattern;
import com.healthmarketscience.jackcess.DataType;
import com.healthmarketscience.jackcess.util.OleBlob;
import static com.healthmarketscience.jackcess.util.OleBlob.*;
import org.apache.commons.lang3.builder.ToStringBuilder;
/**
* Utility code for working with OLE data.
*
* @author James Ahlborn
* @usage _advanced_class_
*/
public class OleUtil
{
/**
* Interface used to allow optional inclusion of the poi library for working
* with compound ole data.
*/
interface CompoundPackageFactory
{
public ContentImpl createCompoundPackageContent(
OleBlobImpl blob, String prettyName, String className, String typeName,
ByteBuffer blobBb, int dataBlockLen);
}
private static final int PACKAGE_SIGNATURE = 0x1C15;
private static final Charset OLE_CHARSET = StandardCharsets.US_ASCII;
private static final Charset OLE_UTF_CHARSET = StandardCharsets.UTF_16LE;
private static final byte[] COMPOUND_STORAGE_SIGNATURE =
{(byte)0xd0,(byte)0xcf,(byte)0x11,(byte)0xe0,
(byte)0xa1,(byte)0xb1,(byte)0x1a,(byte)0xe1};
private static final String SIMPLE_PACKAGE_TYPE = "Package";
private static final int PACKAGE_OBJECT_TYPE = 0x02;
private static final int OLE_VERSION = 0x0501;
private static final int OLE_FORMAT = 0x02;
private static final int PACKAGE_STREAM_SIGNATURE = 0x02;
private static final int PS_EMBEDDED_FILE = 0x030000;
private static final int PS_LINKED_FILE = 0x010000;
private static final Set<ContentType> WRITEABLE_TYPES = EnumSet.of(
ContentType.LINK, ContentType.SIMPLE_PACKAGE, ContentType.OTHER);
private static final byte[] NO_DATA = new byte[0];
private static final int LINK_HEADER = 0x01;
private static final byte[] PACKAGE_FOOTER = {
0x01, 0x05, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x01, (byte)0xAD, 0x05, (byte)0xFE
};
// regex pattern which matches all the crazy extra stuff in unicode
private static final Pattern UNICODE_ACCENT_PATTERN =
Pattern.compile("[\\p{InCombiningDiacriticalMarks}\\p{IsLm}\\p{IsSk}]+");
private static final CompoundPackageFactory COMPOUND_FACTORY;
static {
CompoundPackageFactory compoundFactory = null;
try {
compoundFactory = (CompoundPackageFactory)
Class.forName("com.healthmarketscience.jackcess.impl.CompoundOleUtil")
.newInstance();
} catch(Throwable t) {
// must not have poi, will load compound ole data as "other"
}
COMPOUND_FACTORY = compoundFactory;
}
/**
* Parses an access database blob structure and returns an appropriate
* OleBlob instance.
*/
public static OleBlob parseBlob(byte[] bytes) {
return new OleBlobImpl(bytes);
}
/**
* Creates a new OlBlob instance using the given information.
*/
public static OleBlob createBlob(Builder oleBuilder)
throws IOException
{
try {
if(!WRITEABLE_TYPES.contains(oleBuilder.getType())) {
throw new IllegalArgumentException(
"Cannot currently create ole values of type " +
oleBuilder.getType());
}
long contentLen = oleBuilder.getContentLength();
byte[] contentBytes = oleBuilder.getBytes();
InputStream contentStream = oleBuilder.getStream();
byte[] packageStreamHeader = NO_DATA;
byte[] packageStreamFooter = NO_DATA;
switch(oleBuilder.getType()) {
case LINK:
packageStreamHeader = writePackageStreamHeader(oleBuilder);
// link "content" is file path
contentBytes = getZeroTermStrBytes(oleBuilder.getFilePath());
contentLen = contentBytes.length;
break;
case SIMPLE_PACKAGE:
packageStreamHeader = writePackageStreamHeader(oleBuilder);
packageStreamFooter = writePackageStreamFooter(oleBuilder);
break;
case OTHER:
// nothing more to do
break;
default:
throw new RuntimeException("unexpected type " + oleBuilder.getType());
}
long payloadLen = packageStreamHeader.length + packageStreamFooter.length +
contentLen;
byte[] packageHeader = writePackageHeader(oleBuilder, payloadLen);
long totalOleLen = packageHeader.length + PACKAGE_FOOTER.length +
payloadLen;
if(totalOleLen > DataType.OLE.getMaxSize()) {
throw new IllegalArgumentException("Content size of " + totalOleLen +
" is too large for ole column");
}
byte[] oleBytes = new byte[(int)totalOleLen];
ByteBuffer bb = PageChannel.wrap(oleBytes);
bb.put(packageHeader);
bb.put(packageStreamHeader);
if(contentLen > 0L) {
if(contentBytes != null) {
bb.put(contentBytes);
} else {
byte[] buf = new byte[8192];
int numBytes = 0;
while((numBytes = contentStream.read(buf)) >= 0) {
bb.put(buf, 0, numBytes);
}
}
}
bb.put(packageStreamFooter);
bb.put(PACKAGE_FOOTER);
return parseBlob(oleBytes);
} finally {
ByteUtil.closeQuietly(oleBuilder.getStream());
}
}
private static byte[] writePackageHeader(Builder oleBuilder,
long contentLen) {
byte[] prettyNameBytes = getZeroTermStrBytes(oleBuilder.getPrettyName());
String className = oleBuilder.getClassName();
String typeName = oleBuilder.getTypeName();
if(className == null) {
className = typeName;
} else if(typeName == null) {
typeName = className;
}
byte[] classNameBytes = getZeroTermStrBytes(className);
byte[] typeNameBytes = getZeroTermStrBytes(typeName);
int packageHeaderLen = 20 + prettyNameBytes.length + classNameBytes.length;
int oleHeaderLen = 24 + typeNameBytes.length;
byte[] headerBytes = new byte[packageHeaderLen + oleHeaderLen];
ByteBuffer bb = PageChannel.wrap(headerBytes);
// write outer package header
bb.putShort((short)PACKAGE_SIGNATURE);
bb.putShort((short)packageHeaderLen);
bb.putInt(PACKAGE_OBJECT_TYPE);
bb.putShort((short)prettyNameBytes.length);
bb.putShort((short)classNameBytes.length);
int prettyNameOff = bb.position() + 8;
bb.putShort((short)prettyNameOff);
bb.putShort((short)(prettyNameOff + prettyNameBytes.length));
bb.putInt(-1);
bb.put(prettyNameBytes);
bb.put(classNameBytes);
// put ole header
bb.putInt(OLE_VERSION);
bb.putInt(OLE_FORMAT);
bb.putInt(typeNameBytes.length);
bb.put(typeNameBytes);
bb.putLong(0L);
bb.putInt((int)contentLen);
return headerBytes;
}
private static byte[] writePackageStreamHeader(Builder oleBuilder) {
byte[] fileNameBytes = getZeroTermStrBytes(oleBuilder.getFileName());
byte[] filePathBytes = getZeroTermStrBytes(oleBuilder.getFilePath());
int headerLen = 6 + fileNameBytes.length + filePathBytes.length;
if(oleBuilder.getType() == ContentType.SIMPLE_PACKAGE) {
headerLen += 8 + filePathBytes.length;
} else {
headerLen += 2;
}
byte[] headerBytes = new byte[headerLen];
ByteBuffer bb = PageChannel.wrap(headerBytes);
bb.putShort((short)PACKAGE_STREAM_SIGNATURE);
bb.put(fileNameBytes);
bb.put(filePathBytes);
if(oleBuilder.getType() == ContentType.SIMPLE_PACKAGE) {
bb.putInt(PS_EMBEDDED_FILE);
bb.putInt(filePathBytes.length);
bb.put(filePathBytes, 0, filePathBytes.length);
bb.putInt((int) oleBuilder.getContentLength());
} else {
bb.putInt(PS_LINKED_FILE);
bb.putShort((short)LINK_HEADER);
}
return headerBytes;
}
private static byte[] writePackageStreamFooter(Builder oleBuilder) {
// note, these are _not_ zero terminated
byte[] fileNameBytes = oleBuilder.getFileName().getBytes(OLE_UTF_CHARSET);
byte[] filePathBytes = oleBuilder.getFilePath().getBytes(OLE_UTF_CHARSET);
int footerLen = 12 + (filePathBytes.length * 2) + fileNameBytes.length;
byte[] footerBytes = new byte[footerLen];
ByteBuffer bb = PageChannel.wrap(footerBytes);
bb.putInt(filePathBytes.length/2);
bb.put(filePathBytes);
bb.putInt(fileNameBytes.length/2);
bb.put(fileNameBytes);
bb.putInt(filePathBytes.length/2);
bb.put(filePathBytes);
return footerBytes;
}
/**
* creates the appropriate ContentImpl for the given blob.
*/
private static ContentImpl parseContent(OleBlobImpl blob)
throws IOException
{
ByteBuffer bb = PageChannel.wrap(blob.getBytes());
if((bb.remaining() < 2) || (bb.getShort() != PACKAGE_SIGNATURE)) {
return new UnknownContentImpl(blob);
}
// read outer package header
int headerSize = bb.getShort();
/* int objType = */ bb.getInt();
int prettyNameLen = bb.getShort();
int classNameLen = bb.getShort();
int prettyNameOff = bb.getShort();
int classNameOff = bb.getShort();
/* int objSize = */ bb.getInt();
String prettyName = readStr(bb, prettyNameOff, prettyNameLen);
String className = readStr(bb, classNameOff, classNameLen);
bb.position(headerSize);
// read ole header
int oleVer = bb.getInt();
/* int format = */ bb.getInt();
if(oleVer != OLE_VERSION) {
return new UnknownContentImpl(blob);
}
int typeNameLen = bb.getInt();
String typeName = readStr(bb, bb.position(), typeNameLen);
bb.getLong(); // unused
int dataBlockLen = bb.getInt();
int dataBlockPos = bb.position();
if(SIMPLE_PACKAGE_TYPE.equalsIgnoreCase(typeName)) {
return createSimplePackageContent(
blob, prettyName, className, typeName, bb, dataBlockLen);
}
// if COMPOUND_FACTORY is null, the poi library isn't available, so just
// load compound data as "other"
if((COMPOUND_FACTORY != null) &&
(bb.remaining() >= COMPOUND_STORAGE_SIGNATURE.length) &&
ByteUtil.matchesRange(bb, bb.position(), COMPOUND_STORAGE_SIGNATURE)) {
return COMPOUND_FACTORY.createCompoundPackageContent(
blob, prettyName, className, typeName, bb, dataBlockLen);
}
// this is either some other "special" (as yet unhandled) format, or it is
// simply an embedded file (or it is compound data and poi isn't available)
return new OtherContentImpl(blob, prettyName, className,
typeName, dataBlockPos, dataBlockLen);
}
private static ContentImpl createSimplePackageContent(
OleBlobImpl blob, String prettyName, String className, String typeName,
ByteBuffer blobBb, int dataBlockLen) {
int dataBlockPos = blobBb.position();
ByteBuffer bb = PageChannel.narrowBuffer(blobBb, dataBlockPos,
dataBlockPos + dataBlockLen);
int packageSig = bb.getShort();
if(packageSig != PACKAGE_STREAM_SIGNATURE) {
return new OtherContentImpl(blob, prettyName, className,
typeName, dataBlockPos, dataBlockLen);
}
String fileName = readZeroTermStr(bb);
String filePath = readZeroTermStr(bb);
int packageType = bb.getInt();
if(packageType == PS_EMBEDDED_FILE) {
int localFilePathLen = bb.getInt();
String localFilePath = readStr(bb, bb.position(), localFilePathLen);
int dataLen = bb.getInt();
int dataPos = bb.position();
bb.position(dataLen + dataPos);
// remaining strings are in "reverse" order (local file path, file name,
// file path). these string usee a real utf charset, and therefore can
// "fix" problems with ascii based names (so we prefer these strings to
// the original strings we found)
int strNum = 0;
while(true) {
int rem = bb.remaining();
if(rem < 4) {
break;
}
int strLen = bb.getInt();
String remStr = readStr(bb, bb.position(), strLen * 2, OLE_UTF_CHARSET);
switch(strNum) {
case 0:
localFilePath = remStr;
break;
case 1:
fileName = remStr;
break;
case 2:
filePath = remStr;
break;
default:
// ignore
}
++strNum;
}
return new SimplePackageContentImpl(
blob, prettyName, className, typeName, dataPos, dataLen,
fileName, filePath, localFilePath);
}
if(packageType == PS_LINKED_FILE) {
bb.getShort(); //unknown
String linkStr = readZeroTermStr(bb);
return new LinkContentImpl(blob, prettyName, className, typeName,
fileName, linkStr, filePath);
}
return new OtherContentImpl(blob, prettyName, className,
typeName, dataBlockPos, dataBlockLen);
}
private static String readStr(ByteBuffer bb, int off, int len) {
return readStr(bb, off, len, OLE_CHARSET);
}
private static String readZeroTermStr(ByteBuffer bb) {
int off = bb.position();
while(bb.hasRemaining()) {
byte b = bb.get();
if(b == 0) {
break;
}
}
int len = bb.position() - off;
return readStr(bb, off, len);
}
private static String readStr(ByteBuffer bb, int off, int len,
Charset charset) {
String str = new String(bb.array(), off, len, charset);
bb.position(off + len);
if(str.charAt(str.length() - 1) == '\0') {
str = str.substring(0, str.length() - 1);
}
return str;
}
private static byte[] getZeroTermStrBytes(String str) {
// since we are converting to ascii, try to make "nicer" versions of crazy
// chars (e.g. convert "u with an umlaut" to just "u"). this may not
// ultimately help anything but it is what ms access does.
// decompose complex chars into combos of char and accent
str = Normalizer.normalize(str, Normalizer.Form.NFD);
// strip the accents
str = UNICODE_ACCENT_PATTERN.matcher(str).replaceAll("");
// (re)normalize what is left
str = Normalizer.normalize(str, Normalizer.Form.NFC);
return (str + '\0').getBytes(OLE_CHARSET);
}
static final class OleBlobImpl implements OleBlob, ColumnImpl.InMemoryBlob
{
private byte[] _bytes;
private ContentImpl _content;
private OleBlobImpl(byte[] bytes) {
_bytes = bytes;
}
@Override
public void writeTo(OutputStream out) throws IOException {
out.write(_bytes);
}
@Override
public Content getContent() throws IOException {
if(_content == null) {
_content = parseContent(this);
}
return _content;
}
@Override
public InputStream getBinaryStream() throws SQLException {
return new ByteArrayInputStream(_bytes);
}
@Override
public InputStream getBinaryStream(long pos, long len)
throws SQLException
{
return new ByteArrayInputStream(_bytes, fromJdbcOffset(pos), (int)len);
}
@Override
public long length() throws SQLException {
return _bytes.length;
}
@Override
public byte[] getBytes() throws IOException {
if(_bytes == null) {
throw new IOException("blob is closed");
}
return _bytes;
}
@Override
public byte[] getBytes(long pos, int len) throws SQLException {
return ByteUtil.copyOf(_bytes, fromJdbcOffset(pos), len);
}
@Override
public long position(byte[] pattern, long start) throws SQLException {
int pos = ByteUtil.findRange(PageChannel.wrap(_bytes),
fromJdbcOffset(start), pattern);
return((pos >= 0) ? toJdbcOffset(pos) : pos);
}
@Override
public long position(Blob pattern, long start) throws SQLException {
return position(pattern.getBytes(1L, (int)pattern.length()), start);
}
@Override
public OutputStream setBinaryStream(long position) throws SQLException {
throw new SQLFeatureNotSupportedException();
}
@Override
public void truncate(long len) throws SQLException {
throw new SQLFeatureNotSupportedException();
}
@Override
public int setBytes(long pos, byte[] bytes) throws SQLException {
throw new SQLFeatureNotSupportedException();
}
@Override
public int setBytes(long pos, byte[] bytes, int offset, int lesn)
throws SQLException {
throw new SQLFeatureNotSupportedException();
}
@Override
public void free() {
close();
}
@Override
public void close() {
_bytes = null;
ByteUtil.closeQuietly(_content);
_content = null;
}
private static int toJdbcOffset(int off) {
return off + 1;
}
private static int fromJdbcOffset(long off) {
return (int)off - 1;
}
@Override
public String toString() {
ToStringBuilder sb = CustomToStringStyle.builder(this);
if(_content != null) {
sb.append("content", _content);
} else {
sb.append("bytes", _bytes);
sb.append("content", "(uninitialized)");
}
return sb.toString();
}
}
static abstract class ContentImpl implements Content, Closeable
{
protected final OleBlobImpl _blob;
protected ContentImpl(OleBlobImpl blob) {
_blob = blob;
}
@Override
public OleBlobImpl getBlob() {
return _blob;
}
protected byte[] getBytes() throws IOException {
return getBlob().getBytes();
}
@Override
public void close() {
// base does nothing
}
protected ToStringBuilder toString(ToStringBuilder sb) {
sb.append("type", getType());
return sb;
}
}
static abstract class EmbeddedContentImpl extends ContentImpl
implements EmbeddedContent
{
private final int _position;
private final int _length;
protected EmbeddedContentImpl(OleBlobImpl blob, int position, int length)
{
super(blob);
_position = position;
_length = length;
}
@Override
public long length() {
return _length;
}
@Override
public InputStream getStream() throws IOException {
return new ByteArrayInputStream(getBytes(), _position, _length);
}
@Override
public void writeTo(OutputStream out) throws IOException {
out.write(getBytes(), _position, _length);
}
@Override
protected ToStringBuilder toString(ToStringBuilder sb) {
super.toString(sb);
if(_position >= 0) {
sb.append("content", ByteBuffer.wrap(_blob._bytes, _position, _length));
}
return sb;
}
}
static abstract class EmbeddedPackageContentImpl
extends EmbeddedContentImpl
implements PackageContent
{
private final String _prettyName;
private final String _className;
private final String _typeName;
protected EmbeddedPackageContentImpl(
OleBlobImpl blob, String prettyName, String className,
String typeName, int position, int length)
{
super(blob, position, length);
_prettyName = prettyName;
_className = className;
_typeName = typeName;
}
@Override
public String getPrettyName() {
return _prettyName;
}
@Override
public String getClassName() {
return _className;
}
@Override
public String getTypeName() {
return _typeName;
}
@Override
protected ToStringBuilder toString(ToStringBuilder sb) {
sb.append("prettyName", _prettyName)
.append("className", _className)
.append("typeName", _typeName);
super.toString(sb);
return sb;
}
}
private static final class LinkContentImpl
extends EmbeddedPackageContentImpl
implements LinkContent
{
private final String _fileName;
private final String _linkPath;
private final String _filePath;
private LinkContentImpl(OleBlobImpl blob, String prettyName,
String className, String typeName,
String fileName, String linkPath,
String filePath)
{
super(blob, prettyName, className, typeName, -1, -1);
_fileName = fileName;
_linkPath = linkPath;
_filePath = filePath;
}
@Override
public ContentType getType() {
return ContentType.LINK;
}
@Override
public String getFileName() {
return _fileName;
}
@Override
public String getLinkPath() {
return _linkPath;
}
@Override
public String getFilePath() {
return _filePath;
}
@Override
public InputStream getLinkStream() throws IOException {
return new FileInputStream(getLinkPath());
}
@Override
public String toString() {
return toString(CustomToStringStyle.builder(this))
.append("fileName", _fileName)
.append("linkPath", _linkPath)
.append("filePath", _filePath)
.toString();
}
}
private static final class SimplePackageContentImpl
extends EmbeddedPackageContentImpl
implements SimplePackageContent
{
private final String _fileName;
private final String _filePath;
private final String _localFilePath;
private SimplePackageContentImpl(OleBlobImpl blob, String prettyName,
String className, String typeName,
int position, int length,
String fileName, String filePath,
String localFilePath)
{
super(blob, prettyName, className, typeName, position, length);
_fileName = fileName;
_filePath = filePath;
_localFilePath = localFilePath;
}
@Override
public ContentType getType() {
return ContentType.SIMPLE_PACKAGE;
}
@Override
public String getFileName() {
return _fileName;
}
@Override
public String getFilePath() {
return _filePath;
}
@Override
public String getLocalFilePath() {
return _localFilePath;
}
@Override
public String toString() {
return toString(CustomToStringStyle.builder(this))
.append("fileName", _fileName)
.append("filePath", _filePath)
.append("localFilePath", _localFilePath)
.toString();
}
}
private static final class OtherContentImpl
extends EmbeddedPackageContentImpl
implements OtherContent
{
private OtherContentImpl(
OleBlobImpl blob, String prettyName, String className,
String typeName, int position, int length)
{
super(blob, prettyName, className, typeName, position, length);
}
@Override
public ContentType getType() {
return ContentType.OTHER;
}
@Override
public String toString() {
return toString(CustomToStringStyle.builder(this))
.toString();
}
}
private static final class UnknownContentImpl extends ContentImpl
{
private UnknownContentImpl(OleBlobImpl blob) {
super(blob);
}
@Override
public ContentType getType() {
return ContentType.UNKNOWN;
}
@Override
public String toString() {
return toString(CustomToStringStyle.builder(this))
.append("content", _blob._bytes)
.toString();
}
}
}