View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package ca.uhn.hl7v2.hoh.util.repackage;
19  
20  import java.io.UnsupportedEncodingException;
21  import java.math.BigInteger;
22  import java.nio.charset.Charset;
23  import java.nio.charset.UnsupportedCharsetException;
24  
25  /**
26   * Provides Base64 encoding and decoding as defined by <a
27   * href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045</a>.
28   * 
29   * <p>
30   * This class implements section <cite>6.8. Base64
31   * Content-Transfer-Encoding</cite> from RFC 2045 <cite>Multipurpose Internet
32   * Mail Extensions (MIME) Part One: Format of Internet Message Bodies</cite> by
33   * Freed and Borenstein.
34   * </p>
35   * <p>
36   * The class can be parameterized in the following manner with various
37   * constructors:
38   * <ul>
39   * <li>URL-safe mode: Default off.</li>
40   * <li>Line length: Default 76. Line length that aren't multiples of 4 will
41   * still essentially end up being multiples of 4 in the encoded data.
42   * <li>Line separator: Default is CRLF ("\r\n")</li>
43   * </ul>
44   * </p>
45   * <p>
46   * Since this class operates directly on byte streams, and not character
47   * streams, it is hard-coded to only encode/decode character encodings which are
48   * compatible with the lower 127 ASCII chart (ISO-8859-1, Windows-1252, UTF-8,
49   * etc).
50   * </p>
51   * <p>
52   * This class is thread-safe.
53   * </p>
54   * 
55   * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045</a>
56   * @author Note that this class has been repackaged from Apache Commons-Codec
57   *         and is distributed under the terms of the Apache Software License,
58   *         version 2.0
59   */
60  public class Base64 {
61  
62  	/**
63  	 * BASE32 characters are 6 bits in length. They are formed by taking a block
64  	 * of 3 octets to form a 24-bit string, which is converted into 4 BASE64
65  	 * characters.
66  	 */
67  	private static final int BITS_PER_ENCODED_BYTE = 6;
68  	private static final int BYTES_PER_UNENCODED_BLOCK = 3;
69  	private static final int BYTES_PER_ENCODED_BLOCK = 4;
70  
71  	/**
72  	 * Chunk separator per RFC 2045 section 2.1.
73  	 * 
74  	 * <p>
75  	 * N.B. The next major release may break compatibility and make this field
76  	 * private.
77  	 * </p>
78  	 * 
79  	 * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section
80  	 *      2.1</a>
81  	 */
82  	static final byte[] CHUNK_SEPARATOR = { '\r', '\n' };
83  
84  	/**
85  	 * This array is a lookup table that translates 6-bit positive integer index
86  	 * values into their "Base64 Alphabet" equivalents as specified in Table 1
87  	 * of RFC 2045.
88  	 * 
89  	 * Thanks to "commons" project in ws.apache.org for this code.
90  	 * http://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/
91  	 */
92  	private static final byte[] STANDARD_ENCODE_TABLE = { 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r',
93  			's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/' };
94  
95  	/**
96  	 * This is a copy of the STANDARD_ENCODE_TABLE above, but with + and /
97  	 * changed to - and _ to make the encoded Base64 results more URL-SAFE. This
98  	 * table is only used when the Base64's mode is set to URL-SAFE.
99  	 */
100 	private static final byte[] URL_SAFE_ENCODE_TABLE = { 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r',
101 			's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-', '_' };
102 
103 	/**
104 	 * This array is a lookup table that translates Unicode characters drawn
105 	 * from the "Base64 Alphabet" (as specified in Table 1 of RFC 2045) into
106 	 * their 6-bit positive integer equivalents. Characters that are not in the
107 	 * Base64 alphabet but fall within the bounds of the array are translated to
108 	 * -1.
109 	 * 
110 	 * Note: '+' and '-' both decode to 62. '/' and '_' both decode to 63. This
111 	 * means decoder seamlessly handles both URL_SAFE and STANDARD base64. (The
112 	 * encoder, on the other hand, needs to know ahead of time what to emit).
113 	 * 
114 	 * Thanks to "commons" project in ws.apache.org for this code.
115 	 * http://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/
116 	 */
117 	private static final byte[] DECODE_TABLE = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, 62, -1, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60,
118 			61, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, 63, -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51 };
119 
120 	/**
121 	 * Base64 uses 6-bit fields.
122 	 */
123 	/** Mask used to extract 6 bits, used when encoding */
124 	private static final int MASK_6BITS = 0x3f;
125 
126 	// The static final fields above are used for the original static byte[]
127 	// methods on Base64.
128 	// The private member fields below are used with the new streaming approach,
129 	// which requires
130 	// some state be preserved between calls of encode() and decode().
131 
132 	/**
133 	 * Encode table to use: either STANDARD or URL_SAFE. Note: the DECODE_TABLE
134 	 * above remains static because it is able to decode both STANDARD and
135 	 * URL_SAFE streams, but the encodeTable must be a member variable so we can
136 	 * switch between the two modes.
137 	 */
138 	private byte[] encodeTable;
139 
140 	// Only one decode table currently; keep for consistency with Base32 code
141 	private final byte[] decodeTable = DECODE_TABLE;
142 
143 	/**
144 	 * Line separator for encoding. Not used when decoding. Only used if
145 	 * lineLength > 0.
146 	 */
147 	private byte[] lineSeparator;
148 
149 	/**
150 	 * Convenience variable to help us determine when our buffer is going to run
151 	 * out of room and needs resizing.
152 	 * <code>decodeSize = 3 + lineSeparator.length;</code>
153 	 */
154 	private int decodeSize;
155 
156 	/**
157 	 * Convenience variable to help us determine when our buffer is going to run
158 	 * out of room and needs resizing.
159 	 * <code>encodeSize = 4 + lineSeparator.length;</code>
160 	 */
161 	private int encodeSize;
162 
163 	/**
164 	 * Creates a Base64 codec used for decoding (all modes) and encoding in
165 	 * URL-unsafe mode.
166 	 * <p>
167 	 * When encoding the line length is 0 (no chunking), and the encoding table
168 	 * is STANDARD_ENCODE_TABLE.
169 	 * </p>
170 	 * 
171 	 * <p>
172 	 * When decoding all variants are supported.
173 	 * </p>
174 	 */
175 	public Base64() {
176 		this(0);
177 	}
178 
179 	/**
180 	 * Creates a Base64 codec used for decoding (all modes) and encoding in the
181 	 * given URL-safe mode.
182 	 * <p>
183 	 * When encoding the line length is 76, the line separator is CRLF, and the
184 	 * encoding table is STANDARD_ENCODE_TABLE.
185 	 * </p>
186 	 * 
187 	 * <p>
188 	 * When decoding all variants are supported.
189 	 * </p>
190 	 * 
191 	 * @param urlSafe
192 	 *            if {@code true}, URL-safe encoding is used. In most cases this
193 	 *            should be set to {@code false}.
194 	 * @since 1.4
195 	 */
196 	public Base64(boolean urlSafe) {
197 		this(MIME_CHUNK_SIZE, CHUNK_SEPARATOR, urlSafe);
198 	}
199 
200 	/**
201 	 * Creates a Base64 codec used for decoding (all modes) and encoding in
202 	 * URL-unsafe mode.
203 	 * <p>
204 	 * When encoding the line length is given in the constructor, the line
205 	 * separator is CRLF, and the encoding table is STANDARD_ENCODE_TABLE.
206 	 * </p>
207 	 * <p>
208 	 * Line lengths that aren't multiples of 4 will still essentially end up
209 	 * being multiples of 4 in the encoded data.
210 	 * </p>
211 	 * <p>
212 	 * When decoding all variants are supported.
213 	 * </p>
214 	 * 
215 	 * @param lineLength
216 	 *            Each line of encoded data will be at most of the given length
217 	 *            (rounded down to nearest multiple of 4). If lineLength <= 0,
218 	 *            then the output will not be divided into lines (chunks).
219 	 *            Ignored when decoding.
220 	 * @since 1.4
221 	 */
222 	public Base64(int lineLength) {
223 		this(lineLength, CHUNK_SEPARATOR);
224 	}
225 
226 	/**
227 	 * Creates a Base64 codec used for decoding (all modes) and encoding in
228 	 * URL-unsafe mode.
229 	 * <p>
230 	 * When encoding the line length and line separator are given in the
231 	 * constructor, and the encoding table is STANDARD_ENCODE_TABLE.
232 	 * </p>
233 	 * <p>
234 	 * Line lengths that aren't multiples of 4 will still essentially end up
235 	 * being multiples of 4 in the encoded data.
236 	 * </p>
237 	 * <p>
238 	 * When decoding all variants are supported.
239 	 * </p>
240 	 * 
241 	 * @param lineLength
242 	 *            Each line of encoded data will be at most of the given length
243 	 *            (rounded down to nearest multiple of 4). If lineLength <= 0,
244 	 *            then the output will not be divided into lines (chunks).
245 	 *            Ignored when decoding.
246 	 * @param lineSeparator
247 	 *            Each line of encoded data will end with this sequence of
248 	 *            bytes.
249 	 * @throws IllegalArgumentException
250 	 *             Thrown when the provided lineSeparator included some base64
251 	 *             characters.
252 	 * @since 1.4
253 	 */
254 	public Base64(int lineLength, byte[] lineSeparator) {
255 		this(lineLength, lineSeparator, false);
256 	}
257 
258 	/**
259 	 * Creates a Base64 codec used for decoding (all modes) and encoding in
260 	 * URL-unsafe mode.
261 	 * <p>
262 	 * When encoding the line length and line separator are given in the
263 	 * constructor, and the encoding table is STANDARD_ENCODE_TABLE.
264 	 * </p>
265 	 * <p>
266 	 * Line lengths that aren't multiples of 4 will still essentially end up
267 	 * being multiples of 4 in the encoded data.
268 	 * </p>
269 	 * <p>
270 	 * When decoding all variants are supported.
271 	 * </p>
272 	 * 
273 	 * @param lineLength
274 	 *            Each line of encoded data will be at most of the given length
275 	 *            (rounded down to nearest multiple of 4). If lineLength <= 0,
276 	 *            then the output will not be divided into lines (chunks).
277 	 *            Ignored when decoding.
278 	 * @param lineSeparator
279 	 *            Each line of encoded data will end with this sequence of
280 	 *            bytes.
281 	 * @param urlSafe
282 	 *            Instead of emitting '+' and '/' we emit '-' and '_'
283 	 *            respectively. urlSafe is only applied to encode operations.
284 	 *            Decoding seamlessly handles both modes.
285 	 * @throws IllegalArgumentException
286 	 *             The provided lineSeparator included some base64 characters.
287 	 *             That's not going to work!
288 	 * @since 1.4
289 	 */
290 	public Base64(int lineLength, byte[] lineSeparator, boolean urlSafe) {
291 		this(BYTES_PER_UNENCODED_BLOCK, BYTES_PER_ENCODED_BLOCK, lineLength, lineSeparator == null ? 0 : lineSeparator.length);
292 		// TODO could be simplified if there is no requirement to reject invalid
293 		// line sep when length <=0
294 		// @see test case Base64Test.testConstructors()
295 		if (lineSeparator != null) {
296 			if (containsAlphabetOrPad(lineSeparator)) {
297 				String sep = StringUtils.newStringUtf8(lineSeparator);
298 				throw new IllegalArgumentException("lineSeparator must not contain base64 characters: [" + sep + "]");
299 			}
300 			if (lineLength > 0) { // null line-sep forces no chunking rather
301 									// than throwing IAE
302 				this.encodeSize = BYTES_PER_ENCODED_BLOCK + lineSeparator.length;
303 				this.lineSeparator = new byte[lineSeparator.length];
304 				System.arraycopy(lineSeparator, 0, this.lineSeparator, 0, lineSeparator.length);
305 			} else {
306 				this.encodeSize = BYTES_PER_ENCODED_BLOCK;
307 				this.lineSeparator = null;
308 			}
309 		} else {
310 			this.encodeSize = BYTES_PER_ENCODED_BLOCK;
311 			this.lineSeparator = null;
312 		}
313 		this.decodeSize = this.encodeSize - 1;
314 		this.encodeTable = urlSafe ? URL_SAFE_ENCODE_TABLE : STANDARD_ENCODE_TABLE;
315 	}
316 
317 	/**
318 	 * Returns our current encode mode. True if we're URL-SAFE, false otherwise.
319 	 * 
320 	 * @return true if we're in URL-SAFE mode, false otherwise.
321 	 * @since 1.4
322 	 */
323 	public boolean isUrlSafe() {
324 		return this.encodeTable == URL_SAFE_ENCODE_TABLE;
325 	}
326 
327 	/**
328 	 * <p>
329 	 * Encodes all of the provided data, starting at inPos, for inAvail bytes.
330 	 * Must be called at least twice: once with the data to encode, and once
331 	 * with inAvail set to "-1" to alert encoder that EOF has been reached, so
332 	 * flush last remaining bytes (if not multiple of 3).
333 	 * </p>
334 	 * <p>
335 	 * Thanks to "commons" project in ws.apache.org for the bitwise operations,
336 	 * and general approach.
337 	 * http://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/
338 	 * </p>
339 	 * 
340 	 * @param in
341 	 *            byte[] array of binary data to base64 encode.
342 	 * @param inPos
343 	 *            Position to start reading data from.
344 	 * @param inAvail
345 	 *            Amount of bytes available from input for encoding.
346 	 * @param context
347 	 *            the context to be used
348 	 */
349 	void encode(byte[] in, int inPos, int inAvail, Context context) {
350 		if (context.eof) {
351 			return;
352 		}
353 		// inAvail < 0 is how we're informed of EOF in the underlying data we're
354 		// encoding.
355 		if (inAvail < 0) {
356 			context.eof = true;
357 			if (0 == context.modulus && lineLength == 0) {
358 				return; // no leftovers to process and not using chunking
359 			}
360 			ensureBufferSize(encodeSize, context);
361 			int savedPos = context.pos;
362 			switch (context.modulus) { // 0-2
363 			case 1: // 8 bits = 6 + 2
364 				context.buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 2) & MASK_6BITS]; // top
365 																										// 6
366 																										// bits
367 				context.buffer[context.pos++] = encodeTable[(context.ibitWorkArea << 4) & MASK_6BITS]; // remaining
368 																										// 2
369 				// URL-SAFE skips the padding to further reduce size.
370 				if (encodeTable == STANDARD_ENCODE_TABLE) {
371 					context.buffer[context.pos++] = PAD;
372 					context.buffer[context.pos++] = PAD;
373 				}
374 				break;
375 
376 			case 2: // 16 bits = 6 + 6 + 4
377 				context.buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 10) & MASK_6BITS];
378 				context.buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 4) & MASK_6BITS];
379 				context.buffer[context.pos++] = encodeTable[(context.ibitWorkArea << 2) & MASK_6BITS];
380 				// URL-SAFE skips the padding to further reduce size.
381 				if (encodeTable == STANDARD_ENCODE_TABLE) {
382 					context.buffer[context.pos++] = PAD;
383 				}
384 				break;
385 			}
386 			context.currentLinePos += context.pos - savedPos; // keep track of
387 																// current line
388 																// position
389 			// if currentPos == 0 we are at the start of a line, so don't add
390 			// CRLF
391 			if (lineLength > 0 && context.currentLinePos > 0) {
392 				System.arraycopy(lineSeparator, 0, context.buffer, context.pos, lineSeparator.length);
393 				context.pos += lineSeparator.length;
394 			}
395 		} else {
396 			for (int i = 0; i < inAvail; i++) {
397 				ensureBufferSize(encodeSize, context);
398 				context.modulus = (context.modulus + 1) % BYTES_PER_UNENCODED_BLOCK;
399 				int b = in[inPos++];
400 				if (b < 0) {
401 					b += 256;
402 				}
403 				context.ibitWorkArea = (context.ibitWorkArea << 8) + b; // BITS_PER_BYTE
404 				if (0 == context.modulus) { // 3 bytes = 24 bits = 4 * 6 bits to
405 											// extract
406 					context.buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 18) & MASK_6BITS];
407 					context.buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 12) & MASK_6BITS];
408 					context.buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 6) & MASK_6BITS];
409 					context.buffer[context.pos++] = encodeTable[context.ibitWorkArea & MASK_6BITS];
410 					context.currentLinePos += BYTES_PER_ENCODED_BLOCK;
411 					if (lineLength > 0 && lineLength <= context.currentLinePos) {
412 						System.arraycopy(lineSeparator, 0, context.buffer, context.pos, lineSeparator.length);
413 						context.pos += lineSeparator.length;
414 						context.currentLinePos = 0;
415 					}
416 				}
417 			}
418 		}
419 	}
420 
421 	/**
422 	 * <p>
423 	 * Decodes all of the provided data, starting at inPos, for inAvail bytes.
424 	 * Should be called at least twice: once with the data to decode, and once
425 	 * with inAvail set to "-1" to alert decoder that EOF has been reached. The
426 	 * "-1" call is not necessary when decoding, but it doesn't hurt, either.
427 	 * </p>
428 	 * <p>
429 	 * Ignores all non-base64 characters. This is how chunked (e.g. 76
430 	 * character) data is handled, since CR and LF are silently ignored, but has
431 	 * implications for other bytes, too. This method subscribes to the
432 	 * garbage-in, garbage-out philosophy: it will not check the provided data
433 	 * for validity.
434 	 * </p>
435 	 * <p>
436 	 * Thanks to "commons" project in ws.apache.org for the bitwise operations,
437 	 * and general approach.
438 	 * http://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/
439 	 * </p>
440 	 * 
441 	 * @param in
442 	 *            byte[] array of ascii data to base64 decode.
443 	 * @param inPos
444 	 *            Position to start reading data from.
445 	 * @param inAvail
446 	 *            Amount of bytes available from input for encoding.
447 	 * @param context
448 	 *            the context to be used
449 	 */
450 	void decode(byte[] in, int inPos, int inAvail, Context context) {
451 		if (context.eof) {
452 			return;
453 		}
454 		if (inAvail < 0) {
455 			context.eof = true;
456 		}
457 		for (int i = 0; i < inAvail; i++) {
458 			ensureBufferSize(decodeSize, context);
459 			byte b = in[inPos++];
460 			if (b == PAD) {
461 				// We're done.
462 				context.eof = true;
463 				break;
464 			} else {
465 				if (b >= 0 && b < DECODE_TABLE.length) {
466 					int result = DECODE_TABLE[b];
467 					if (result >= 0) {
468 						context.modulus = (context.modulus + 1) % BYTES_PER_ENCODED_BLOCK;
469 						context.ibitWorkArea = (context.ibitWorkArea << BITS_PER_ENCODED_BYTE) + result;
470 						if (context.modulus == 0) {
471 							context.buffer[context.pos++] = (byte) ((context.ibitWorkArea >> 16) & MASK_8BITS);
472 							context.buffer[context.pos++] = (byte) ((context.ibitWorkArea >> 8) & MASK_8BITS);
473 							context.buffer[context.pos++] = (byte) (context.ibitWorkArea & MASK_8BITS);
474 						}
475 					}
476 				}
477 			}
478 		}
479 
480 		// Two forms of EOF as far as base64 decoder is concerned: actual
481 		// EOF (-1) and first time '=' character is encountered in stream.
482 		// This approach makes the '=' padding characters completely optional.
483 		if (context.eof && context.modulus != 0) {
484 			ensureBufferSize(decodeSize, context);
485 
486 			// We have some spare bits remaining
487 			// Output all whole multiples of 8 bits and ignore the rest
488 			switch (context.modulus) {
489 			// case 1: // 6 bits - ignore entirely
490 			// break;
491 			case 2: // 12 bits = 8 + 4
492 				context.ibitWorkArea = context.ibitWorkArea >> 4; // dump the
493 																	// extra 4
494 																	// bits
495 				context.buffer[context.pos++] = (byte) ((context.ibitWorkArea) & MASK_8BITS);
496 				break;
497 			case 3: // 18 bits = 8 + 8 + 2
498 				context.ibitWorkArea = context.ibitWorkArea >> 2; // dump 2 bits
499 				context.buffer[context.pos++] = (byte) ((context.ibitWorkArea >> 8) & MASK_8BITS);
500 				context.buffer[context.pos++] = (byte) ((context.ibitWorkArea) & MASK_8BITS);
501 				break;
502 			}
503 		}
504 	}
505 
506 	/**
507 	 * Tests a given byte array to see if it contains only valid characters
508 	 * within the Base64 alphabet. Currently the method treats whitespace as
509 	 * valid.
510 	 * 
511 	 * @param arrayOctet
512 	 *            byte array to test
513 	 * @return {@code true} if all bytes are valid characters in the Base64
514 	 *         alphabet or if the byte array is empty; {@code false}, otherwise
515 	 * @deprecated 1.5 Use {@link #isBase64(byte[])}, will be removed in 2.0.
516 	 */
517 	@Deprecated
518 	public static boolean isArrayByteBase64(byte[] arrayOctet) {
519 		return isBase64(arrayOctet);
520 	}
521 
522 	/**
523 	 * Returns whether or not the <code>octet</code> is in the base 64 alphabet.
524 	 * 
525 	 * @param octet
526 	 *            The value to test
527 	 * @return {@code true} if the value is defined in the the base 64 alphabet,
528 	 *         {@code false} otherwise.
529 	 * @since 1.4
530 	 */
531 	public static boolean isBase64(byte octet) {
532 		return octet == PAD_DEFAULT || (octet >= 0 && octet < DECODE_TABLE.length && DECODE_TABLE[octet] != -1);
533 	}
534 
535 	/**
536 	 * Tests a given String to see if it contains only valid characters within
537 	 * the Base64 alphabet. Currently the method treats whitespace as valid.
538 	 * 
539 	 * @param base64
540 	 *            String to test
541 	 * @return {@code true} if all characters in the String are valid characters
542 	 *         in the Base64 alphabet or if the String is empty; {@code false},
543 	 *         otherwise
544 	 * @since 1.5
545 	 */
546 	public static boolean isBase64(String base64) {
547 		return isBase64(StringUtils.getBytesUtf8(base64));
548 	}
549 
550 	/**
551 	 * Tests a given byte array to see if it contains only valid characters
552 	 * within the Base64 alphabet. Currently the method treats whitespace as
553 	 * valid.
554 	 * 
555 	 * @param arrayOctet
556 	 *            byte array to test
557 	 * @return {@code true} if all bytes are valid characters in the Base64
558 	 *         alphabet or if the byte array is empty; {@code false}, otherwise
559 	 * @since 1.5
560 	 */
561 	public static boolean isBase64(byte[] arrayOctet) {
562 		for (int i = 0; i < arrayOctet.length; i++) {
563 			if (!isBase64(arrayOctet[i]) && !isWhiteSpace(arrayOctet[i])) {
564 				return false;
565 			}
566 		}
567 		return true;
568 	}
569 
570 	/**
571 	 * Encodes binary data using the base64 algorithm but does not chunk the
572 	 * output.
573 	 * 
574 	 * @param binaryData
575 	 *            binary data to encode
576 	 * @return byte[] containing Base64 characters in their UTF-8
577 	 *         representation.
578 	 */
579 	public static byte[] encodeBase64(byte[] binaryData) {
580 		return encodeBase64(binaryData, false);
581 	}
582 
583 	/**
584 	 * Encodes binary data using the base64 algorithm but does not chunk the
585 	 * output.
586 	 * 
587 	 * NOTE: We changed the behaviour of this method from multi-line chunking
588 	 * (commons-codec-1.4) to single-line non-chunking (commons-codec-1.5).
589 	 * 
590 	 * @param binaryData
591 	 *            binary data to encode
592 	 * @return String containing Base64 characters.
593 	 * @since 1.4 (NOTE: 1.4 chunked the output, whereas 1.5 does not).
594 	 */
595 	public static String encodeBase64String(byte[] binaryData) {
596 		return StringUtils.newStringUtf8(encodeBase64(binaryData, false));
597 	}
598 
599 	/**
600 	 * Encodes binary data using a URL-safe variation of the base64 algorithm
601 	 * but does not chunk the output. The url-safe variation emits - and _
602 	 * instead of + and / characters.
603 	 * 
604 	 * @param binaryData
605 	 *            binary data to encode
606 	 * @return byte[] containing Base64 characters in their UTF-8
607 	 *         representation.
608 	 * @since 1.4
609 	 */
610 	public static byte[] encodeBase64URLSafe(byte[] binaryData) {
611 		return encodeBase64(binaryData, false, true);
612 	}
613 
614 	/**
615 	 * Encodes binary data using a URL-safe variation of the base64 algorithm
616 	 * but does not chunk the output. The url-safe variation emits - and _
617 	 * instead of + and / characters.
618 	 * 
619 	 * @param binaryData
620 	 *            binary data to encode
621 	 * @return String containing Base64 characters
622 	 * @since 1.4
623 	 */
624 	public static String encodeBase64URLSafeString(byte[] binaryData) {
625 		return StringUtils.newStringUtf8(encodeBase64(binaryData, false, true));
626 	}
627 
628 	/**
629 	 * Encodes binary data using the base64 algorithm and chunks the encoded
630 	 * output into 76 character blocks
631 	 * 
632 	 * @param binaryData
633 	 *            binary data to encode
634 	 * @return Base64 characters chunked in 76 character blocks
635 	 */
636 	public static byte[] encodeBase64Chunked(byte[] binaryData) {
637 		return encodeBase64(binaryData, true);
638 	}
639 
640 	/**
641 	 * Encodes binary data using the base64 algorithm, optionally chunking the
642 	 * output into 76 character blocks.
643 	 * 
644 	 * @param binaryData
645 	 *            Array containing binary data to encode.
646 	 * @param isChunked
647 	 *            if {@code true} this encoder will chunk the base64 output into
648 	 *            76 character blocks
649 	 * @return Base64-encoded data.
650 	 * @throws IllegalArgumentException
651 	 *             Thrown when the input array needs an output array bigger than
652 	 *             {@link Integer#MAX_VALUE}
653 	 */
654 	public static byte[] encodeBase64(byte[] binaryData, boolean isChunked) {
655 		return encodeBase64(binaryData, isChunked, false);
656 	}
657 
658 	/**
659 	 * Encodes binary data using the base64 algorithm, optionally chunking the
660 	 * output into 76 character blocks.
661 	 * 
662 	 * @param binaryData
663 	 *            Array containing binary data to encode.
664 	 * @param isChunked
665 	 *            if {@code true} this encoder will chunk the base64 output into
666 	 *            76 character blocks
667 	 * @param urlSafe
668 	 *            if {@code true} this encoder will emit - and _ instead of the
669 	 *            usual + and / characters.
670 	 * @return Base64-encoded data.
671 	 * @throws IllegalArgumentException
672 	 *             Thrown when the input array needs an output array bigger than
673 	 *             {@link Integer#MAX_VALUE}
674 	 * @since 1.4
675 	 */
676 	public static byte[] encodeBase64(byte[] binaryData, boolean isChunked, boolean urlSafe) {
677 		return encodeBase64(binaryData, isChunked, urlSafe, Integer.MAX_VALUE);
678 	}
679 
680 	/**
681 	 * Encodes binary data using the base64 algorithm, optionally chunking the
682 	 * output into 76 character blocks.
683 	 * 
684 	 * @param binaryData
685 	 *            Array containing binary data to encode.
686 	 * @param isChunked
687 	 *            if {@code true} this encoder will chunk the base64 output into
688 	 *            76 character blocks
689 	 * @param urlSafe
690 	 *            if {@code true} this encoder will emit - and _ instead of the
691 	 *            usual + and / characters.
692 	 * @param maxResultSize
693 	 *            The maximum result size to accept.
694 	 * @return Base64-encoded data.
695 	 * @throws IllegalArgumentException
696 	 *             Thrown when the input array needs an output array bigger than
697 	 *             maxResultSize
698 	 * @since 1.4
699 	 */
700 	public static byte[] encodeBase64(byte[] binaryData, boolean isChunked, boolean urlSafe, int maxResultSize) {
701 		if (binaryData == null || binaryData.length == 0) {
702 			return binaryData;
703 		}
704 
705 		// Create this so can use the super-class method
706 		// Also ensures that the same roundings are performed by the ctor and
707 		// the code
708 		Base64 b64 = isChunked ? new Base64(urlSafe) : new Base64(0, CHUNK_SEPARATOR, urlSafe);
709 		long len = b64.getEncodedLength(binaryData);
710 		if (len > maxResultSize) {
711 			throw new IllegalArgumentException("Input array too big, the output array would be bigger (" + len + ") than the specified maximum size of " + maxResultSize);
712 		}
713 
714 		return b64.encode(binaryData);
715 	}
716 
717 	/**
718 	 * Decodes a Base64 String into octets
719 	 * 
720 	 * @param base64String
721 	 *            String containing Base64 data
722 	 * @return Array containing decoded data.
723 	 * @since 1.4
724 	 */
725 	public static byte[] decodeBase64(String base64String) {
726 		return new Base64().decode(base64String);
727 	}
728 
729 	/**
730 	 * Decodes Base64 data into octets
731 	 * 
732 	 * @param base64Data
733 	 *            Byte array containing Base64 data
734 	 * @return Array containing decoded data.
735 	 */
736 	public static byte[] decodeBase64(byte[] base64Data) {
737 		return new Base64().decode(base64Data);
738 	}
739 
740 	// Implementation of the Encoder Interface
741 
742 	// Implementation of integer encoding used for crypto
743 	/**
744 	 * Decodes a byte64-encoded integer according to crypto standards such as
745 	 * W3C's XML-Signature
746 	 * 
747 	 * @param pArray
748 	 *            a byte array containing base64 character data
749 	 * @return A BigInteger
750 	 * @since 1.4
751 	 */
752 	public static BigInteger decodeInteger(byte[] pArray) {
753 		return new BigInteger(1, decodeBase64(pArray));
754 	}
755 
756 	/**
757 	 * Encodes to a byte64-encoded integer according to crypto standards such as
758 	 * W3C's XML-Signature
759 	 * 
760 	 * @param bigInt
761 	 *            a BigInteger
762 	 * @return A byte array containing base64 character data
763 	 * @throws NullPointerException
764 	 *             if null is passed in
765 	 * @since 1.4
766 	 */
767 	public static byte[] encodeInteger(BigInteger bigInt) {
768 		if (bigInt == null) {
769 			throw new NullPointerException("encodeInteger called with null parameter");
770 		}
771 		return encodeBase64(toIntegerBytes(bigInt), false);
772 	}
773 
774 	/**
775 	 * Returns a byte-array representation of a <code>BigInteger</code> without
776 	 * sign bit.
777 	 * 
778 	 * @param bigInt
779 	 *            <code>BigInteger</code> to be converted
780 	 * @return a byte array representation of the BigInteger parameter
781 	 */
782 	static byte[] toIntegerBytes(BigInteger bigInt) {
783 		int bitlen = bigInt.bitLength();
784 		// round bitlen
785 		bitlen = ((bitlen + 7) >> 3) << 3;
786 		byte[] bigBytes = bigInt.toByteArray();
787 
788 		if (((bigInt.bitLength() % 8) != 0) && (((bigInt.bitLength() / 8) + 1) == (bitlen / 8))) {
789 			return bigBytes;
790 		}
791 		// set up params for copying everything but sign bit
792 		int startSrc = 0;
793 		int len = bigBytes.length;
794 
795 		// if bigInt is exactly byte-aligned, just skip signbit in copy
796 		if ((bigInt.bitLength() % 8) == 0) {
797 			startSrc = 1;
798 			len--;
799 		}
800 		int startDst = bitlen / 8 - len; // to pad w/ nulls as per spec
801 		byte[] resizedBytes = new byte[bitlen / 8];
802 		System.arraycopy(bigBytes, startSrc, resizedBytes, startDst, len);
803 		return resizedBytes;
804 	}
805 
806 	/**
807 	 * Returns whether or not the <code>octet</code> is in the Base32 alphabet.
808 	 * 
809 	 * @param octet
810 	 *            The value to test
811 	 * @return {@code true} if the value is defined in the the Base32 alphabet
812 	 *         {@code false} otherwise.
813 	 */
814 	protected boolean isInAlphabet(byte octet) {
815 		return octet >= 0 && octet < decodeTable.length && decodeTable[octet] != -1;
816 	}
817 
818 	/**
819 	 * Holds thread context so classes can be thread-safe.
820 	 * 
821 	 * This class is not itself thread-safe; each thread must allocate its own
822 	 * copy.
823 	 * 
824 	 * @since 1.7
825 	 */
826 	static class Context {
827 
828 		/**
829 		 * Place holder for the bytes we're dealing with for our based logic.
830 		 * Bitwise operations store and extract the encoding or decoding from
831 		 * this variable.
832 		 */
833 		int ibitWorkArea;
834 
835 		/**
836 		 * Place holder for the bytes we're dealing with for our based logic.
837 		 * Bitwise operations store and extract the encoding or decoding from
838 		 * this variable.
839 		 */
840 		long lbitWorkArea;
841 
842 		/**
843 		 * Buffer for streaming.
844 		 */
845 		byte[] buffer;
846 
847 		/**
848 		 * Position where next character should be written in the buffer.
849 		 */
850 		int pos;
851 
852 		/**
853 		 * Position where next character should be read from the buffer.
854 		 */
855 		int readPos;
856 
857 		/**
858 		 * Boolean flag to indicate the EOF has been reached. Once EOF has been
859 		 * reached, this object becomes useless, and must be thrown away.
860 		 */
861 		boolean eof;
862 
863 		/**
864 		 * Variable tracks how many characters have been written to the current
865 		 * line. Only used when encoding. We use it to make sure each encoded
866 		 * line never goes beyond lineLength (if lineLength > 0).
867 		 */
868 		int currentLinePos;
869 
870 		/**
871 		 * Writes to the buffer only occur after every 3/5 reads when encoding,
872 		 * and every 4/8 reads when decoding. This variable helps track that.
873 		 */
874 		int modulus;
875 
876 		Context() {
877 		}
878 	}
879 
880 	/**
881 	 * EOF
882 	 * 
883 	 * @since 1.7
884 	 */
885 	static final int EOF = -1;
886 
887 	/**
888 	 * MIME chunk size per RFC 2045 section 6.8.
889 	 * 
890 	 * <p>
891 	 * The {@value} character limit does not count the trailing CRLF, but counts
892 	 * all other characters, including any equal signs.
893 	 * </p>
894 	 * 
895 	 * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section
896 	 *      6.8</a>
897 	 */
898 	public static final int MIME_CHUNK_SIZE = 76;
899 
900 	/**
901 	 * PEM chunk size per RFC 1421 section 4.3.2.4.
902 	 * 
903 	 * <p>
904 	 * The {@value} character limit does not count the trailing CRLF, but counts
905 	 * all other characters, including any equal signs.
906 	 * </p>
907 	 * 
908 	 * @see <a href="http://tools.ietf.org/html/rfc1421">RFC 1421 section
909 	 *      4.3.2.4</a>
910 	 */
911 	public static final int PEM_CHUNK_SIZE = 64;
912 
913 	private static final int DEFAULT_BUFFER_RESIZE_FACTOR = 2;
914 
915 	/**
916 	 * Defines the default buffer size - currently {@value} - must be large
917 	 * enough for at least one encoded block+separator
918 	 */
919 	private static final int DEFAULT_BUFFER_SIZE = 8192;
920 
921 	/** Mask used to extract 8 bits, used in decoding bytes */
922 	protected static final int MASK_8BITS = 0xff;
923 
924 	/**
925 	 * Byte used to pad output.
926 	 */
927 	protected static final byte PAD_DEFAULT = '='; // Allow static access to
928 													// default
929 
930 	protected final byte PAD = PAD_DEFAULT; // instance variable just in case it
931 											// needs to vary later
932 
933 	/**
934 	 * Number of bytes in each full block of unencoded data, e.g. 4 for Base64
935 	 * and 5 for Base32
936 	 */
937 	private final int unencodedBlockSize;
938 
939 	/**
940 	 * Number of bytes in each full block of encoded data, e.g. 3 for Base64 and
941 	 * 8 for Base32
942 	 */
943 	private final int encodedBlockSize;
944 
945 	/**
946 	 * Chunksize for encoding. Not used when decoding. A value of zero or less
947 	 * implies no chunking of the encoded data. Rounded down to nearest multiple
948 	 * of encodedBlockSize.
949 	 */
950 	protected int lineLength;
951 
952 	/**
953 	 * Size of chunk separator. Not used unless {@link #lineLength} > 0.
954 	 */
955 	private int chunkSeparatorLength;
956 
957 	/**
958 	 * Note <code>lineLength</code> is rounded down to the nearest multiple of
959 	 * {@link #encodedBlockSize} If <code>chunkSeparatorLength</code> is zero,
960 	 * then chunking is disabled.
961 	 * 
962 	 * @param unencodedBlockSize
963 	 *            the size of an unencoded block (e.g. Base64 = 3)
964 	 * @param encodedBlockSize
965 	 *            the size of an encoded block (e.g. Base64 = 4)
966 	 * @param lineLength
967 	 *            if &gt; 0, use chunking with a length <code>lineLength</code>
968 	 * @param chunkSeparatorLength
969 	 *            the chunk separator length, if relevant
970 	 */
971 	protected Base64(int unencodedBlockSize, int encodedBlockSize, int lineLength, int chunkSeparatorLength) {
972 		this.unencodedBlockSize = unencodedBlockSize;
973 		this.encodedBlockSize = encodedBlockSize;
974 		this.lineLength = (lineLength > 0 && chunkSeparatorLength > 0) ? (lineLength / encodedBlockSize) * encodedBlockSize : 0;
975 		this.chunkSeparatorLength = chunkSeparatorLength;
976 	}
977 
978 	/**
979 	 * Returns true if this object has buffered data for reading.
980 	 * 
981 	 * @param context
982 	 *            the context to be used
983 	 * @return true if there is data still available for reading.
984 	 */
985 	boolean hasData(Context context) { // package protected for access from I/O
986 										// streams
987 		return context.buffer != null;
988 	}
989 
990 	/**
991 	 * Returns the amount of buffered data available for reading.
992 	 * 
993 	 * @param context
994 	 *            the context to be used
995 	 * @return The amount of buffered data available for reading.
996 	 */
997 	int available(Context context) { // package protected for access from I/O
998 										// streams
999 		return context.buffer != null ? context.pos - context.readPos : 0;
1000 	}
1001 
1002 	/**
1003 	 * Get the default buffer size. Can be overridden.
1004 	 * 
1005 	 * @return {@link #DEFAULT_BUFFER_SIZE}
1006 	 */
1007 	protected int getDefaultBufferSize() {
1008 		return DEFAULT_BUFFER_SIZE;
1009 	}
1010 
1011 	/**
1012 	 * Increases our buffer by the {@link #DEFAULT_BUFFER_RESIZE_FACTOR}.
1013 	 * 
1014 	 * @param context
1015 	 *            the context to be used
1016 	 */
1017 	private void resizeBuffer(Context context) {
1018 		if (context.buffer == null) {
1019 			context.buffer = new byte[getDefaultBufferSize()];
1020 			context.pos = 0;
1021 			context.readPos = 0;
1022 		} else {
1023 			byte[] b = new byte[context.buffer.length * DEFAULT_BUFFER_RESIZE_FACTOR];
1024 			System.arraycopy(context.buffer, 0, b, 0, context.buffer.length);
1025 			context.buffer = b;
1026 		}
1027 	}
1028 
1029 	/**
1030 	 * Ensure that the buffer has room for <code>size</code> bytes
1031 	 * 
1032 	 * @param size
1033 	 *            minimum spare space required
1034 	 * @param context
1035 	 *            the context to be used
1036 	 */
1037 	protected void ensureBufferSize(int size, Context context) {
1038 		if ((context.buffer == null) || (context.buffer.length < context.pos + size)) {
1039 			resizeBuffer(context);
1040 		}
1041 	}
1042 
1043 	/**
1044 	 * Extracts buffered data into the provided byte[] array, starting at
1045 	 * position bPos, up to a maximum of bAvail bytes. Returns how many bytes
1046 	 * were actually extracted.
1047 	 * 
1048 	 * @param b
1049 	 *            byte[] array to extract the buffered data into.
1050 	 * @param bPos
1051 	 *            position in byte[] array to start extraction at.
1052 	 * @param bAvail
1053 	 *            amount of bytes we're allowed to extract. We may extract fewer
1054 	 *            (if fewer are available).
1055 	 * @param context
1056 	 *            the context to be used
1057 	 * @return The number of bytes successfully extracted into the provided
1058 	 *         byte[] array.
1059 	 */
1060 	int readResults(byte[] b, int bPos, int bAvail, Context context) { // package
1061 																		// protected
1062 																		// for
1063 																		// access
1064 																		// from
1065 																		// I/O
1066 																		// streams
1067 		if (context.buffer != null) {
1068 			int len = Math.min(available(context), bAvail);
1069 			System.arraycopy(context.buffer, context.readPos, b, bPos, len);
1070 			context.readPos += len;
1071 			if (context.readPos >= context.pos) {
1072 				context.buffer = null; // so hasData() will return false, and
1073 										// this method can return -1
1074 			}
1075 			return len;
1076 		}
1077 		return context.eof ? EOF : 0;
1078 	}
1079 
1080 	/**
1081 	 * Checks if a byte value is whitespace or not. Whitespace is taken to mean:
1082 	 * space, tab, CR, LF
1083 	 * 
1084 	 * @param byteToCheck
1085 	 *            the byte to check
1086 	 * @return true if byte is whitespace, false otherwise
1087 	 */
1088 	protected static boolean isWhiteSpace(byte byteToCheck) {
1089 		switch (byteToCheck) {
1090 		case ' ':
1091 		case '\n':
1092 		case '\r':
1093 		case '\t':
1094 			return true;
1095 		default:
1096 			return false;
1097 		}
1098 	}
1099 
1100 	/**
1101 	 * Encodes an Object using the Base-N algorithm. This method is provided in
1102 	 * order to satisfy the requirements of the Encoder interface, and will
1103 	 * throw an EncoderException if the supplied object is not of type byte[].
1104 	 * 
1105 	 * @param obj
1106 	 *            Object to encode
1107 	 * @return An object (of type byte[]) containing the Base-N encoded data
1108 	 *         which corresponds to the byte[] supplied.
1109 	 * @throws EncoderException
1110 	 *             if the parameter supplied is not of type byte[]
1111 	 */
1112 	public Object encode(Object obj) throws Exception {
1113 		if (!(obj instanceof byte[])) {
1114 			throw new Exception("Parameter supplied to Base-N encode is not a byte[]");
1115 		}
1116 		return encode((byte[]) obj);
1117 	}
1118 
1119 	/**
1120 	 * Encodes a byte[] containing binary data, into a String containing
1121 	 * characters in the Base-N alphabet. Uses UTF8 encoding.
1122 	 * 
1123 	 * @param pArray
1124 	 *            a byte array containing binary data
1125 	 * @return A String containing only Base-N character data
1126 	 */
1127 	public String encodeToString(byte[] pArray) {
1128 		return StringUtils.newStringUtf8(encode(pArray));
1129 	}
1130 
1131 	/**
1132 	 * Encodes a byte[] containing binary data, into a String containing
1133 	 * characters in the appropriate alphabet. Uses UTF8 encoding.
1134 	 * 
1135 	 * @param pArray
1136 	 *            a byte array containing binary data
1137 	 * @return String containing only character data in the appropriate
1138 	 *         alphabet.
1139 	 */
1140 	public String encodeAsString(byte[] pArray) {
1141 		return StringUtils.newStringUtf8(encode(pArray));
1142 	}
1143 
1144 	/**
1145 	 * Decodes an Object using the Base-N algorithm. This method is provided in
1146 	 * order to satisfy the requirements of the Decoder interface, and will
1147 	 * throw a DecoderException if the supplied object is not of type byte[] or
1148 	 * String.
1149 	 * 
1150 	 * @param obj
1151 	 *            Object to decode
1152 	 * @return An object (of type byte[]) containing the binary data which
1153 	 *         corresponds to the byte[] or String supplied.
1154 	 * @throws DecoderException
1155 	 *             if the parameter supplied is not of type byte[]
1156 	 */
1157 	public Object decode(Object obj) throws Exception {
1158 		if (obj instanceof byte[]) {
1159 			return decode((byte[]) obj);
1160 		} else if (obj instanceof String) {
1161 			return decode((String) obj);
1162 		} else {
1163 			throw new Exception("Parameter supplied to Base-N decode is not a byte[] or a String");
1164 		}
1165 	}
1166 
1167 	/**
1168 	 * Decodes a String containing characters in the Base-N alphabet.
1169 	 * 
1170 	 * @param pArray
1171 	 *            A String containing Base-N character data
1172 	 * @return a byte array containing binary data
1173 	 */
1174 	public byte[] decode(String pArray) {
1175 		return decode(StringUtils.getBytesUtf8(pArray));
1176 	}
1177 
1178 	/**
1179 	 * Decodes a byte[] containing characters in the Base-N alphabet.
1180 	 * 
1181 	 * @param pArray
1182 	 *            A byte array containing Base-N character data
1183 	 * @return a byte array containing binary data
1184 	 */
1185 	public byte[] decode(byte[] pArray) {
1186 		Context context = new Context();
1187 		if (pArray == null || pArray.length == 0) {
1188 			return pArray;
1189 		}
1190 		decode(pArray, 0, pArray.length, context);
1191 		decode(pArray, 0, EOF, context); // Notify decoder of EOF.
1192 		byte[] result = new byte[context.pos];
1193 		readResults(result, 0, result.length, context);
1194 		return result;
1195 	}
1196 
1197 	/**
1198 	 * Encodes a byte[] containing binary data, into a byte[] containing
1199 	 * characters in the alphabet.
1200 	 * 
1201 	 * @param pArray
1202 	 *            a byte array containing binary data
1203 	 * @return A byte array containing only the basen alphabetic character data
1204 	 */
1205 	public byte[] encode(byte[] pArray) {
1206 		Context context = new Context();
1207 		if (pArray == null || pArray.length == 0) {
1208 			return pArray;
1209 		}
1210 		encode(pArray, 0, pArray.length, context);
1211 		encode(pArray, 0, EOF, context); // Notify encoder of EOF.
1212 		byte[] buf = new byte[context.pos - context.readPos];
1213 		readResults(buf, 0, buf.length, context);
1214 		return buf;
1215 	}
1216 
1217 	/**
1218 	 * Tests a given byte array to see if it contains only valid characters
1219 	 * within the alphabet. The method optionally treats whitespace and pad as
1220 	 * valid.
1221 	 * 
1222 	 * @param arrayOctet
1223 	 *            byte array to test
1224 	 * @param allowWSPad
1225 	 *            if {@code true}, then whitespace and PAD are also allowed
1226 	 * 
1227 	 * @return {@code true} if all bytes are valid characters in the alphabet or
1228 	 *         if the byte array is empty; {@code false}, otherwise
1229 	 */
1230 	public boolean isInAlphabet(byte[] arrayOctet, boolean allowWSPad) {
1231 		for (int i = 0; i < arrayOctet.length; i++) {
1232 			if (!isInAlphabet(arrayOctet[i]) && (!allowWSPad || (arrayOctet[i] != PAD) && !isWhiteSpace(arrayOctet[i]))) {
1233 				return false;
1234 			}
1235 		}
1236 		return true;
1237 	}
1238 
1239 	/**
1240 	 * Tests a given String to see if it contains only valid characters within
1241 	 * the alphabet. The method treats whitespace and PAD as valid.
1242 	 * 
1243 	 * @param basen
1244 	 *            String to test
1245 	 * @return {@code true} if all characters in the String are valid characters
1246 	 *         in the alphabet or if the String is empty; {@code false},
1247 	 *         otherwise
1248 	 * @see #isInAlphabet(byte[], boolean)
1249 	 */
1250 	public boolean isInAlphabet(String basen) {
1251 		return isInAlphabet(StringUtils.getBytesUtf8(basen), true);
1252 	}
1253 
1254 	/**
1255 	 * Tests a given byte array to see if it contains any characters within the
1256 	 * alphabet or PAD.
1257 	 * 
1258 	 * Intended for use in checking line-ending arrays
1259 	 * 
1260 	 * @param arrayOctet
1261 	 *            byte array to test
1262 	 * @return {@code true} if any byte is a valid character in the alphabet or
1263 	 *         PAD; {@code false} otherwise
1264 	 */
1265 	protected boolean containsAlphabetOrPad(byte[] arrayOctet) {
1266 		if (arrayOctet == null) {
1267 			return false;
1268 		}
1269 		for (byte element : arrayOctet) {
1270 			if (PAD == element || isInAlphabet(element)) {
1271 				return true;
1272 			}
1273 		}
1274 		return false;
1275 	}
1276 
1277 	/**
1278 	 * Calculates the amount of space needed to encode the supplied array.
1279 	 * 
1280 	 * @param pArray
1281 	 *            byte[] array which will later be encoded
1282 	 * 
1283 	 * @return amount of space needed to encoded the supplied array. Returns a
1284 	 *         long since a max-len array will require > Integer.MAX_VALUE
1285 	 */
1286 	public long getEncodedLength(byte[] pArray) {
1287 		// Calculate non-chunked size - rounded up to allow for padding
1288 		// cast to long is needed to avoid possibility of overflow
1289 		long len = ((pArray.length + unencodedBlockSize - 1) / unencodedBlockSize) * (long) encodedBlockSize;
1290 		if (lineLength > 0) { // We're using chunking
1291 			// Round up to nearest multiple
1292 			len += ((len + lineLength - 1) / lineLength) * chunkSeparatorLength;
1293 		}
1294 		return len;
1295 	}
1296 
1297 	/**
1298 	 * Character encoding names required of every implementation of the Java
1299 	 * platform.
1300 	 * 
1301 	 * From the Java documentation <a href=
1302 	 * "http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html"
1303 	 * >Standard charsets</a>:
1304 	 * <p>
1305 	 * <cite>Every implementation of the Java platform is required to support
1306 	 * the following character encodings. Consult the release documentation for
1307 	 * your implementation to see if any other encodings are supported. Consult
1308 	 * the release documentation for your implementation to see if any other
1309 	 * encodings are supported. </cite>
1310 	 * </p>
1311 	 * 
1312 	 * <ul>
1313 	 * <li><code>US-ASCII</code><br/>
1314 	 * Seven-bit ASCII, a.k.a. ISO646-US, a.k.a. the Basic Latin block of the
1315 	 * Unicode character set.</li>
1316 	 * <li><code>ISO-8859-1</code><br/>
1317 	 * ISO Latin Alphabet No. 1, a.k.a. ISO-LATIN-1.</li>
1318 	 * <li><code>UTF-8</code><br/>
1319 	 * Eight-bit Unicode Transformation Format.</li>
1320 	 * <li><code>UTF-16BE</code><br/>
1321 	 * Sixteen-bit Unicode Transformation Format, big-endian byte order.</li>
1322 	 * <li><code>UTF-16LE</code><br/>
1323 	 * Sixteen-bit Unicode Transformation Format, little-endian byte order.</li>
1324 	 * <li><code>UTF-16</code><br/>
1325 	 * Sixteen-bit Unicode Transformation Format, byte order specified by a
1326 	 * mandatory initial byte-order mark (either order accepted on input,
1327 	 * big-endian used on output.)</li>
1328 	 * </ul>
1329 	 * 
1330 	 * This perhaps would best belong in the [lang] project. Even if a similar
1331 	 * interface is defined in [lang], it is not foreseen that [codec] would be
1332 	 * made to depend on [lang].
1333 	 * 
1334 	 * <p>
1335 	 * This class is immutable and thread-safe.
1336 	 * </p>
1337 	 * 
1338 	 * @see <a
1339 	 *      href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
1340 	 *      charsets</a>
1341 	 * @since 1.4
1342 	 * @version $Id$
1343 	 */
1344 	public class CharEncoding {
1345 		/**
1346 		 * CharEncodingISO Latin Alphabet No. 1, a.k.a. ISO-LATIN-1. </p>
1347 		 * <p>
1348 		 * Every implementation of the Java platform is required to support this
1349 		 * character encoding.
1350 		 * </p>
1351 		 * 
1352 		 * @see <a
1353 		 *      href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
1354 		 *      charsets</a>
1355 		 */
1356 		public static final String ISO_8859_1 = "ISO-8859-1";
1357 
1358 		/**
1359 		 * <p>
1360 		 * Seven-bit ASCII, also known as ISO646-US, also known as the Basic
1361 		 * Latin block of the Unicode character set.
1362 		 * </p>
1363 		 * <p>
1364 		 * Every implementation of the Java platform is required to support this
1365 		 * character encoding.
1366 		 * </p>
1367 		 * 
1368 		 * @see <a
1369 		 *      href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
1370 		 *      charsets</a>
1371 		 */
1372 		public static final String US_ASCII = "US-ASCII";
1373 
1374 		/**
1375 		 * <p>
1376 		 * Sixteen-bit Unicode Transformation Format, The byte order specified
1377 		 * by a mandatory initial byte-order mark (either order accepted on
1378 		 * input, big-endian used on output)
1379 		 * </p>
1380 		 * <p>
1381 		 * Every implementation of the Java platform is required to support this
1382 		 * character encoding.
1383 		 * </p>
1384 		 * 
1385 		 * @see <a
1386 		 *      href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
1387 		 *      charsets</a>
1388 		 */
1389 		public static final String UTF_16 = "UTF-16";
1390 
1391 		/**
1392 		 * <p>
1393 		 * Sixteen-bit Unicode Transformation Format, big-endian byte order.
1394 		 * </p>
1395 		 * <p>
1396 		 * Every implementation of the Java platform is required to support this
1397 		 * character encoding.
1398 		 * </p>
1399 		 * 
1400 		 * @see <a
1401 		 *      href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
1402 		 *      charsets</a>
1403 		 */
1404 		public static final String UTF_16BE = "UTF-16BE";
1405 
1406 		/**
1407 		 * <p>
1408 		 * Sixteen-bit Unicode Transformation Format, little-endian byte order.
1409 		 * </p>
1410 		 * <p>
1411 		 * Every implementation of the Java platform is required to support this
1412 		 * character encoding.
1413 		 * </p>
1414 		 * 
1415 		 * @see <a
1416 		 *      href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
1417 		 *      charsets</a>
1418 		 */
1419 		public static final String UTF_16LE = "UTF-16LE";
1420 
1421 		/**
1422 		 * <p>
1423 		 * Eight-bit Unicode Transformation Format.
1424 		 * </p>
1425 		 * <p>
1426 		 * Every implementation of the Java platform is required to support this
1427 		 * character encoding.
1428 		 * </p>
1429 		 * 
1430 		 * @see <a
1431 		 *      href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
1432 		 *      charsets</a>
1433 		 */
1434 		public static final String UTF_8 = "UTF-8";
1435 	}
1436 
1437 	/**
1438 	 * Charsets required of every implementation of the Java platform.
1439 	 * 
1440 	 * From the Java documentation <a href=
1441 	 * "http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html"
1442 	 * >Standard charsets</a>:
1443 	 * <p>
1444 	 * <cite>Every implementation of the Java platform is required to support
1445 	 * the following character encodings. Consult the release documentation for
1446 	 * your implementation to see if any other encodings are supported. Consult
1447 	 * the release documentation for your implementation to see if any other
1448 	 * encodings are supported. </cite>
1449 	 * </p>
1450 	 * 
1451 	 * <ul>
1452 	 * <li><code>US-ASCII</code><br/>
1453 	 * Seven-bit ASCII, a.k.a. ISO646-US, a.k.a. the Basic Latin block of the
1454 	 * Unicode character set.</li>
1455 	 * <li><code>ISO-8859-1</code><br/>
1456 	 * ISO Latin Alphabet No. 1, a.k.a. ISO-LATIN-1.</li>
1457 	 * <li><code>UTF-8</code><br/>
1458 	 * Eight-bit Unicode Transformation Format.</li>
1459 	 * <li><code>UTF-16BE</code><br/>
1460 	 * Sixteen-bit Unicode Transformation Format, big-endian byte order.</li>
1461 	 * <li><code>UTF-16LE</code><br/>
1462 	 * Sixteen-bit Unicode Transformation Format, little-endian byte order.</li>
1463 	 * <li><code>UTF-16</code><br/>
1464 	 * Sixteen-bit Unicode Transformation Format, byte order specified by a
1465 	 * mandatory initial byte-order mark (either order accepted on input,
1466 	 * big-endian used on output.)</li>
1467 	 * </ul>
1468 	 * 
1469 	 * This perhaps would best belong in the Commons Lang project. Even if a
1470 	 * similar class is defined in Commons Lang, it is not foreseen that Commons
1471 	 * Codec would be made to depend on Commons Lang.
1472 	 * 
1473 	 * <p>
1474 	 * This class is immutable and thread-safe.
1475 	 * </p>
1476 	 * 
1477 	 * @see <a
1478 	 *      href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
1479 	 *      charsets</a>
1480 	 * @since 1.7
1481 	 * @version $Id: CharEncoding.java 1173287 2011-09-20 18:16:19Z ggregory $
1482 	 */
1483 	public static class Charsets {
1484 
1485 		//
1486 		// This class should only contain Charset instances for required
1487 		// encodings. This guarantees that it will load correctly and
1488 		// without delay on all Java platforms.
1489 		//
1490 
1491 		/**
1492 		 * Returns the given Charset or the default Charset if the given Charset
1493 		 * is null.
1494 		 * 
1495 		 * @param charset
1496 		 *            A charset or null.
1497 		 * @return the given Charset or the default Charset if the given Charset
1498 		 *         is null
1499 		 */
1500 		public static Charset toCharset(Charset charset) {
1501 			return charset == null ? Charset.defaultCharset() : charset;
1502 		}
1503 
1504 		/**
1505 		 * Returns a Charset for the named charset. If the name is null, return
1506 		 * the default Charset.
1507 		 * 
1508 		 * @param charset
1509 		 *            The name of the requested charset, may be null.
1510 		 * @return a Charset for the named charset
1511 		 * @throws UnsupportedCharsetException
1512 		 *             If the named charset is unavailable
1513 		 */
1514 		public static Charset toCharset(String charset) {
1515 			return charset == null ? Charset.defaultCharset() : Charset.forName(charset);
1516 		}
1517 
1518 		/**
1519 		 * CharEncodingISO Latin Alphabet No. 1, a.k.a. ISO-LATIN-1. </p>
1520 		 * <p>
1521 		 * Every implementation of the Java platform is required to support this
1522 		 * character encoding.
1523 		 * </p>
1524 		 * 
1525 		 * @see <a
1526 		 *      href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
1527 		 *      charsets</a>
1528 		 */
1529 		public static final Charset ISO_8859_1 = Charset.forName(CharEncoding.ISO_8859_1);
1530 
1531 		/**
1532 		 * <p>
1533 		 * Seven-bit ASCII, also known as ISO646-US, also known as the Basic
1534 		 * Latin block of the Unicode character set.
1535 		 * </p>
1536 		 * <p>
1537 		 * Every implementation of the Java platform is required to support this
1538 		 * character encoding.
1539 		 * </p>
1540 		 * 
1541 		 * @see <a
1542 		 *      href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
1543 		 *      charsets</a>
1544 		 */
1545 		public static final Charset US_ASCII = Charset.forName(CharEncoding.US_ASCII);
1546 
1547 		/**
1548 		 * <p>
1549 		 * Sixteen-bit Unicode Transformation Format, The byte order specified
1550 		 * by a mandatory initial byte-order mark (either order accepted on
1551 		 * input, big-endian used on output)
1552 		 * </p>
1553 		 * <p>
1554 		 * Every implementation of the Java platform is required to support this
1555 		 * character encoding.
1556 		 * </p>
1557 		 * 
1558 		 * @see <a
1559 		 *      href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
1560 		 *      charsets</a>
1561 		 */
1562 		public static final Charset UTF_16 = Charset.forName(CharEncoding.UTF_16);
1563 
1564 		/**
1565 		 * <p>
1566 		 * Sixteen-bit Unicode Transformation Format, big-endian byte order.
1567 		 * </p>
1568 		 * <p>
1569 		 * Every implementation of the Java platform is required to support this
1570 		 * character encoding.
1571 		 * </p>
1572 		 * 
1573 		 * @see <a
1574 		 *      href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
1575 		 *      charsets</a>
1576 		 */
1577 		public static final Charset UTF_16BE = Charset.forName(CharEncoding.UTF_16BE);
1578 
1579 		/**
1580 		 * <p>
1581 		 * Sixteen-bit Unicode Transformation Format, little-endian byte order.
1582 		 * </p>
1583 		 * <p>
1584 		 * Every implementation of the Java platform is required to support this
1585 		 * character encoding.
1586 		 * </p>
1587 		 * 
1588 		 * @see <a
1589 		 *      href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
1590 		 *      charsets</a>
1591 		 */
1592 		public static final Charset UTF_16LE = Charset.forName(CharEncoding.UTF_16LE);
1593 
1594 		/**
1595 		 * <p>
1596 		 * Eight-bit Unicode Transformation Format.
1597 		 * </p>
1598 		 * <p>
1599 		 * Every implementation of the Java platform is required to support this
1600 		 * character encoding.
1601 		 * </p>
1602 		 * 
1603 		 * @see <a
1604 		 *      href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
1605 		 *      charsets</a>
1606 		 */
1607 		public static final Charset UTF_8 = Charset.forName(CharEncoding.UTF_8);
1608 	}
1609 
1610 	/**
1611 	 * Converts String to and from bytes using the encodings required by the
1612 	 * Java specification. These encodings are specified in <a href=
1613 	 * "http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html"
1614 	 * >Standard charsets</a>
1615 	 * 
1616 	 * <p>
1617 	 * This class is immutable and thread-safe.
1618 	 * </p>
1619 	 * 
1620 	 * @see CharEncoding
1621 	 * @see <a
1622 	 *      href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
1623 	 *      charsets</a>
1624 	 * @version $Id$
1625 	 * @since 1.4
1626 	 */
1627 	public static class StringUtils {
1628 
1629 		/**
1630 		 * Calls {@link String#getBytes(Charset)}
1631 		 * 
1632 		 * @param string
1633 		 *            The string to encode (if null, return null).
1634 		 * @param charset
1635 		 *            The {@link Charset} to encode the {@code String}
1636 		 * @return the encoded bytes
1637 		 */
1638 		private static byte[] getBytes(String string, Charset charset) {
1639 			if (string == null) {
1640 				return null;
1641 			}
1642 			return string.getBytes(charset);
1643 		}
1644 
1645 		/**
1646 		 * Encodes the given string into a sequence of bytes using the
1647 		 * ISO-8859-1 charset, storing the result into a new byte array.
1648 		 * 
1649 		 * @param string
1650 		 *            the String to encode, may be {@code null}
1651 		 * @return encoded bytes, or {@code null} if the input string was
1652 		 *         {@code null}
1653 		 * @throws NullPointerException
1654 		 *             Thrown if {@link Charsets#ISO_8859_1} is not initialized,
1655 		 *             which should never happen since it is required by the
1656 		 *             Java platform specification.
1657 		 * @since As of 1.7, throws {@link NullPointerException} instead of
1658 		 *        UnsupportedEncodingException
1659 		 * @see <a
1660 		 *      href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
1661 		 *      charsets</a>
1662 		 * @see #getBytesUnchecked(String, String)
1663 		 */
1664 		public static byte[] getBytesIso8859_1(String string) {
1665 			return getBytes(string, Charsets.ISO_8859_1);
1666 		}
1667 
1668 		/**
1669 		 * Encodes the given string into a sequence of bytes using the named
1670 		 * charset, storing the result into a new byte array.
1671 		 * <p>
1672 		 * This method catches {@link UnsupportedEncodingException} and rethrows
1673 		 * it as {@link IllegalStateException}, which should never happen for a
1674 		 * required charset name. Use this method when the encoding is required
1675 		 * to be in the JRE.
1676 		 * </p>
1677 		 * 
1678 		 * @param string
1679 		 *            the String to encode, may be {@code null}
1680 		 * @param charsetName
1681 		 *            The name of a required {@link java.nio.charset.Charset}
1682 		 * @return encoded bytes, or {@code null} if the input string was
1683 		 *         {@code null}
1684 		 * @throws IllegalStateException
1685 		 *             Thrown when a {@link UnsupportedEncodingException} is
1686 		 *             caught, which should never happen for a required charset
1687 		 *             name.
1688 		 * @see CharEncoding
1689 		 * @see String#getBytes(String)
1690 		 */
1691 		public static byte[] getBytesUnchecked(String string, String charsetName) {
1692 			if (string == null) {
1693 				return null;
1694 			}
1695 			try {
1696 				return string.getBytes(charsetName);
1697 			} catch (UnsupportedEncodingException e) {
1698 				throw StringUtils.newIllegalStateException(charsetName, e);
1699 			}
1700 		}
1701 
1702 		/**
1703 		 * Encodes the given string into a sequence of bytes using the US-ASCII
1704 		 * charset, storing the result into a new byte array.
1705 		 * 
1706 		 * @param string
1707 		 *            the String to encode, may be {@code null}
1708 		 * @return encoded bytes, or {@code null} if the input string was
1709 		 *         {@code null}
1710 		 * @throws NullPointerException
1711 		 *             Thrown if {@link Charsets#US_ASCII} is not initialized,
1712 		 *             which should never happen since it is required by the
1713 		 *             Java platform specification.
1714 		 * @since As of 1.7, throws {@link NullPointerException} instead of
1715 		 *        UnsupportedEncodingException
1716 		 * @see <a
1717 		 *      href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
1718 		 *      charsets</a>
1719 		 * @see #getBytesUnchecked(String, String)
1720 		 */
1721 		public static byte[] getBytesUsAscii(String string) {
1722 			return getBytes(string, Charsets.US_ASCII);
1723 		}
1724 
1725 		/**
1726 		 * Encodes the given string into a sequence of bytes using the UTF-16
1727 		 * charset, storing the result into a new byte array.
1728 		 * 
1729 		 * @param string
1730 		 *            the String to encode, may be {@code null}
1731 		 * @return encoded bytes, or {@code null} if the input string was
1732 		 *         {@code null}
1733 		 * @throws NullPointerException
1734 		 *             Thrown if {@link Charsets#UTF_16} is not initialized,
1735 		 *             which should never happen since it is required by the
1736 		 *             Java platform specification.
1737 		 * @since As of 1.7, throws {@link NullPointerException} instead of
1738 		 *        UnsupportedEncodingException
1739 		 * @see <a
1740 		 *      href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
1741 		 *      charsets</a>
1742 		 * @see #getBytesUnchecked(String, String)
1743 		 */
1744 		public static byte[] getBytesUtf16(String string) {
1745 			return getBytes(string, Charsets.UTF_16);
1746 		}
1747 
1748 		/**
1749 		 * Encodes the given string into a sequence of bytes using the UTF-16BE
1750 		 * charset, storing the result into a new byte array.
1751 		 * 
1752 		 * @param string
1753 		 *            the String to encode, may be {@code null}
1754 		 * @return encoded bytes, or {@code null} if the input string was
1755 		 *         {@code null}
1756 		 * @throws NullPointerException
1757 		 *             Thrown if {@link Charsets#UTF_16BE} is not initialized,
1758 		 *             which should never happen since it is required by the
1759 		 *             Java platform specification.
1760 		 * @since As of 1.7, throws {@link NullPointerException} instead of
1761 		 *        UnsupportedEncodingException
1762 		 * @see <a
1763 		 *      href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
1764 		 *      charsets</a>
1765 		 * @see #getBytesUnchecked(String, String)
1766 		 */
1767 		public static byte[] getBytesUtf16Be(String string) {
1768 			return getBytes(string, Charsets.UTF_16BE);
1769 		}
1770 
1771 		/**
1772 		 * Encodes the given string into a sequence of bytes using the UTF-16LE
1773 		 * charset, storing the result into a new byte array.
1774 		 * 
1775 		 * @param string
1776 		 *            the String to encode, may be {@code null}
1777 		 * @return encoded bytes, or {@code null} if the input string was
1778 		 *         {@code null}
1779 		 * @throws NullPointerException
1780 		 *             Thrown if {@link Charsets#UTF_16LE} is not initialized,
1781 		 *             which should never happen since it is required by the
1782 		 *             Java platform specification.
1783 		 * @since As of 1.7, throws {@link NullPointerException} instead of
1784 		 *        UnsupportedEncodingException
1785 		 * @see <a
1786 		 *      href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
1787 		 *      charsets</a>
1788 		 * @see #getBytesUnchecked(String, String)
1789 		 */
1790 		public static byte[] getBytesUtf16Le(String string) {
1791 			return getBytes(string, Charsets.UTF_16LE);
1792 		}
1793 
1794 		/**
1795 		 * Encodes the given string into a sequence of bytes using the UTF-8
1796 		 * charset, storing the result into a new byte array.
1797 		 * 
1798 		 * @param string
1799 		 *            the String to encode, may be {@code null}
1800 		 * @return encoded bytes, or {@code null} if the input string was
1801 		 *         {@code null}
1802 		 * @throws NullPointerException
1803 		 *             Thrown if {@link Charsets#UTF_8} is not initialized,
1804 		 *             which should never happen since it is required by the
1805 		 *             Java platform specification.
1806 		 * @since As of 1.7, throws {@link NullPointerException} instead of
1807 		 *        UnsupportedEncodingException
1808 		 * @see <a
1809 		 *      href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard
1810 		 *      charsets</a>
1811 		 * @see #getBytesUnchecked(String, String)
1812 		 */
1813 		public static byte[] getBytesUtf8(String string) {
1814 			return getBytes(string, Charsets.UTF_8);
1815 		}
1816 
1817 		private static IllegalStateException newIllegalStateException(String charsetName, UnsupportedEncodingException e) {
1818 			return new IllegalStateException(charsetName + ": " + e);
1819 		}
1820 
1821 		/**
1822 		 * Constructs a new <code>String</code> by decoding the specified array
1823 		 * of bytes using the given charset.
1824 		 * 
1825 		 * @param bytes
1826 		 *            The bytes to be decoded into characters
1827 		 * @param charset
1828 		 *            The {@link Charset} to encode the {@code String}
1829 		 * @return A new <code>String</code> decoded from the specified array of
1830 		 *         bytes using the given charset, or {@code null} if the input
1831 		 *         byte array was {@code null}.
1832 		 * @throws NullPointerException
1833 		 *             Thrown if {@link Charsets#UTF_8} is not initialized,
1834 		 *             which should never happen since it is required by the
1835 		 *             Java platform specification.
1836 		 */
1837 		private static String newString(byte[] bytes, Charset charset) {
1838 			return bytes == null ? null : new String(bytes, charset);
1839 		}
1840 
1841 		/**
1842 		 * Constructs a new <code>String</code> by decoding the specified array
1843 		 * of bytes using the given charset.
1844 		 * <p>
1845 		 * This method catches {@link UnsupportedEncodingException} and
1846 		 * re-throws it as {@link IllegalStateException}, which should never
1847 		 * happen for a required charset name. Use this method when the encoding
1848 		 * is required to be in the JRE.
1849 		 * </p>
1850 		 * 
1851 		 * @param bytes
1852 		 *            The bytes to be decoded into characters, may be
1853 		 *            {@code null}
1854 		 * @param charsetName
1855 		 *            The name of a required {@link java.nio.charset.Charset}
1856 		 * @return A new <code>String</code> decoded from the specified array of
1857 		 *         bytes using the given charset, or {@code null} if the input
1858 		 *         byte array was {@code null}.
1859 		 * @throws IllegalStateException
1860 		 *             Thrown when a {@link UnsupportedEncodingException} is
1861 		 *             caught, which should never happen for a required charset
1862 		 *             name.
1863 		 * @see CharEncoding
1864 		 * @see String#String(byte[], String)
1865 		 */
1866 		public static String newString(byte[] bytes, String charsetName) {
1867 			if (bytes == null) {
1868 				return null;
1869 			}
1870 			try {
1871 				return new String(bytes, charsetName);
1872 			} catch (UnsupportedEncodingException e) {
1873 				throw StringUtils.newIllegalStateException(charsetName, e);
1874 			}
1875 		}
1876 
1877 		/**
1878 		 * Constructs a new <code>String</code> by decoding the specified array
1879 		 * of bytes using the ISO-8859-1 charset.
1880 		 * 
1881 		 * @param bytes
1882 		 *            The bytes to be decoded into characters, may be
1883 		 *            {@code null}
1884 		 * @return A new <code>String</code> decoded from the specified array of
1885 		 *         bytes using the ISO-8859-1 charset, or {@code null} if the
1886 		 *         input byte array was {@code null}.
1887 		 * @throws NullPointerException
1888 		 *             Thrown if {@link Charsets#ISO_8859_1} is not initialized,
1889 		 *             which should never happen since it is required by the
1890 		 *             Java platform specification.
1891 		 * @since As of 1.7, throws {@link NullPointerException} instead of
1892 		 *        UnsupportedEncodingException
1893 		 */
1894 		public static String newStringIso8859_1(byte[] bytes) {
1895 			return new String(bytes, Charsets.ISO_8859_1);
1896 		}
1897 
1898 		/**
1899 		 * Constructs a new <code>String</code> by decoding the specified array
1900 		 * of bytes using the US-ASCII charset.
1901 		 * 
1902 		 * @param bytes
1903 		 *            The bytes to be decoded into characters
1904 		 * @return A new <code>String</code> decoded from the specified array of
1905 		 *         bytes using the US-ASCII charset, or {@code null} if the
1906 		 *         input byte array was {@code null}.
1907 		 * @throws NullPointerException
1908 		 *             Thrown if {@link Charsets#US_ASCII} is not initialized,
1909 		 *             which should never happen since it is required by the
1910 		 *             Java platform specification.
1911 		 * @since As of 1.7, throws {@link NullPointerException} instead of
1912 		 *        UnsupportedEncodingException
1913 		 */
1914 		public static String newStringUsAscii(byte[] bytes) {
1915 			return new String(bytes, Charsets.US_ASCII);
1916 		}
1917 
1918 		/**
1919 		 * Constructs a new <code>String</code> by decoding the specified array
1920 		 * of bytes using the UTF-16 charset.
1921 		 * 
1922 		 * @param bytes
1923 		 *            The bytes to be decoded into characters
1924 		 * @return A new <code>String</code> decoded from the specified array of
1925 		 *         bytes using the UTF-16 charset or {@code null} if the input
1926 		 *         byte array was {@code null}.
1927 		 * @throws NullPointerException
1928 		 *             Thrown if {@link Charsets#UTF_16} is not initialized,
1929 		 *             which should never happen since it is required by the
1930 		 *             Java platform specification.
1931 		 * @since As of 1.7, throws {@link NullPointerException} instead of
1932 		 *        UnsupportedEncodingException
1933 		 */
1934 		public static String newStringUtf16(byte[] bytes) {
1935 			return new String(bytes, Charsets.UTF_16);
1936 		}
1937 
1938 		/**
1939 		 * Constructs a new <code>String</code> by decoding the specified array
1940 		 * of bytes using the UTF-16BE charset.
1941 		 * 
1942 		 * @param bytes
1943 		 *            The bytes to be decoded into characters
1944 		 * @return A new <code>String</code> decoded from the specified array of
1945 		 *         bytes using the UTF-16BE charset, or {@code null} if the
1946 		 *         input byte array was {@code null}.
1947 		 * @throws NullPointerException
1948 		 *             Thrown if {@link Charsets#UTF_16BE} is not initialized,
1949 		 *             which should never happen since it is required by the
1950 		 *             Java platform specification.
1951 		 * @since As of 1.7, throws {@link NullPointerException} instead of
1952 		 *        UnsupportedEncodingException
1953 		 */
1954 		public static String newStringUtf16Be(byte[] bytes) {
1955 			return new String(bytes, Charsets.UTF_16BE);
1956 		}
1957 
1958 		/**
1959 		 * Constructs a new <code>String</code> by decoding the specified array
1960 		 * of bytes using the UTF-16LE charset.
1961 		 * 
1962 		 * @param bytes
1963 		 *            The bytes to be decoded into characters
1964 		 * @return A new <code>String</code> decoded from the specified array of
1965 		 *         bytes using the UTF-16LE charset, or {@code null} if the
1966 		 *         input byte array was {@code null}.
1967 		 * @throws NullPointerException
1968 		 *             Thrown if {@link Charsets#UTF_16LE} is not initialized,
1969 		 *             which should never happen since it is required by the
1970 		 *             Java platform specification.
1971 		 * @since As of 1.7, throws {@link NullPointerException} instead of
1972 		 *        UnsupportedEncodingException
1973 		 */
1974 		public static String newStringUtf16Le(byte[] bytes) {
1975 			return new String(bytes, Charsets.UTF_16LE);
1976 		}
1977 
1978 		/**
1979 		 * Constructs a new <code>String</code> by decoding the specified array
1980 		 * of bytes using the UTF-8 charset.
1981 		 * 
1982 		 * @param bytes
1983 		 *            The bytes to be decoded into characters
1984 		 * @return A new <code>String</code> decoded from the specified array of
1985 		 *         bytes using the UTF-8 charset, or {@code null} if the input
1986 		 *         byte array was {@code null}.
1987 		 * @throws NullPointerException
1988 		 *             Thrown if {@link Charsets#UTF_8} is not initialized,
1989 		 *             which should never happen since it is required by the
1990 		 *             Java platform specification.
1991 		 * @since As of 1.7, throws {@link NullPointerException} instead of
1992 		 *        UnsupportedEncodingException
1993 		 */
1994 		public static String newStringUtf8(byte[] bytes) {
1995 			return newString(bytes, Charsets.UTF_8);
1996 		}
1997 
1998 	}
1999 
2000 }