001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.archivers.zip;
020
021import java.io.ByteArrayInputStream;
022import java.io.ByteArrayOutputStream;
023import java.io.EOFException;
024import java.io.IOException;
025import java.io.InputStream;
026import java.io.PushbackInputStream;
027import java.math.BigInteger;
028import java.nio.Buffer;
029import java.nio.ByteBuffer;
030import java.util.Arrays;
031import java.util.zip.CRC32;
032import java.util.zip.DataFormatException;
033import java.util.zip.Inflater;
034import java.util.zip.ZipEntry;
035import java.util.zip.ZipException;
036
037import org.apache.commons.compress.archivers.ArchiveEntry;
038import org.apache.commons.compress.archivers.ArchiveInputStream;
039import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
040import org.apache.commons.compress.compressors.deflate64.Deflate64CompressorInputStream;
041import org.apache.commons.compress.utils.ArchiveUtils;
042import org.apache.commons.compress.utils.IOUtils;
043import org.apache.commons.compress.utils.InputStreamStatistics;
044
045import static org.apache.commons.compress.archivers.zip.ZipConstants.DWORD;
046import static org.apache.commons.compress.archivers.zip.ZipConstants.SHORT;
047import static org.apache.commons.compress.archivers.zip.ZipConstants.WORD;
048import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC;
049
050/**
051 * Implements an input stream that can read Zip archives.
052 *
053 * <p>As of Apache Commons Compress it transparently supports Zip64
054 * extensions and thus individual entries and archives larger than 4
055 * GB or with more than 65536 entries.</p>
056 *
057 * <p>The {@link ZipFile} class is preferred when reading from files
058 * as {@link ZipArchiveInputStream} is limited by not being able to
059 * read the central directory header before returning entries.  In
060 * particular {@link ZipArchiveInputStream}</p>
061 *
062 * <ul>
063 *
064 *  <li>may return entries that are not part of the central directory
065 *  at all and shouldn't be considered part of the archive.</li>
066 *
067 *  <li>may return several entries with the same name.</li>
068 *
069 *  <li>will not return internal or external attributes.</li>
070 *
071 *  <li>may return incomplete extra field data.</li>
072 *
073 *  <li>may return unknown sizes and CRC values for entries until the
074 *  next entry has been reached if the archive uses the data
075 *  descriptor feature.</li>
076 *
077 * </ul>
078 *
079 * @see ZipFile
080 * @NotThreadSafe
081 */
082public class ZipArchiveInputStream extends ArchiveInputStream implements InputStreamStatistics {
083
084    /** The zip encoding to use for file names and the file comment. */
085    private final ZipEncoding zipEncoding;
086
087    // the provided encoding (for unit tests)
088    final String encoding;
089
090    /** Whether to look for and use Unicode extra fields. */
091    private final boolean useUnicodeExtraFields;
092
093    /** Wrapped stream, will always be a PushbackInputStream. */
094    private final InputStream in;
095
096    /** Inflater used for all deflated entries. */
097    private final Inflater inf = new Inflater(true);
098
099    /** Buffer used to read from the wrapped stream. */
100    private final ByteBuffer buf = ByteBuffer.allocate(ZipArchiveOutputStream.BUFFER_SIZE);
101
102    /** The entry that is currently being read. */
103    private CurrentEntry current = null;
104
105    /** Whether the stream has been closed. */
106    private boolean closed = false;
107
108    /** Whether the stream has reached the central directory - and thus found all entries. */
109    private boolean hitCentralDirectory = false;
110
111    /**
112     * When reading a stored entry that uses the data descriptor this
113     * stream has to read the full entry and caches it.  This is the
114     * cache.
115     */
116    private ByteArrayInputStream lastStoredEntry = null;
117
118    /** Whether the stream will try to read STORED entries that use a data descriptor. */
119    private boolean allowStoredEntriesWithDataDescriptor = false;
120
121    /** Count decompressed bytes for current entry */
122    private long uncompressedCount = 0;
123
124    private static final int LFH_LEN = 30;
125    /*
126      local file header signature     WORD
127      version needed to extract       SHORT
128      general purpose bit flag        SHORT
129      compression method              SHORT
130      last mod file time              SHORT
131      last mod file date              SHORT
132      crc-32                          WORD
133      compressed size                 WORD
134      uncompressed size               WORD
135      file name length                SHORT
136      extra field length              SHORT
137    */
138
139    private static final int CFH_LEN = 46;
140    /*
141        central file header signature   WORD
142        version made by                 SHORT
143        version needed to extract       SHORT
144        general purpose bit flag        SHORT
145        compression method              SHORT
146        last mod file time              SHORT
147        last mod file date              SHORT
148        crc-32                          WORD
149        compressed size                 WORD
150        uncompressed size               WORD
151        file name length                SHORT
152        extra field length              SHORT
153        file comment length             SHORT
154        disk number start               SHORT
155        internal file attributes        SHORT
156        external file attributes        WORD
157        relative offset of local header WORD
158    */
159
160    private static final long TWO_EXP_32 = ZIP64_MAGIC + 1;
161
162    // cached buffers - must only be used locally in the class (COMPRESS-172 - reduce garbage collection)
163    private final byte[] lfhBuf = new byte[LFH_LEN];
164    private final byte[] skipBuf = new byte[1024];
165    private final byte[] shortBuf = new byte[SHORT];
166    private final byte[] wordBuf = new byte[WORD];
167    private final byte[] twoDwordBuf = new byte[2 * DWORD];
168
169    private int entriesRead = 0;
170
171    /**
172     * Create an instance using UTF-8 encoding
173     * @param inputStream the stream to wrap
174     */
175    public ZipArchiveInputStream(final InputStream inputStream) {
176        this(inputStream, ZipEncodingHelper.UTF8);
177    }
178
179    /**
180     * Create an instance using the specified encoding
181     * @param inputStream the stream to wrap
182     * @param encoding the encoding to use for file names, use null
183     * for the platform's default encoding
184     * @since 1.5
185     */
186    public ZipArchiveInputStream(final InputStream inputStream, final String encoding) {
187        this(inputStream, encoding, true);
188    }
189
190    /**
191     * Create an instance using the specified encoding
192     * @param inputStream the stream to wrap
193     * @param encoding the encoding to use for file names, use null
194     * for the platform's default encoding
195     * @param useUnicodeExtraFields whether to use InfoZIP Unicode
196     * Extra Fields (if present) to set the file names.
197     */
198    public ZipArchiveInputStream(final InputStream inputStream, final String encoding, final boolean useUnicodeExtraFields) {
199        this(inputStream, encoding, useUnicodeExtraFields, false);
200    }
201
202    /**
203     * Create an instance using the specified encoding
204     * @param inputStream the stream to wrap
205     * @param encoding the encoding to use for file names, use null
206     * for the platform's default encoding
207     * @param useUnicodeExtraFields whether to use InfoZIP Unicode
208     * Extra Fields (if present) to set the file names.
209     * @param allowStoredEntriesWithDataDescriptor whether the stream
210     * will try to read STORED entries that use a data descriptor
211     * @since 1.1
212     */
213    public ZipArchiveInputStream(final InputStream inputStream,
214                                 final String encoding,
215                                 final boolean useUnicodeExtraFields,
216                                 final boolean allowStoredEntriesWithDataDescriptor) {
217        this.encoding = encoding;
218        zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
219        this.useUnicodeExtraFields = useUnicodeExtraFields;
220        in = new PushbackInputStream(inputStream, buf.capacity());
221        this.allowStoredEntriesWithDataDescriptor =
222            allowStoredEntriesWithDataDescriptor;
223        // haven't read anything so far
224        ((Buffer)buf).limit(0);
225    }
226
227    public ZipArchiveEntry getNextZipEntry() throws IOException {
228        uncompressedCount = 0;
229
230        boolean firstEntry = true;
231        if (closed || hitCentralDirectory) {
232            return null;
233        }
234        if (current != null) {
235            closeEntry();
236            firstEntry = false;
237        }
238
239        long currentHeaderOffset = getBytesRead();
240        try {
241            if (firstEntry) {
242                // split archives have a special signature before the
243                // first local file header - look for it and fail with
244                // the appropriate error message if this is a split
245                // archive.
246                readFirstLocalFileHeader(lfhBuf);
247            } else {
248                readFully(lfhBuf);
249            }
250        } catch (final EOFException e) { //NOSONAR
251            return null;
252        }
253
254        final ZipLong sig = new ZipLong(lfhBuf);
255        if (!sig.equals(ZipLong.LFH_SIG)) {
256            if (sig.equals(ZipLong.CFH_SIG) || sig.equals(ZipLong.AED_SIG) || isApkSigningBlock(lfhBuf)) {
257                hitCentralDirectory = true;
258                skipRemainderOfArchive();
259                return null;
260            }
261            throw new ZipException(String.format("Unexpected record signature: 0X%X", sig.getValue()));
262        }
263
264        int off = WORD;
265        current = new CurrentEntry();
266
267        final int versionMadeBy = ZipShort.getValue(lfhBuf, off);
268        off += SHORT;
269        current.entry.setPlatform((versionMadeBy >> ZipFile.BYTE_SHIFT) & ZipFile.NIBLET_MASK);
270
271        final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(lfhBuf, off);
272        final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames();
273        final ZipEncoding entryEncoding = hasUTF8Flag ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding;
274        current.hasDataDescriptor = gpFlag.usesDataDescriptor();
275        current.entry.setGeneralPurposeBit(gpFlag);
276
277        off += SHORT;
278
279        current.entry.setMethod(ZipShort.getValue(lfhBuf, off));
280        off += SHORT;
281
282        final long time = ZipUtil.dosToJavaTime(ZipLong.getValue(lfhBuf, off));
283        current.entry.setTime(time);
284        off += WORD;
285
286        ZipLong size = null, cSize = null;
287        if (!current.hasDataDescriptor) {
288            current.entry.setCrc(ZipLong.getValue(lfhBuf, off));
289            off += WORD;
290
291            cSize = new ZipLong(lfhBuf, off);
292            off += WORD;
293
294            size = new ZipLong(lfhBuf, off);
295            off += WORD;
296        } else {
297            off += 3 * WORD;
298        }
299
300        final int fileNameLen = ZipShort.getValue(lfhBuf, off);
301
302        off += SHORT;
303
304        final int extraLen = ZipShort.getValue(lfhBuf, off);
305        off += SHORT; // NOSONAR - assignment as documentation
306
307        final byte[] fileName = new byte[fileNameLen];
308        readFully(fileName);
309        current.entry.setName(entryEncoding.decode(fileName), fileName);
310        if (hasUTF8Flag) {
311            current.entry.setNameSource(ZipArchiveEntry.NameSource.NAME_WITH_EFS_FLAG);
312        }
313
314        final byte[] extraData = new byte[extraLen];
315        readFully(extraData);
316        current.entry.setExtra(extraData);
317
318        if (!hasUTF8Flag && useUnicodeExtraFields) {
319            ZipUtil.setNameAndCommentFromExtraFields(current.entry, fileName, null);
320        }
321
322        processZip64Extra(size, cSize);
323
324        current.entry.setLocalHeaderOffset(currentHeaderOffset);
325        current.entry.setDataOffset(getBytesRead());
326        current.entry.setStreamContiguous(true);
327
328        ZipMethod m = ZipMethod.getMethodByCode(current.entry.getMethod());
329        if (current.entry.getCompressedSize() != ArchiveEntry.SIZE_UNKNOWN) {
330            if (ZipUtil.canHandleEntryData(current.entry) && m != ZipMethod.STORED && m != ZipMethod.DEFLATED) {
331                InputStream bis = new BoundedInputStream(in, current.entry.getCompressedSize());
332                switch (m) {
333                case UNSHRINKING:
334                    current.in = new UnshrinkingInputStream(bis);
335                    break;
336                case IMPLODING:
337                    current.in = new ExplodingInputStream(
338                        current.entry.getGeneralPurposeBit().getSlidingDictionarySize(),
339                        current.entry.getGeneralPurposeBit().getNumberOfShannonFanoTrees(),
340                        bis);
341                    break;
342                case BZIP2:
343                    current.in = new BZip2CompressorInputStream(bis);
344                    break;
345                case ENHANCED_DEFLATED:
346                    current.in = new Deflate64CompressorInputStream(bis);
347                    break;
348                default:
349                    // we should never get here as all supported methods have been covered
350                    // will cause an error when read is invoked, don't throw an exception here so people can
351                    // skip unsupported entries
352                    break;
353                }
354            }
355        } else if (m == ZipMethod.ENHANCED_DEFLATED) {
356            current.in = new Deflate64CompressorInputStream(in);
357        }
358
359        entriesRead++;
360        return current.entry;
361    }
362
363    /**
364     * Fills the given array with the first local file header and
365     * deals with splitting/spanning markers that may prefix the first
366     * LFH.
367     */
368    private void readFirstLocalFileHeader(final byte[] lfh) throws IOException {
369        readFully(lfh);
370        final ZipLong sig = new ZipLong(lfh);
371        if (sig.equals(ZipLong.DD_SIG)) {
372            throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.SPLITTING);
373        }
374
375        if (sig.equals(ZipLong.SINGLE_SEGMENT_SPLIT_MARKER)) {
376            // The archive is not really split as only one segment was
377            // needed in the end.  Just skip over the marker.
378            final byte[] missedLfhBytes = new byte[4];
379            readFully(missedLfhBytes);
380            System.arraycopy(lfh, 4, lfh, 0, LFH_LEN - 4);
381            System.arraycopy(missedLfhBytes, 0, lfh, LFH_LEN - 4, 4);
382        }
383    }
384
385    /**
386     * Records whether a Zip64 extra is present and sets the size
387     * information from it if sizes are 0xFFFFFFFF and the entry
388     * doesn't use a data descriptor.
389     */
390    private void processZip64Extra(final ZipLong size, final ZipLong cSize) {
391        final Zip64ExtendedInformationExtraField z64 =
392            (Zip64ExtendedInformationExtraField)
393            current.entry.getExtraField(Zip64ExtendedInformationExtraField.HEADER_ID);
394        current.usesZip64 = z64 != null;
395        if (!current.hasDataDescriptor) {
396            if (z64 != null // same as current.usesZip64 but avoids NPE warning
397                    && (ZipLong.ZIP64_MAGIC.equals(cSize) || ZipLong.ZIP64_MAGIC.equals(size)) ) {
398                current.entry.setCompressedSize(z64.getCompressedSize().getLongValue());
399                current.entry.setSize(z64.getSize().getLongValue());
400            } else if (cSize != null && size != null) {
401                current.entry.setCompressedSize(cSize.getValue());
402                current.entry.setSize(size.getValue());
403            }
404        }
405    }
406
407    @Override
408    public ArchiveEntry getNextEntry() throws IOException {
409        return getNextZipEntry();
410    }
411
412    /**
413     * Whether this class is able to read the given entry.
414     *
415     * <p>May return false if it is set up to use encryption or a
416     * compression method that hasn't been implemented yet.</p>
417     * @since 1.1
418     */
419    @Override
420    public boolean canReadEntryData(final ArchiveEntry ae) {
421        if (ae instanceof ZipArchiveEntry) {
422            final ZipArchiveEntry ze = (ZipArchiveEntry) ae;
423            return ZipUtil.canHandleEntryData(ze)
424                && supportsDataDescriptorFor(ze)
425                && supportsCompressedSizeFor(ze);
426        }
427        return false;
428    }
429
430    @Override
431    public int read(final byte[] buffer, final int offset, final int length) throws IOException {
432        if (closed) {
433            throw new IOException("The stream is closed");
434        }
435
436        if (current == null) {
437            return -1;
438        }
439
440        // avoid int overflow, check null buffer
441        if (offset > buffer.length || length < 0 || offset < 0 || buffer.length - offset < length) {
442            throw new ArrayIndexOutOfBoundsException();
443        }
444
445        ZipUtil.checkRequestedFeatures(current.entry);
446        if (!supportsDataDescriptorFor(current.entry)) {
447            throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.DATA_DESCRIPTOR,
448                    current.entry);
449        }
450        if (!supportsCompressedSizeFor(current.entry)) {
451            throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.UNKNOWN_COMPRESSED_SIZE,
452                    current.entry);
453        }
454
455        int read;
456        if (current.entry.getMethod() == ZipArchiveOutputStream.STORED) {
457            read = readStored(buffer, offset, length);
458        } else if (current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED) {
459            read = readDeflated(buffer, offset, length);
460        } else if (current.entry.getMethod() == ZipMethod.UNSHRINKING.getCode()
461                || current.entry.getMethod() == ZipMethod.IMPLODING.getCode()
462                || current.entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode()
463                || current.entry.getMethod() == ZipMethod.BZIP2.getCode()) {
464            read = current.in.read(buffer, offset, length);
465        } else {
466            throw new UnsupportedZipFeatureException(ZipMethod.getMethodByCode(current.entry.getMethod()),
467                    current.entry);
468        }
469
470        if (read >= 0) {
471            current.crc.update(buffer, offset, read);
472            uncompressedCount += read;
473        }
474
475        return read;
476    }
477
478    /**
479     * @since 1.17
480     */
481    @Override
482    public long getCompressedCount() {
483        if (current.entry.getMethod() == ZipArchiveOutputStream.STORED) {
484            return current.bytesRead;
485        } else if (current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED) {
486            return getBytesInflated();
487        } else if (current.entry.getMethod() == ZipMethod.UNSHRINKING.getCode()) {
488            return ((UnshrinkingInputStream) current.in).getCompressedCount();
489        } else if (current.entry.getMethod() == ZipMethod.IMPLODING.getCode()) {
490            return ((ExplodingInputStream) current.in).getCompressedCount();
491        } else if (current.entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode()) {
492            return ((Deflate64CompressorInputStream) current.in).getCompressedCount();
493        } else if (current.entry.getMethod() == ZipMethod.BZIP2.getCode()) {
494            return ((BZip2CompressorInputStream) current.in).getCompressedCount();
495        } else {
496            return -1;
497        }
498    }
499
500    /**
501     * @since 1.17
502     */
503    @Override
504    public long getUncompressedCount() {
505        return uncompressedCount;
506    }
507
508    /**
509     * Implementation of read for STORED entries.
510     */
511    private int readStored(final byte[] buffer, final int offset, final int length) throws IOException {
512
513        if (current.hasDataDescriptor) {
514            if (lastStoredEntry == null) {
515                readStoredEntry();
516            }
517            return lastStoredEntry.read(buffer, offset, length);
518        }
519
520        final long csize = current.entry.getSize();
521        if (current.bytesRead >= csize) {
522            return -1;
523        }
524
525        if (buf.position() >= buf.limit()) {
526            ((Buffer)buf).position(0);
527            final int l = in.read(buf.array());
528            if (l == -1) {
529                ((Buffer)buf).limit(0);
530                throw new IOException("Truncated ZIP file");
531            }
532            ((Buffer)buf).limit(l);
533
534            count(l);
535            current.bytesReadFromStream += l;
536        }
537
538        int toRead = Math.min(buf.remaining(), length);
539        if ((csize - current.bytesRead) < toRead) {
540            // if it is smaller than toRead then it fits into an int
541            toRead = (int) (csize - current.bytesRead);
542        }
543        buf.get(buffer, offset, toRead);
544        current.bytesRead += toRead;
545        return toRead;
546    }
547
548    /**
549     * Implementation of read for DEFLATED entries.
550     */
551    private int readDeflated(final byte[] buffer, final int offset, final int length) throws IOException {
552        final int read = readFromInflater(buffer, offset, length);
553        if (read <= 0) {
554            if (inf.finished()) {
555                return -1;
556            } else if (inf.needsDictionary()) {
557                throw new ZipException("This archive needs a preset dictionary"
558                                       + " which is not supported by Commons"
559                                       + " Compress.");
560            } else if (read == -1) {
561                throw new IOException("Truncated ZIP file");
562            }
563        }
564        return read;
565    }
566
567    /**
568     * Potentially reads more bytes to fill the inflater's buffer and
569     * reads from it.
570     */
571    private int readFromInflater(final byte[] buffer, final int offset, final int length) throws IOException {
572        int read = 0;
573        do {
574            if (inf.needsInput()) {
575                final int l = fill();
576                if (l > 0) {
577                    current.bytesReadFromStream += buf.limit();
578                } else if (l == -1) {
579                    return -1;
580                } else {
581                    break;
582                }
583            }
584            try {
585                read = inf.inflate(buffer, offset, length);
586            } catch (final DataFormatException e) {
587                throw (IOException) new ZipException(e.getMessage()).initCause(e);
588            }
589        } while (read == 0 && inf.needsInput());
590        return read;
591    }
592
593    @Override
594    public void close() throws IOException {
595        if (!closed) {
596            closed = true;
597            try {
598                in.close();
599            } finally {
600                inf.end();
601            }
602        }
603    }
604
605    /**
606     * Skips over and discards value bytes of data from this input
607     * stream.
608     *
609     * <p>This implementation may end up skipping over some smaller
610     * number of bytes, possibly 0, if and only if it reaches the end
611     * of the underlying stream.</p>
612     *
613     * <p>The actual number of bytes skipped is returned.</p>
614     *
615     * @param value the number of bytes to be skipped.
616     * @return the actual number of bytes skipped.
617     * @throws IOException - if an I/O error occurs.
618     * @throws IllegalArgumentException - if value is negative.
619     */
620    @Override
621    public long skip(final long value) throws IOException {
622        if (value >= 0) {
623            long skipped = 0;
624            while (skipped < value) {
625                final long rem = value - skipped;
626                final int x = read(skipBuf, 0, (int) (skipBuf.length > rem ? rem : skipBuf.length));
627                if (x == -1) {
628                    return skipped;
629                }
630                skipped += x;
631            }
632            return skipped;
633        }
634        throw new IllegalArgumentException();
635    }
636
637    /**
638     * Checks if the signature matches what is expected for a zip file.
639     * Does not currently handle self-extracting zips which may have arbitrary
640     * leading content.
641     *
642     * @param signature the bytes to check
643     * @param length    the number of bytes to check
644     * @return true, if this stream is a zip archive stream, false otherwise
645     */
646    public static boolean matches(final byte[] signature, final int length) {
647        if (length < ZipArchiveOutputStream.LFH_SIG.length) {
648            return false;
649        }
650
651        return checksig(signature, ZipArchiveOutputStream.LFH_SIG) // normal file
652            || checksig(signature, ZipArchiveOutputStream.EOCD_SIG) // empty zip
653            || checksig(signature, ZipArchiveOutputStream.DD_SIG) // split zip
654            || checksig(signature, ZipLong.SINGLE_SEGMENT_SPLIT_MARKER.getBytes());
655    }
656
657    private static boolean checksig(final byte[] signature, final byte[] expected) {
658        for (int i = 0; i < expected.length; i++) {
659            if (signature[i] != expected[i]) {
660                return false;
661            }
662        }
663        return true;
664    }
665
666    /**
667     * Closes the current ZIP archive entry and positions the underlying
668     * stream to the beginning of the next entry. All per-entry variables
669     * and data structures are cleared.
670     * <p>
671     * If the compressed size of this entry is included in the entry header,
672     * then any outstanding bytes are simply skipped from the underlying
673     * stream without uncompressing them. This allows an entry to be safely
674     * closed even if the compression method is unsupported.
675     * <p>
676     * In case we don't know the compressed size of this entry or have
677     * already buffered too much data from the underlying stream to support
678     * uncompression, then the uncompression process is completed and the
679     * end position of the stream is adjusted based on the result of that
680     * process.
681     *
682     * @throws IOException if an error occurs
683     */
684    private void closeEntry() throws IOException {
685        if (closed) {
686            throw new IOException("The stream is closed");
687        }
688        if (current == null) {
689            return;
690        }
691
692        // Ensure all entry bytes are read
693        if (currentEntryHasOutstandingBytes()) {
694            drainCurrentEntryData();
695        } else {
696            // this is guaranteed to exhaust the stream
697            skip(Long.MAX_VALUE); //NOSONAR
698
699            final long inB = current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED
700                       ? getBytesInflated() : current.bytesRead;
701
702            // this is at most a single read() operation and can't
703            // exceed the range of int
704            final int diff = (int) (current.bytesReadFromStream - inB);
705
706            // Pushback any required bytes
707            if (diff > 0) {
708                pushback(buf.array(), buf.limit() - diff, diff);
709                current.bytesReadFromStream -= diff;
710            }
711
712            // Drain remainder of entry if not all data bytes were required
713            if (currentEntryHasOutstandingBytes()) {
714                drainCurrentEntryData();
715            }
716        }
717
718        if (lastStoredEntry == null && current.hasDataDescriptor) {
719            readDataDescriptor();
720        }
721
722        inf.reset();
723        ((Buffer)buf).clear().flip();
724        current = null;
725        lastStoredEntry = null;
726    }
727
728    /**
729     * If the compressed size of the current entry is included in the entry header
730     * and there are any outstanding bytes in the underlying stream, then
731     * this returns true.
732     *
733     * @return true, if current entry is determined to have outstanding bytes, false otherwise
734     */
735    private boolean currentEntryHasOutstandingBytes() {
736        return current.bytesReadFromStream <= current.entry.getCompressedSize()
737                && !current.hasDataDescriptor;
738    }
739
740    /**
741     * Read all data of the current entry from the underlying stream
742     * that hasn't been read, yet.
743     */
744    private void drainCurrentEntryData() throws IOException {
745        long remaining = current.entry.getCompressedSize() - current.bytesReadFromStream;
746        while (remaining > 0) {
747            final long n = in.read(buf.array(), 0, (int) Math.min(buf.capacity(), remaining));
748            if (n < 0) {
749                throw new EOFException("Truncated ZIP entry: "
750                                       + ArchiveUtils.sanitize(current.entry.getName()));
751            }
752            count(n);
753            remaining -= n;
754        }
755    }
756
757    /**
758     * Get the number of bytes Inflater has actually processed.
759     *
760     * <p>for Java &lt; Java7 the getBytes* methods in
761     * Inflater/Deflater seem to return unsigned ints rather than
762     * longs that start over with 0 at 2^32.</p>
763     *
764     * <p>The stream knows how many bytes it has read, but not how
765     * many the Inflater actually consumed - it should be between the
766     * total number of bytes read for the entry and the total number
767     * minus the last read operation.  Here we just try to make the
768     * value close enough to the bytes we've read by assuming the
769     * number of bytes consumed must be smaller than (or equal to) the
770     * number of bytes read but not smaller by more than 2^32.</p>
771     */
772    private long getBytesInflated() {
773        long inB = inf.getBytesRead();
774        if (current.bytesReadFromStream >= TWO_EXP_32) {
775            while (inB + TWO_EXP_32 <= current.bytesReadFromStream) {
776                inB += TWO_EXP_32;
777            }
778        }
779        return inB;
780    }
781
782    private int fill() throws IOException {
783        if (closed) {
784            throw new IOException("The stream is closed");
785        }
786        final int length = in.read(buf.array());
787        if (length > 0) {
788            ((Buffer)buf).limit(length);
789            count(buf.limit());
790            inf.setInput(buf.array(), 0, buf.limit());
791        }
792        return length;
793    }
794
795    private void readFully(final byte[] b) throws IOException {
796        readFully(b, 0);
797    }
798
799    private void readFully(final byte[] b, final int off) throws IOException {
800        final int len = b.length - off;
801        final int count = IOUtils.readFully(in, b, off, len);
802        count(count);
803        if (count < len) {
804            throw new EOFException();
805        }
806    }
807
808    private void readDataDescriptor() throws IOException {
809        readFully(wordBuf);
810        ZipLong val = new ZipLong(wordBuf);
811        if (ZipLong.DD_SIG.equals(val)) {
812            // data descriptor with signature, skip sig
813            readFully(wordBuf);
814            val = new ZipLong(wordBuf);
815        }
816        current.entry.setCrc(val.getValue());
817
818        // if there is a ZIP64 extra field, sizes are eight bytes
819        // each, otherwise four bytes each.  Unfortunately some
820        // implementations - namely Java7 - use eight bytes without
821        // using a ZIP64 extra field -
822        // https://bugs.sun.com/bugdatabase/view_bug.do?bug_id=7073588
823
824        // just read 16 bytes and check whether bytes nine to twelve
825        // look like one of the signatures of what could follow a data
826        // descriptor (ignoring archive decryption headers for now).
827        // If so, push back eight bytes and assume sizes are four
828        // bytes, otherwise sizes are eight bytes each.
829        readFully(twoDwordBuf);
830        final ZipLong potentialSig = new ZipLong(twoDwordBuf, DWORD);
831        if (potentialSig.equals(ZipLong.CFH_SIG) || potentialSig.equals(ZipLong.LFH_SIG)) {
832            pushback(twoDwordBuf, DWORD, DWORD);
833            current.entry.setCompressedSize(ZipLong.getValue(twoDwordBuf));
834            current.entry.setSize(ZipLong.getValue(twoDwordBuf, WORD));
835        } else {
836            current.entry.setCompressedSize(ZipEightByteInteger.getLongValue(twoDwordBuf));
837            current.entry.setSize(ZipEightByteInteger.getLongValue(twoDwordBuf, DWORD));
838        }
839    }
840
841    /**
842     * Whether this entry requires a data descriptor this library can work with.
843     *
844     * @return true if allowStoredEntriesWithDataDescriptor is true,
845     * the entry doesn't require any data descriptor or the method is
846     * DEFLATED or ENHANCED_DEFLATED.
847     */
848    private boolean supportsDataDescriptorFor(final ZipArchiveEntry entry) {
849        return !entry.getGeneralPurposeBit().usesDataDescriptor()
850
851                || (allowStoredEntriesWithDataDescriptor && entry.getMethod() == ZipEntry.STORED)
852                || entry.getMethod() == ZipEntry.DEFLATED
853                || entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode();
854    }
855
856    /**
857     * Whether the compressed size for the entry is either known or
858     * not required by the compression method being used.
859     */
860    private boolean supportsCompressedSizeFor(final ZipArchiveEntry entry) {
861        return entry.getCompressedSize() != ArchiveEntry.SIZE_UNKNOWN
862            || entry.getMethod() == ZipEntry.DEFLATED
863            || entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode()
864            || (entry.getGeneralPurposeBit().usesDataDescriptor()
865                && allowStoredEntriesWithDataDescriptor
866                && entry.getMethod() == ZipEntry.STORED);
867    }
868
869    private static final String USE_ZIPFILE_INSTEAD_OF_STREAM_DISCLAIMER =
870        " while reading a stored entry using data descriptor. Either the archive is broken"
871        + " or it can not be read using ZipArchiveInputStream and you must use ZipFile."
872        + " A common cause for this is a ZIP archive containing a ZIP archive."
873        + " See http://commons.apache.org/proper/commons-compress/zip.html#ZipArchiveInputStream_vs_ZipFile";
874
875    /**
876     * Caches a stored entry that uses the data descriptor.
877     *
878     * <ul>
879     *   <li>Reads a stored entry until the signature of a local file
880     *     header, central directory header or data descriptor has been
881     *     found.</li>
882     *   <li>Stores all entry data in lastStoredEntry.</p>
883     *   <li>Rewinds the stream to position at the data
884     *     descriptor.</li>
885     *   <li>reads the data descriptor</li>
886     * </ul>
887     *
888     * <p>After calling this method the entry should know its size,
889     * the entry's data is cached and the stream is positioned at the
890     * next local file or central directory header.</p>
891     */
892    private void readStoredEntry() throws IOException {
893        final ByteArrayOutputStream bos = new ByteArrayOutputStream();
894        int off = 0;
895        boolean done = false;
896
897        // length of DD without signature
898        final int ddLen = current.usesZip64 ? WORD + 2 * DWORD : 3 * WORD;
899
900        while (!done) {
901            final int r = in.read(buf.array(), off, ZipArchiveOutputStream.BUFFER_SIZE - off);
902            if (r <= 0) {
903                // read the whole archive without ever finding a
904                // central directory
905                throw new IOException("Truncated ZIP file");
906            }
907            if (r + off < 4) {
908                // buffer too small to check for a signature, loop
909                off += r;
910                continue;
911            }
912
913            done = bufferContainsSignature(bos, off, r, ddLen);
914            if (!done) {
915                off = cacheBytesRead(bos, off, r, ddLen);
916            }
917        }
918        if (current.entry.getCompressedSize() != current.entry.getSize()) {
919            throw new ZipException("compressed and uncompressed size don't match"
920                                   + USE_ZIPFILE_INSTEAD_OF_STREAM_DISCLAIMER);
921        }
922        final byte[] b = bos.toByteArray();
923        if (b.length != current.entry.getSize()) {
924            throw new ZipException("actual and claimed size don't match"
925                                   + USE_ZIPFILE_INSTEAD_OF_STREAM_DISCLAIMER);
926        }
927        lastStoredEntry = new ByteArrayInputStream(b);
928    }
929
930    private static final byte[] LFH = ZipLong.LFH_SIG.getBytes();
931    private static final byte[] CFH = ZipLong.CFH_SIG.getBytes();
932    private static final byte[] DD = ZipLong.DD_SIG.getBytes();
933
934    /**
935     * Checks whether the current buffer contains the signature of a
936     * &quot;data descriptor&quot;, &quot;local file header&quot; or
937     * &quot;central directory entry&quot;.
938     *
939     * <p>If it contains such a signature, reads the data descriptor
940     * and positions the stream right after the data descriptor.</p>
941     */
942    private boolean bufferContainsSignature(final ByteArrayOutputStream bos, final int offset, final int lastRead, final int expectedDDLen)
943            throws IOException {
944
945        boolean done = false;
946        for (int i = 0; !done && i < offset + lastRead - 4; i++) {
947            if (buf.array()[i] == LFH[0] && buf.array()[i + 1] == LFH[1]) {
948                int expectDDPos = i;
949                if (i >= expectedDDLen &&
950                    (buf.array()[i + 2] == LFH[2] && buf.array()[i + 3] == LFH[3])
951                    || (buf.array()[i] == CFH[2] && buf.array()[i + 3] == CFH[3])) {
952                    // found a LFH or CFH:
953                    expectDDPos = i - expectedDDLen;
954                    done = true;
955                }
956                else if (buf.array()[i + 2] == DD[2] && buf.array()[i + 3] == DD[3]) {
957                    // found DD:
958                    done = true;
959                }
960                if (done) {
961                    // * push back bytes read in excess as well as the data
962                    //   descriptor
963                    // * copy the remaining bytes to cache
964                    // * read data descriptor
965                    pushback(buf.array(), expectDDPos, offset + lastRead - expectDDPos);
966                    bos.write(buf.array(), 0, expectDDPos);
967                    readDataDescriptor();
968                }
969            }
970        }
971        return done;
972    }
973
974    /**
975     * If the last read bytes could hold a data descriptor and an
976     * incomplete signature then save the last bytes to the front of
977     * the buffer and cache everything in front of the potential data
978     * descriptor into the given ByteArrayOutputStream.
979     *
980     * <p>Data descriptor plus incomplete signature (3 bytes in the
981     * worst case) can be 20 bytes max.</p>
982     */
983    private int cacheBytesRead(final ByteArrayOutputStream bos, int offset, final int lastRead, final int expecteDDLen) {
984        final int cacheable = offset + lastRead - expecteDDLen - 3;
985        if (cacheable > 0) {
986            bos.write(buf.array(), 0, cacheable);
987            System.arraycopy(buf.array(), cacheable, buf.array(), 0, expecteDDLen + 3);
988            offset = expecteDDLen + 3;
989        } else {
990            offset += lastRead;
991        }
992        return offset;
993    }
994
995    private void pushback(final byte[] buf, final int offset, final int length) throws IOException {
996        ((PushbackInputStream) in).unread(buf, offset, length);
997        pushedBackBytes(length);
998    }
999
1000    // End of Central Directory Record
1001    //   end of central dir signature    WORD
1002    //   number of this disk             SHORT
1003    //   number of the disk with the
1004    //   start of the central directory  SHORT
1005    //   total number of entries in the
1006    //   central directory on this disk  SHORT
1007    //   total number of entries in
1008    //   the central directory           SHORT
1009    //   size of the central directory   WORD
1010    //   offset of start of central
1011    //   directory with respect to
1012    //   the starting disk number        WORD
1013    //   .ZIP file comment length        SHORT
1014    //   .ZIP file comment               up to 64KB
1015    //
1016
1017    /**
1018     * Reads the stream until it find the "End of central directory
1019     * record" and consumes it as well.
1020     */
1021    private void skipRemainderOfArchive() throws IOException {
1022        // skip over central directory. One LFH has been read too much
1023        // already.  The calculation discounts file names and extra
1024        // data so it will be too short.
1025        realSkip((long) entriesRead * CFH_LEN - LFH_LEN);
1026        findEocdRecord();
1027        realSkip((long) ZipFile.MIN_EOCD_SIZE - WORD /* signature */ - SHORT /* comment len */);
1028        readFully(shortBuf);
1029        // file comment
1030        realSkip(ZipShort.getValue(shortBuf));
1031    }
1032
1033    /**
1034     * Reads forward until the signature of the &quot;End of central
1035     * directory&quot; record is found.
1036     */
1037    private void findEocdRecord() throws IOException {
1038        int currentByte = -1;
1039        boolean skipReadCall = false;
1040        while (skipReadCall || (currentByte = readOneByte()) > -1) {
1041            skipReadCall = false;
1042            if (!isFirstByteOfEocdSig(currentByte)) {
1043                continue;
1044            }
1045            currentByte = readOneByte();
1046            if (currentByte != ZipArchiveOutputStream.EOCD_SIG[1]) {
1047                if (currentByte == -1) {
1048                    break;
1049                }
1050                skipReadCall = isFirstByteOfEocdSig(currentByte);
1051                continue;
1052            }
1053            currentByte = readOneByte();
1054            if (currentByte != ZipArchiveOutputStream.EOCD_SIG[2]) {
1055                if (currentByte == -1) {
1056                    break;
1057                }
1058                skipReadCall = isFirstByteOfEocdSig(currentByte);
1059                continue;
1060            }
1061            currentByte = readOneByte();
1062            if (currentByte == -1
1063                || currentByte == ZipArchiveOutputStream.EOCD_SIG[3]) {
1064                break;
1065            }
1066            skipReadCall = isFirstByteOfEocdSig(currentByte);
1067        }
1068    }
1069
1070    /**
1071     * Skips bytes by reading from the underlying stream rather than
1072     * the (potentially inflating) archive stream - which {@link
1073     * #skip} would do.
1074     *
1075     * Also updates bytes-read counter.
1076     */
1077    private void realSkip(final long value) throws IOException {
1078        if (value >= 0) {
1079            long skipped = 0;
1080            while (skipped < value) {
1081                final long rem = value - skipped;
1082                final int x = in.read(skipBuf, 0, (int) (skipBuf.length > rem ? rem : skipBuf.length));
1083                if (x == -1) {
1084                    return;
1085                }
1086                count(x);
1087                skipped += x;
1088            }
1089            return;
1090        }
1091        throw new IllegalArgumentException();
1092    }
1093
1094    /**
1095     * Reads bytes by reading from the underlying stream rather than
1096     * the (potentially inflating) archive stream - which {@link #read} would do.
1097     *
1098     * Also updates bytes-read counter.
1099     */
1100    private int readOneByte() throws IOException {
1101        final int b = in.read();
1102        if (b != -1) {
1103            count(1);
1104        }
1105        return b;
1106    }
1107
1108    private boolean isFirstByteOfEocdSig(final int b) {
1109        return b == ZipArchiveOutputStream.EOCD_SIG[0];
1110    }
1111
1112    private static final byte[] APK_SIGNING_BLOCK_MAGIC = new byte[] {
1113        'A', 'P', 'K', ' ', 'S', 'i', 'g', ' ', 'B', 'l', 'o', 'c', 'k', ' ', '4', '2',
1114    };
1115    private static final BigInteger LONG_MAX = BigInteger.valueOf(Long.MAX_VALUE);
1116
1117    /**
1118     * Checks whether this might be an APK Signing Block.
1119     *
1120     * <p>Unfortunately the APK signing block does not start with some kind of signature, it rather ends with one. It
1121     * starts with a length, so what we do is parse the suspect length, skip ahead far enough, look for the signature
1122     * and if we've found it, return true.</p>
1123     *
1124     * @param suspectLocalFileHeader the bytes read from the underlying stream in the expectation that they would hold
1125     * the local file header of the next entry.
1126     *
1127     * @return true if this looks like a APK signing block
1128     *
1129     * @see <a href="https://source.android.com/security/apksigning/v2">https://source.android.com/security/apksigning/v2</a>
1130     */
1131    private boolean isApkSigningBlock(byte[] suspectLocalFileHeader) throws IOException {
1132        // length of block excluding the size field itself
1133        BigInteger len = ZipEightByteInteger.getValue(suspectLocalFileHeader);
1134        // LFH has already been read and all but the first eight bytes contain (part of) the APK signing block,
1135        // also subtract 16 bytes in order to position us at the magic string
1136        BigInteger toSkip = len.add(BigInteger.valueOf(DWORD - suspectLocalFileHeader.length
1137            - (long) APK_SIGNING_BLOCK_MAGIC.length));
1138        byte[] magic = new byte[APK_SIGNING_BLOCK_MAGIC.length];
1139
1140        try {
1141            if (toSkip.signum() < 0) {
1142                // suspectLocalFileHeader contains the start of suspect magic string
1143                int off = suspectLocalFileHeader.length + toSkip.intValue();
1144                // length was shorter than magic length
1145                if (off < DWORD) {
1146                    return false;
1147                }
1148                int bytesInBuffer = Math.abs(toSkip.intValue());
1149                System.arraycopy(suspectLocalFileHeader, off, magic, 0, Math.min(bytesInBuffer, magic.length));
1150                if (bytesInBuffer < magic.length) {
1151                    readFully(magic, bytesInBuffer);
1152                }
1153            } else {
1154                while (toSkip.compareTo(LONG_MAX) > 0) {
1155                    realSkip(Long.MAX_VALUE);
1156                    toSkip = toSkip.add(LONG_MAX.negate());
1157                }
1158                realSkip(toSkip.longValue());
1159                readFully(magic);
1160            }
1161        } catch (EOFException ex) { //NOSONAR
1162            // length was invalid
1163            return false;
1164        }
1165        return Arrays.equals(magic, APK_SIGNING_BLOCK_MAGIC);
1166    }
1167
1168    /**
1169     * Structure collecting information for the entry that is
1170     * currently being read.
1171     */
1172    private static final class CurrentEntry {
1173
1174        /**
1175         * Current ZIP entry.
1176         */
1177        private final ZipArchiveEntry entry = new ZipArchiveEntry();
1178
1179        /**
1180         * Does the entry use a data descriptor?
1181         */
1182        private boolean hasDataDescriptor;
1183
1184        /**
1185         * Does the entry have a ZIP64 extended information extra field.
1186         */
1187        private boolean usesZip64;
1188
1189        /**
1190         * Number of bytes of entry content read by the client if the
1191         * entry is STORED.
1192         */
1193        private long bytesRead;
1194
1195        /**
1196         * Number of bytes of entry content read from the stream.
1197         *
1198         * <p>This may be more than the actual entry's length as some
1199         * stuff gets buffered up and needs to be pushed back when the
1200         * end of the entry has been reached.</p>
1201         */
1202        private long bytesReadFromStream;
1203
1204        /**
1205         * The checksum calculated as the current entry is read.
1206         */
1207        private final CRC32 crc = new CRC32();
1208
1209        /**
1210         * The input stream decompressing the data for shrunk and imploded entries.
1211         */
1212        private InputStream in;
1213    }
1214
1215    /**
1216     * Bounded input stream adapted from commons-io
1217     */
1218    private class BoundedInputStream extends InputStream {
1219
1220        /** the wrapped input stream */
1221        private final InputStream in;
1222
1223        /** the max length to provide */
1224        private final long max;
1225
1226        /** the number of bytes already returned */
1227        private long pos = 0;
1228
1229        /**
1230         * Creates a new <code>BoundedInputStream</code> that wraps the given input
1231         * stream and limits it to a certain size.
1232         *
1233         * @param in The wrapped input stream
1234         * @param size The maximum number of bytes to return
1235         */
1236        public BoundedInputStream(final InputStream in, final long size) {
1237            this.max = size;
1238            this.in = in;
1239        }
1240
1241        @Override
1242        public int read() throws IOException {
1243            if (max >= 0 && pos >= max) {
1244                return -1;
1245            }
1246            final int result = in.read();
1247            pos++;
1248            count(1);
1249            current.bytesReadFromStream++;
1250            return result;
1251        }
1252
1253        @Override
1254        public int read(final byte[] b) throws IOException {
1255            return this.read(b, 0, b.length);
1256        }
1257
1258        @Override
1259        public int read(final byte[] b, final int off, final int len) throws IOException {
1260            if (max >= 0 && pos >= max) {
1261                return -1;
1262            }
1263            final long maxRead = max >= 0 ? Math.min(len, max - pos) : len;
1264            final int bytesRead = in.read(b, off, (int) maxRead);
1265
1266            if (bytesRead == -1) {
1267                return -1;
1268            }
1269
1270            pos += bytesRead;
1271            count(bytesRead);
1272            current.bytesReadFromStream += bytesRead;
1273            return bytesRead;
1274        }
1275
1276        @Override
1277        public long skip(final long n) throws IOException {
1278            final long toSkip = max >= 0 ? Math.min(n, max - pos) : n;
1279            final long skippedBytes = IOUtils.skip(in, toSkip);
1280            pos += skippedBytes;
1281            return skippedBytes;
1282        }
1283
1284        @Override
1285        public int available() throws IOException {
1286            if (max >= 0 && pos >= max) {
1287                return 0;
1288            }
1289            return in.available();
1290        }
1291    }
1292}