/*
 * Decompiled with CFR 0.152.
 */
package org.commoncrawl.util.shared;

import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.EOFException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.PushbackInputStream;
import java.io.SequenceInputStream;
import java.net.URI;
import java.net.URISyntaxException;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.text.SimpleDateFormat;
import java.util.LinkedList;
import java.util.Vector;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.zip.CRC32;
import java.util.zip.CheckedInputStream;
import java.util.zip.Inflater;
import java.util.zip.InflaterInputStream;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.GnuParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.DataOutputBuffer;
import org.apache.hadoop.io.Text;
import org.commoncrawl.io.shared.NIOHttpHeaders;
import org.commoncrawl.util.shared.ByteArrayUtils;
import org.commoncrawl.util.shared.CCStringUtils;
import org.commoncrawl.util.shared.CountingInputStream;
import org.commoncrawl.util.shared.HexDump;

public final class ARCFileReader
extends InflaterInputStream {
    private static final Log LOG = LogFactory.getLog(ARCFileReader.class);
    private CRC32 _crc = new CRC32();
    private String _arcFileHeader = null;
    public static final int DEFAULT_BLOCK_SIZE = 32768;
    private static int _blockSize = 32768;
    private static final int GZIP_MAGIC = 35615;
    private static final int FHCRC = 2;
    private static final int FEXTRA = 4;
    private static final int FNAME = 8;
    private static final int FCOMMENT = 16;
    private byte[] tmpbuf = new byte[128];
    static Options options = new Options();

    public ARCFileReader(InputStream source) throws IOException {
        super(new CustomPushbackInputStream(new CountingInputStream(source), _blockSize), new Inflater(true), _blockSize);
        this.readARCHeader();
    }

    public boolean hasMoreItems() throws IOException {
        try {
            this.readHeader();
            return true;
        }
        catch (EOFException e) {
            return false;
        }
    }

    public void nextKeyValue(Text key, BytesWritable value) throws IOException {
        this._crc.reset();
        this.resetInflater();
        long streamPos = this.getPosition();
        ArcFileBuilder builder = new ArcFileBuilder(streamPos, key, value);
        while (true) {
            byte[] scanBuffer = new byte[_blockSize];
            ByteBuffer byteBuffer = ByteBuffer.wrap(scanBuffer);
            int readAmount = this.read(scanBuffer, 0, scanBuffer.length);
            if (readAmount == -1) break;
            this._crc.update(scanBuffer, 0, readAmount);
            byteBuffer.limit(readAmount);
            builder.inputData(byteBuffer);
        }
        this.readTrailer();
        builder.finish();
    }

    private void resetInflater() {
        this.inf.reset();
    }

    private void readARCHeader() throws IOException {
        this.readHeader();
        byte[] accumBuffer = new byte[4096];
        int accumAmount = 0;
        int readAmt = 0;
        while ((readAmt = this.read(accumBuffer, accumAmount, accumBuffer.length - accumAmount)) > 0) {
            if ((accumAmount += readAmt) != accumBuffer.length) continue;
            throw new IOException("Invalid ARC File Header");
        }
        if (readAmt == 0 || accumAmount == 0) {
            throw new IOException("Invalid ARC File Header");
        }
        this._crc.reset();
        this._crc.update(accumBuffer, 0, accumAmount);
        this.readTrailer();
        this._arcFileHeader = new String(accumBuffer, 0, accumAmount, "ISO-8859-1");
    }

    private void readHeader() throws IOException {
        CheckedInputStream in = new CheckedInputStream(this.in, this._crc);
        this._crc.reset();
        if (ARCFileReader.readUShort(in) != 35615) {
            throw new IOException("Not in GZIP format");
        }
        if (ARCFileReader.readUByte(in) != 8) {
            throw new IOException("Unsupported compression method");
        }
        int flg = ARCFileReader.readUByte(in);
        this.skipBytes(in, 6);
        if ((flg & 4) == 4) {
            this.skipBytes(in, ARCFileReader.readUShort(in));
        }
        if ((flg & 8) == 8) {
            while (ARCFileReader.readUByte(in) != 0) {
            }
        }
        if ((flg & 0x10) == 16) {
            while (ARCFileReader.readUByte(in) != 0) {
            }
        }
        if ((flg & 2) == 2) {
            int v = (int)this._crc.getValue() & 0xFFFF;
            if (ARCFileReader.readUShort(in) != v) {
                throw new IOException("Corrupt GZIP header");
            }
        }
    }

    private void readTrailer() throws IOException {
        PushbackInputStream in = (PushbackInputStream)this.in;
        int n = this.inf.getRemaining();
        if (n > 0) {
            in.unread(this.buf, this.len - n, n);
        }
        long crcValue = ARCFileReader.readUInt(in);
        long rawByteLen = ARCFileReader.readUInt(in);
        if (crcValue != this._crc.getValue() || rawByteLen != (this.inf.getBytesWritten() & 0xFFFFFFFFL)) {
            throw new IOException("Corrupt GZIP trailer");
        }
    }

    private static long readUInt(InputStream in) throws IOException {
        long s = ARCFileReader.readUShort(in);
        return (long)ARCFileReader.readUShort(in) << 16 | s;
    }

    private static int readUShort(InputStream in) throws IOException {
        int b = ARCFileReader.readUByte(in);
        return ARCFileReader.readUByte(in) << 8 | b;
    }

    private static int readUByte(InputStream in) throws IOException {
        int b = in.read();
        if (b == -1) {
            throw new EOFException();
        }
        if (b < -1 || b > 255) {
            throw new IOException("read() returned value out of range -1..255: " + b);
        }
        return b;
    }

    private void skipBytes(InputStream in, int n) throws IOException {
        while (n > 0) {
            int len = in.read(this.tmpbuf, 0, n < this.tmpbuf.length ? n : this.tmpbuf.length);
            if (len == -1) {
                throw new EOFException();
            }
            n -= len;
        }
    }

    public final long getPosition() throws IOException {
        CustomPushbackInputStream in = (CustomPushbackInputStream)this.in;
        int rawStreamPos = (int)((CountingInputStream)in.getSource()).getPosition();
        int bufferedAmt = in.getAvailableInBuffer();
        int bytesRead = rawStreamPos - bufferedAmt;
        return bytesRead + 1;
    }

    static void printUsage() {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp("ARCFileReaer", options);
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public static void main(String[] args) throws IOException, URISyntaxException, InterruptedException {
        Configuration conf = new Configuration();
        String path = null;
        GnuParser parser = new GnuParser();
        try {
            CommandLine cmdLine = parser.parse(options, args);
            path = cmdLine.getOptionValue("file");
            if (cmdLine.hasOption("conf")) {
                conf.addResource(new Path(cmdLine.getOptionValue("conf")));
            }
            if (cmdLine.hasOption("awsAccessKey")) {
                conf.set("fs.s3n.awsAccessKeyId", cmdLine.getOptionValue("awsAccessKey"));
            }
            if (cmdLine.hasOption("awsSecret")) {
                conf.set("fs.s3n.awsSecretAccessKey", cmdLine.getOptionValue("awsSecret"));
            }
        }
        catch (ParseException e) {
            System.out.println(e.toString());
            ARCFileReader.printUsage();
            System.exit(1);
        }
        URI uri = new URI(path);
        FileSystem fs = FileSystem.get((URI)uri, (Configuration)conf);
        InflaterInputStream reader = null;
        try {
            System.out.println("Initializing Reader for Path:" + uri);
            reader = new ARCFileReader((InputStream)fs.open(new Path(path)));
            Text key = new Text();
            BytesWritable value = new BytesWritable();
            while (((ARCFileReader)reader).hasMoreItems()) {
                ((ARCFileReader)reader).nextKeyValue(key, value);
                int indexOfTrailingCRLF = ByteArrayUtils.indexOf(value.getBytes(), 0, value.getLength(), "\r\n\r\n".getBytes());
                int headerLen = indexOfTrailingCRLF + 4;
                int contentLen = value.getLength() - headerLen;
                String outputStr = "Key:" + key.toString() + " HeaderLen:" + headerLen + " ContentLen:" + contentLen;
                System.out.println(outputStr);
            }
            System.out.println("Exiting Loop");
        }
        catch (Exception e) {
            System.out.println(CCStringUtils.stringifyException((Throwable)e));
            LOG.error((Object)CCStringUtils.stringifyException((Throwable)e));
        }
        finally {
            if (reader != null) {
                System.out.println("***Closing Reader");
                reader.close();
            }
        }
    }

    static {
        OptionBuilder.withArgName((String)"conf");
        OptionBuilder.hasArg();
        OptionBuilder.withDescription((String)"Config File Name");
        options.addOption(OptionBuilder.create((String)"conf"));
        OptionBuilder.withArgName((String)"file");
        OptionBuilder.hasArg();
        OptionBuilder.withDescription((String)"ARC File Path");
        OptionBuilder.isRequired();
        options.addOption(OptionBuilder.create((String)"file"));
        OptionBuilder.withArgName((String)"awsAccessKey");
        OptionBuilder.hasArg();
        OptionBuilder.withDescription((String)"AWS Access Key");
        options.addOption(OptionBuilder.create((String)"awsAccessKey"));
        OptionBuilder.withArgName((String)"awsSecret");
        OptionBuilder.hasArg();
        OptionBuilder.withDescription((String)"AWS Secret");
        options.addOption(OptionBuilder.create((String)"awsSecret"));
    }

    static class CustomPushbackInputStream
    extends PushbackInputStream {
        public CustomPushbackInputStream(InputStream in, int size) {
            super(in, size);
        }

        public InputStream getSource() {
            return this.in;
        }

        public int getAvailableInBuffer() {
            return this.buf.length - this.pos;
        }

        public void debug() {
            System.out.println(HexDump.dumpHexString(this.buf, this.pos, Math.min(this.buf.length - this.pos, 100)));
        }
    }

    public static class ArcFileBuilder {
        SimpleDateFormat TIMESTAMP14 = new SimpleDateFormat("yyyyMMddHHmmss");
        Text _keyOut;
        BytesWritable _valueOut;
        NIOHttpHeaders _headers = new NIOHttpHeaders();
        int _recordLen;
        int _payloadLength;
        int _headerLength;
        int _dataLength;
        State _state = State.LookingForMetadata;
        LinkedList<ByteBuffer> _buffers = new LinkedList();
        long _streamPos;
        CRLFMatchState _crlfMatchState = CRLFMatchState.CRLFMatchState_NoMatch;
        boolean eos = false;
        static Charset UTF8_Charset = Charset.forName("UTF8");
        static Charset ASCII_Charset = Charset.forName("ASCII");
        static final Pattern metadataLinePattern = Pattern.compile("^(.*)\\s([0-9\\.]+)\\s([0-9]+)\\s([^\\s]+)\\s([0-9]+)$");

        public ArcFileBuilder(long streamPos, Text key, BytesWritable value) {
            this._streamPos = streamPos;
            this._keyOut = key;
            this._valueOut = value;
        }

        final boolean isHTTPHeaderTermintor(byte c) {
            if (c != 13 && c != 10) {
                this._crlfMatchState = CRLFMatchState.CRLFMatchState_NoMatch;
            } else {
                switch (this._crlfMatchState) {
                    case CRLFMatchState_NoMatch: 
                    case CRLFMatchState_FirstCR: {
                        if (c == 13) {
                            this._crlfMatchState = CRLFMatchState.CRLFMatchState_FirstCR;
                            break;
                        }
                        if (c != 10) break;
                        this._crlfMatchState = CRLFMatchState.CRLFMatchState_FirstLF;
                        break;
                    }
                    case CRLFMatchState_FirstLF: {
                        if (c == 13) {
                            this._crlfMatchState = CRLFMatchState.CRLFMatchState_SecondCR;
                            break;
                        }
                        if (c != 10) break;
                        return true;
                    }
                    case CRLFMatchState_SecondCR: {
                        if (c == 13) {
                            this._crlfMatchState = CRLFMatchState.CRLFMatchState_FirstCR;
                            break;
                        }
                        if (c != 10) break;
                        return true;
                    }
                }
            }
            return false;
        }

        private final ByteBuffer appendAndSliceBuffer(ByteBuffer buffer) throws IOException {
            if (buffer.remaining() == 0) {
                buffer.rewind();
                this._buffers.addLast(buffer);
                return null;
            }
            ByteBuffer oldBuffer = buffer;
            buffer = buffer.slice();
            oldBuffer.limit(oldBuffer.position());
            oldBuffer.rewind();
            this._buffers.addLast(oldBuffer);
            return buffer;
        }

        private final boolean checkForTerminator(byte matchingChar) {
            boolean terminatorFound = false;
            switch (this._state) {
                case LookingForMetadata: {
                    if (matchingChar != 10) break;
                    terminatorFound = true;
                    break;
                }
                case LookingForHeaderTerminator: {
                    terminatorFound = this.isHTTPHeaderTermintor(matchingChar);
                }
            }
            return terminatorFound;
        }

        private static InputStream newInputStream(final ByteBuffer buf) {
            return new InputStream(){

                @Override
                public synchronized int read() throws IOException {
                    if (!buf.hasRemaining()) {
                        return -1;
                    }
                    return buf.get();
                }

                @Override
                public synchronized int read(byte[] bytes, int off, int len) throws IOException {
                    len = Math.min(len, buf.remaining());
                    buf.get(bytes, off, len);
                    return len;
                }
            };
        }

        private static InputStreamReader readerFromScanBufferList(LinkedList<ByteBuffer> buffers, Charset charset) throws IOException {
            Vector<InputStream> inputStreams = new Vector<InputStream>();
            for (ByteBuffer buffer : buffers) {
                inputStreams.add(ArcFileBuilder.newInputStream(buffer));
            }
            buffers.clear();
            SequenceInputStream seqInputStream = new SequenceInputStream(inputStreams.elements());
            return new InputStreamReader((InputStream)seqInputStream, charset);
        }

        private final String readLine(Charset charset) throws IOException {
            BufferedReader reader = new BufferedReader(ArcFileBuilder.readerFromScanBufferList(this._buffers, charset));
            return reader.readLine();
        }

        private static String[] parseMetadataLine(String metadataLine) {
            Matcher m = metadataLinePattern.matcher(metadataLine);
            if (m.matches() && m.groupCount() == 5) {
                return new String[]{m.group(1), m.group(2), m.group(3), m.group(4), m.group(5)};
            }
            return null;
        }

        private static String[] parseMalformedMetadataLine(String malformedMetadataLine) {
            int nextMatchPos;
            String[] parts = new String[5];
            int lastMatchPos = malformedMetadataLine.length();
            int matchCount = 0;
            for (int i = 0; i < 5 && ((nextMatchPos = malformedMetadataLine.lastIndexOf(32, lastMatchPos - 1)) != -1 || matchCount == 4); ++i) {
                parts[5 - matchCount] = ++matchCount == 5 ? malformedMetadataLine.substring(0, lastMatchPos) : malformedMetadataLine.substring(nextMatchPos + 1, lastMatchPos);
                lastMatchPos = nextMatchPos;
            }
            if (matchCount == 5) {
                return parts;
            }
            return null;
        }

        private final void processMetadataLine(String metadata) throws IOException {
            String[] parts = ArcFileBuilder.parseMetadataLine(metadata);
            if (parts == null) {
                parts = ArcFileBuilder.parseMalformedMetadataLine(metadata);
            }
            if (parts == null) {
                throw new IOException("Invalid ARC Metadata Line:" + metadata);
            }
            this._keyOut.set(parts[0]);
            this._headers.add("x_commoncrawl_HostIP", parts[1]);
            this._headers.add("x_commoncrawl_ArcTimestamp", parts[2]);
            this._headers.add("x_commoncrawl_ArcMimeType", parts[3]);
            this._recordLen = Integer.parseInt(parts[4]);
        }

        private final void processHeaders() throws IOException {
            int rawHeaderSize = 0;
            for (ByteBuffer buffer : this._buffers) {
                rawHeaderSize += buffer.remaining();
            }
            byte[] headerBytes = new byte[rawHeaderSize];
            int offset = 0;
            for (ByteBuffer buffer : this._buffers) {
                int len = buffer.remaining();
                buffer.get(headerBytes, offset, len);
                offset += len;
            }
            BufferedReader reader = new BufferedReader(new InputStreamReader((InputStream)new ByteArrayInputStream(headerBytes), UTF8_Charset));
            String line = null;
            while ((line = reader.readLine()) != null) {
                if (line.length() == 0) continue;
                int colonPos = line.indexOf(58);
                if (colonPos != -1 && colonPos != line.length() - 1) {
                    this._headers.add(line.substring(0, colonPos), line.substring(colonPos + 1));
                    continue;
                }
                this._headers.add("", line);
            }
            this._payloadLength = Math.max(0, this._recordLen - rawHeaderSize);
            if (this._payloadLength == 0) {
                LOG.warn((Object)("Invalid Payload Size Detected Key:" + this._keyOut.toString() + " RecordLen:" + this._recordLen + " HeaderLen:" + rawHeaderSize + " Headers:\n" + this._headers.toString()));
                LOG.warn((Object)("Header Dump:" + HexDump.dumpHexString(headerBytes)));
            }
            this._headers.set("x_commoncrawl_ArcPayloadLen", Integer.toString(this._payloadLength));
            DataOutputBuffer headerBuffer = new DataOutputBuffer();
            OutputStreamWriter headerStream = new OutputStreamWriter((OutputStream)headerBuffer, Charset.forName("UTF-8"));
            headerStream.write(this._headers.toString());
            headerStream.write("\r\n");
            headerStream.flush();
            this._headerLength = headerBuffer.getLength();
            this._valueOut.setSize(this._headerLength + this._payloadLength);
            System.arraycopy(headerBuffer.getData(), 0, this._valueOut.getBytes(), 0, this._headerLength);
        }

        private final void transitionState() throws IOException {
            switch (this._state) {
                case LookingForMetadata: {
                    this.processMetadataLine(this.readLine(Charset.forName("UTF-8")));
                    this._state = State.LookingForHeaderTerminator;
                    break;
                }
                case LookingForHeaderTerminator: {
                    this.processHeaders();
                    this._state = State.ReadingContent;
                }
            }
        }

        public final void finish() throws IOException {
            if (this._state == State.ReadingContent) {
                this._state = State.Finished;
                if (this._dataLength == 0) {
                    LOG.error((Object)("ArcFileBuilder Encountered Item with Zero Length Content. URI:" + this._keyOut));
                }
            } else {
                throw new IOException("ArcBuilder finish calledin Invalid State. State:" + (Object)((Object)this._state) + " ArcFile:" + this._keyOut + " Position:" + this._streamPos + " Item URI:" + this._keyOut);
            }
        }

        public final void inputData(ByteBuffer buffer) throws IOException {
            int copyAmount;
            while (buffer != null && this._state != State.ReadingContent) {
                boolean terminated = false;
                while (buffer.remaining() != 0) {
                    byte b = buffer.get();
                    if (b != 13 && b != 10 && this._crlfMatchState == CRLFMatchState.CRLFMatchState_NoMatch || !(terminated = this.checkForTerminator(b))) continue;
                    buffer = this.appendAndSliceBuffer(buffer);
                    this.transitionState();
                    break;
                }
                if (terminated || buffer == null || buffer.remaining() != 0) continue;
                buffer = this.appendAndSliceBuffer(buffer);
            }
            if (this._state == State.ReadingContent && buffer != null && buffer.remaining() != 0 && (copyAmount = Math.min(this._payloadLength - this._dataLength, buffer.remaining())) > 0) {
                buffer.get(this._valueOut.getBytes(), this._headerLength + this._dataLength, copyAmount);
                this._dataLength += copyAmount;
            }
        }

        static enum CRLFMatchState {
            CRLFMatchState_NoMatch,
            CRLFMatchState_FirstCR,
            CRLFMatchState_FirstLF,
            CRLFMatchState_SecondCR;

        }

        private static enum State {
            LookingForMetadata,
            LookingForHeaderTerminator,
            ReadingContent,
            Finished;

        }
    }
}

