package com.twitter.elephantbird.pig.mahout;

import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
import com.twitter.elephantbird.pig.util.AbstractWritableConverter;
import com.twitter.elephantbird.pig.util.PigTokenHelper;
import com.twitter.elephantbird.pig.util.PigUtil;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.GnuParser;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.mahout.math.DenseVector;
import org.apache.mahout.math.RandomAccessSparseVector;
import org.apache.mahout.math.SequentialAccessSparseVector;
import org.apache.mahout.math.Vector;
import org.apache.mahout.math.VectorWritable;
import org.apache.pig.ResourceSchema;
import org.apache.pig.data.BagFactory;
import org.apache.pig.data.DataBag;
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.DataType;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
import org.apache.pig.impl.logicalLayer.schema.Schema;

/* loaded from: input_file:com/twitter/elephantbird/pig/mahout/VectorWritableConverter.class */
public class VectorWritableConverter extends AbstractWritableConverter<VectorWritable> {
    private static final String CARDINALITY_PARAM = "cardinality";
    private static final String DENSE_PARAM = "dense";
    private static final String SPARSE_PARAM = "sparse";
    private static final String SEQUENTIAL_PARAM = "sequential";
    private static final String FLOAT_PRECISION_PARAM = "floatPrecision";
    private final TupleFactory tupleFactory;
    private final BagFactory bagFactory;
    private final boolean dense;
    private final boolean sparse;
    private final Integer cardinality;
    private final boolean sequential;
    private final boolean floatPrecision;

    public VectorWritableConverter() throws ParseException {
        this(new String[0]);
    }

    public VectorWritableConverter(String[] strArr) throws ParseException {
        super(new VectorWritable());
        this.tupleFactory = TupleFactory.getInstance();
        this.bagFactory = BagFactory.getInstance();
        Preconditions.checkNotNull(strArr);
        CommandLine parseArguments = parseArguments(strArr);
        this.cardinality = parseArguments.hasOption(CARDINALITY_PARAM) ? new Integer(parseArguments.getOptionValue(CARDINALITY_PARAM)) : null;
        this.dense = parseArguments.hasOption(DENSE_PARAM);
        this.sequential = parseArguments.hasOption(SEQUENTIAL_PARAM);
        Preconditions.checkState((this.dense && this.sequential) ? false : true, "Options '-dense' and '-sequential' are mutually exclusive");
        this.sparse = parseArguments.hasOption(SPARSE_PARAM) || this.sequential;
        Preconditions.checkState((this.dense && this.sparse) ? false : true, "Options '-dense' and '-sparse' are mutually exclusive");
        this.floatPrecision = parseArguments.hasOption(FLOAT_PRECISION_PARAM);
        this.writable.setWritesLaxPrecision(this.floatPrecision);
    }

    private CommandLine parseArguments(String[] strArr) throws ParseException {
        return new GnuParser().parse(getOptions(), strArr);
    }

    protected Options getOptions() {
        Options options = new Options();
        OptionBuilder.withLongOpt(CARDINALITY_PARAM);
        OptionBuilder.hasArg();
        OptionBuilder.withArgName("n");
        OptionBuilder.withDescription("Expected cardinality of vector data.");
        options.addOption(OptionBuilder.create());
        OptionBuilder.withLongOpt(DENSE_PARAM);
        OptionBuilder.withDescription("If specified along with cardinality, reported LOAD schema will be dense.");
        options.addOption(OptionBuilder.create());
        OptionBuilder.withLongOpt(SPARSE_PARAM);
        OptionBuilder.withDescription("If specified, reported LOAD schema will be sparse.");
        options.addOption(OptionBuilder.create());
        OptionBuilder.withLongOpt(SEQUENTIAL_PARAM);
        OptionBuilder.withDescription("If specified, Pig vector data will be converted to SequentialAccessSparseVector data on STORE. Otherwise, RandomAccessSparseVector is used.");
        options.addOption(OptionBuilder.create());
        OptionBuilder.withLongOpt(FLOAT_PRECISION_PARAM);
        OptionBuilder.withDescription("If specified, float precision will be used when writing output data.");
        options.addOption(OptionBuilder.create());
        return options;
    }

    @Override // com.twitter.elephantbird.pig.util.AbstractWritableConverter, com.twitter.elephantbird.pig.util.WritableConverter
    public ResourceSchema.ResourceFieldSchema getLoadSchema() throws IOException {
        Schema.FieldSchema fieldSchema;
        Byte valueOf = Byte.valueOf(this.floatPrecision ? (byte) 20 : (byte) 25);
        if (!this.sparse) {
            return (!this.dense || this.cardinality == null) ? new ResourceSchema.ResourceFieldSchema(new Schema.FieldSchema((String) null, (byte) 50)) : new ResourceSchema.ResourceFieldSchema(new Schema.FieldSchema((String) null, new Schema(Collections.nCopies(this.cardinality.intValue(), new Schema.FieldSchema((String) null, valueOf.byteValue())))));
        }
        if (PigUtil.Pig9orNewer) {
            Schema schema = new Schema();
            schema.add(new Schema.FieldSchema("index", (byte) 10));
            schema.add(new Schema.FieldSchema("value", valueOf.byteValue()));
            Schema schema2 = new Schema();
            schema2.add(new Schema.FieldSchema("t", schema, (byte) 110));
            fieldSchema = new Schema.FieldSchema("entries", schema2, (byte) 120);
        } else {
            fieldSchema = new Schema.FieldSchema("entries", new Schema(ImmutableList.of(new Schema.FieldSchema("index", (byte) 10), new Schema.FieldSchema("value", valueOf.byteValue()))), (byte) 120);
        }
        return this.cardinality != null ? new ResourceSchema.ResourceFieldSchema(new Schema.FieldSchema((String) null, new Schema(ImmutableList.of(fieldSchema)))) : new ResourceSchema.ResourceFieldSchema(new Schema.FieldSchema((String) null, new Schema(ImmutableList.of(new Schema.FieldSchema(CARDINALITY_PARAM, (byte) 10), fieldSchema))));
    }

    @Override // com.twitter.elephantbird.pig.util.AbstractWritableConverter, com.twitter.elephantbird.pig.util.WritableConverter
    public Object bytesToObject(DataByteArray dataByteArray) throws IOException {
        return bytesToTuple(dataByteArray.get(), null);
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // com.twitter.elephantbird.pig.util.WritableLoadCaster
    public Tuple toTuple(VectorWritable vectorWritable, ResourceSchema.ResourceFieldSchema resourceFieldSchema) throws IOException {
        Tuple newTupleNoCopy;
        Preconditions.checkNotNull(vectorWritable, "VectorWritable is null");
        Vector vector = vectorWritable.get();
        Preconditions.checkNotNull(vector, "Vector is null");
        int size = vector.size();
        if (this.cardinality != null) {
            Preconditions.checkState(this.cardinality.intValue() == size, "Expecting cardinality %s but found cardinality %s", new Object[]{this.cardinality, Integer.valueOf(size)});
        }
        if (vector.isDense()) {
            Preconditions.checkState(!this.sparse, "Expecting sparse vector but found dense vector");
            ArrayList newArrayListWithCapacity = Lists.newArrayListWithCapacity(vector.size());
            if (this.floatPrecision) {
                Iterator it = vector.iterator();
                while (it.hasNext()) {
                    newArrayListWithCapacity.add(Float.valueOf((float) ((Vector.Element) it.next()).get()));
                }
            } else {
                Iterator it2 = vector.iterator();
                while (it2.hasNext()) {
                    newArrayListWithCapacity.add(Double.valueOf(((Vector.Element) it2.next()).get()));
                }
            }
            newTupleNoCopy = this.tupleFactory.newTupleNoCopy(newArrayListWithCapacity);
        } else {
            Preconditions.checkState(!this.dense, "Expecting dense vector but found sparse vector");
            ArrayList newArrayListWithCapacity2 = Lists.newArrayListWithCapacity(vector.getNumNondefaultElements());
            Iterator iterateNonZero = vector.iterateNonZero();
            while (iterateNonZero.hasNext()) {
                Vector.Element element = (Vector.Element) iterateNonZero.next();
                int index = element.index();
                Preconditions.checkState(this.cardinality == null || index < this.cardinality.intValue(), "Vector entry index %s is outside valid range [0, %s)", new Object[]{Integer.valueOf(index), this.cardinality});
                newArrayListWithCapacity2.add(this.tupleFactory.newTupleNoCopy(ImmutableList.of(Integer.valueOf(index), Double.valueOf(element.get()))));
            }
            newTupleNoCopy = this.cardinality != null ? this.tupleFactory.newTupleNoCopy(ImmutableList.of(this.bagFactory.newDefaultBag(newArrayListWithCapacity2))) : this.tupleFactory.newTupleNoCopy(ImmutableList.of(Integer.valueOf(size), this.bagFactory.newDefaultBag(newArrayListWithCapacity2)));
        }
        return newTupleNoCopy;
    }

    @Override // com.twitter.elephantbird.pig.util.AbstractWritableConverter, com.twitter.elephantbird.pig.util.WritableConverter
    public void checkStoreSchema(ResourceSchema.ResourceFieldSchema resourceFieldSchema) throws IOException {
        assertFieldTypeEquals((byte) 110, resourceFieldSchema.getType(), "tuple");
        ResourceSchema schema = resourceFieldSchema.getSchema();
        assertNotNull(schema, "ResourceSchema for tuple is null", new Object[0]);
        ResourceSchema.ResourceFieldSchema[] fields = schema.getFields();
        assertNotNull(fields, "Tuple field schemas are null", new Object[0]);
        if (fields.length == 1 && fields[0].getType() == 120) {
            Preconditions.checkNotNull(this.cardinality, "Cardinality undefined");
            checkSparseVectorEntriesSchema(fields[0].getSchema());
        } else if (fields.length == 2 && fields[1].getType() == 120) {
            Preconditions.checkState(this.cardinality == null, "Cardinality already defined");
            assertFieldTypeEquals((byte) 10, fields[0].getType(), "tuple[0]");
            checkSparseVectorEntriesSchema(fields[1].getSchema());
        } else {
            for (int i = 0; i < fields.length; i++) {
                assertFieldTypeIsNumeric(fields[i].getType(), "tuple[" + i + PigTokenHelper.MAP_END);
            }
        }
    }

    private void checkSparseVectorEntriesSchema(ResourceSchema resourceSchema) throws IOException {
        assertNotNull(resourceSchema, "ResourceSchema of entries is null", new Object[0]);
        ResourceSchema.ResourceFieldSchema[] fields = resourceSchema.getFields();
        assertNotNull(fields, "Tuple field schemas are null", new Object[0]);
        assertTupleLength(1, fields.length, "entries");
        assertFieldTypeEquals((byte) 110, fields[0].getType(), "entries[0]");
        ResourceSchema schema = fields[0].getSchema();
        assertNotNull(schema, "ResourceSchema of entries[0] is null", new Object[0]);
        ResourceSchema.ResourceFieldSchema[] fields2 = schema.getFields();
        assertNotNull(fields2, "Tuple field schemas are null", new Object[0]);
        assertTupleLength(2, fields2.length, "entries[0]");
        assertFieldTypeEquals((byte) 10, fields2[0].getType(), "entries[0][0]");
        assertFieldTypeIsNumeric(fields2[1].getType(), "entries[0][1]");
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // com.twitter.elephantbird.pig.util.WritableStoreCaster
    /* renamed from: toWritable, reason: merged with bridge method [inline-methods] */
    public VectorWritable mo238toWritable(Tuple tuple) throws IOException {
        Vector denseVector;
        int intValue;
        DataBag<Tuple> dataBag;
        Preconditions.checkNotNull(tuple, "Tuple is null");
        if (isSparseVector(tuple)) {
            Preconditions.checkState(!this.dense, "Expecting dense vector but found sparse vector");
            if (tuple.size() == 2) {
                intValue = ((Integer) tuple.get(0)).intValue();
                if (this.cardinality != null) {
                    Preconditions.checkState(this.cardinality.intValue() == intValue, "Expecting cardinality %s but found cardinality %s", new Object[]{this.cardinality, Integer.valueOf(intValue)});
                }
                dataBag = (DataBag) tuple.get(1);
            } else {
                Preconditions.checkNotNull(this.cardinality, "Cardinality is undefined");
                intValue = this.cardinality.intValue();
                dataBag = (DataBag) tuple.get(0);
            }
            denseVector = new RandomAccessSparseVector(intValue);
            for (Tuple tuple2 : dataBag) {
                validateSparseVectorEntry(tuple2);
                denseVector.setQuick(((Integer) tuple2.get(0)).intValue(), ((Number) tuple2.get(1)).doubleValue());
            }
            if (this.sequential) {
                denseVector = new SequentialAccessSparseVector(denseVector);
            }
        } else {
            Preconditions.checkState(!this.sparse, "Expecting sparse vector but found dense vector");
            validateDenseVector(tuple);
            double[] dArr = new double[tuple.size()];
            for (int i = 0; i < dArr.length; i++) {
                dArr[i] = ((Number) tuple.get(i)).doubleValue();
            }
            denseVector = new DenseVector(dArr, true);
        }
        this.writable.set(denseVector);
        return this.writable;
    }

    private static boolean isSparseVector(Tuple tuple) throws IOException {
        assertNotNull(tuple, "Tuple is null", new Object[0]);
        if (1 == tuple.size() && 120 == tuple.getType(0)) {
            return true;
        }
        return 2 == tuple.size() && 10 == tuple.getType(0) && 120 == tuple.getType(1);
    }

    private static void validateSparseVectorEntry(Tuple tuple) throws IOException {
        assertNotNull(tuple, "Tuple is null", new Object[0]);
        assertTupleLength(2, tuple.size(), "tuple");
        assertFieldTypeEquals((byte) 10, tuple.getType(0), "tuple[0]");
        assertFieldTypeIsNumeric(tuple.getType(1), "tuple[1]");
    }

    private static void validateDenseVector(Tuple tuple) throws IOException {
        assertNotNull(tuple, "Tuple is null", new Object[0]);
        for (int i = 0; i < tuple.size(); i++) {
            assertFieldTypeIsNumeric(tuple.getType(i), "tuple[" + i + PigTokenHelper.MAP_END);
        }
    }

    private static void assertNotNull(Object obj, String str, Object... objArr) throws IOException {
        if (obj == null) {
            throw new IOException(String.format(str, objArr));
        }
    }

    private static void assertFieldTypeEquals(byte b, byte b2, String str) throws IOException {
        if (b != b2) {
            throw new IOException(String.format("Expected %s of type '%s' but found type '%s'", str, DataType.findTypeName(b), DataType.findTypeName(b2)));
        }
    }

    private static void assertFieldTypeIsNumeric(byte b, String str) throws IOException {
        switch (b) {
            case 10:
            case 15:
            case 20:
            case 25:
                return;
            default:
                throw new IOException(String.format("Expected %s of numeric type but found type '%s'", str, DataType.findTypeName(b)));
        }
    }

    private static void assertTupleLength(int i, int i2, String str) throws IOException {
        if (i != i2) {
            throw new IOException(String.format("Expected %s of length %s but found length %s", str, Integer.valueOf(i), Integer.valueOf(i2)));
        }
    }
}
