/*
 * Decompiled with CFR 0.152.
 */
package elki.outlier;

import elki.Algorithm;
import elki.data.NumberVector;
import elki.data.type.TypeInformation;
import elki.data.type.TypeUtil;
import elki.database.datastore.DataStore;
import elki.database.datastore.DataStoreUtil;
import elki.database.datastore.DoubleDataStore;
import elki.database.datastore.WritableDataStore;
import elki.database.datastore.WritableDoubleDataStore;
import elki.database.datastore.WritableIntegerDataStore;
import elki.database.ids.DBIDIter;
import elki.database.ids.DBIDMIter;
import elki.database.ids.DBIDRef;
import elki.database.ids.DBIDUtil;
import elki.database.ids.DBIDs;
import elki.database.ids.HashSetModifiableDBIDs;
import elki.database.query.QueryBuilder;
import elki.database.query.knn.KNNSearcher;
import elki.database.relation.DoubleRelation;
import elki.database.relation.MaterializedDoubleRelation;
import elki.database.relation.MaterializedRelation;
import elki.database.relation.Relation;
import elki.database.relation.RelationUtil;
import elki.distance.Distance;
import elki.distance.minkowski.EuclideanDistance;
import elki.logging.Logging;
import elki.logging.progress.AbstractProgress;
import elki.logging.progress.FiniteProgress;
import elki.math.linearalgebra.VMath;
import elki.math.linearalgebra.pca.PCAResult;
import elki.math.linearalgebra.pca.PCARunner;
import elki.math.statistics.distribution.ChiSquaredDistribution;
import elki.math.statistics.distribution.estimator.GammaChoiWetteEstimator;
import elki.outlier.OutlierAlgorithm;
import elki.result.Metadata;
import elki.result.outlier.OutlierResult;
import elki.result.outlier.ProbabilisticOutlierScore;
import elki.utilities.datastructures.arraylike.DoubleArrayAdapter;
import elki.utilities.datastructures.arraylike.NumberArrayAdapter;
import elki.utilities.documentation.Reference;
import elki.utilities.documentation.Title;
import elki.utilities.optionhandling.OptionID;
import elki.utilities.optionhandling.Parameterizer;
import elki.utilities.optionhandling.constraints.CommonConstraints;
import elki.utilities.optionhandling.constraints.GreaterConstraint;
import elki.utilities.optionhandling.constraints.ParameterConstraint;
import elki.utilities.optionhandling.parameterization.Parameterization;
import elki.utilities.optionhandling.parameters.DoubleParameter;
import elki.utilities.optionhandling.parameters.EnumParameter;
import elki.utilities.optionhandling.parameters.Flag;
import elki.utilities.optionhandling.parameters.IntParameter;
import elki.utilities.optionhandling.parameters.ObjectParameter;
import java.util.Arrays;

@Title(value="COP: Correlation Outlier Probability")
@Reference(authors="Hans-Peter Kriegel, Peer Kr\u00f6ger, Erich Schubert, Arthur Zimek", title="Outlier Detection in Arbitrarily Oriented Subspaces", booktitle="Proc. IEEE Int. Conf. on Data Mining (ICDM 2012)", url="https://doi.org/10.1109/ICDM.2012.21", bibkey="DBLP:conf/icdm/KriegelKSZ12")
public class COP<V extends NumberVector>
implements OutlierAlgorithm {
    private static final Logging LOG = Logging.getLogger(COP.class);
    public static final String COP_SCORES = "Correlation Outlier Probabilities";
    public static final String COP_DIM = "Local Correlation Dimensionality";
    public static final String COP_ERRORVEC = "Error vectors";
    private static final DoubleArrayAdapter SHORTENED_ARRAY = new DoubleArrayAdapter(){

        public int size(double[] array) {
            return (int)(0.85 * (double)array.length);
        }
    };
    protected Distance<? super V> distance;
    protected int k;
    protected PCARunner pca;
    protected double expect = 1.0E-4;
    protected DistanceDist dist = DistanceDist.CHISQUARED;
    protected boolean models;

    public COP(Distance<? super V> distance, int k, PCARunner pca, double expect, DistanceDist dist, boolean models) {
        this.distance = distance;
        this.k = k;
        this.pca = pca;
        this.expect = expect;
        this.dist = dist;
        this.models = models;
    }

    public TypeInformation[] getInputTypeRestriction() {
        return TypeUtil.array((TypeInformation[])new TypeInformation[]{TypeUtil.NUMBER_VECTOR_FIELD});
    }

    public OutlierResult run(Relation<V> relation) {
        DBIDs ids = relation.getDBIDs();
        KNNSearcher knnQuery = new QueryBuilder(relation, this.distance).kNNByDBID(this.k + 1);
        int dim = RelationUtil.dimensionality(relation);
        if (this.k <= dim + 1) {
            LOG.warning((CharSequence)("PCA is underspecified with a too low k! k should be at much larger than " + dim));
        }
        WritableDoubleDataStore cop_score = DataStoreUtil.makeDoubleStorage((DBIDs)ids, (int)6);
        WritableDataStore cop_err_v = this.models ? DataStoreUtil.makeStorage((DBIDs)ids, (int)6, double[].class) : null;
        WritableIntegerDataStore cop_dim = this.models ? DataStoreUtil.makeIntegerStorage((DBIDs)ids, (int)6, (int)-1) : null;
        FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress(COP_SCORES, relation.size(), LOG) : null;
        double[] centroid = new double[dim];
        double[] scores = new double[dim];
        HashSetModifiableDBIDs nids = DBIDUtil.newHashSet((int)(this.k + 10));
        DBIDIter id = ids.iter();
        while (id.valid()) {
            nids.clear().addDBIDs((DBIDs)knnQuery.getKNN((Object)id, this.k + 1));
            nids.remove((DBIDRef)id);
            COP.computeCentroid(centroid, relation, (DBIDs)nids);
            PCAResult pcares = this.pca.processIds((DBIDs)nids, relation);
            double[][] tevecs = pcares.getEigenvectors();
            double[] evs = pcares.getEigenvalues();
            double[] projected = VMath.times((double[][])tevecs, (double[])VMath.minusEquals((double[])((NumberVector)relation.get((DBIDRef)id)).toArray(), (double[])centroid));
            if (this.dist == DistanceDist.CHISQUARED) {
                double sqdevs = 0.0;
                for (int d = 0; d < dim; ++d) {
                    double dev = projected[d];
                    scores[d] = 1.0 - ChiSquaredDistribution.cdf((double)(sqdevs += dev * dev / evs[d]), (double)((double)d + 1.0));
                }
            } else {
                assert (this.dist == DistanceDist.GAMMA);
                double[][] dists = new double[dim][nids.size()];
                double[] srel = new double[dim];
                DBIDMIter s = nids.iter();
                for (int j = 0; s.valid() && j < nids.size(); ++j) {
                    NumberVector vec = (NumberVector)relation.get((DBIDRef)s);
                    for (int d = 0; d < dim; ++d) {
                        srel[d] = vec.doubleValue(d) - centroid[d];
                    }
                    double sqdist = 0.0;
                    for (int d = 0; d < dim; ++d) {
                        double serrd = VMath.transposeTimes((double[])tevecs[d], (double[])srel);
                        dists[d][j] = sqdist += serrd * serrd / evs[d];
                    }
                    s.advance();
                }
                double sqdevs = 0.0;
                for (int d = 0; d < dim; ++d) {
                    double dev = projected[d];
                    Arrays.sort(dists[d]);
                    scores[d] = 1.0 - GammaChoiWetteEstimator.STATIC.estimate((Object)dists[d], (NumberArrayAdapter)SHORTENED_ARRAY).cdf(sqdevs += dev * dev / evs[d]);
                }
            }
            double min = Double.POSITIVE_INFINITY;
            int vdim = dim - 1;
            for (int d = 0; d < dim; ++d) {
                double v = scores[d];
                if (!(v < min)) continue;
                min = v;
                vdim = d;
            }
            double prob = this.expect * (1.0 - min) / (this.expect + min);
            cop_score.putDouble((DBIDRef)id, prob);
            if (this.models) {
                Arrays.fill(projected, vdim + 1, dim, 0.0);
                cop_err_v.put((DBIDRef)id, (Object)VMath.timesEquals((double[])VMath.transposeTimes((double[][])tevecs, (double[])projected), (double)(-prob)));
                cop_dim.putInt((DBIDRef)id, dim - vdim);
            }
            LOG.incrementProcessed((AbstractProgress)prog);
            id.advance();
        }
        LOG.ensureCompleted(prog);
        MaterializedDoubleRelation scoreResult = new MaterializedDoubleRelation(COP_SCORES, ids, (DoubleDataStore)cop_score);
        ProbabilisticOutlierScore scoreMeta = new ProbabilisticOutlierScore();
        OutlierResult result = new OutlierResult(scoreMeta, (DoubleRelation)scoreResult);
        if (this.models) {
            Metadata.hierarchyOf((Object)result).addChild((Object)new MaterializedRelation(COP_DIM, TypeUtil.INTEGER, ids, (DataStore)cop_dim));
            Metadata.hierarchyOf((Object)result).addChild((Object)new MaterializedRelation(COP_ERRORVEC, TypeUtil.DOUBLE_ARRAY, ids, (DataStore)cop_err_v));
        }
        return result;
    }

    private static void computeCentroid(double[] centroid, Relation<? extends NumberVector> relation, DBIDs ids) {
        Arrays.fill(centroid, 0.0);
        int dim = centroid.length;
        DBIDIter it = ids.iter();
        while (it.valid()) {
            NumberVector v = (NumberVector)relation.get((DBIDRef)it);
            for (int i = 0; i < dim; ++i) {
                int n = i;
                centroid[n] = centroid[n] + v.doubleValue(i);
            }
            it.advance();
        }
        VMath.timesEquals((double[])centroid, (double)(1.0 / (double)ids.size()));
    }

    public static class Par<V extends NumberVector>
    implements Parameterizer {
        public static final OptionID K_ID = new OptionID("cop.k", "The number of nearest neighbors of an object to be considered for computing its COP_SCORE.");
        public static final OptionID DIST_ID = new OptionID("cop.dist", "The assumed distribution of squared distances. ChiSquared is faster, Gamma expected to be more accurate but could also overfit.");
        public static final OptionID PCARUNNER_ID = new OptionID("cop.pcarunner", "The class to compute (filtered) PCA.");
        public static final OptionID EXPECT_ID = new OptionID("cop.expect", "Expected share of outliers. Only affect score normalization.");
        public static final OptionID MODELS_ID = new OptionID("cop.models", "Include COP models (error vectors) in output. This needs more memory.");
        int k;
        PCARunner pca;
        DistanceDist dist;
        double expect;
        boolean models = false;
        protected Distance<? super V> distance;

        public void configure(Parameterization config) {
            new ObjectParameter(Algorithm.Utils.DISTANCE_FUNCTION_ID, Distance.class, EuclideanDistance.class).grab(config, x -> {
                this.distance = x;
            });
            ((IntParameter)new IntParameter(K_ID).addConstraint((ParameterConstraint)new GreaterConstraint(5))).grab(config, x -> {
                this.k = x;
            });
            new EnumParameter(DIST_ID, DistanceDist.class, (Enum)DistanceDist.GAMMA).grab(config, x -> {
                this.dist = x;
            });
            ((DoubleParameter)((DoubleParameter)new DoubleParameter(EXPECT_ID, 0.001).addConstraint((ParameterConstraint)CommonConstraints.GREATER_THAN_ZERO_DOUBLE)).addConstraint((ParameterConstraint)CommonConstraints.LESS_THAN_ONE_DOUBLE)).grab(config, x -> {
                this.expect = x;
            });
            new ObjectParameter(PCARUNNER_ID, PCARunner.class, PCARunner.class).grab(config, x -> {
                this.pca = x;
            });
            new Flag(MODELS_ID).grab(config, x -> {
                this.models = x;
            });
        }

        public COP<V> make() {
            return new COP<V>(this.distance, this.k, this.pca, this.expect, this.dist, this.models);
        }
    }

    public static enum DistanceDist {
        CHISQUARED,
        GAMMA;

    }
}

