package be.ac.vub.bsb.parsers.util;

import be.ac.ulb.bigre.pathwayinference.core.core.PathwayinferenceConstants;
import be.ac.vub.bsb.cooccurrence.measures.Matrix;
import cern.colt.matrix.DoubleMatrix1D;
import cern.colt.matrix.impl.AbstractFormatter;
import cern.colt.matrix.impl.DenseDoubleMatrix1D;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import java.util.TreeSet;
import org.apache.log4j.spi.Configurator;

/* loaded from: input_file:be/ac/vub/bsb/parsers/util/CategoricFeaturesToBinaryConverter.class */
public class CategoricFeaturesToBinaryConverter extends GenericDelimFlatFileParser {
    private String[] _columnNames;
    private List<Integer> _indicesOfCategoricalFeats;
    private Map<String, DoubleMatrix1D> _featVersusValues = new TreeMap();
    private Matrix _featureMatrix = new Matrix();
    private boolean _test = true;

    public CategoricFeaturesToBinaryConverter() {
        super.init();
    }

    @Override // be.ac.vub.bsb.parsers.util.GenericDelimFlatFileParser, be.ac.vub.bsb.parsers.util.GenericFlatFileParser, be.ac.vub.bsb.parsers.util.IGenericParser
    public void parse() {
        super.setInputDelimiter("\t");
        super.setOutputLocation("");
        super.goThroughLines();
        assembleFeatureMatrix();
    }

    private void assembleFeatureMatrix() {
        System.out.println("Assembling extended feature matrix...");
        setFeatureMatrix(new Matrix(this._featVersusValues.keySet().size(), this._featVersusValues.get(this._featVersusValues.keySet().iterator().next()).size()));
        int i = 0;
        for (String str : this._featVersusValues.keySet()) {
            getFeatureMatrix().setRowName(i, str);
            getFeatureMatrix().setRow(i, this._featVersusValues.get(str).toArray());
            i++;
        }
        for (int i2 = 1; i2 < this._columnNames.length; i2++) {
            getFeatureMatrix().setColName(i2 - 1, this._columnNames[i2]);
        }
    }

    @Override // be.ac.vub.bsb.parsers.util.GenericDelimFlatFileParser
    protected String processLine(String str) {
        String[] split = str.split(super.getInputDelimiter());
        if (getLineCounter() <= 0) {
            this._columnNames = split;
            return "";
        }
        String str2 = split[0];
        DenseDoubleMatrix1D denseDoubleMatrix1D = new DenseDoubleMatrix1D(split.length - 1);
        System.out.println(String.valueOf(getLineCounter()) + AbstractFormatter.DEFAULT_COLUMN_SEPARATOR + str2);
        if (!getIndicesOfCategoricalFeats().contains(Integer.valueOf(getLineCounter()))) {
            for (int i = 1; i < split.length; i++) {
                if (split[i].equals("NA") || split[i].equals("NaN") || split[i].equalsIgnoreCase(Configurator.NULL)) {
                    denseDoubleMatrix1D.set(i - 1, Double.NaN);
                } else {
                    denseDoubleMatrix1D.set(i - 1, Double.parseDouble(split[i]));
                }
            }
            this._featVersusValues.put(str2, denseDoubleMatrix1D);
            return "";
        }
        System.out.println("Feature " + str2 + " is categorical.");
        TreeSet<String> treeSet = new TreeSet();
        for (int i2 = 1; i2 < split.length; i2++) {
            treeSet.add(split[i2]);
        }
        if (this._test && str2.equals("ENV_BIOME")) {
            System.out.println("env-biome test column: " + split[3135]);
        }
        treeSet.remove("not determined");
        treeSet.remove("NaN");
        treeSet.remove("NA");
        treeSet.remove(Configurator.NULL);
        System.out.println(treeSet);
        if (treeSet.size() == 1) {
            System.out.println("Categorical feature " + str2 + " has only one state (" + ((String) treeSet.iterator().next()) + ") and is skipped.");
            return "";
        }
        if (treeSet.size() == 2) {
            String str3 = (String) treeSet.iterator().next();
            System.out.println("Only two categories occur in the categorical feature " + str2 + ". Category " + str3 + " is represented by 1, the other by zero.");
            for (int i3 = 1; i3 < split.length; i3++) {
                if (split[i3].equals("NA") || split[i3].equals("NaN") || split[i3].equalsIgnoreCase(Configurator.NULL)) {
                    denseDoubleMatrix1D.set(i3 - 1, Double.NaN);
                } else if (split[i3].equals(str3)) {
                    denseDoubleMatrix1D.set(i3 - 1, 1.0d);
                } else {
                    denseDoubleMatrix1D.set(i3 - 1, 0.0d);
                }
            }
            this._featVersusValues.put(String.valueOf(str2) + PathwayinferenceConstants.REACTION_SUBREACTION_JOINER + str3, denseDoubleMatrix1D);
            return "";
        }
        for (String str4 : treeSet) {
            DenseDoubleMatrix1D denseDoubleMatrix1D2 = new DenseDoubleMatrix1D(split.length - 1);
            for (int i4 = 1; i4 < split.length; i4++) {
                if (split[i4].equals("NA") || split[i4].equals("NaN") || split[i4].equalsIgnoreCase(Configurator.NULL)) {
                    denseDoubleMatrix1D2.set(i4 - 1, Double.NaN);
                } else if (split[i4].equals(str4)) {
                    denseDoubleMatrix1D2.set(i4 - 1, 1.0d);
                } else {
                    denseDoubleMatrix1D2.set(i4 - 1, 0.0d);
                }
            }
            this._featVersusValues.put(String.valueOf(str2) + PathwayinferenceConstants.REACTION_SUBREACTION_JOINER + str4, denseDoubleMatrix1D2);
        }
        return "";
    }

    private void setFeatureMatrix(Matrix matrix) {
        this._featureMatrix = matrix;
    }

    public Matrix getFeatureMatrix() {
        return this._featureMatrix;
    }

    public void setIndicesOfCategoricalFeats(List<Integer> list) {
        this._indicesOfCategoricalFeats = list;
    }

    public List<Integer> getIndicesOfCategoricalFeats() {
        return this._indicesOfCategoricalFeats;
    }

    public static void main(String[] strArr) {
        ArrayList arrayList = new ArrayList();
        arrayList.add(6);
        arrayList.add(7);
        CategoricFeaturesToBinaryConverter categoricFeaturesToBinaryConverter = new CategoricFeaturesToBinaryConverter();
        categoricFeaturesToBinaryConverter.setInputLocation("/Users/karoline/Documents/Documents_Karoline/BSB_Lab/Collaborations/Florence_DAlcheBuc/Data/Gajer_Transl-Med_2012/Parsed/ravel_features.txt");
        categoricFeaturesToBinaryConverter.setIndicesOfCategoricalFeats(arrayList);
        categoricFeaturesToBinaryConverter.parse();
        categoricFeaturesToBinaryConverter.getFeatureMatrix().writeMatrix("ravel_features_split.txt", "\t", true, true);
    }
}
