package be.ac.vub.bsb.parsers.knight;

import be.ac.ulb.bigre.pathwayinference.core.io.IOTools;
import be.ac.ulb.bigre.pathwayinference.core.util.DiverseTools;
import be.ac.vub.bsb.cooccurrence.cmd.CooccurrenceAnalyser;
import be.ac.vub.bsb.cooccurrence.conversion.AbundanceMatrixNormalizer;
import be.ac.vub.bsb.cooccurrence.conversion.MatrixFilterer;
import be.ac.vub.bsb.cooccurrence.core.CooccurrenceConstants;
import be.ac.vub.bsb.cooccurrence.measures.Matrix;
import be.ac.vub.bsb.cooccurrence.measures.MatrixToolsProvider;
import be.ac.vub.bsb.cooccurrence.measures.StatsProvider;
import be.ac.vub.bsb.cooccurrence.util.FeatureMatrixLoader;
import be.ac.vub.bsb.cooccurrence.util.HigherLevelTaxaAssigner;
import be.ac.vub.bsb.cooccurrence.util.HigherLevelTaxaMetadataComplementer;
import be.ac.vub.bsb.cooccurrence.util.ToolBox;
import be.ac.vub.bsb.parsers.util.BiomTableParser;
import cern.colt.matrix.DoubleMatrix1D;
import cern.colt.matrix.impl.DenseDoubleMatrix1D;
import java.io.File;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.log4j.Logger;

/* loaded from: input_file:be/ac/vub/bsb/parsers/knight/QiimeDBMatrixPreprocessor.class */
public class QiimeDBMatrixPreprocessor {
    public static String STANDARD_QIIME_SAMPLE_PREFIX = "SAMPLE-";
    public static String METADATA_ATTRIBUTES = "kingdom/phylum/class/order/family/genus/species/lineage/taxon";
    private Matrix _filteredOtuMatrix;
    private String _otuTableLocation = "";
    private String _metadataLocation = "";
    private boolean _biomFormat = false;
    private int _colMinSum = 100;
    private boolean _computeColMinSumForEnvSpecMatrix = false;
    private boolean _noRowMinOccFilter = false;
    private boolean _noNorm = false;
    private boolean _noNormAndHigherTaxa = false;
    private int _minEnvSpecRowOcc = 0;
    private Set<String> _samplesToRemove = new HashSet();
    private String _metadataAttribs = "";
    private Logger _logger = Logger.getLogger(getClass().getPackage().toString());
    private Matrix _otuMatrix = new Matrix();
    private String _report = "";

    private void loadBiomTable() {
        String tempDir = ToolBox.getTempDir();
        String str = String.valueOf(tempDir) + File.separator + DiverseTools.getTempFileName() + "_matrix.txt";
        File file = new File(ToolBox.getTempDir());
        if (!file.canWrite()) {
            this._logger.error("Not allowed to write to temp folder " + tempDir + "!");
        }
        if (!file.canRead()) {
            this._logger.error("Not allowed to read from temp folder " + tempDir + "!");
        }
        if (!file.canExecute()) {
            this._logger.error("Not allowed to execute in temp folder " + tempDir + "!");
        }
        BiomTableParser biomTableParser = new BiomTableParser();
        biomTableParser.setInputLocation(getOtuTableLocation());
        biomTableParser.setOutputLocation(str);
        biomTableParser.setLogger(this._logger);
        biomTableParser.setMetadataFileLocation(this._metadataLocation);
        biomTableParser.parse();
        this._logger.info("Parsed biom table " + getOtuTableLocation() + " to temp location " + str);
        if (biomTableParser.isContainsTaxonomy()) {
            this._logger.info("Parsed lineages from biom table " + getOtuTableLocation() + " to location " + this._metadataLocation);
            setMetadataAttribs(METADATA_ATTRIBUTES);
        }
        setOtuTableLocation(str);
    }

    public void preprocess() {
        if (isBiomFormat()) {
            loadBiomTable();
        }
        this._otuMatrix.setSpecialChars(ToolBox.getCoNetSpecialCharsReplacementTable());
        this._otuMatrix.readMatrix(getOtuTableLocation(), false);
        if (!getMetadataLocation().isEmpty()) {
            List<String> stringToList = DiverseTools.stringToList(getMetadataAttribs(), CooccurrenceAnalyser.ITEM_SEPARATOR);
            ArrayList arrayList = new ArrayList();
            for (String str : stringToList) {
                arrayList.add("java.lang.String");
            }
            this._otuMatrix.readRowMetaData(getMetadataLocation(), stringToList, arrayList);
            System.out.println("Metadata assigned.");
            System.out.println("phylum of " + this._otuMatrix.getRowName(0) + "=" + this._otuMatrix.getRowMetaAnnotation(0, "phylum"));
        }
        this._report = "";
        setFilteredOtuMatrix(this._otuMatrix.copy());
        Integer valueOf = Integer.valueOf(getMinEnvSpecRowOcc());
        if (valueOf.intValue() <= 0) {
            valueOf = Integer.valueOf(getOtuMatrix().getMatrix().columns() / 4);
        }
        System.out.println("Minimal row occurrence: " + valueOf);
        Double valueOf2 = Double.valueOf(getColMinSum().doubleValue());
        if (isComputeColMinSum()) {
            DoubleMatrix1D sumVector = MatrixToolsProvider.getSumVector(getFilteredOtuMatrix(), true, true);
            System.out.println("Column sums: " + sumVector);
            valueOf2 = Double.valueOf(StatsProvider.getQuantile(sumVector, 0.25d, true));
            System.out.println("Column minimum sum after discarding 25% quartile: " + valueOf2);
        }
        MatrixFilterer matrixFilterer = new MatrixFilterer(getFilteredOtuMatrix());
        matrixFilterer.setOmitFeaturesFromColumnMinSum(true);
        matrixFilterer.setFilterMethods("row_minocc/col_minsum");
        if (isNoRowMinOccFilter()) {
            matrixFilterer.setFilterNumbers("1/" + valueOf2.toString());
        } else {
            matrixFilterer.setFilterNumbers(String.valueOf(valueOf.toString()) + "/" + valueOf2.toString());
        }
        matrixFilterer.filter();
        this._report = matrixFilterer.toString();
        if (isNoNormAndHigherTaxa()) {
            setFilteredOtuMatrix(matrixFilterer.getFilteredMatrix());
        } else {
            if (!isNoNorm()) {
                AbundanceMatrixNormalizer abundanceMatrixNormalizer = new AbundanceMatrixNormalizer(matrixFilterer.getFilteredMatrix());
                abundanceMatrixNormalizer.setExcludeFeaturesFromNormalization(true);
                abundanceMatrixNormalizer.setStandardizationMethods(AbundanceMatrixNormalizer.COLUMN_NORMALIZATION_BY_SUMDIVISION);
                abundanceMatrixNormalizer.normalize();
                this._report = String.valueOf(this._report) + abundanceMatrixNormalizer.toString();
                setFilteredOtuMatrix(abundanceMatrixNormalizer.getNormalizedAbundanceMatrix());
            }
            if (!getMetadataLocation().isEmpty()) {
                HigherLevelTaxaAssigner higherLevelTaxaAssigner = new HigherLevelTaxaAssigner();
                higherLevelTaxaAssigner.setInputMatrix(getFilteredOtuMatrix());
                higherLevelTaxaAssigner.setLineageAttrib(CooccurrenceConstants.LINEAGE_ATTRIBUTE);
                higherLevelTaxaAssigner.assignHigherLevelTaxa();
                this._report = String.valueOf(this._report) + higherLevelTaxaAssigner.toString();
                setFilteredOtuMatrix(higherLevelTaxaAssigner.getExtendedMatrix());
            }
        }
        if (getSamplesToRemove().isEmpty()) {
            return;
        }
        setFilteredOtuMatrix(MatrixToolsProvider.getSubMatrixWithoutColNames(getFilteredOtuMatrix(), getSamplesToRemove()));
        System.out.println("After sample filtering, matrix has " + getFilteredOtuMatrix().getMatrix().columns() + " columns.");
    }

    public void appendHigherLevelTaxonMetadata() {
        HigherLevelTaxaMetadataComplementer higherLevelTaxaMetadataComplementer = new HigherLevelTaxaMetadataComplementer();
        higherLevelTaxaMetadataComplementer.setMatrix(getFilteredOtuMatrix());
        higherLevelTaxaMetadataComplementer.setAssignHigherLevelTaxa(false);
        higherLevelTaxaMetadataComplementer.setOnRows(true);
        higherLevelTaxaMetadataComplementer.setMetadataFileRows(getMetadataLocation());
        higherLevelTaxaMetadataComplementer.setFillPredefTaxonLevels(true);
        List<String> stringToList = DiverseTools.stringToList(METADATA_ATTRIBUTES, CooccurrenceAnalyser.ITEM_SEPARATOR);
        ArrayList arrayList = new ArrayList();
        for (String str : stringToList) {
            arrayList.add("java.lang.String");
        }
        higherLevelTaxaMetadataComplementer.setAttributes(stringToList);
        higherLevelTaxaMetadataComplementer.setTreatSpecialChars(true);
        higherLevelTaxaMetadataComplementer.setAttribTypes(arrayList);
        higherLevelTaxaMetadataComplementer.loadMetaDataFromFile();
        higherLevelTaxaMetadataComplementer.addHigherLevelTaxaEntriesInMetadata();
        higherLevelTaxaMetadataComplementer.writeMetaDataToFile(getMetadataLocation());
    }

    public void processFeatures(String str, String str2, boolean z) {
        FeatureMatrixLoader featureMatrixLoader = new FeatureMatrixLoader(getFilteredOtuMatrix(), str, false);
        featureMatrixLoader.setMatchFeatures(true);
        featureMatrixLoader.setTransposeFeatures(true);
        featureMatrixLoader.setStandardQIIMETable(isBiomFormat());
        featureMatrixLoader.loadFeatures();
        Matrix featureMatrix = featureMatrixLoader.getFeatureMatrix();
        if (z) {
            featureMatrix = featureMatrixLoader.getMatrixWithFeatures();
        }
        featureMatrix.writeMatrix(str2, "\t", true, true);
    }

    private static boolean hasNegativeValues(DoubleMatrix1D doubleMatrix1D) {
        for (int i = 0; i < doubleMatrix1D.size(); i++) {
            if (doubleMatrix1D.get(i) < 0.0d) {
                return true;
            }
        }
        return false;
    }

    private static DoubleMatrix1D removeNegValuesByShift(DoubleMatrix1D doubleMatrix1D) {
        DenseDoubleMatrix1D denseDoubleMatrix1D = new DenseDoubleMatrix1D(doubleMatrix1D.size());
        double min = StatsProvider.getMin(doubleMatrix1D, true);
        for (int i = 0; i < doubleMatrix1D.size(); i++) {
            denseDoubleMatrix1D.set(i, doubleMatrix1D.get(i) + Math.abs(min));
        }
        return denseDoubleMatrix1D;
    }

    public static Matrix makeEnvFeaturesPositive(Matrix matrix) {
        new DenseDoubleMatrix1D(matrix.getMatrix().rows());
        for (int i = 0; i < matrix.getMatrix().columns(); i++) {
            DoubleMatrix1D viewColumn = matrix.getMatrix().viewColumn(i);
            if (hasNegativeValues(viewColumn)) {
                System.out.println("Shifting environmental vector with negative values " + matrix.getColName(i) + " into positive range.");
                System.out.println("before: " + viewColumn.toString());
                matrix.setColumn(i, removeNegValuesByShift(viewColumn).toArray());
                System.out.println("after: " + matrix.getMatrix().viewColumn(i));
            }
        }
        return matrix;
    }

    public String getOtuTableLocation() {
        return this._otuTableLocation;
    }

    public void setOtuTableLocation(String str) {
        this._otuTableLocation = str;
    }

    public boolean isBiomFormat() {
        return this._biomFormat;
    }

    public void setBiomFormat(boolean z) {
        this._biomFormat = z;
    }

    public void setComputeColMinSum(boolean z) {
        this._computeColMinSumForEnvSpecMatrix = z;
    }

    public boolean isComputeColMinSum() {
        return this._computeColMinSumForEnvSpecMatrix;
    }

    public Matrix getOtuMatrix() {
        return this._otuMatrix;
    }

    public Matrix getFilteredOtuMatrix() {
        return this._filteredOtuMatrix;
    }

    public void setFilteredOtuMatrix(Matrix matrix) {
        this._filteredOtuMatrix = matrix;
    }

    public void setMetadataLocation(String str) {
        this._metadataLocation = str;
    }

    public String getMetadataLocation() {
        return this._metadataLocation;
    }

    public void setMetadataAttribs(String str) {
        this._metadataAttribs = str;
    }

    public String getMetadataAttribs() {
        return this._metadataAttribs;
    }

    public void setNoNormAndHigherTaxa(boolean z) {
        this._noNormAndHigherTaxa = z;
    }

    public boolean isNoNormAndHigherTaxa() {
        return this._noNormAndHigherTaxa;
    }

    public boolean isNoNorm() {
        return this._noNorm;
    }

    public void setNoNorm(boolean z) {
        this._noNorm = z;
    }

    public boolean isNoRowMinOccFilter() {
        return this._noRowMinOccFilter;
    }

    public void setNoRowMinOccFilter(boolean z) {
        this._noRowMinOccFilter = z;
    }

    public void setMinEnvSpecRowOcc(int i) {
        this._minEnvSpecRowOcc = i;
    }

    public int getMinEnvSpecRowOcc() {
        return this._minEnvSpecRowOcc;
    }

    public void setColMinSum(int i) {
        this._colMinSum = i;
    }

    public Integer getColMinSum() {
        return Integer.valueOf(this._colMinSum);
    }

    public Set<String> getSamplesToRemove() {
        return this._samplesToRemove;
    }

    public void setSamplesToRemove(Set<String> set) {
        this._samplesToRemove = set;
    }

    public String toString() {
        return this._report;
    }

    public static void main(String[] strArr) {
        String str = String.valueOf("/Users/karoline/Documents/Documents_Karoline/BSB_Lab/Results/Quiime/Input_EMP/") + File.separator + "laubersoils_metadata.txt";
        String str2 = String.valueOf("/Users/karoline/Documents/Documents_Karoline/BSB_Lab/Results/Quiime/Input_EMP/") + File.separator + "laubersoils.txt";
        if (1 != 0) {
            str2 = String.valueOf("/Users/karoline/Documents/Documents_Karoline/BSB_Lab/Results/Quiime/Input_EMP/") + File.separator + "laubersoils-nonorm.txt";
        }
        String str3 = String.valueOf("/Users/karoline/Documents/Documents_Karoline/BSB_Lab/Results/Quiime/Input_EMP/") + File.separator + "laubersoils_features.txt";
        QiimeDBMatrixPreprocessor qiimeDBMatrixPreprocessor = new QiimeDBMatrixPreprocessor();
        qiimeDBMatrixPreprocessor.setOtuTableLocation("/Users/karoline/Documents/Documents_Karoline/BSB_Lab/Results/Quiime/InputTests/laubersoils-customfilter/laubersoils-nonorm-firstround.txt");
        qiimeDBMatrixPreprocessor.setBiomFormat(true);
        if (!qiimeDBMatrixPreprocessor.isBiomFormat()) {
            qiimeDBMatrixPreprocessor.setMetadataAttribs(METADATA_ATTRIBUTES);
        }
        qiimeDBMatrixPreprocessor.setMetadataLocation(str);
        qiimeDBMatrixPreprocessor.setNoNorm(true);
        qiimeDBMatrixPreprocessor.setNoNormAndHigherTaxa(true);
        qiimeDBMatrixPreprocessor.setMinEnvSpecRowOcc(20);
        qiimeDBMatrixPreprocessor.setColMinSum(300);
        if (0 == 0 && 0 == 0) {
            qiimeDBMatrixPreprocessor.preprocess();
            qiimeDBMatrixPreprocessor.getFilteredOtuMatrix().writeMatrix(str2, "\t", true, true);
        } else {
            Matrix matrix = new Matrix();
            matrix.setSpecialChars(ToolBox.getCoNetSpecialCharsReplacementTable());
            matrix.readMatrix(str2, false);
            qiimeDBMatrixPreprocessor.setFilteredOtuMatrix(matrix);
            if (0 != 0) {
                qiimeDBMatrixPreprocessor.processFeatures("/Users/karoline/Documents/Documents_Karoline/BSB_Lab/Results/Quiime/Input_EMP/shenzhen/knight_shenzhen_filtered_mapping.txt", str3, false);
            } else {
                qiimeDBMatrixPreprocessor.appendHigherLevelTaxonMetadata();
            }
        }
        IOTools.exportStringToFile(qiimeDBMatrixPreprocessor.toString(), String.valueOf("laubersoils") + "_preproc.log");
    }
}
