package be.ac.vub.bsb.parsers.knight;

import be.ac.ulb.bigre.pathwayinference.core.io.IOTools;
import be.ac.ulb.bigre.pathwayinference.core.io.OneColumnSetParser;
import be.ac.ulb.bigre.pathwayinference.core.util.DiverseTools;
import be.ac.ulb.bigre.pathwayinference.core.validation.EvaluationMetaLauncher;
import be.ac.vub.bsb.cooccurrence.conversion.AbundanceMatrixNormalizer;
import be.ac.vub.bsb.cooccurrence.conversion.MatrixFilterer;
import be.ac.vub.bsb.cooccurrence.core.CooccurrenceConstants;
import be.ac.vub.bsb.cooccurrence.measures.Matrix;
import be.ac.vub.bsb.cooccurrence.measures.MatrixToolsProvider;
import be.ac.vub.bsb.cooccurrence.measures.StatsProvider;
import be.ac.vub.bsb.cooccurrence.util.ArrayTools;
import be.ac.vub.bsb.cooccurrence.util.FeatureMatrixLoader;
import be.ac.vub.bsb.cooccurrence.util.HigherLevelTaxaAssigner;
import be.ac.vub.bsb.parsers.util.ParserTools;
import cern.colt.matrix.DoubleMatrix1D;
import cern.colt.matrix.impl.AbstractFormatter;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;

/* loaded from: input_file:be/ac/vub/bsb/parsers/knight/QiimeDBEnvGroupMatrixBuilder.class */
public class QiimeDBEnvGroupMatrixBuilder {
    public static String BIOME_PREFIX = "ENV_BIOME_ENVO";
    public static String UBERON_PREFIX = "BODY_SITE_UBERON";
    public static String FREE_LIVING = "free-living";
    public static String HOST_ASSOCIATED = String.valueOf(BIOME_PREFIX) + ":organism-associated habitat";
    public static String MOIST_FOREST = String.valueOf(BIOME_PREFIX) + ":Tropical and subtropical moist broadleaf forest biome";
    public static String CONIFEROUS_FOREST = String.valueOf(BIOME_PREFIX) + ":Tropical and subtropical coniferous forest biome";
    public static String TUNDRA = String.valueOf(BIOME_PREFIX) + ":Tundra communities and barren Arctic deserts";
    public static String GRASSLANDS = String.valueOf(BIOME_PREFIX) + ":Temperate grasslands, savannas, and shrubland biome";
    public static String SKIN = "skin";
    public static String ORAL_CAVITY = "oral-cavity";
    public static String INTESTINE = "intestine";
    public static String VAGINA = "vagina";
    public static String HAIR = "hair";
    public static String NASAL_CAVITY = "nasal-cavity";
    public static String EXTERNAL_AUDITORY_CHANNEL = "ear-canal";
    public static String URINE = "urine";
    public static String[] SUPPORTED_BIOMES = {GRASSLANDS, TUNDRA, MOIST_FOREST, CONIFEROUS_FOREST, HOST_ASSOCIATED, FREE_LIVING, SKIN, ORAL_CAVITY, INTESTINE, VAGINA, HAIR, NASAL_CAVITY, EXTERNAL_AUDITORY_CHANNEL, URINE};
    private static String TERRESTRIAL = String.valueOf(BIOME_PREFIX) + ":terrestrial habitat";
    private Matrix _featureMatrix;
    private Matrix _otuMatrix;
    private Matrix _envSpecSubMatrix;
    private String _metadataLocation = "";
    private String _metadataAttribs = "";
    private int _colMinSum = 100;
    private boolean _computeColMinSumForEnvSpecMatrix = false;
    private boolean _noRowMinOccFilter = false;
    private boolean _noNorm = false;
    private boolean _noNormAndHigherTaxa = false;
    private int _minEnvSpecRowOcc = 0;
    private String _experimentStringMatcher = "";
    private boolean _otuMatrixPreprocessed = false;
    private Set _filteredColumnNames = new HashSet();
    private String _envSpecMatrixPreprocReport = "";

    public static Map<String, String> getBiomeMerges() {
        HashMap hashMap = new HashMap();
        hashMap.put(GRASSLANDS, GRASSLANDS);
        hashMap.put(MOIST_FOREST, MOIST_FOREST);
        hashMap.put(CONIFEROUS_FOREST, CONIFEROUS_FOREST);
        hashMap.put(TUNDRA, TUNDRA);
        hashMap.put(HOST_ASSOCIATED, HOST_ASSOCIATED);
        hashMap.put(TERRESTRIAL, TERRESTRIAL);
        hashMap.put(String.valueOf(BIOME_PREFIX) + ":Animal-associated habitat", HOST_ASSOCIATED);
        hashMap.put(String.valueOf(BIOME_PREFIX) + ":animal-associated habitat", HOST_ASSOCIATED);
        hashMap.put(String.valueOf(BIOME_PREFIX) + ":human-associated habitat", HOST_ASSOCIATED);
        hashMap.put(String.valueOf(BIOME_PREFIX) + ":feces", HOST_ASSOCIATED);
        hashMap.put(String.valueOf(BIOME_PREFIX) + ":gut", HOST_ASSOCIATED);
        hashMap.put(String.valueOf(BIOME_PREFIX) + ":surface", HOST_ASSOCIATED);
        return hashMap;
    }

    public static Map<String, String> getHostAssocBiomeMerges() {
        HashMap hashMap = new HashMap();
        hashMap.put(String.valueOf(UBERON_PREFIX) + ":cecum", INTESTINE);
        hashMap.put(String.valueOf(UBERON_PREFIX) + ":colon", INTESTINE);
        hashMap.put(String.valueOf(UBERON_PREFIX) + ":stomach", INTESTINE);
        hashMap.put(String.valueOf(UBERON_PREFIX) + ":small intestine", INTESTINE);
        hashMap.put(String.valueOf(UBERON_PREFIX) + ":large intestine", INTESTINE);
        hashMap.put(String.valueOf(UBERON_PREFIX) + ":rectum", INTESTINE);
        hashMap.put(String.valueOf(UBERON_PREFIX) + ":feces", INTESTINE);
        hashMap.put(String.valueOf(UBERON_PREFIX) + ":skin of arm", SKIN);
        hashMap.put(String.valueOf(UBERON_PREFIX) + ":skin of digit of hand", SKIN);
        hashMap.put(String.valueOf(UBERON_PREFIX) + ":skin of finger", SKIN);
        hashMap.put(String.valueOf(UBERON_PREFIX) + ":skin of forearm", SKIN);
        hashMap.put(String.valueOf(UBERON_PREFIX) + ":skin of head", SKIN);
        hashMap.put(String.valueOf(UBERON_PREFIX) + ":zone of skin of head", SKIN);
        hashMap.put(String.valueOf(UBERON_PREFIX) + ":zone of skin of hand", SKIN);
        hashMap.put(String.valueOf(UBERON_PREFIX) + ":zone of skin of knee", SKIN);
        hashMap.put(String.valueOf(UBERON_PREFIX) + ":zone of skin of outer ear", SKIN);
        hashMap.put(String.valueOf(UBERON_PREFIX) + ":zone of skin abdomen", SKIN);
        hashMap.put(String.valueOf(UBERON_PREFIX) + ":zone of skin of foot", SKIN);
        hashMap.put(String.valueOf(UBERON_PREFIX) + ":zone of skin of wrist", SKIN);
        hashMap.put(String.valueOf(UBERON_PREFIX) + ":nose", SKIN);
        hashMap.put(String.valueOf(UBERON_PREFIX) + ":glans penis", SKIN);
        hashMap.put(String.valueOf(UBERON_PREFIX) + ":skin", SKIN);
        hashMap.put(String.valueOf(UBERON_PREFIX) + ":fossa", SKIN);
        hashMap.put(String.valueOf(UBERON_PREFIX) + ":mouth", ORAL_CAVITY);
        hashMap.put(String.valueOf(UBERON_PREFIX) + ":mucosa of mouth", ORAL_CAVITY);
        hashMap.put(String.valueOf(UBERON_PREFIX) + ":tongue", ORAL_CAVITY);
        hashMap.put(String.valueOf(UBERON_PREFIX) + ":buccal mucosa", ORAL_CAVITY);
        hashMap.put(String.valueOf(UBERON_PREFIX) + ":gingiva", ORAL_CAVITY);
        hashMap.put(String.valueOf(UBERON_PREFIX) + ":gingival epithelium", ORAL_CAVITY);
        hashMap.put(String.valueOf(UBERON_PREFIX) + ":hard palate", ORAL_CAVITY);
        hashMap.put(String.valueOf(UBERON_PREFIX) + ":mucosa of tongue", ORAL_CAVITY);
        hashMap.put(String.valueOf(UBERON_PREFIX) + ":oral cavity", ORAL_CAVITY);
        hashMap.put(String.valueOf(UBERON_PREFIX) + ":oropharynx", ORAL_CAVITY);
        hashMap.put(String.valueOf(UBERON_PREFIX) + ":palatine tonsil", ORAL_CAVITY);
        hashMap.put(String.valueOf(UBERON_PREFIX) + ":hair", HAIR);
        hashMap.put(String.valueOf(UBERON_PREFIX) + ":urine", URINE);
        hashMap.put(String.valueOf(UBERON_PREFIX) + ":labia minora", VAGINA);
        hashMap.put(String.valueOf(UBERON_PREFIX) + ":mucosa of vagina", VAGINA);
        hashMap.put(String.valueOf(UBERON_PREFIX) + ":vagina", VAGINA);
        hashMap.put(String.valueOf(UBERON_PREFIX) + ":vaginal fornix", VAGINA);
        hashMap.put(String.valueOf(UBERON_PREFIX) + ":ear canal", EXTERNAL_AUDITORY_CHANNEL);
        hashMap.put(String.valueOf(UBERON_PREFIX) + ":nasal cavity", NASAL_CAVITY);
        hashMap.put(String.valueOf(UBERON_PREFIX) + ":nostril", NASAL_CAVITY);
        hashMap.put(String.valueOf(UBERON_PREFIX) + ":nostrils", NASAL_CAVITY);
        hashMap.put(String.valueOf(UBERON_PREFIX) + ":nares", NASAL_CAVITY);
        return hashMap;
    }

    public void checkCategoryOverlap() {
        for (int i = 0; i < this._featureMatrix.getMatrix().columns(); i++) {
            if (this._featureMatrix.getColName(i).isEmpty()) {
                System.err.println("Empty name for column index " + i + "!");
            }
            boolean z = false;
            boolean z2 = false;
            boolean z3 = false;
            boolean z4 = false;
            boolean z5 = false;
            boolean z6 = false;
            boolean z7 = false;
            for (int i2 = 0; i2 < this._featureMatrix.getMatrix().rows(); i2++) {
                String rowName = this._featureMatrix.getRowName(i2);
                double d = this._featureMatrix.getMatrix().get(i2, i);
                if (!Double.isNaN(d) && d > 0.0d && rowName.startsWith(BIOME_PREFIX)) {
                    if (rowName.contains(TUNDRA)) {
                    }
                    if (rowName.contains(MOIST_FOREST)) {
                        z = true;
                    }
                    if (rowName.contains(HOST_ASSOCIATED)) {
                        z2 = true;
                    }
                    if (rowName.contains("feces")) {
                        z3 = true;
                    }
                    if (rowName.contains("gut")) {
                        z4 = true;
                    }
                    if (rowName.contains(EvaluationMetaLauncher.HUMAN)) {
                        z5 = true;
                    }
                    if (rowName.contains("nimal-assoc")) {
                        z6 = true;
                    }
                    if (rowName.contains(TERRESTRIAL)) {
                        z7 = true;
                    }
                }
            }
            if (z7 && (z6 || z5 || z4 || z3 || z2)) {
                System.err.println("Terrestrial biome includes host-associate biome");
            }
            if (z2 && z) {
                System.err.println("Host-association is co-classified with moist broadleaf forest (index = " + i + ")!");
            }
        }
    }

    private void loadMetadata() {
        if (getMetadataLocation().isEmpty()) {
            return;
        }
        ParserTools.checkFileLocation(getMetadataLocation());
        List<String> stringToList = DiverseTools.stringToList(getMetadataAttribs(), "/");
        ArrayList arrayList = new ArrayList();
        for (String str : stringToList) {
            arrayList.add("java.lang.String");
        }
        getOtuMatrix().readRowMetaData(getMetadataLocation(), stringToList, arrayList);
    }

    public void preprocessOTUMatrix() {
        loadMetadata();
        Set<String> arrayToSet = ArrayTools.arrayToSet((Object[]) getOtuMatrix().getColNames());
        MatrixFilterer matrixFilterer = new MatrixFilterer(getOtuMatrix());
        matrixFilterer.setOmitFeaturesFromColumnMinSum(false);
        matrixFilterer.setFilterMethods("col_minsum");
        matrixFilterer.setFilterNumbers(getColMinSum().toString());
        matrixFilterer.filter();
        Set<String> arrayToSet2 = ArrayTools.arrayToSet((Object[]) matrixFilterer.getFilteredMatrix().getColNames());
        for (String str : arrayToSet) {
            if (!arrayToSet2.contains(str)) {
                getFilteredColumnNames().add(str);
            }
        }
        AbundanceMatrixNormalizer abundanceMatrixNormalizer = new AbundanceMatrixNormalizer(matrixFilterer.getFilteredMatrix());
        abundanceMatrixNormalizer.setExcludeFeaturesFromNormalization(false);
        abundanceMatrixNormalizer.setStandardizationMethods(AbundanceMatrixNormalizer.COLUMN_NORMALIZATION_BY_SUMDIVISION);
        abundanceMatrixNormalizer.normalize();
        if (getMetadataLocation().isEmpty()) {
            setOtuMatrix(abundanceMatrixNormalizer.getNormalizedAbundanceMatrix());
        } else {
            HigherLevelTaxaAssigner higherLevelTaxaAssigner = new HigherLevelTaxaAssigner();
            higherLevelTaxaAssigner.setInputMatrix(abundanceMatrixNormalizer.getNormalizedAbundanceMatrix());
            higherLevelTaxaAssigner.setLineageAttrib(CooccurrenceConstants.LINEAGE_ATTRIBUTE);
            higherLevelTaxaAssigner.assignHigherLevelTaxa();
            setOtuMatrix(higherLevelTaxaAssigner.getExtendedMatrix());
        }
        setOtuMatrixPreprocessed(true);
    }

    public void buildEnvBiomeSpecificSubMatrix(String str, boolean z, boolean z2) {
        if (isOtuMatrixPreprocessed() && !getFilteredColumnNames().isEmpty()) {
            MatrixFilterer matrixFilterer = new MatrixFilterer();
            matrixFilterer.setMatrix(getFeatureMatrix());
            matrixFilterer.setNamesOfColumnsToDiscard(getFilteredColumnNames());
            matrixFilterer.filter();
            setFeatureMatrix(matrixFilterer.getFilteredMatrix());
            for (int i = 0; i < 50; i++) {
                System.out.print(String.valueOf(getFeatureMatrix().getColName(i)) + AbstractFormatter.DEFAULT_COLUMN_SEPARATOR);
            }
            System.out.println();
            for (int i2 = 0; i2 < 50; i2++) {
                System.out.print(String.valueOf(getOtuMatrix().getColName(i2)) + AbstractFormatter.DEFAULT_COLUMN_SEPARATOR);
            }
            System.out.println();
        }
        Set<String> arrayToSet = ArrayTools.arrayToSet((Object[]) getFeatureMatrix().getRowNames());
        Map<String, String> biomeMerges = getBiomeMerges();
        biomeMerges.putAll(getHostAssocBiomeMerges());
        if (str.equals(FREE_LIVING)) {
            biomeMerges.put(MOIST_FOREST, FREE_LIVING);
            biomeMerges.put(CONIFEROUS_FOREST, FREE_LIVING);
            biomeMerges.put(TUNDRA, FREE_LIVING);
            biomeMerges.put(GRASSLANDS, FREE_LIVING);
        }
        System.out.println("updated biome merging: " + biomeMerges.toString());
        System.out.println("feature matrix row names: " + ArrayTools.arrayToString(this._featureMatrix.getRowNames(), ","));
        HashSet hashSet = new HashSet();
        for (String str2 : biomeMerges.keySet()) {
            if (biomeMerges.get(str2).equals(str)) {
                if (str.equals(FREE_LIVING) || str.equals(SKIN) || str.equals(INTESTINE) || str.equals(ORAL_CAVITY) || str.equals(NASAL_CAVITY) || str.equals(URINE) || str.equals(EXTERNAL_AUDITORY_CHANNEL) || str.equals(VAGINA) || str.equals(HAIR)) {
                    if (this._featureMatrix.hasRowName(str2)) {
                        hashSet.add(Integer.valueOf(this._featureMatrix.getIndexOfRowName(str2)));
                    } else {
                        System.err.println("No feature present for " + str2 + " in feature matrix.");
                    }
                } else if (this._featureMatrix.hasRowName(biomeMerges.get(str2))) {
                    hashSet.add(Integer.valueOf(this._featureMatrix.getIndexOfRowName(str2)));
                } else {
                    System.err.println("No feature present for " + biomeMerges.get(str2) + " in feature matrix.");
                }
            }
        }
        HashSet hashSet2 = new HashSet();
        for (int i3 = 0; i3 < getOtuMatrix().getMatrix().columns(); i3++) {
            if (getExperimentStringMatcher().isEmpty()) {
                boolean z3 = false;
                Iterator it = hashSet.iterator();
                while (it.hasNext()) {
                    double d = this._featureMatrix.getMatrix().get(((Integer) it.next()).intValue(), i3);
                    if (!Double.isNaN(d) && d > 0.0d) {
                        z3 = true;
                    }
                }
                if (z3) {
                    hashSet2.add(Integer.valueOf(i3));
                }
            } else {
                String replace = getOtuMatrix().getColName(i3).replace("EXP_", "");
                if (!getExperimentStringMatcher().equals(HostAssociatedSubMatrixExtractor.TWIN_STUDY_MATCHER)) {
                    replace = replace.split("\\.")[0];
                }
                if (replace.matches(getExperimentStringMatcher())) {
                    hashSet2.add(Integer.valueOf(i3));
                }
            }
        }
        if (z) {
            if (getOtuMatrix().getMatrix().columns() != getFeatureMatrix().getMatrix().columns()) {
                System.err.println("The column number of OTU and feature matrix is different, cannot load features!");
            } else {
                FeatureMatrixLoader featureMatrixLoader = new FeatureMatrixLoader(getOtuMatrix(), getFeatureMatrix());
                featureMatrixLoader.loadFeatures();
                setOtuMatrix(featureMatrixLoader.getMatrixWithFeatures());
            }
        }
        this._envSpecSubMatrix = MatrixToolsProvider.getSubMatrixWithColIndices(getOtuMatrix(), hashSet2);
        System.out.println("env-specific samples: ");
        Iterator it2 = hashSet2.iterator();
        while (it2.hasNext()) {
            System.out.print(String.valueOf(getOtuMatrix().getColName(((Integer) it2.next()).intValue())) + AbstractFormatter.DEFAULT_COLUMN_SEPARATOR);
        }
        System.out.println();
        System.out.println("Column number in sub-matrix for environment " + str + ": " + getEnvSpecSubMatrix().getMatrix().columns());
        if (z2) {
            if (!getMetadataLocation().isEmpty()) {
                loadMetadata();
                getEnvSpecSubMatrix().setRowMetaData(getOtuMatrix().getRowMetaData());
                for (int i4 = 0; i4 < getEnvSpecSubMatrix().getMatrix().rows(); i4++) {
                    if (arrayToSet.contains(getEnvSpecSubMatrix().getRowName(i4))) {
                        getEnvSpecSubMatrix().setRowMetaAnnotation(i4, FeatureMatrixLoader.IS_FEATURE_ATTRIB, FeatureMatrixLoader.FEATURE_PRESENT_VALUE);
                    } else {
                        getEnvSpecSubMatrix().setRowMetaAnnotation(i4, FeatureMatrixLoader.IS_FEATURE_ATTRIB, FeatureMatrixLoader.FEATURE_ABSENT_VALUE);
                    }
                }
            }
            Integer valueOf = Integer.valueOf(getMinEnvSpecRowOcc());
            if (valueOf.intValue() <= 0) {
                valueOf = Integer.valueOf(getEnvSpecSubMatrix().getMatrix().columns() / 4);
            }
            System.out.println("Minimal row occurrence: " + valueOf);
            Double valueOf2 = Double.valueOf(getColMinSum().doubleValue());
            if (isComputeColMinSumForEnvSpecMatrix()) {
                DoubleMatrix1D sumVector = MatrixToolsProvider.getSumVector(MatrixToolsProvider.getSubmatrixWithoutRows(getEnvSpecSubMatrix(), arrayToSet), true, true);
                System.out.println("Column sums: " + sumVector);
                valueOf2 = Double.valueOf(StatsProvider.getQuantile(sumVector, 0.25d, true));
                System.out.println("Column minimum sum after discarding 25% quartile: " + valueOf2);
            }
            MatrixFilterer matrixFilterer2 = new MatrixFilterer(getEnvSpecSubMatrix());
            matrixFilterer2.setOmitFeaturesFromColumnMinSum(true);
            if (isOtuMatrixPreprocessed()) {
                matrixFilterer2.setFilterMethods("row_minocc");
                if (isNoRowMinOccFilter()) {
                    matrixFilterer2.setFilterNumbers("1");
                } else {
                    matrixFilterer2.setFilterNumbers(valueOf.toString());
                }
            } else {
                matrixFilterer2.setFilterMethods("row_minocc/col_minsum");
                if (isNoRowMinOccFilter()) {
                    matrixFilterer2.setFilterNumbers("1/" + valueOf2.toString());
                } else {
                    matrixFilterer2.setFilterNumbers(String.valueOf(valueOf.toString()) + "/" + valueOf2.toString());
                }
            }
            matrixFilterer2.filter();
            this._envSpecMatrixPreprocReport = matrixFilterer2.toString();
            if (isOtuMatrixPreprocessed() || isNoNormAndHigherTaxa()) {
                this._envSpecSubMatrix = matrixFilterer2.getFilteredMatrix();
                return;
            }
            if (!isNoNorm()) {
                AbundanceMatrixNormalizer abundanceMatrixNormalizer = new AbundanceMatrixNormalizer(matrixFilterer2.getFilteredMatrix());
                abundanceMatrixNormalizer.setExcludeFeaturesFromNormalization(true);
                abundanceMatrixNormalizer.setStandardizationMethods(AbundanceMatrixNormalizer.COLUMN_NORMALIZATION_BY_SUMDIVISION);
                abundanceMatrixNormalizer.normalize();
                this._envSpecMatrixPreprocReport = String.valueOf(this._envSpecMatrixPreprocReport) + abundanceMatrixNormalizer.toString();
                this._envSpecSubMatrix = abundanceMatrixNormalizer.getNormalizedAbundanceMatrix();
            }
            if (getMetadataLocation().isEmpty()) {
                return;
            }
            HigherLevelTaxaAssigner higherLevelTaxaAssigner = new HigherLevelTaxaAssigner();
            higherLevelTaxaAssigner.setInputMatrix(this._envSpecSubMatrix);
            higherLevelTaxaAssigner.setLineageAttrib(CooccurrenceConstants.LINEAGE_ATTRIBUTE);
            higherLevelTaxaAssigner.assignHigherLevelTaxa();
            this._envSpecMatrixPreprocReport = String.valueOf(this._envSpecMatrixPreprocReport) + higherLevelTaxaAssigner.toString();
            this._envSpecSubMatrix = higherLevelTaxaAssigner.getExtendedMatrix();
        }
    }

    public void setMinEnvSpecRowOcc(int i) {
        this._minEnvSpecRowOcc = i;
    }

    public int getMinEnvSpecRowOcc() {
        return this._minEnvSpecRowOcc;
    }

    public void setColMinSum(int i) {
        this._colMinSum = i;
    }

    public Integer getColMinSum() {
        return Integer.valueOf(this._colMinSum);
    }

    public void setComputeColMinSumForEnvSpecMatrix(boolean z) {
        this._computeColMinSumForEnvSpecMatrix = z;
    }

    public boolean isComputeColMinSumForEnvSpecMatrix() {
        return this._computeColMinSumForEnvSpecMatrix;
    }

    public void setOtuMatrixPreprocessed(boolean z) {
        this._otuMatrixPreprocessed = z;
    }

    public boolean isOtuMatrixPreprocessed() {
        return this._otuMatrixPreprocessed;
    }

    public void setFilteredColumnNames(Set set) {
        this._filteredColumnNames = set;
    }

    public Set getFilteredColumnNames() {
        return this._filteredColumnNames;
    }

    public void setFeatureMatrix(Matrix matrix) {
        this._featureMatrix = matrix;
    }

    public Matrix getFeatureMatrix() {
        return this._featureMatrix;
    }

    public void setOtuMatrix(Matrix matrix) {
        this._otuMatrix = matrix;
    }

    public Matrix getOtuMatrix() {
        return this._otuMatrix;
    }

    public void setMetadataLocation(String str) {
        this._metadataLocation = str;
    }

    public String getMetadataLocation() {
        return this._metadataLocation;
    }

    public void setMetadataAttribs(String str) {
        this._metadataAttribs = str;
    }

    public String getMetadataAttribs() {
        return this._metadataAttribs;
    }

    public void setNoNormAndHigherTaxa(boolean z) {
        this._noNormAndHigherTaxa = z;
    }

    public boolean isNoNormAndHigherTaxa() {
        return this._noNormAndHigherTaxa;
    }

    public boolean isNoNorm() {
        return this._noNorm;
    }

    public void setNoNorm(boolean z) {
        this._noNorm = z;
    }

    public boolean isNoRowMinOccFilter() {
        return this._noRowMinOccFilter;
    }

    public void setNoRowMinOccFilter(boolean z) {
        this._noRowMinOccFilter = z;
    }

    public String getExperimentStringMatcher() {
        return this._experimentStringMatcher;
    }

    public void setExperimentStringMatcher(String str) {
        this._experimentStringMatcher = str;
    }

    public Matrix getEnvSpecSubMatrix() {
        return this._envSpecSubMatrix;
    }

    public String toString() {
        return this._envSpecMatrixPreprocReport;
    }

    public static void main(String[] strArr) {
        String str = 0 != 0 ? "/Users/karoline/Documents/Documents_Karoline/BSB_Lab/Results/Quiime/Input/qiime_db_otu_processed_all.txt" : "/Users/karoline/Documents/Documents_Karoline/BSB_Lab/Results/Quiime/Input/qiime_db_otu_processed_minocc50.txt";
        String str2 = 0 != 0 ? "/Users/karoline/Documents/Documents_Karoline/BSB_Lab/Results/Quiime/Input/qiime_db_features_split.txt" : "/Users/karoline/Documents/Documents_Karoline/BSB_Lab/Results/Quiime/Input/qiime_db_features_split_filtered_50.txt";
        String str3 = "/Users/karoline/Documents/Documents_Karoline/BSB_Lab/Results/Quiime/Input/qiime_db_otu_metadata_higherleveltaxa.txt";
        if (0 != 0) {
            if (0 != 0) {
                str = "/Users/karoline/Documents/Documents_Karoline/BSB_Lab/Data/QIIME_Knight_2012_public/HMP_v35_QIIME_Nov2012_study_969_split_library_seqs_and_mapping/parsed/qiime_db_nov2012_hmp_otu_houston.txt";
                str2 = "/Users/karoline/Documents/Documents_Karoline/BSB_Lab/Data/QIIME_Knight_2012_public/HMP_v35_QIIME_Nov2012_study_969_split_library_seqs_and_mapping/parsed/qiime_db_nov2012_hmp_houston_features_split.txt";
            } else if (0 != 0) {
                str = "/Users/karoline/Documents/Documents_Karoline/BSB_Lab/Data/QIIME_Knight_2012_public/HMP_v35_QIIME_Nov2012_study_969_split_library_seqs_and_mapping/parsed/qiime_db_nov2012_hmp_otu_stlouis.txt";
                str2 = "/Users/karoline/Documents/Documents_Karoline/BSB_Lab/Data/QIIME_Knight_2012_public/HMP_v35_QIIME_Nov2012_study_969_split_library_seqs_and_mapping/parsed/qiime_db_nov2012_hmp_stlouis_features_split.txt";
            }
            str3 = "/Users/karoline/Documents/Documents_Karoline/BSB_Lab/Data/QIIME_Knight_2012_public/HMP_v35_QIIME_Nov2012_study_969_split_library_seqs_and_mapping/parsed/quiime_db_nov2012_hmp_otu_metadata.txt";
        }
        System.out.println("Read features...");
        Matrix matrix = new Matrix();
        matrix.readMatrix(str2, false);
        System.out.println("Read OTUs...");
        Matrix matrix2 = new Matrix();
        matrix2.readMatrix(str, false);
        Set parse = new OneColumnSetParser("/Users/karoline/Documents/Documents_Karoline/BSB_Lab/Results/Quiime/Input/qiime_db_discarded_columns_minocc330_minsum100.txt").parse();
        System.out.println("Get environment-specific sub-matrix...");
        QiimeDBEnvGroupMatrixBuilder qiimeDBEnvGroupMatrixBuilder = new QiimeDBEnvGroupMatrixBuilder();
        qiimeDBEnvGroupMatrixBuilder.setFeatureMatrix(matrix);
        qiimeDBEnvGroupMatrixBuilder.checkCategoryOverlap();
        qiimeDBEnvGroupMatrixBuilder.setNoNorm(true);
        qiimeDBEnvGroupMatrixBuilder.setNoNormAndHigherTaxa(true);
        qiimeDBEnvGroupMatrixBuilder.setNoRowMinOccFilter(false);
        qiimeDBEnvGroupMatrixBuilder.setOtuMatrix(matrix2);
        qiimeDBEnvGroupMatrixBuilder.setMetadataAttribs("kingdom/phylum/class/order/family/genus/species/lineage/taxon");
        qiimeDBEnvGroupMatrixBuilder.setMetadataLocation(str3);
        qiimeDBEnvGroupMatrixBuilder.setExperimentStringMatcher(HostAssociatedSubMatrixExtractor.LAUBER_SOIL_MATCHER);
        qiimeDBEnvGroupMatrixBuilder.setMinEnvSpecRowOcc(1);
        qiimeDBEnvGroupMatrixBuilder.setColMinSum(300);
        qiimeDBEnvGroupMatrixBuilder.setComputeColMinSumForEnvSpecMatrix(false);
        if (0 != 0) {
            qiimeDBEnvGroupMatrixBuilder.setOtuMatrixPreprocessed(true);
            qiimeDBEnvGroupMatrixBuilder.setFilteredColumnNames(parse);
        }
        qiimeDBEnvGroupMatrixBuilder.buildEnvBiomeSpecificSubMatrix("laubersoils", true, true);
        qiimeDBEnvGroupMatrixBuilder.getEnvSpecSubMatrix().writeMatrix(String.valueOf("laubersoils") + ".txt", "\t", true, true);
        Matrix envSpecSubMatrix = qiimeDBEnvGroupMatrixBuilder.getEnvSpecSubMatrix();
        Matrix featureSubMatrix = MatrixToolsProvider.getFeatureSubMatrix(envSpecSubMatrix);
        HashSet hashSet = new HashSet();
        hashSet.addAll(ArrayTools.arrayToSet((Object[]) featureSubMatrix.getRowNames()));
        DoubleMatrix1D sumVector = MatrixToolsProvider.getSumVector(MatrixToolsProvider.getSubmatrixWithoutRows(envSpecSubMatrix, hashSet), true, true);
        HashSet hashSet2 = new HashSet();
        for (int i = 0; i < sumVector.size(); i++) {
            hashSet2.add(Double.valueOf(sumVector.get(i)));
        }
        ParserTools.exportCollectionToOneColumnFile(hashSet2, String.valueOf("laubersoils") + "_colsums.txt");
        IOTools.exportStringToFile(qiimeDBEnvGroupMatrixBuilder.toString(), String.valueOf("laubersoils") + "_preproc.log");
    }
}
