package be.ac.vub.bsb.parsers.genomesize;

import be.ac.vub.bsb.cooccurrence.measures.StatsProvider;
import be.ac.vub.bsb.cooccurrence.util.TaxonToolBox;
import be.ac.vub.bsb.parsers.ncbi.NCBIDBOnlineQueries;
import be.ac.vub.bsb.parsers.ncbi.NCBITaxonDBQueryResult;
import be.ac.vub.bsb.parsers.ncbi.TaxonomyComparator;
import be.ac.vub.bsb.parsers.ncbi.TaxonomyProvider;
import be.ac.vub.bsb.parsers.util.GenericObjectMatrixProcessor;
import cern.colt.matrix.impl.DenseDoubleMatrix1D;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import org.apache.log4j.Logger;
import org.ujmp.core.Matrix;
import org.ujmp.core.MatrixFactory;
import org.ujmp.core.calculation.Calculation;
import org.ujmp.core.enums.FileFormat;
import org.ujmp.core.exceptions.MatrixException;
import org.ujmp.core.objectmatrix.impl.DefaultDenseObjectMatrix2D;

/* loaded from: input_file:be/ac/vub/bsb/parsers/genomesize/TaxonToGenomeSizeLinker.class */
public class TaxonToGenomeSizeLinker extends GenericObjectMatrixProcessor {
    public static String CURRENT_NCBI = "current_ncbi";
    public static String HIGHER_LEVEL_TAXON = "higher_level_taxon";
    public static String NCBI_GENOME_INCOMPLETE = "ncbi_genome_incomplete";
    public static String NOT_PRESENT_IN_IMG_OR_NCBI = "absent_from_img_finished_genomes_and_ncbi";
    public static String SPECIES_LEVEL_GENOME_SIZE_COMPUTED_FROM_BOTH_NCBI_AND_IMG = "species_level_genome_size_computed_from_ncbi_and_img";
    public static String SPECIES_LEVEL_GENOMNE_SIZE_COMPUTED_FROM_NCBI = "species_level_genome_size_computed_from_ncbi";
    public static String SPECIES_LEVEL_GENOME_SIZE_COMPUTED_FTOM_IMG = "species_level_genome_size_computed_from_img";
    public static String NCBI = "ncbi";
    public static String IMG = "img";
    private Matrix _mergedGenomeSizeMatrix;
    private int _taxonIdColumn;
    private Map<Object, Set<Integer>> _taxonIdVsGenomeSizeMatRowIndices = new HashMap();
    private Set<Integer> _idsOfTaxaWithUnfinsihedNCBIGenomes = new HashSet();
    private String _strainAggregationStrategy = GenomeSizeToHigherTaxaAssigner.DEFAULT_AGGREGATING_STRATEGY;
    private boolean _taxonIdIsNCBIId = false;
    private boolean _lowestTaxonLevelInTaxonTableIsSpecies = false;
    private int _genomeSizeColumn = 0;
    private int _taxonLevelColumn = -100;
    private Logger _logger = Logger.getLogger(getClass().getPackage().toString());
    private TaxonomyProvider _taxoProvider = new TaxonomyProvider();
    private TaxonomyComparator _taxoComparer = new TaxonomyComparator();

    public TaxonToGenomeSizeLinker(String str, String str2, int i, String str3, String str4) {
        this._taxonIdColumn = 0;
        super.setInputMatrixFile(str, str2);
        this._taxonIdColumn = i;
        try {
            this._mergedGenomeSizeMatrix = MatrixFactory.importFromFile(FileFormat.CSV, str3, new Object[]{str4});
            this._logger.info("Genome size table row number: " + this._mergedGenomeSizeMatrix.getSize()[0]);
            this._logger.info("Genome size table column number: " + this._mergedGenomeSizeMatrix.getSize()[1]);
        } catch (IOException e) {
            e.printStackTrace();
        } catch (MatrixException e2) {
            e2.printStackTrace();
        }
    }

    public TaxonToGenomeSizeLinker(String str, String str2, int i, Matrix matrix) {
        this._taxonIdColumn = 0;
        this._mergedGenomeSizeMatrix = matrix;
        super.setInputMatrixFile(str, str2);
        this._taxonIdColumn = i;
    }

    public TaxonToGenomeSizeLinker(Matrix matrix, int i, Matrix matrix2) {
        this._taxonIdColumn = 0;
        super.setInputMatrix(matrix);
        this._mergedGenomeSizeMatrix = matrix2;
        this._taxonIdColumn = i;
    }

    private void setColumnsInOutputTable(int i, int i2, Object obj, boolean z, String str, int i3, double d, String str2) {
        int i4;
        int i5 = 0;
        for (int i6 = 0; i6 < super.getInputMatrix().getSize()[1]; i6++) {
            super.getOutputMatrix().setAsObject(super.getInputMatrix().getAsObject(new long[]{i, i6}), new long[]{i2, i5});
            i5++;
        }
        if (z) {
            String str3 = "";
            int i7 = 0;
            Set<Integer> set = this._taxonIdVsGenomeSizeMatRowIndices.get(obj);
            ArrayList<String> arrayList = new ArrayList();
            boolean z2 = false;
            boolean z3 = false;
            DenseDoubleMatrix1D denseDoubleMatrix1D = new DenseDoubleMatrix1D(set.size());
            int i8 = 0;
            Iterator<Integer> it = set.iterator();
            while (it.hasNext()) {
                int intValue = it.next().intValue();
                str3 = this._mergedGenomeSizeMatrix.getAsString(new long[]{intValue, 1});
                i7 = this._mergedGenomeSizeMatrix.getAsInt(new long[]{intValue, 0});
                arrayList.add(this._mergedGenomeSizeMatrix.getAsString(new long[]{intValue, 3}));
                denseDoubleMatrix1D.set(i8, Integer.valueOf(this._mergedGenomeSizeMatrix.getAsInt(new long[]{intValue, 2})).doubleValue());
                i8++;
            }
            if (denseDoubleMatrix1D.size() <= 1) {
                d = denseDoubleMatrix1D.get(0);
            } else if (getStrainAggregationStrategy().equals(GenomeSizeToHigherTaxaAssigner.MEAN)) {
                d = StatsProvider.getMean(denseDoubleMatrix1D, true);
            } else if (getStrainAggregationStrategy().equals(GenomeSizeToHigherTaxaAssigner.MEDIAN)) {
                d = StatsProvider.getMedian(denseDoubleMatrix1D, true);
            }
            if (this._taxonIdIsNCBIId) {
                i3 = Integer.parseInt(str);
                str = this._lowestTaxonLevelInTaxonTableIsSpecies ? TaxonToolBox.getSpeciesFromStrain(str3, " ", true) : str3;
            } else {
                i3 = this._lowestTaxonLevelInTaxonTableIsSpecies ? TaxonomyProvider.getTaxonIdGivenName(str) : i7;
            }
            if (!this._lowestTaxonLevelInTaxonTableIsSpecies || arrayList.size() <= 1) {
                str2 = (String) arrayList.get(0);
            } else {
                for (String str4 : arrayList) {
                    if (str4.contains(IMG)) {
                        z2 = true;
                    } else if (str4.contains(NCBI)) {
                        z3 = true;
                    }
                }
                if (z2 && !z3) {
                    str2 = SPECIES_LEVEL_GENOME_SIZE_COMPUTED_FTOM_IMG;
                } else if (z2 && z3) {
                    str2 = SPECIES_LEVEL_GENOME_SIZE_COMPUTED_FROM_BOTH_NCBI_AND_IMG;
                } else if (!z2 && z3) {
                    str2 = SPECIES_LEVEL_GENOMNE_SIZE_COMPUTED_FROM_NCBI;
                }
            }
        }
        if (this._taxonIdIsNCBIId) {
            super.getOutputMatrix().setAsString(str, new long[]{i2, i5});
            i4 = i5 + 1;
        } else {
            super.getOutputMatrix().setAsInt(i3, new long[]{i2, i5});
            i4 = i5 + 1;
        }
        this._genomeSizeColumn = i4;
        super.getOutputMatrix().setAsDouble(d, new long[]{i2, i4});
        super.getOutputMatrix().setAsString(str2, new long[]{i2, i4 + 1});
    }

    @Override // be.ac.vub.bsb.parsers.util.IObjectMatrixProcessor
    public void processMatrix() {
        addGenomeSizesToTaxonTable();
    }

    public void addGenomeSizesToTaxonTable() {
        int i;
        int taxonIdGivenName;
        String str;
        String asString;
        this._taxonIdVsGenomeSizeMatRowIndices = new HashMap();
        new HashSet();
        setOutputMatrix(new DefaultDenseObjectMatrix2D(new long[]{super.getInputMatrix().getSize()[0] + 1, super.getInputMatrix().getSize()[1] + 3}));
        if (super.getInputMatrix().getAsInt(new long[]{1, this._taxonIdColumn}) > 0) {
            this._taxonIdIsNCBIId = true;
            this._logger.info("The taxon identifier column in the taxon table contains integers, so it is assumed that NCBI taxon identifiers were provided.");
        } else {
            this._taxonIdIsNCBIId = false;
            this._logger.info("The taxon identifier column in the taxon table does not contain integers, so it is assumed that taxon names were provided.");
        }
        for (long[] jArr : this._mergedGenomeSizeMatrix.allCoordinates()) {
            int intValue = Long.valueOf(jArr[0]).intValue();
            int intValue2 = Long.valueOf(jArr[1]).intValue();
            if (intValue > 0) {
                if (this._taxonIdIsNCBIId && intValue2 == 0) {
                    if (isLowestTaxonLevelInTaxonTableIsSpecies()) {
                        this._taxoProvider.setTaxon(TaxonomyProvider.getTaxonNameGiveId(this._mergedGenomeSizeMatrix.getAsInt(jArr)));
                        int taxonIdGivenName2 = TaxonomyProvider.getTaxonIdGivenName(this._taxoProvider.getNamesOfSuperTaxon(TaxonomyProvider.SPECIES).get(0));
                        if (this._taxonIdVsGenomeSizeMatRowIndices.containsKey(Integer.valueOf(taxonIdGivenName2))) {
                            this._taxonIdVsGenomeSizeMatRowIndices.get(Integer.valueOf(taxonIdGivenName2)).add(Integer.valueOf(intValue));
                        } else {
                            HashSet hashSet = new HashSet();
                            hashSet.add(Integer.valueOf(intValue));
                            this._taxonIdVsGenomeSizeMatRowIndices.put(Integer.valueOf(taxonIdGivenName2), hashSet);
                        }
                    } else {
                        HashSet hashSet2 = new HashSet();
                        hashSet2.add(Integer.valueOf(intValue));
                        this._taxonIdVsGenomeSizeMatRowIndices.put(Integer.valueOf(this._mergedGenomeSizeMatrix.getAsInt(jArr)), hashSet2);
                    }
                } else if (!this._taxonIdIsNCBIId && intValue2 == 1 && (asString = this._mergedGenomeSizeMatrix.getAsString(jArr)) != null && !asString.equals("NaN") && !asString.isEmpty()) {
                    String speciesFromStrain = this._lowestTaxonLevelInTaxonTableIsSpecies ? TaxonToolBox.getSpeciesFromStrain(asString, " ", true) : asString;
                    if (this._taxonIdVsGenomeSizeMatRowIndices.containsKey(speciesFromStrain)) {
                        this._taxonIdVsGenomeSizeMatRowIndices.get(speciesFromStrain).add(Integer.valueOf(intValue));
                    } else {
                        HashSet hashSet3 = new HashSet();
                        hashSet3.add(Integer.valueOf(intValue));
                        this._taxonIdVsGenomeSizeMatRowIndices.put(speciesFromStrain, hashSet3);
                    }
                }
            }
        }
        new NCBITaxonDBQueryResult();
        int intValue3 = Long.valueOf(super.getInputMatrix().getSize()[1]).intValue();
        if (this._taxonIdIsNCBIId) {
            super.getOutputMatrix().setAsString("NCBI_taxon_name", new long[]{0, intValue3});
            i = intValue3 + 1;
        } else {
            super.getOutputMatrix().setAsString("NCBI_taxon_id", new long[]{0, intValue3});
            i = intValue3 + 1;
        }
        super.getOutputMatrix().setAsString("Genome_size", new long[]{0, i});
        super.getOutputMatrix().setAsString("Genome_size_data_source", new long[]{0, i + 1});
        int i2 = 0 + 1;
        for (long[] jArr2 : super.getInputMatrix().allCoordinates()) {
            int intValue4 = Long.valueOf(jArr2[0]).intValue();
            if (Long.valueOf(jArr2[1]).intValue() == this._taxonIdColumn) {
                Object valueOf = this._taxonIdIsNCBIId ? Integer.valueOf(super.getInputMatrix().getAsInt(jArr2)) : super.getInputMatrix().getAsString(jArr2);
                if (this._taxonIdVsGenomeSizeMatRowIndices.containsKey(valueOf)) {
                    setColumnsInOutputTable(intValue4, i2, valueOf, true, valueOf.toString(), 0, 0.0d, "");
                } else {
                    this._logger.warn("Taxon " + valueOf + " from taxon matrix is not contained in genome size matrix!");
                    double d = 0.0d;
                    if (this._taxonIdIsNCBIId) {
                        taxonIdGivenName = ((Integer) valueOf).intValue();
                        str = TaxonomyProvider.getTaxonNameGiveId(((Integer) valueOf).intValue());
                        this._taxoProvider.setTaxon(str);
                    } else {
                        taxonIdGivenName = TaxonomyProvider.getTaxonIdGivenName((String) valueOf);
                        str = (String) valueOf;
                        this._taxoProvider.setTaxon((String) valueOf);
                    }
                    if (this._taxoComparer.compare(getTaxonLevelColumn() != -100 ? super.getInputMatrix().getAsString(new long[]{intValue4, getTaxonLevelColumn()}) : this._taxoProvider.getTaxonomicLevel(), TaxonomyProvider.SPECIES) != 0) {
                        setColumnsInOutputTable(intValue4, i2, valueOf, false, str, taxonIdGivenName, 0.0d, HIGHER_LEVEL_TAXON);
                    } else if (this._idsOfTaxaWithUnfinsihedNCBIGenomes.contains(Integer.valueOf(taxonIdGivenName))) {
                        setColumnsInOutputTable(intValue4, i2, valueOf, false, str, taxonIdGivenName, 0.0d, NCBI_GENOME_INCOMPLETE);
                    } else {
                        this._logger.info("Querying NCBI for species with unknown genome length " + str + " with NCBI taxon identifier " + taxonIdGivenName + "...");
                        NCBITaxonDBQueryResult nCBITaxonGenomeSize = NCBIDBOnlineQueries.getNCBITaxonGenomeSize(taxonIdGivenName);
                        if (nCBITaxonGenomeSize.wasSuccessful()) {
                            nCBITaxonGenomeSize.removeSingleNonMainReplicatorsFromGenomeSizes();
                            if (nCBITaxonGenomeSize.wasSuccessful()) {
                                nCBITaxonGenomeSize.removeAllNonCompleteGenomesFromGenomeSizes();
                                nCBITaxonGenomeSize.addNonSingleNonMainReplicatorSizeToCorrespondingGenomeSize();
                                if (nCBITaxonGenomeSize.wasSuccessful()) {
                                    if (getStrainAggregationStrategy().equals(GenomeSizeToHigherTaxaAssigner.MEAN)) {
                                        d = nCBITaxonGenomeSize.getMeanCompletedGenomeSize();
                                    } else if (getStrainAggregationStrategy().equals(GenomeSizeToHigherTaxaAssigner.MEDIAN)) {
                                        d = nCBITaxonGenomeSize.getMedianCompletedGenomeSize();
                                    }
                                    setColumnsInOutputTable(intValue4, i2, valueOf, false, str, taxonIdGivenName, d, CURRENT_NCBI);
                                } else {
                                    setColumnsInOutputTable(intValue4, i2, valueOf, false, str, taxonIdGivenName, 0.0d, NCBI_GENOME_INCOMPLETE);
                                }
                            } else {
                                setColumnsInOutputTable(intValue4, i2, valueOf, false, str, taxonIdGivenName, 0.0d, NCBI_GENOME_INCOMPLETE);
                            }
                        } else {
                            setColumnsInOutputTable(intValue4, i2, valueOf, false, str, taxonIdGivenName, 0.0d, NOT_PRESENT_IN_IMG_OR_NCBI);
                        }
                    }
                }
                i2++;
            }
        }
        HashSet hashSet4 = new HashSet();
        int intValue5 = Long.valueOf(super.getOutputMatrix().getSize()[0]).intValue();
        for (int i3 = i2; i3 <= intValue5; i3++) {
            hashSet4.add(Integer.valueOf(i3));
        }
        super.setOutputMatrix(super.getOutputMatrix().deleteRows(Calculation.Ret.NEW, hashSet4));
    }

    public void setTaxonLevelColumn(int i) {
        this._taxonLevelColumn = i;
    }

    public int getTaxonLevelColumn() {
        return this._taxonLevelColumn;
    }

    public int getGenomeSizeColumn() {
        return this._genomeSizeColumn;
    }

    public void setIdsOfTaxaWithUnfinsihedNCBIGenomes(Set<Integer> set) {
        this._idsOfTaxaWithUnfinsihedNCBIGenomes = set;
    }

    public Set<Integer> getIdsOfTaxaWithUnfinsihedNCBIGenomes() {
        return this._idsOfTaxaWithUnfinsihedNCBIGenomes;
    }

    public void setStrainAggregationStrategy(String str) {
        this._strainAggregationStrategy = str;
    }

    public String getStrainAggregationStrategy() {
        return this._strainAggregationStrategy;
    }

    public void setLowestTaxonLevelInTaxonTableIsSpecies(boolean z) {
        this._lowestTaxonLevelInTaxonTableIsSpecies = z;
    }

    public boolean isLowestTaxonLevelInTaxonTableIsSpecies() {
        return this._lowestTaxonLevelInTaxonTableIsSpecies;
    }

    public static void main(String[] strArr) {
        TaxonToGenomeSizeLinker taxonToGenomeSizeLinker = new TaxonToGenomeSizeLinker("/Users/karoline/Documents/Documents_Karoline/BSB_Lab/Data/MetaHIT_freeze2/Parsed_Tables/taxon.tab", "\t", 0, "mergedMat.txt", "\t");
        taxonToGenomeSizeLinker.setTaxonLevelColumn(1);
        taxonToGenomeSizeLinker.setLowestTaxonLevelInTaxonTableIsSpecies(true);
        taxonToGenomeSizeLinker.addGenomeSizesToTaxonTable();
        try {
            taxonToGenomeSizeLinker.getOutputMatrix().exportToFile("taxonWithGenomeSize.txt", new Object[0]);
        } catch (IOException e) {
            e.printStackTrace();
        } catch (MatrixException e2) {
            e2.printStackTrace();
        }
    }
}
