package be.ac.vub.bsb.parsers.genomesize;

import be.ac.ulb.bigre.pathwayinference.core.io.TwoColumnHashMapParser;
import be.ac.vub.bsb.parsers.ncbi.NCBIDBOnlineQueries;
import be.ac.vub.bsb.parsers.util.GenericObjectMatrixProcessor;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.ujmp.core.exceptions.MatrixException;
import org.ujmp.core.objectmatrix.impl.DefaultDenseObjectMatrix2D;

/* loaded from: input_file:be/ac/vub/bsb/parsers/genomesize/NCBIGenomeFilePreprocessor.class */
public class NCBIGenomeFilePreprocessor extends GenericObjectMatrixProcessor {
    public static String NCBI_MAIN_REPLICATOR = "chromosome";
    private Map<Integer, List<Integer>> _ncbiTaxonIdVsRowIndices;
    private Map<String, Object> _ncbiTaxonIdVsSequenceStatus;
    private Set<Integer> _unfinishedTaxaIdentifiers;
    private boolean _treatTaxaWithoutSequenceStatusAsUnfinished;

    public NCBIGenomeFilePreprocessor(String str, String str2) {
        this._ncbiTaxonIdVsRowIndices = new HashMap();
        this._ncbiTaxonIdVsSequenceStatus = new HashMap();
        this._unfinishedTaxaIdentifiers = new HashSet();
        this._treatTaxaWithoutSequenceStatusAsUnfinished = false;
        super.setInputMatrixFile(str, str2);
    }

    public NCBIGenomeFilePreprocessor(String str, String str2, String str3) {
        this._ncbiTaxonIdVsRowIndices = new HashMap();
        this._ncbiTaxonIdVsSequenceStatus = new HashMap();
        this._unfinishedTaxaIdentifiers = new HashSet();
        this._treatTaxaWithoutSequenceStatusAsUnfinished = false;
        super.setInputMatrixFile(str, str2);
        TwoColumnHashMapParser twoColumnHashMapParser = new TwoColumnHashMapParser(str3);
        twoColumnHashMapParser.setKeyColumn(2);
        twoColumnHashMapParser.setValueColumn(6);
        this._ncbiTaxonIdVsSequenceStatus = twoColumnHashMapParser.parse();
    }

    private void sumReplicaLengths(List<Integer> list, int i) {
        int i2 = 0;
        String str = "";
        Integer num = 0;
        boolean z = false;
        boolean z2 = true;
        String str2 = "";
        new HashSet();
        Iterator<Integer> it = list.iterator();
        while (it.hasNext()) {
            int intValue = it.next().intValue();
            i2 += getInputMatrix().getAsInt(new long[]{intValue, 2});
            str = getInputMatrix().getAsString(new long[]{intValue, 5});
            num = Integer.valueOf(getInputMatrix().getAsInt(new long[]{intValue, 3}));
            if (getInputMatrix().getAsString(new long[]{intValue, 6}).startsWith(NCBI_MAIN_REPLICATOR)) {
                str2 = getInputMatrix().getAsString(new long[]{intValue, 8});
                z = true;
            }
        }
        if (!this._ncbiTaxonIdVsSequenceStatus.isEmpty()) {
            if (!this._ncbiTaxonIdVsSequenceStatus.containsKey(num.toString())) {
                if (isTreatTaxaWithoutSequenceStatusAsUnfinished()) {
                    this._unfinishedTaxaIdentifiers.add(num);
                }
                super.getLogger().warn("Taxon " + str + " with identifier " + num + " is not contained in file with sequence status!");
            } else if (this._ncbiTaxonIdVsSequenceStatus.get(num.toString()) instanceof Set) {
                if (((Set) this._ncbiTaxonIdVsSequenceStatus.get(num.toString())).contains(NCBIDBOnlineQueries.GENOME_COMPLETED)) {
                    z2 = true;
                }
            } else if ((this._ncbiTaxonIdVsSequenceStatus.get(num.toString()) instanceof String) && !this._ncbiTaxonIdVsSequenceStatus.get(num.toString()).equals(NCBIDBOnlineQueries.GENOME_COMPLETED)) {
                z2 = false;
                super.getLogger().warn("For taxon " + str + " with identifier " + num + " the sequence is not finished!");
            }
        }
        if (!z) {
            getLogger().warn("Taxon " + str + " with NCBI identifier " + num + " is only represented by non-main replicators.");
        }
        if (!z || !z2) {
            getUnfinishedTaxaIdentifiers().add(num);
            return;
        }
        getOutputMatrix().setAsInt(i2, new long[]{i, 2});
        getOutputMatrix().setAsInt(num.intValue(), new long[]{i, 3});
        getOutputMatrix().setAsString(str, new long[]{i, 5});
        getOutputMatrix().setAsString(str2, new long[]{i, 8});
    }

    @Override // be.ac.vub.bsb.parsers.util.IObjectMatrixProcessor
    public void processMatrix() {
        new ArrayList();
        for (long[] jArr : getInputMatrix().allCoordinates()) {
            int intValue = Long.valueOf(jArr[0]).intValue();
            int intValue2 = Long.valueOf(jArr[1]).intValue();
            if (intValue > 0 && intValue2 == 3) {
                int asInt = getInputMatrix().getAsInt(jArr);
                if (this._ncbiTaxonIdVsRowIndices.containsKey(Integer.valueOf(asInt))) {
                    this._ncbiTaxonIdVsRowIndices.get(Integer.valueOf(asInt)).add(Integer.valueOf(intValue));
                } else {
                    ArrayList arrayList = new ArrayList();
                    arrayList.add(Integer.valueOf(intValue));
                    this._ncbiTaxonIdVsRowIndices.put(Integer.valueOf(asInt), arrayList);
                }
            }
        }
        super.setOutputMatrix(new DefaultDenseObjectMatrix2D(new long[]{this._ncbiTaxonIdVsRowIndices.keySet().size() + 1, getInputMatrix().getSize()[1]}));
        getOutputMatrix().setAsString("Genome_Size", new long[]{0, 2});
        getOutputMatrix().setAsString("Taxon_NCBI_ID", new long[]{0, 3});
        getOutputMatrix().setAsString("Taxon_name", new long[]{0, 5});
        getOutputMatrix().setAsString("Update_date_of_main_replicator", new long[]{0, 8});
        int i = 0 + 1;
        Iterator<Integer> it = this._ncbiTaxonIdVsRowIndices.keySet().iterator();
        while (it.hasNext()) {
            sumReplicaLengths(this._ncbiTaxonIdVsRowIndices.get(Integer.valueOf(it.next().intValue())), i);
            i++;
        }
    }

    public Set<Integer> getUnfinishedTaxaIdentifiers() {
        return this._unfinishedTaxaIdentifiers;
    }

    public void setTreatTaxaWithoutSequenceStatusAsUnfinished(boolean z) {
        this._treatTaxaWithoutSequenceStatusAsUnfinished = z;
    }

    public boolean isTreatTaxaWithoutSequenceStatusAsUnfinished() {
        return this._treatTaxaWithoutSequenceStatusAsUnfinished;
    }

    public static void main(String[] strArr) {
        NCBIGenomeFilePreprocessor nCBIGenomeFilePreprocessor = new NCBIGenomeFilePreprocessor("/Users/karoline/Documents/Documents_Karoline/BSB_Lab/Data/NCBI/NCBI_Microbial_Genomes/summary.csv", ";", "/Users/karoline/Documents/Documents_Karoline/BSB_Lab/Data/NCBI/NCBI_Microbial_Genomes/lproks_0.txt");
        nCBIGenomeFilePreprocessor.processMatrix();
        System.out.println("identifier of taxa to skip: " + nCBIGenomeFilePreprocessor.getUnfinishedTaxaIdentifiers());
        try {
            nCBIGenomeFilePreprocessor.getOutputMatrix().exportToFile("ncbiProcessed.txt", new Object[0]);
        } catch (IOException e) {
            e.printStackTrace();
        } catch (MatrixException e2) {
            e2.printStackTrace();
        }
    }
}
