package org.forester.ws.seqdb;

import java.util.List;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.math3.geometry.VectorFormat;
import org.biojava.nbio.alignment.io.StockholmStructure;
import org.biojava.nbio.structure.StructureTools;
import org.biojava.nbio.structure.io.mmcif.SimpleMMcifParser;
import org.forester.go.BasicGoTerm;
import org.forester.go.GoNameSpace;
import org.forester.go.GoTerm;
import org.forester.go.GoXRef;
import org.forester.io.parsers.phyloxml.PhyloXmlUtil;
import org.forester.phylogeny.data.Accession;
import org.forester.phylogeny.data.Annotation;
import org.forester.sequence.BasicSequence;
import org.forester.sequence.MolecularSequence;
import org.forester.util.ForesterUtil;

/* loaded from: input_file:forester-1.038.jar:org/forester/ws/seqdb/UniProtEntry.class */
public final class UniProtEntry implements SequenceDatabaseEntry {
    public static final Pattern BindingDB_PATTERN = Pattern.compile("BindingDB;\\s+([0-9A-Z]+);");
    public static final Pattern CTD_PATTERN = Pattern.compile("CTD;\\s+(\\d+);");
    public static final Pattern DrugBank_PATTERN = Pattern.compile("DrugBank;\\s+([0-9A-Z]+);\\s+([^\\.]+)");
    public static final Pattern GO_PATTERN = Pattern.compile("GO;\\s+(GO:\\d+);\\s+([PFC]):([^;]+);");
    public static final Pattern KEGG_PATTERN = Pattern.compile("KEGG;\\s+([a-z]+:[0-9]+);");
    public static final Pattern MIM_PATTERN = Pattern.compile("MIM;\\s+(\\d+);");
    public static final Pattern NextBio_PATTERN = Pattern.compile("NextBio;\\s+(\\d+);");
    public static final Pattern Orphanet_PATTERN = Pattern.compile("Orphanet;\\s+(\\d+);\\s+([^\\.]+)");
    public static final Pattern PDB_PATTERN = Pattern.compile("PDB;\\s+([0-9A-Z]{4});\\s+([^;]+)");
    public static final Pattern PharmGKB_PATTERN = Pattern.compile("PharmGKB;\\s+([0-9A-Z]+);");
    public static final Pattern Reactome_PATTERN = Pattern.compile("Reactome;\\s+([0-9A-Z]+);\\s+([^\\.]+)");
    public static final Pattern HGNC_PATTERN = Pattern.compile("HGNC;\\s+HGNC:(\\d+);");
    public static final Pattern NCBI_TAXID_PATTERN = Pattern.compile("NCBI_TaxID=(\\d+)");
    private String _ac;
    private SortedSet<Accession> _cross_references;
    private String _gene_name;
    private SortedSet<GoTerm> _go_terms;
    private String _name;
    private String _os_scientific_name;
    private String _symbol;
    private String _tax_id;
    private MolecularSequence _mol_seq;

    private UniProtEntry() {
    }

    public Object clone() throws CloneNotSupportedException {
        throw new CloneNotSupportedException();
    }

    @Override // org.forester.ws.seqdb.SequenceDatabaseEntry
    public String getAccession() {
        return this._ac;
    }

    @Override // org.forester.ws.seqdb.SequenceDatabaseEntry
    public SortedSet<Accession> getCrossReferences() {
        return this._cross_references;
    }

    @Override // org.forester.ws.seqdb.SequenceDatabaseEntry
    public String getGeneName() {
        return this._gene_name;
    }

    @Override // org.forester.ws.seqdb.SequenceDatabaseEntry
    public SortedSet<GoTerm> getGoTerms() {
        return this._go_terms;
    }

    @Override // org.forester.ws.seqdb.SequenceDatabaseEntry
    public String getProvider() {
        return PhyloXmlUtil.UNIPROT_TAX_PROVIDER;
    }

    @Override // org.forester.ws.seqdb.SequenceDatabaseEntry
    public String getSequenceName() {
        return this._name;
    }

    @Override // org.forester.ws.seqdb.SequenceDatabaseEntry
    public String getSequenceSymbol() {
        return this._symbol;
    }

    @Override // org.forester.ws.seqdb.SequenceDatabaseEntry
    public String getTaxonomyIdentifier() {
        return this._tax_id;
    }

    @Override // org.forester.ws.seqdb.SequenceDatabaseEntry
    public String getTaxonomyScientificName() {
        return this._os_scientific_name;
    }

    @Override // org.forester.ws.seqdb.SequenceDatabaseEntry
    public boolean isEmpty() {
        return ForesterUtil.isEmpty(getAccession()) && ForesterUtil.isEmpty(getSequenceName()) && ForesterUtil.isEmpty(getTaxonomyScientificName()) && ForesterUtil.isEmpty(getSequenceSymbol()) && ForesterUtil.isEmpty(getGeneName()) && ForesterUtil.isEmpty(getTaxonomyIdentifier()) && ForesterUtil.isEmpty(getSequenceSymbol()) && (getGoTerms() == null || getGoTerms().isEmpty()) && (getCrossReferences() == null || getCrossReferences().isEmpty());
    }

    private void addCrossReference(Accession accession) {
        if (this._cross_references == null) {
            this._cross_references = new TreeSet();
        }
        this._cross_references.add(accession);
    }

    private void addGoTerm(BasicGoTerm basicGoTerm) {
        if (this._go_terms == null) {
            this._go_terms = new TreeSet();
        }
        this._go_terms.add(basicGoTerm);
    }

    private void setAc(String str) {
        if (this._ac == null) {
            this._ac = str;
        }
    }

    private void setMolecularSequence(MolecularSequence molecularSequence) {
        this._mol_seq = molecularSequence;
    }

    private void setGeneName(String str) {
        if (this._gene_name == null) {
            this._gene_name = str;
        }
    }

    private void setOsScientificName(String str) {
        if (this._os_scientific_name == null) {
            this._os_scientific_name = str;
        }
    }

    private void setSequenceName(String str) {
        if (this._name == null) {
            this._name = str;
        }
    }

    private void setSequenceSymbol(String str) {
        this._symbol = str;
    }

    private void setTaxId(String str) {
        if (this._tax_id == null) {
            this._tax_id = str;
        }
    }

    public static SequenceDatabaseEntry createInstanceFromPlainText(List<String> list) {
        UniProtEntry uniProtEntry = new UniProtEntry();
        boolean z = false;
        StringBuffer stringBuffer = new StringBuffer();
        boolean z2 = false;
        for (String str : list) {
            if (str.startsWith("AC")) {
                uniProtEntry.setAc(SequenceDbWsTools.extractFromTo(str, "AC", SimpleMMcifParser.STRING_LIMIT));
            } else if (str.startsWith("DE") && ForesterUtil.isEmpty(uniProtEntry.getSequenceName())) {
                if (str.indexOf("RecName:") <= 0 || str.indexOf("Full=") <= 0) {
                    if (str.indexOf("SubName:") > 0 && str.indexOf("Full=") > 0) {
                        if (str.indexOf(VectorFormat.DEFAULT_PREFIX) > 0) {
                            uniProtEntry.setSequenceName(SequenceDbWsTools.extractFromTo(str, "Full=", VectorFormat.DEFAULT_PREFIX));
                        } else {
                            uniProtEntry.setSequenceName(SequenceDbWsTools.extractFromTo(str, "Full=", SimpleMMcifParser.STRING_LIMIT));
                        }
                    }
                } else if (str.indexOf(VectorFormat.DEFAULT_PREFIX) > 0) {
                    uniProtEntry.setSequenceName(SequenceDbWsTools.extractFromTo(str, "Full=", VectorFormat.DEFAULT_PREFIX));
                } else {
                    uniProtEntry.setSequenceName(SequenceDbWsTools.extractFromTo(str, "Full=", SimpleMMcifParser.STRING_LIMIT));
                }
            } else if (str.startsWith("DE") && ForesterUtil.isEmpty(uniProtEntry.getSequenceSymbol())) {
                if (str.indexOf("Short=") > 0) {
                    if (str.indexOf(VectorFormat.DEFAULT_PREFIX) > 0) {
                        uniProtEntry.setSequenceSymbol(SequenceDbWsTools.extractFromTo(str, "Short=", VectorFormat.DEFAULT_PREFIX));
                    } else {
                        uniProtEntry.setSequenceSymbol(SequenceDbWsTools.extractFromTo(str, "Short=", SimpleMMcifParser.STRING_LIMIT));
                    }
                }
            } else if (str.startsWith("GN") && ForesterUtil.isEmpty(uniProtEntry.getGeneName())) {
                if (str.indexOf("Name=") > 0) {
                    if (str.indexOf(VectorFormat.DEFAULT_PREFIX) > 0) {
                        uniProtEntry.setGeneName(SequenceDbWsTools.extractFromTo(str, "Name=", VectorFormat.DEFAULT_PREFIX));
                    } else {
                        uniProtEntry.setGeneName(SequenceDbWsTools.extractFromTo(str, "Name=", SimpleMMcifParser.STRING_LIMIT));
                    }
                }
            } else if (str.startsWith("DR")) {
                if (str.indexOf("GO;") > 0) {
                    Matcher matcher = GO_PATTERN.matcher(str);
                    if (matcher.find()) {
                        String group = matcher.group(1);
                        String group2 = matcher.group(2);
                        String group3 = matcher.group(3);
                        String str2 = GoNameSpace.BIOLOGICAL_PROCESS_STR;
                        if (group2.equals("F")) {
                            str2 = GoNameSpace.MOLECULAR_FUNCTION_STR;
                        } else if (group2.equals(StructureTools.C_ATOM_NAME)) {
                            str2 = GoNameSpace.CELLULAR_COMPONENT_STR;
                        }
                        uniProtEntry.addGoTerm(new BasicGoTerm(group, group3, str2, false));
                    }
                } else if (str.indexOf("PDB;") > 0) {
                    Matcher matcher2 = PDB_PATTERN.matcher(str);
                    if (matcher2.find()) {
                        uniProtEntry.addCrossReference(new Accession(matcher2.group(1), "PDB", matcher2.group(2)));
                    }
                } else if (str.indexOf("KEGG;") > 0) {
                    Matcher matcher3 = KEGG_PATTERN.matcher(str);
                    if (matcher3.find()) {
                        uniProtEntry.addCrossReference(new Accession(matcher3.group(1), GoXRef.KEGG_STR));
                    }
                } else if (str.indexOf("CTD;") > 0) {
                    Matcher matcher4 = CTD_PATTERN.matcher(str);
                    if (matcher4.find()) {
                        uniProtEntry.addCrossReference(new Accession(matcher4.group(1), "CTD"));
                    }
                } else if (str.indexOf("MIM;") > 0) {
                    Matcher matcher5 = MIM_PATTERN.matcher(str);
                    if (matcher5.find()) {
                        uniProtEntry.addCrossReference(new Accession(matcher5.group(1), StockholmStructure.DatabaseReference.MIM));
                    }
                } else if (str.indexOf("Orphanet;") > 0) {
                    Matcher matcher6 = Orphanet_PATTERN.matcher(str);
                    if (matcher6.find()) {
                        uniProtEntry.addCrossReference(new Accession(matcher6.group(1), "Orphanet", matcher6.group(2)));
                    }
                } else if (str.indexOf("PharmGKB;") > 0) {
                    Matcher matcher7 = PharmGKB_PATTERN.matcher(str);
                    if (matcher7.find()) {
                        uniProtEntry.addCrossReference(new Accession(matcher7.group(1), "PharmGKB"));
                    }
                } else if (str.indexOf("BindingDB;") > 0) {
                    Matcher matcher8 = BindingDB_PATTERN.matcher(str);
                    if (matcher8.find()) {
                        uniProtEntry.addCrossReference(new Accession(matcher8.group(1), "BindingDB"));
                    }
                } else if (str.indexOf("DrugBank;") > 0) {
                    Matcher matcher9 = DrugBank_PATTERN.matcher(str);
                    if (matcher9.find()) {
                        uniProtEntry.addCrossReference(new Accession(matcher9.group(1), "DrugBank", matcher9.group(2)));
                    }
                } else if (str.indexOf("NextBio;") > 0) {
                    Matcher matcher10 = NextBio_PATTERN.matcher(str);
                    if (matcher10.find()) {
                        uniProtEntry.addCrossReference(new Accession(matcher10.group(1), "NextBio"));
                    }
                } else if (str.indexOf("Reactome;") > 0) {
                    Matcher matcher11 = Reactome_PATTERN.matcher(str);
                    if (matcher11.find()) {
                        uniProtEntry.addCrossReference(new Accession(matcher11.group(1), GoXRef.REACTOME_STR, matcher11.group(2)));
                    }
                } else if (str.indexOf("HGNC;") > 0) {
                    Matcher matcher12 = HGNC_PATTERN.matcher(str);
                    if (matcher12.find()) {
                        uniProtEntry.addCrossReference(new Accession(matcher12.group(1), "HGNC"));
                    }
                }
            } else if (str.startsWith("OS")) {
                if (str.indexOf("(") > 0) {
                    uniProtEntry.setOsScientificName(SequenceDbWsTools.extractFromTo(str, "OS", "("));
                } else {
                    uniProtEntry.setOsScientificName(SequenceDbWsTools.extractFromTo(str, "OS", "."));
                }
            } else if (str.startsWith("OX")) {
                if (str.indexOf("NCBI_TaxID=") > 0) {
                    Matcher matcher13 = NCBI_TAXID_PATTERN.matcher(str);
                    if (matcher13.find()) {
                        uniProtEntry.setTaxId(matcher13.group(1));
                    }
                }
            } else if (str.startsWith("SQ")) {
                z = true;
                if (str.contains("AA;")) {
                    z2 = true;
                }
            } else if (z && str.startsWith(" ")) {
                stringBuffer.append(str.replaceAll("\\s+", ""));
            }
        }
        if (stringBuffer.length() > 0) {
            if (z2) {
                uniProtEntry.setMolecularSequence(BasicSequence.createAaSequence(uniProtEntry.getAccession(), stringBuffer.toString()));
            } else {
                uniProtEntry.setMolecularSequence(BasicSequence.createDnaSequence(uniProtEntry.getAccession(), stringBuffer.toString()));
            }
        }
        return uniProtEntry;
    }

    @Override // org.forester.ws.seqdb.SequenceDatabaseEntry
    public SortedSet<Annotation> getAnnotations() {
        return null;
    }

    @Override // org.forester.ws.seqdb.SequenceDatabaseEntry
    public String getMap() {
        return null;
    }

    @Override // org.forester.ws.seqdb.SequenceDatabaseEntry
    public String getChromosome() {
        return null;
    }

    @Override // org.forester.ws.seqdb.SequenceDatabaseEntry
    public MolecularSequence getMolecularSequence() {
        return this._mol_seq;
    }
}
