package jsat.text.tokenizer;

import java.util.ArrayList;
import java.util.List;
import org.apache.log4j.Priority;

/* loaded from: input_file:JSAT-0.0.7.jar:jsat/text/tokenizer/NaiveTokenizer.class */
public class NaiveTokenizer implements Tokenizer {
    private static final long serialVersionUID = -2112091783442076933L;
    private boolean useLowerCase;
    private boolean otherToWhiteSpace;
    private boolean noDigits;
    private int minTokenLength;
    private int maxTokenLength;

    public NaiveTokenizer() {
        this(true);
    }

    public NaiveTokenizer(boolean z) {
        this.otherToWhiteSpace = true;
        this.noDigits = false;
        this.minTokenLength = 1;
        this.maxTokenLength = Priority.OFF_INT;
        this.useLowerCase = z;
    }

    public void setUseLowerCase(boolean z) {
        this.useLowerCase = z;
    }

    public boolean isUseLowerCase() {
        return this.useLowerCase;
    }

    public void setOtherToWhiteSpace(boolean z) {
        this.otherToWhiteSpace = z;
    }

    public boolean isOtherToWhiteSpace() {
        return this.otherToWhiteSpace;
    }

    @Override // jsat.text.tokenizer.Tokenizer
    public List<String> tokenize(String str) {
        ArrayList arrayList = new ArrayList();
        tokenize(str, new StringBuilder(str.length() / 10), arrayList);
        return arrayList;
    }

    @Override // jsat.text.tokenizer.Tokenizer
    public void tokenize(String str, StringBuilder sb, List<String> list) {
        for (int i = 0; i < str.length(); i++) {
            char charAt = str.charAt(i);
            if (Character.isLetter(charAt)) {
                if (this.useLowerCase) {
                    sb.append(Character.toLowerCase(charAt));
                } else {
                    sb.append(charAt);
                }
            } else if (!this.noDigits && Character.isDigit(charAt)) {
                sb.append(charAt);
            } else if (this.otherToWhiteSpace || Character.isWhitespace(charAt)) {
                if (sb.length() >= this.minTokenLength && sb.length() <= this.maxTokenLength) {
                    list.add(sb.toString());
                }
                sb.setLength(0);
            }
        }
        if (sb.length() < this.minTokenLength || sb.length() > this.maxTokenLength) {
            return;
        }
        list.add(sb.toString());
    }

    public void setMaxTokenLength(int i) {
        if (i < 1) {
            throw new IllegalArgumentException("Max token length must be positive, not " + i);
        }
        if (i <= this.minTokenLength) {
            throw new IllegalArgumentException("Max token length must be larger than the min token length");
        }
        this.maxTokenLength = i;
    }

    public int getMaxTokenLength() {
        return this.maxTokenLength;
    }

    public void setMinTokenLength(int i) {
        if (i < 0) {
            throw new IllegalArgumentException("Minimum token length must be non negative, not " + i);
        }
        if (i > this.maxTokenLength) {
            throw new IllegalArgumentException("Minimum token length can not exced the maximum token length");
        }
        this.minTokenLength = i;
    }

    public int getMinTokenLength() {
        return this.minTokenLength;
    }

    public void setNoDigits(boolean z) {
        this.noDigits = z;
    }

    public boolean isNoDigits() {
        return this.noDigits;
    }
}
