/*
 * Decompiled with CFR 0.152.
 */
package com.kohlschutter.boilerpipe.filters.simple;

import com.kohlschutter.boilerpipe.BoilerpipeFilter;
import com.kohlschutter.boilerpipe.BoilerpipeProcessingException;
import com.kohlschutter.boilerpipe.document.TextBlock;
import com.kohlschutter.boilerpipe.document.TextDocument;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public final class MinClauseWordsFilter
implements BoilerpipeFilter {
    public static final MinClauseWordsFilter INSTANCE = new MinClauseWordsFilter(5, false);
    private int minWords;
    private final boolean acceptClausesWithoutDelimiter;
    private final Pattern PAT_CLAUSE_DELIMITER = Pattern.compile("[\\p{L}\\d][\\,\\.\\:\\;\\!\\?]+([ \\n\\r]+|$)");
    private final Pattern PAT_WHITESPACE = Pattern.compile("[ \\n\\r]+");

    public MinClauseWordsFilter(int minWords) {
        this(minWords, false);
    }

    public MinClauseWordsFilter(int minWords, boolean acceptClausesWithoutDelimiter) {
        this.minWords = minWords;
        this.acceptClausesWithoutDelimiter = acceptClausesWithoutDelimiter;
    }

    @Override
    public boolean process(TextDocument doc) throws BoilerpipeProcessingException {
        boolean changes = false;
        for (TextBlock tb : doc.getTextBlocks()) {
            int end;
            if (!tb.isContent()) continue;
            String text = tb.getText();
            Matcher m = this.PAT_CLAUSE_DELIMITER.matcher(text);
            boolean found = m.find();
            int start = 0;
            boolean hasClause = false;
            while (found) {
                end = m.start() + 1;
                hasClause = this.isClause(text.subSequence(start, end));
                start = m.end();
                if (hasClause) break;
                found = m.find();
            }
            end = text.length();
            if (this.acceptClausesWithoutDelimiter) {
                hasClause |= this.isClause(text.subSequence(start, end));
            }
            if (hasClause) continue;
            tb.setIsContent(false);
            changes = true;
        }
        return changes;
    }

    private boolean isClause(CharSequence text) {
        Matcher m = this.PAT_WHITESPACE.matcher(text);
        int n = 1;
        while (m.find()) {
            if (++n < this.minWords) continue;
            return true;
        }
        return n >= this.minWords;
    }
}

