package com.mobstac.thehindu.tts;

import org.apache.commons.lang3.StringEscapeUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.safety.Whitelist;

/* loaded from: classes2.dex */
public class PlainTextParser {
    /* JADX WARN: Unreachable blocks removed: 2, instructions: 2 */
    public static String cleanTagPerservingLineBreaks(String str) {
        if (str == null) {
            return str;
        }
        Document parse = Jsoup.parse(str);
        parse.outputSettings(new Document.OutputSettings().prettyPrint(false));
        parse.select("br").append("\\n");
        parse.select("p").prepend("\\n\\n");
        return Jsoup.clean(parse.html().replaceAll("\\\\n", "\n"), "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false));
    }

    /* JADX WARN: Unreachable blocks removed: 1, instructions: 1 */
    public static String html2PlainText(String str) {
        return removeExtendedChars(removeUrl(StringEscapeUtils.unescapeHtml4(cleanTagPerservingLineBreaks(""))));
    }

    /* JADX WARN: Unreachable blocks removed: 1, instructions: 1 */
    public static void main(String[] strArr) {
        System.out.println(html2PlainText("a<br>b"));
    }

    /* JADX WARN: Unreachable blocks removed: 1, instructions: 1 */
    public static String removeExtendedChars(String str) {
        return str.replaceAll("[^\\x00-\\x7F]", " ");
    }

    /* JADX WARN: Unreachable blocks removed: 1, instructions: 1 */
    public static String removeUrl(String str) {
        return str.replaceAll("\\b(https?|ftp|file|telnet|http|Unsure)://[-a-zA-Z0-9+&@#/%?=~_|!:,.;]*[-a-zA-Z0-9+&@#/%=~_|]", "");
    }
}
