package org.apache.nutch.analysis.lang.custom;

import java.io.BufferedReader;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Properties;
import java.util.Vector;
import org.apache.lucene.analysis.shingle.ShingleFilter;
import org.apache.lucene.util.packed.PackedInts;
import org.apache.nutch.analysis.lang.custom.NGramProfile;
import org.hsqldb.Tokens;

/* loaded from: input_file:WEB-INF/lib/contentconnector-language-2.0.8.jar:org/apache/nutch/analysis/lang/custom/LanguageIdentifier.class */
public class LanguageIdentifier {
    private static final int DEFAULT_ANALYSIS_LENGTH = 0;
    private int minLength;
    private int maxLength;
    private int analyzeLength;
    private NGramProfile suspect;
    private ArrayList<NGramProfile> languages = new ArrayList<>();
    private ArrayList<String> supportedLanguages = new ArrayList<>();
    private HashMap<CharSequence, NGramProfile.NGramEntry[]> ngramsIdx = new HashMap<>();

    public LanguageIdentifier() {
        this.minLength = 3;
        this.maxLength = 3;
        this.analyzeLength = 0;
        this.suspect = null;
        this.minLength = 3;
        this.maxLength = 3;
        this.maxLength = Math.min(this.maxLength, 4);
        this.maxLength = Math.max(this.maxLength, 1);
        this.minLength = Math.max(this.minLength, 1);
        this.minLength = Math.min(this.minLength, this.maxLength);
        this.analyzeLength = 0;
        Properties properties = new Properties();
        try {
            properties.load(LanguageIdentifier.class.getResourceAsStream("langmappings.properties"));
            Enumeration keys = properties.keys();
            StringBuffer stringBuffer = new StringBuffer("Language identifier plugin supports:");
            HashMap hashMap = new HashMap();
            while (keys.hasMoreElements()) {
                String str = (String) keys.nextElement();
                InputStream resourceAsStream = LanguageIdentifier.class.getResourceAsStream(str + ".ngp");
                if (resourceAsStream != null) {
                    NGramProfile nGramProfile = new NGramProfile(str, this.minLength, this.maxLength);
                    try {
                        nGramProfile.load(resourceAsStream);
                        this.languages.add(nGramProfile);
                        this.supportedLanguages.add(str);
                        List<NGramProfile.NGramEntry> sorted = nGramProfile.getSorted();
                        for (int i = 0; i < sorted.size(); i++) {
                            NGramProfile.NGramEntry nGramEntry = sorted.get(i);
                            List list = (List) hashMap.get(nGramEntry);
                            if (list == null) {
                                list = new ArrayList();
                                hashMap.put(nGramEntry, list);
                            }
                            list.add(nGramEntry);
                            nGramEntry.setProfile(nGramProfile);
                        }
                        stringBuffer.append(ShingleFilter.DEFAULT_TOKEN_SEPARATOR + str + Tokens.T_OPENBRACKET + sorted.size() + Tokens.T_CLOSEBRACKET);
                        resourceAsStream.close();
                    } catch (IOException e) {
                        e.printStackTrace();
                    }
                }
            }
            for (NGramProfile.NGramEntry nGramEntry2 : hashMap.keySet()) {
                List list2 = (List) hashMap.get(nGramEntry2);
                if (list2 != null) {
                    this.ngramsIdx.put(nGramEntry2.getSeq(), (NGramProfile.NGramEntry[]) list2.toArray(new NGramProfile.NGramEntry[list2.size()]));
                }
            }
            this.suspect = new NGramProfile("suspect", this.minLength, this.maxLength);
        } catch (Exception e2) {
            e2.printStackTrace();
        }
    }

    /* JADX WARN: Can't fix incorrect switch cases order, some code will duplicate */
    public static void main(String[] strArr) {
        String readLine;
        boolean z = false;
        Vector vector = new Vector();
        String str = "";
        String str2 = "";
        StringBuilder sb = new StringBuilder();
        int i = 0;
        if (strArr.length == 0) {
            System.err.println("Usage: LanguageIdentifier [-identifyrows filename maxlines] [-identifyfile charset filename] [-identifyfileset charset files] [-identifytext text] [-identifyurl url]");
            System.exit(-1);
        }
        int i2 = 0;
        while (i2 < strArr.length) {
            if (strArr[i2].equals("-identifyfile")) {
                z = true;
                int i3 = i2 + 1;
                str2 = strArr[i3];
                i2 = i3 + 1;
                str = strArr[i2];
            }
            if (strArr[i2].equals("-identifyurl")) {
                z = 3;
                i2++;
                str = strArr[i2];
            }
            if (strArr[i2].equals("-identifyrows")) {
                z = 5;
                int i4 = i2 + 1;
                str = strArr[i4];
                i2 = i4 + 1;
                i = Integer.parseInt(strArr[i2]);
            }
            if (strArr[i2].equals("-identifytext")) {
                z = 2;
                while (true) {
                    i2++;
                    if (i2 >= strArr.length - 1) {
                        break;
                    } else {
                        sb.append(strArr[i2] + ShingleFilter.DEFAULT_TOKEN_SEPARATOR);
                    }
                }
            }
            if (strArr[i2].equals("-identifyfileset")) {
                z = 4;
                i2++;
                str2 = strArr[i2];
                while (true) {
                    i2++;
                    if (i2 < strArr.length) {
                        File file = new File(strArr[i2]);
                        for (File file2 : file.isDirectory() ? file.listFiles() : new File[]{file}) {
                            vector.add(file2.getAbsolutePath());
                        }
                    }
                }
            }
            i2++;
        }
        String str3 = null;
        LanguageIdentifier languageIdentifier = new LanguageIdentifier();
        try {
            switch (z) {
                case true:
                    FileInputStream fileInputStream = new FileInputStream(new File(str));
                    str3 = languageIdentifier.identify(fileInputStream, str2);
                    fileInputStream.close();
                    break;
                case true:
                    str3 = languageIdentifier.identify(sb);
                    break;
                case true:
                    System.out.println("FILESET");
                    Iterator it = vector.iterator();
                    while (it.hasNext()) {
                        try {
                            str = (String) it.next();
                            FileInputStream fileInputStream2 = new FileInputStream(new File(str));
                            str3 = languageIdentifier.identify(fileInputStream2, str2);
                            fileInputStream2.close();
                        } catch (Exception e) {
                            System.out.println(e);
                        }
                        System.out.println(str + " was identified as " + str3);
                    }
                    System.exit(0);
                    break;
                case true:
                    BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(new File(str))));
                    while (i > 0 && (readLine = bufferedReader.readLine()) != null) {
                        String trim = readLine.trim();
                        if (trim.length() > 2) {
                            i--;
                            str3 = languageIdentifier.identify(trim);
                            System.out.println("R=" + str3 + ":" + trim);
                        }
                    }
                    bufferedReader.close();
                    System.exit(0);
                    break;
            }
        } catch (Exception e2) {
            System.out.println(e2);
        }
        System.out.println("text was identified as " + str3);
    }

    public String identify(String str) {
        return identify(new StringBuilder(str));
    }

    public String identify(StringBuilder sb) {
        StringBuilder sb2 = sb;
        if (this.analyzeLength > 0 && sb.length() > this.analyzeLength) {
            sb2 = new StringBuilder().append((CharSequence) sb);
            sb2.setLength(this.analyzeLength);
        }
        this.suspect.analyze(sb2);
        float f = Float.MIN_VALUE;
        String str = "";
        HashMap hashMap = new HashMap();
        for (NGramProfile.NGramEntry nGramEntry : this.suspect.getSorted()) {
            NGramProfile.NGramEntry[] nGramEntryArr = this.ngramsIdx.get(nGramEntry.getSeq());
            if (nGramEntryArr != null) {
                for (int i = 0; i < nGramEntryArr.length; i++) {
                    NGramProfile profile = nGramEntryArr[i].getProfile();
                    Float f2 = (Float) hashMap.get(profile);
                    if (f2 == null) {
                        f2 = new Float(PackedInts.COMPACT);
                    }
                    float floatValue = f2.floatValue() + nGramEntryArr[i].getFrequency() + nGramEntry.getFrequency();
                    hashMap.put(profile, new Float(floatValue));
                    if (floatValue > f) {
                        f = floatValue;
                        str = profile.getName();
                    }
                }
            }
        }
        return str;
    }

    public String identify(InputStream inputStream) throws IOException {
        return identify(inputStream, null);
    }

    public String identify(InputStream inputStream, String str) throws IOException {
        ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
        byte[] bArr = new byte[2048];
        while (true) {
            int read = inputStream.read(bArr);
            int i = read;
            if (read == -1 || (this.analyzeLength != 0 && byteArrayOutputStream.size() >= this.analyzeLength)) {
                break;
            }
            if (this.analyzeLength != 0) {
                i = Math.min(i, this.analyzeLength - byteArrayOutputStream.size());
            }
            byteArrayOutputStream.write(bArr, 0, i);
        }
        return identify(str == null ? byteArrayOutputStream.toString() : byteArrayOutputStream.toString(str));
    }
}
