package com.ontotext.russie.gazetteer;

import com.ontotext.russie.RussIEConstants;
import com.ontotext.russie.morph.Lemma;
import com.ontotext.russie.morph.LemmaImpl;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.io.UnsupportedEncodingException;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.XMLReaderFactory;

/* loaded from: input_file:com/ontotext/russie/gazetteer/InflectionalGazetteerXMLReader.class */
public class InflectionalGazetteerXMLReader implements RussIEConstants, ContentHandler {
    private String wordform;
    protected List<String> listImportTypes;
    static final String DEFAULT_PARSER = "org.apache.xerces.parsers.SAXParser";
    public static final String TAG_RUSNAMES = "rusnames";
    public static final String TAG_NAME = "name";
    public static final String TAG_CAT = "cat";
    public static final String TAG_CAT_END = "/cat";
    public static final String TAG_FORM = "form";
    public static final String TAG_TAG = "tag";
    public static final String TAG_TAG_END = "/tag";
    public static final String TAG_PH = "ph";
    public static final String TAG_PH_END = "/ph";
    public static final String ATTR_N = "n";
    public static final String CAT_LOCATION = "Loc";
    public static final String CAT_PERSON_FAMILY = "PerFamily";
    public static final String CAT_PERSON_FIRST = "PerFirst";
    public static final String CAT_PERSON = "Per";
    private static Map<String, String> catVsMajorType = new HashMap();
    Lemma lemma;
    String category;
    int occurance;
    String name;
    String type;
    private List<Lemma> lemmas = new ArrayList();
    private StringBuffer tagContent = new StringBuffer();
    private String parserValue = DEFAULT_PARSER;
    Locator locator = null;

    public InflectionalGazetteerXMLReader(List<String> list) {
        this.listImportTypes = list;
    }

    public static String getMajorType4Category(String str) {
        return catVsMajorType.get(str);
    }

    public void load(String str) {
        load(new File(str));
    }

    public void load(File file) {
        try {
            FileReader fileReader = new FileReader(file);
            parse(fileReader);
            fileReader.close();
        } catch (IOException e) {
            e.printStackTrace();
        } catch (SAXException e2) {
            e2.printStackTrace();
        }
    }

    public void setParser(String str) {
        this.parserValue = str;
    }

    public void parse(Reader reader) throws IOException, SAXException {
        InputSource inputSource = new InputSource(reader);
        XMLReader createXMLReader = XMLReaderFactory.createXMLReader(this.parserValue);
        createXMLReader.setContentHandler(this);
        createXMLReader.parse(inputSource);
    }

    public void parse(URL url) throws IOException, SAXException {
        InputSource inputSource = new InputSource(url.toExternalForm());
        InputStream openStream = url.openStream();
        try {
            inputSource.setByteStream(openStream);
            XMLReader createXMLReader = XMLReaderFactory.createXMLReader(this.parserValue);
            createXMLReader.setContentHandler(this);
            createXMLReader.parse(inputSource);
            openStream.close();
        } catch (Throwable th) {
            openStream.close();
            throw th;
        }
    }

    @Override // org.xml.sax.ContentHandler
    public void setDocumentLocator(Locator locator) {
        this.locator = locator;
    }

    @Override // org.xml.sax.ContentHandler
    public void startDocument() throws SAXException {
    }

    @Override // org.xml.sax.ContentHandler
    public void endDocument() throws SAXException {
    }

    @Override // org.xml.sax.ContentHandler
    public void startElement(String str, String str2, String str3, Attributes attributes) throws SAXException {
        this.tagContent = new StringBuffer();
        if (str2.equals(TAG_RUSNAMES)) {
            return;
        }
        if (str2.equals(TAG_NAME)) {
            this.lemma = new LemmaImpl();
            this.lemmas.add(this.lemma);
        } else if (!str2.equals(TAG_CAT) && str2.equals(TAG_FORM)) {
            this.lemma.getFeatureMap().put(RussIEConstants.FEATURE_OCCURANCE, new Integer(attributes.getValue(0)));
        }
    }

    @Override // org.xml.sax.ContentHandler
    public void endElement(String str, String str2, String str3) throws SAXException {
        if (str2.equals(TAG_CAT)) {
            try {
                String str4 = catVsMajorType.get(this.tagContent.toString().trim());
                if (this.listImportTypes.contains(str4)) {
                    this.lemma.getFeatureMap().put(RussIEConstants.MAJOR_TYPE, str4);
                }
            } catch (Exception e) {
                System.out.println("Unknown Category :" + ((Object) this.tagContent));
            }
        }
        if (str2.equals(TAG_PH)) {
            try {
                this.wordform = new String(this.tagContent.toString().getBytes(), "UTF-8");
                this.wordform = this.wordform.trim();
            } catch (UnsupportedEncodingException e2) {
            }
        }
        if (str2.equals(TAG_TAG)) {
            this.type = this.tagContent.toString().trim();
            this.lemma.addWordForm(this.wordform, this.type);
            if ((this.type.charAt(0) == 'N' && this.type.charAt(this.type.length() - 1) == 'n') || (this.type.charAt(0) == 'V' && this.type.charAt(this.type.length() - 1) == 'i')) {
                this.lemma.setMainForm(this.wordform, this.type);
            }
        }
        this.tagContent = new StringBuffer();
    }

    @Override // org.xml.sax.ContentHandler
    public void characters(char[] cArr, int i, int i2) throws SAXException {
        this.tagContent.append(cArr, i, i2);
    }

    @Override // org.xml.sax.ContentHandler
    public void startPrefixMapping(String str, String str2) throws SAXException {
    }

    @Override // org.xml.sax.ContentHandler
    public void endPrefixMapping(String str) throws SAXException {
    }

    @Override // org.xml.sax.ContentHandler
    public void ignorableWhitespace(char[] cArr, int i, int i2) throws SAXException {
    }

    @Override // org.xml.sax.ContentHandler
    public void processingInstruction(String str, String str2) throws SAXException {
    }

    @Override // org.xml.sax.ContentHandler
    public void skippedEntity(String str) throws SAXException {
    }

    public List<Lemma> getLemmas() {
        return new ArrayList(this.lemmas);
    }

    static {
        catVsMajorType.put(CAT_LOCATION, RussIEConstants.MAJOR_TYPE_LOC);
        catVsMajorType.put(CAT_PERSON, RussIEConstants.MAJOR_TYPE_PERSON_FULL);
        catVsMajorType.put(CAT_PERSON_FAMILY, RussIEConstants.MAJOR_TYPE_PERSON_SURNAME);
        catVsMajorType.put(CAT_PERSON_FIRST, RussIEConstants.MAJOR_TYPE_PERSON_FIRST);
    }
}
