package edu.umd.cloud9.collection.aquaint2;

import edu.umd.cloud9.collection.Indexable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.regex.Pattern;
import org.apache.hadoop.io.WritableUtils;

/* loaded from: input_file:edu/umd/cloud9/collection/aquaint2/Aquaint2Document.class */
public class Aquaint2Document extends Indexable {
    public static final String XML_START_TAG = "<DOC ";
    public static final String XML_END_TAG = "</DOC>";
    private String mRawDoc;
    private String mDocid;
    private String mText;
    private static Pattern sTags = Pattern.compile("<[^>]+>");

    public void write(DataOutput dataOutput) throws IOException {
        byte[] bytes = this.mRawDoc.getBytes();
        WritableUtils.writeVInt(dataOutput, bytes.length);
        dataOutput.write(bytes, 0, bytes.length);
    }

    public void readFields(DataInput dataInput) throws IOException {
        int readVInt = WritableUtils.readVInt(dataInput);
        byte[] bArr = new byte[readVInt];
        dataInput.readFully(bArr, 0, readVInt);
        readDocument(this, new String(bArr));
    }

    @Override // edu.umd.cloud9.collection.Indexable
    public String getDocid() {
        if (this.mDocid == null) {
            this.mDocid = this.mRawDoc.substring(9, this.mRawDoc.indexOf("\"", 9)).trim();
        }
        return this.mDocid;
    }

    @Override // edu.umd.cloud9.collection.Indexable
    public String getContent() {
        if (this.mText == null) {
            int indexOf = this.mRawDoc.indexOf(">");
            if (indexOf == -1) {
                this.mText = "";
            } else {
                this.mText = this.mRawDoc.substring(indexOf + 1, this.mRawDoc.length() - 6).trim();
                this.mText = sTags.matcher(this.mText).replaceAll("");
            }
        }
        return this.mText;
    }

    public static void readDocument(Aquaint2Document aquaint2Document, String str) {
        if (str == null) {
            throw new RuntimeException("Error, can't read null string!");
        }
        aquaint2Document.mRawDoc = str;
        aquaint2Document.mDocid = null;
        aquaint2Document.mText = null;
    }
}
