package edu.umd.cloud9.collection.trec;

import com.google.common.base.Preconditions;
import edu.umd.cloud9.collection.WebDocument;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.WritableUtils;

/* loaded from: input_file:edu/umd/cloud9/collection/trec/TrecDocument.class */
public class TrecDocument extends WebDocument {
    public static final String XML_START_TAG = "<DOC>";
    public static final String XML_END_TAG = "</DOC>";
    private String doc;
    private String docid;

    public void write(DataOutput dataOutput) throws IOException {
        byte[] bytes = this.doc.getBytes();
        WritableUtils.writeVInt(dataOutput, bytes.length);
        dataOutput.write(bytes, 0, bytes.length);
    }

    public void readFields(DataInput dataInput) throws IOException {
        int readVInt = WritableUtils.readVInt(dataInput);
        byte[] bArr = new byte[readVInt];
        dataInput.readFully(bArr, 0, readVInt);
        readDocument(this, new String(bArr));
    }

    @Override // edu.umd.cloud9.collection.Indexable
    public String getDocid() {
        if (this.docid == null) {
            int indexOf = this.doc.indexOf("<DOCNO>");
            if (indexOf == -1) {
                this.docid = "";
            } else {
                this.docid = this.doc.substring(indexOf + 7, this.doc.indexOf("</DOCNO>", indexOf)).trim();
            }
        }
        return this.docid;
    }

    @Override // edu.umd.cloud9.collection.Indexable
    public String getContent() {
        return this.doc;
    }

    public static void readDocument(TrecDocument trecDocument, String str) {
        Preconditions.checkNotNull(str);
        Preconditions.checkNotNull(trecDocument);
        trecDocument.doc = str;
        trecDocument.docid = null;
    }

    @Override // edu.umd.cloud9.collection.WebDocument
    public String getURL() {
        return null;
    }
}
