public abstract class BasicDocMaker extends java.lang.Object implements DocMaker
Modifier and Type | Field and Description |
---|---|
static java.lang.String |
BODY_FIELD |
static java.lang.String |
BYTES_FIELD |
protected Config |
config |
static java.lang.String |
DATE_FIELD |
protected boolean |
forever |
static java.lang.String |
ID_FIELD |
protected Field.Index |
indexVal |
static java.lang.String |
NAME_FIELD |
protected Field.Store |
storeVal |
protected Field.TermVector |
termVecVal |
static java.lang.String |
TITLE_FIELD |
Constructor and Description |
---|
BasicDocMaker() |
Modifier and Type | Method and Description |
---|---|
protected void |
addBytes(long n) |
protected void |
addUniqueBytes(long n) |
protected void |
collectFiles(java.io.File f,
java.util.ArrayList inputFiles) |
long |
getByteCount()
Return total byte size of docs made since last reset.
|
int |
getCount()
Return number of docs made since last reset.
|
HTMLParser |
getHtmlParser()
Returns the htmlParser.
|
protected abstract DocData |
getNextDocData()
Return the data of the next document.
|
Document |
makeDocument()
Create the next document.
|
Document |
makeDocument(int size)
Create the next document, of the given size by input bytes.
|
long |
numUniqueBytes()
Return total bytes of all available unique texts, 0 if not applicable
|
void |
printDocStatistics()
Print some statistics on docs available/added/etc.
|
void |
resetInputs()
Reset inputs so that the test run would behave, input wise, as if it just started.
|
protected void |
resetUniqueBytes() |
void |
setConfig(Config config)
Set the properties
|
void |
setHTMLParser(HTMLParser htmlParser)
Set the html parser to use, when appropriate
|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
numUniqueTexts
protected boolean forever
public static final java.lang.String BODY_FIELD
public static final java.lang.String TITLE_FIELD
public static final java.lang.String DATE_FIELD
public static final java.lang.String ID_FIELD
public static final java.lang.String BYTES_FIELD
public static final java.lang.String NAME_FIELD
protected Config config
protected Field.Store storeVal
protected Field.Index indexVal
protected Field.TermVector termVecVal
protected abstract DocData getNextDocData() throws NoMoreDataException, java.lang.Exception
if
- cannot create the next doc dataNoMoreDataException
- if data is exhausted (and 'forever' set to false).java.lang.Exception
public Document makeDocument() throws java.lang.Exception
DocMaker
makeDocument
in interface DocMaker
java.lang.Exception
public Document makeDocument(int size) throws java.lang.Exception
DocMaker
makeDocument
in interface DocMaker
size
- size of document, or 0 if there is no size requirement.java.lang.Exception
public void setConfig(Config config)
DocMaker
public void resetInputs()
DocMaker
resetInputs
in interface DocMaker
public long numUniqueBytes()
DocMaker
numUniqueBytes
in interface DocMaker
public int getCount()
DocMaker
public long getByteCount()
DocMaker
getByteCount
in interface DocMaker
protected void addUniqueBytes(long n)
protected void resetUniqueBytes()
protected void addBytes(long n)
public void printDocStatistics()
DocMaker
printDocStatistics
in interface DocMaker
protected void collectFiles(java.io.File f, java.util.ArrayList inputFiles)
public void setHTMLParser(HTMLParser htmlParser)
DocMaker
setHTMLParser
in interface DocMaker
public HTMLParser getHtmlParser()
DocMaker
getHtmlParser
in interface DocMaker
Copyright © 2000-2014 Apache Software Foundation. All Rights Reserved.