public class TrecDocMaker extends BasicDocMaker
Config properties:
Modifier and Type | Field and Description |
---|---|
protected java.io.File |
dataDir |
protected java.lang.ThreadLocal |
dateFormat |
protected java.util.ArrayList |
inputFiles |
protected int |
iteration |
protected int |
nextFile |
protected java.io.BufferedReader |
reader |
BODY_FIELD, BYTES_FIELD, config, DATE_FIELD, forever, ID_FIELD, indexVal, NAME_FIELD, storeVal, termVecVal, TITLE_FIELD
Constructor and Description |
---|
TrecDocMaker() |
Modifier and Type | Method and Description |
---|---|
protected void |
closeInputs() |
protected java.text.DateFormat |
getDateFormat(int n) |
protected DocData |
getNextDocData()
Return the data of the next document.
|
int |
numUniqueTexts()
Return how many real unique texts are available, 0 if not applicable.
|
protected void |
openNextFile() |
protected java.util.Date |
parseDate(java.lang.String dateStr) |
protected java.lang.StringBuffer |
read(java.lang.String prefix,
java.lang.StringBuffer sb,
boolean collectMatchLine,
boolean collectAll) |
void |
resetInputs()
Reset inputs so that the test run would behave, input wise, as if it just started.
|
void |
setConfig(Config config)
Set the properties
|
addBytes, addUniqueBytes, collectFiles, getByteCount, getCount, getHtmlParser, makeDocument, makeDocument, numUniqueBytes, printDocStatistics, resetUniqueBytes, setHTMLParser
protected java.lang.ThreadLocal dateFormat
protected java.io.File dataDir
protected java.util.ArrayList inputFiles
protected int nextFile
protected int iteration
protected java.io.BufferedReader reader
public void setConfig(Config config)
DocMaker
setConfig
in interface DocMaker
setConfig
in class BasicDocMaker
protected void openNextFile() throws NoMoreDataException, java.lang.Exception
NoMoreDataException
java.lang.Exception
protected void closeInputs()
protected java.lang.StringBuffer read(java.lang.String prefix, java.lang.StringBuffer sb, boolean collectMatchLine, boolean collectAll) throws java.lang.Exception
java.lang.Exception
protected DocData getNextDocData() throws NoMoreDataException, java.lang.Exception
BasicDocMaker
getNextDocData
in class BasicDocMaker
NoMoreDataException
- if data is exhausted (and 'forever' set to false).java.lang.Exception
protected java.text.DateFormat getDateFormat(int n)
protected java.util.Date parseDate(java.lang.String dateStr)
public void resetInputs()
DocMaker
resetInputs
in interface DocMaker
resetInputs
in class BasicDocMaker
public int numUniqueTexts()
DocMaker
Copyright © 2000-2014 Apache Software Foundation. All Rights Reserved.