org.cdlib.xtf.textEngine.freeform
Class FreeformQueryParser

Object
  extended by FreeformQueryParser
All Implemented Interfaces:
FreeformQueryParserConstants

public class FreeformQueryParser
extends Object
implements FreeformQueryParserConstants

A grammar-based parser for "freeform queries", constructed with JavaCC. Designed to parse a query language much like that supported by "gaggle", a little query language used at CDL, which is in turn designed to act much like Google.

Uses a tokenizer that should be good for most European-language queries.


Nested Class Summary
 class FreeformQueryParser.FNode
          The result of a parse.
(package private) static class FreeformQueryParser.JJCalls
           
private static class FreeformQueryParser.LookaheadSuccess
           
 
Field Summary
private  FreeformQueryParser.JJCalls[] jj_2_rtns
           
private  int jj_endpos
           
private  Vector jj_expentries
           
private  int[] jj_expentry
           
private  int jj_gc
           
private  int jj_gen
           
(package private)  SimpleCharStream jj_input_stream
           
private  int jj_kind
           
private  int jj_la
           
private  int[] jj_la1
           
private static int[] jj_la1_0
           
private  Token jj_lastpos
           
private  int[] jj_lasttokens
           
private  FreeformQueryParser.LookaheadSuccess jj_ls
           
 Token jj_nt
           
private  int jj_ntk
           
private  boolean jj_rescan
           
private  Token jj_scanpos
           
private  boolean jj_semLA
           
 boolean lookingAhead
           
 Token token
           
 FreeformQueryParserTokenManager token_source
           
 
Fields inherited from interface FreeformQueryParserConstants
ACRONYM, ALPHA, ALPHANUM, AND, APOSTROPHE, BASIC, CJK, CLOSE_PAREN, COLON, COMPANY, DEFAULT, DIGIT, EMAIL, EOF, HAS_DIGIT, HOST, LETTER, NOISE, NOT, NUM, OPEN_PAREN, OR, P, PLUS, QUOTE, SYMBOL, tokenImage
 
Constructor Summary
FreeformQueryParser(FreeformQueryParserTokenManager tm)
           
FreeformQueryParser(InputStream stream)
           
FreeformQueryParser(InputStream stream, String encoding)
           
FreeformQueryParser(Reader stream)
           
 
Method Summary
 FreeformQueryParser.FNode ANDs()
          A sequence of terms (optionally separated by "AND" or "&") is AND-ed together.
 FreeformQueryParser.FNode Component()
          A component of a query is a phrase, term, parenthesized sequence, or a "not" clause.
 void disable_tracing()
           
 void enable_tracing()
           
 ParseException generateParseException()
           
 Token getNextToken()
           
 Token getToken(int index)
           
private  boolean jj_2_1(int xla)
           
private  boolean jj_3_1()
           
private  boolean jj_3R_7()
           
private  void jj_add_error_token(int kind, int pos)
           
private  Token jj_consume_token(int kind)
           
private static void jj_la1_0()
           
private  int jj_ntk()
           
private  void jj_rescan_token()
           
private  void jj_save(int index, int xla)
           
private  boolean jj_scan_token(int kind)
           
static void main(String[] args)
          Simple command-line test driver.
 FreeformQueryParser.FNode Not()
          You can stick "not" in front of something to negate it.
 FreeformQueryParser.FNode ORs()
          A sequence of components, separated by "OR" or "|"
 FreeformQueryParser.FNode ParenSeq()
          We allow parenthesized sub-expressions for grouping
 FreeformQueryParser.FNode Phrase()
          A phrase is a quoted string of terms (but we also take care not to barf on reserved words).
 FreeformQueryParser.FNode Query()
          The entire query, which consists of a single sub-query.
 void ReInit(FreeformQueryParserTokenManager tm)
           
 void ReInit(InputStream stream)
           
 void ReInit(InputStream stream, String encoding)
           
 void ReInit(Reader stream)
           
 FreeformQueryParser.FNode SubQuery()
          A single sub-query (can be contained in a paren expr)
 FreeformQueryParser.FNode Term()
          In general a term is just a single word.
 
Methods inherited from class Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

token_source

public FreeformQueryParserTokenManager token_source

jj_input_stream

SimpleCharStream jj_input_stream

token

public Token token

jj_nt

public Token jj_nt

jj_ntk

private int jj_ntk

jj_scanpos

private Token jj_scanpos

jj_lastpos

private Token jj_lastpos

jj_la

private int jj_la

lookingAhead

public boolean lookingAhead

jj_semLA

private boolean jj_semLA

jj_gen

private int jj_gen

jj_la1

private final int[] jj_la1

jj_la1_0

private static int[] jj_la1_0

jj_2_rtns

private final FreeformQueryParser.JJCalls[] jj_2_rtns

jj_rescan

private boolean jj_rescan

jj_gc

private int jj_gc

jj_ls

private final FreeformQueryParser.LookaheadSuccess jj_ls

jj_expentries

private Vector jj_expentries

jj_expentry

private int[] jj_expentry

jj_kind

private int jj_kind

jj_lasttokens

private int[] jj_lasttokens

jj_endpos

private int jj_endpos
Constructor Detail

FreeformQueryParser

public FreeformQueryParser(InputStream stream)

FreeformQueryParser

public FreeformQueryParser(InputStream stream,
                           String encoding)

FreeformQueryParser

public FreeformQueryParser(Reader stream)

FreeformQueryParser

public FreeformQueryParser(FreeformQueryParserTokenManager tm)
Method Detail

main

public static void main(String[] args)
                 throws IOException
Simple command-line test driver.

Throws:
IOException

Term

public final FreeformQueryParser.FNode Term()
                                     throws ParseException
In general a term is just a single word. But it can also be an email address, symbol, number, etc.

Throws:
ParseException

Phrase

public final FreeformQueryParser.FNode Phrase()
                                       throws ParseException
A phrase is a quoted string of terms (but we also take care not to barf on reserved words).

Throws:
ParseException

Not

public final FreeformQueryParser.FNode Not()
                                    throws ParseException
You can stick "not" in front of something to negate it. There is post- processing in the Query() production (at the end) to guarantee that each NOT is actually part of an AND-NOT.

Throws:
ParseException

ParenSeq

public final FreeformQueryParser.FNode ParenSeq()
                                         throws ParseException
We allow parenthesized sub-expressions for grouping

Throws:
ParseException

Component

public final FreeformQueryParser.FNode Component()
                                          throws ParseException
A component of a query is a phrase, term, parenthesized sequence, or a "not" clause. It can be preceded by an optional field specification.

Throws:
ParseException

ORs

public final FreeformQueryParser.FNode ORs()
                                    throws ParseException
A sequence of components, separated by "OR" or "|"

Throws:
ParseException

ANDs

public final FreeformQueryParser.FNode ANDs()
                                     throws ParseException
A sequence of terms (optionally separated by "AND" or "&") is AND-ed together. As in Google, "AND" binds more loosely than "OR", so that A AND B OR C should be grouped like this: A AND (B OR C).

Throws:
ParseException

SubQuery

public final FreeformQueryParser.FNode SubQuery()
                                         throws ParseException
A single sub-query (can be contained in a paren expr)

Throws:
ParseException

Query

public final FreeformQueryParser.FNode Query()
                                      throws ParseException
The entire query, which consists of a single sub-query. We apply additional processing to ensure proper structure.

Throws:
ParseException

jj_2_1

private final boolean jj_2_1(int xla)

jj_3R_7

private final boolean jj_3R_7()

jj_3_1

private final boolean jj_3_1()

jj_la1_0

private static void jj_la1_0()

ReInit

public void ReInit(InputStream stream)

ReInit

public void ReInit(InputStream stream,
                   String encoding)

ReInit

public void ReInit(Reader stream)

ReInit

public void ReInit(FreeformQueryParserTokenManager tm)

jj_consume_token

private final Token jj_consume_token(int kind)
                              throws ParseException
Throws:
ParseException

jj_scan_token

private final boolean jj_scan_token(int kind)

getNextToken

public final Token getNextToken()

getToken

public final Token getToken(int index)

jj_ntk

private final int jj_ntk()

jj_add_error_token

private void jj_add_error_token(int kind,
                                int pos)

generateParseException

public ParseException generateParseException()

enable_tracing

public final void enable_tracing()

disable_tracing

public final void disable_tracing()

jj_rescan_token

private final void jj_rescan_token()

jj_save

private final void jj_save(int index,
                           int xla)