| 50 | _QUERY_TERM_MAP = { |
| 51 | 'uid': _PREFIX_UID, |
| 52 | 'activity': _PREFIX_ACTIVITY, |
| 53 | 'activity_id': _PREFIX_ACTIVITY_ID, |
| 54 | 'mime_type': _PREFIX_MIME_TYPE, |
| 55 | 'keep': _PREFIX_KEEP, |
| 56 | } |
| 57 | |
| 58 | _QUERY_VALUE_MAP = { |
| 59 | 'timestamp': _VALUE_TIMESTAMP, |
| 60 | } |
| 61 | |
| 62 | |
| 63 | class TermGenerator (xapian.TermGenerator): |
| 64 | |
| 65 | def index_document(self, document, properties): |
| 66 | document.add_value(_VALUE_TIMESTAMP, str(properties['timestamp'])) |
| 67 | document.add_value(_VALUE_TITLE, properties.get('title', '').strip()) |
| 68 | |
| 69 | xapian.TermGenerator.set_document(self, document) |
| 70 | |
| 71 | properties = dict(properties) |
| 72 | self._index_known(document, properties) |
| 73 | self._index_unknown(document, properties) |
| 74 | |
| 75 | def _index_known(self, document, properties): |
| 76 | for name, prefix in _QUERY_TERM_MAP.items(): |
| 77 | if (name not in properties): |
| 78 | continue |
| 79 | |
| 80 | self._index_property(document, name, properties.pop(name), prefix) |
| 81 | |
| 82 | def _index_unknown(self, document, properties): |
| 83 | for name, value in properties.items(): |
| 84 | self._index_property(document, name, value) |
| 85 | |
| 86 | def _index_property(self, doc, name, value, prefix=''): |
| 87 | if name in _PROPERTIES_NOT_TO_INDEX or not value: |
| 88 | return |
| 89 | |
| 90 | if isinstance(value, unicode): |
| 91 | value = value.encode('utf-8') |
| 92 | elif not isinstance(value, basestring): |
| 93 | value = str(value) |
| 94 | |
| 95 | # We need to add the full value (i.e. not split into words) so |
| 96 | # we can enumerate unique values. It also simplifies setting up |
| 97 | # dictionary-based queries. |
| 98 | if prefix: |
| 99 | doc.add_term(_PREFIX_FULL_VALUE + prefix + value) |
| 100 | |
| 101 | self.index_text(value, 1, prefix or _PREFIX_NONE) |
| 102 | self.increase_termpos() |
| 103 | |
| 104 | |
| 105 | class QueryParser (xapian.QueryParser): |
| 106 | """QueryParser that understands dictionaries and Xapian query strings. |
| 107 | |
| 108 | The dictionary contains metadata names as keys and either basic types |
| 109 | (exact match), 2-tuples (range, only valid for value-stored metadata) |
| 110 | or a list (multiple exact matches joined with OR) as values. |
| 111 | An empty dictionary matches everything. Queries from different keys |
| 112 | (i.e. different metadata names) are joined with AND. |
| 113 | """ |
| 114 | |
| 115 | def __init__(self): |
| 116 | xapian.QueryParser.__init__(self) |
| 117 | |
| 118 | for name, prefix in _QUERY_TERM_MAP.items(): |
| 119 | self.add_prefix(name, prefix) |
| 120 | self.add_prefix('', prefix) |
| 121 | |
| 122 | self.add_prefix('', _PREFIX_NONE) |
| 123 | |
| 124 | def _parse_query_term(self, name, prefix, value): |
| 125 | if isinstance(value, list): |
| 126 | subqueries = [self._parse_query_term(name, prefix, word) |
| 127 | for word in value] |
| 128 | return Query(Query.OP_OR, subqueries) |
| 129 | |
| 130 | elif prefix: |
| 131 | return Query(_PREFIX_FULL_VALUE + prefix + str(value)) |
| 132 | else: |
| 133 | return Query(_PREFIX_NONE + str(value)) |
| 134 | |
| 135 | def _parse_query_value_range(self, name, value, value_no): |
| 136 | if len(value) != 2: |
| 137 | raise TypeError( |
| 138 | 'Only tuples of size 2 have a defined meaning. ' |
| 139 | 'Did you mean to pass a list instead?') |
| 140 | |
| 141 | start, end = value |
| 142 | return Query(Query.OP_VALUE_RANGE, value_no, str(start), str(end)) |
| 143 | |
| 144 | def _parse_query_value(self, name, value_no, value): |
| 145 | if isinstance(value, list): |
| 146 | subqueries = [self._parse_query_value(name, value_no, word) |
| 147 | for word in value] |
| 148 | return Query(Query.OP_OR, subqueries) |
| 149 | |
| 150 | elif isinstance(value, tuple): |
| 151 | return self._parse_query_value_range(name, value, value_no) |
| 152 | |
| 153 | elif isinstance(value, dict): |
| 154 | # compatibility option for timestamp: {'start': 0, 'end': 1} |
| 155 | start = value.get('start', 0) |
| 156 | end = value.get('end', sys.maxint) |
| 157 | return self._parse_query_value_range(name, (start, end), value_no) |
| 158 | |
| 159 | else: |
| 160 | return Query(Query.OP_VALUE_RANGE, |
| 161 | _QUERY_VALUE_MAP[name], str(value), str(value)) |
| 162 | |
| 163 | def _parse_query_xapian(self, query_str): |
| 164 | try: |
| 165 | return xapian.QueryParser.parse_query( |
| 166 | self, query_str, |
| 167 | QueryParser.FLAG_PHRASE | |
| 168 | QueryParser.FLAG_BOOLEAN | |
| 169 | QueryParser.FLAG_LOVEHATE | |
| 170 | QueryParser.FLAG_WILDCARD, |
| 171 | '') |
| 172 | |
| 173 | except xapian.QueryParserError, exception: |
| 174 | logging.warning('Invalid query string: '+exception.get_msg()) |
| 175 | return Query() |
| 176 | |
| 177 | def parse_query(self, query_dict, query_string): |
| 178 | logging.debug('parse_query %r %r', query_dict, query_string) |
| 179 | queries = [] |
| 180 | query_dict = dict(query_dict) |
| 181 | |
| 182 | if query_string: |
| 183 | queries.append(self._parse_query_xapian(str(query_string))) |
| 184 | |
| 185 | for name, value in query_dict.items(): |
| 186 | if name in _QUERY_TERM_MAP: |
| 187 | queries.append(self._parse_query_term(name, |
| 188 | _QUERY_TERM_MAP[name], value)) |
| 189 | elif name in _QUERY_VALUE_MAP: |
| 190 | queries.append(self._parse_query_value(name, |
| 191 | _QUERY_VALUE_MAP[name], value)) |
| 192 | else: |
| 193 | logging.warning('Unknown term: %r=%r', name, value) |
| 194 | |
| 195 | if not queries: |
| 196 | queries.append(Query('')) |
| 197 | |
| 198 | if query_dict: |
| 199 | logging.warning('Unknown term(s): %r', query_dict) |
| 200 | |
| 201 | logging.debug('queries: %r', [str(q) for q in queries]) |
| 202 | return Query(Query.OP_AND, queries) |
| 203 | |
165 | | def _parse_query(self, query_dict): |
166 | | logging.debug('_parse_query %r' % query_dict) |
167 | | queries = [] |
168 | | |
169 | | query_str = query_dict.pop('query', None) |
170 | | if query_str is not None: |
171 | | query_parser = QueryParser() |
172 | | query_parser.set_database(self._database) |
173 | | #query_parser.set_default_op(Query.OP_AND) |
174 | | |
175 | | # TODO: we should do stemming, but in which language? |
176 | | #query_parser.set_stemmer(_xapian.Stem(lang)) |
177 | | #query_parser.set_stemming_strategy(qp.STEM_SOME) |
178 | | |
179 | | query = query_parser.parse_query( |
180 | | query_str, |
181 | | QueryParser.FLAG_PHRASE | |
182 | | QueryParser.FLAG_BOOLEAN | |
183 | | QueryParser.FLAG_LOVEHATE | |
184 | | QueryParser.FLAG_WILDCARD, |
185 | | '') |
186 | | |
187 | | queries.append(query) |
188 | | |
189 | | timestamp = query_dict.pop('timestamp', None) |
190 | | if timestamp is not None: |
191 | | start = str(timestamp.pop('start', 0)) |
192 | | end = str(timestamp.pop('end', _MAX_RESULTS)) |
193 | | query = Query(Query.OP_VALUE_RANGE, _VALUE_TIMESTAMP, start, end) |
194 | | queries.append(query) |
195 | | |
196 | | uid = query_dict.pop('uid', None) |
197 | | if uid is not None: |
198 | | queries.append(Query(_PREFIX_UID + uid)) |
199 | | |
200 | | activity = query_dict.pop('activity', None) |
201 | | if activity is not None: |
202 | | queries.append(Query(_PREFIX_ACTIVITY + activity)) |
203 | | |
204 | | activity_id = query_dict.pop('activity_id', None) |
205 | | if activity_id is not None: |
206 | | query = Query(_PREFIX_ACTIVITY_ID + activity_id) |
207 | | queries.append(query) |
208 | | |
209 | | keep = query_dict.pop('keep', None) |
210 | | if keep is not None: |
211 | | query = Query(_PREFIX_KEEP + str(keep)) |
212 | | queries.append(query) |
213 | | |
214 | | mime_type = query_dict.pop('mime_type', None) |
215 | | if mime_type is not None: |
216 | | mime_queries = [] |
217 | | for mime_type in mime_type: |
218 | | mime_queries.append(Query(_PREFIX_MIME_TYPE + mime_type)) |
219 | | queries.append(Query(Query.OP_OR, mime_queries)) |
220 | | |
221 | | if not queries: |
222 | | queries.append(Query('')) |
223 | | |
224 | | if query_dict: |
225 | | logging.warning('Unknown term(s): %r' % query_dict) |
226 | | |
227 | | return Query(Query.OP_AND, queries) |
228 | | |