From: Sascha Silbe <sascha-pgp@silbe.org>
Subject: [PATCH] rebuild index on migration (#1787)
Currently we force an index rescan on migration, but not a rebuild.
Any entry that's already in the index will _not_ be updated.
This patch fixes the migration logic to do a full index rebuild,
thus actually migrating the index.
Signed-off-by: Sascha Silbe <sascha-pgp@silbe.org>
---
README | 8 ++++
src/carquinyol/datastore.py | 69 ++++++++++++++++++++++----------------
src/carquinyol/indexstore.py | 4 ++
src/carquinyol/layoutmanager.py | 2 +-
4 files changed, 53 insertions(+), 30 deletions(-)
diff --git a/README b/README
index ae5ec36..84da051 100644
a
|
b
|
Storage format history |
32 | 32 | |
33 | 33 | 3 not-mainstream |
34 | 34 | test versioning support |
| 35 | |
| 36 | 4 0.88, 0.86.2 |
| 37 | version bump to force index rebuild that may have been missed during the |
| 38 | migration to version 2 (SL#1787) |
| 39 | |
| 40 | 5 not-mainstream |
| 41 | test versioning support (version bump for SL#1787) |
| 42 | |
diff --git a/src/carquinyol/datastore.py b/src/carquinyol/datastore.py
index dc2e167..6824d90 100644
a
|
b
|
class DataStore(dbus.service.Object): |
56 | 56 | allow_replacement=False) |
57 | 57 | dbus.service.Object.__init__(self, bus_name, DS_OBJECT_PATH) |
58 | 58 | |
59 | | layout_manager = layoutmanager.get_instance() |
60 | | if layout_manager.get_version() == 0: |
61 | | migration.migrate_from_0() |
62 | | layout_manager.set_version(layoutmanager.CURRENT_LAYOUT_VERSION) |
63 | | layout_manager.index_updated = False |
64 | | elif layout_manager.get_version() == 1: |
65 | | layout_manager.set_version(layoutmanager.CURRENT_LAYOUT_VERSION) |
66 | | layout_manager.index_updated = False |
| 59 | migrated = self._migrate() |
67 | 60 | |
68 | 61 | self._metadata_store = MetadataStore() |
69 | | |
| 62 | self._file_store = FileStore() |
| 63 | self._optimizer = Optimizer(self._file_store, self._metadata_store) |
70 | 64 | self._index_store = IndexStore() |
| 65 | |
| 66 | if migrated: |
| 67 | self._rebuild_index() |
| 68 | return |
| 69 | |
71 | 70 | try: |
72 | 71 | self._index_store.open_index() |
73 | 72 | except Exception: |
74 | 73 | logging.exception('Failed to open index, will rebuild') |
75 | | layout_manager.index_updated = False |
76 | | self._index_store.remove_index() |
77 | | self._index_store.open_index() |
78 | | |
79 | | self._file_store = FileStore() |
| 74 | self._rebuild_index() |
| 75 | return |
80 | 76 | |
81 | 77 | if not layout_manager.index_updated: |
82 | 78 | logging.debug('Index is not up-to-date, will update') |
83 | | self._rebuild_index() |
| 79 | self._update_index() |
84 | 80 | |
85 | | self._optimizer = Optimizer(self._file_store, self._metadata_store) |
| 81 | def _migrate(self): |
| 82 | """Check version of data store on disk and migrate if necessary. |
| 83 | |
| 84 | Returns True if migration was done and an index rebuild is required, |
| 85 | False otherwise. |
| 86 | """ |
| 87 | layout_manager = layoutmanager.get_instance() |
| 88 | old_version = layout_manager.get_version() |
| 89 | if old_version == layoutmanager.CURRENT_LAYOUT_VERSION: |
| 90 | return False |
| 91 | |
| 92 | if old_version == 0: |
| 93 | migration.migrate_from_0() |
| 94 | |
| 95 | layout_manager.set_version(layoutmanager.CURRENT_LAYOUT_VERSION) |
| 96 | return True |
86 | 97 | |
87 | 98 | def _rebuild_index(self): |
| 99 | """Remove and recreate index.""" |
| 100 | layoutmanager.get_instance().index_updated = False |
| 101 | self._index_store.close_index() |
| 102 | self._index_store.remove_index() |
| 103 | self._index_store.open_index() |
| 104 | self._update_index() |
| 105 | |
| 106 | def _update_index(self): |
| 107 | """Find entries that are not yet in the index and add them.""" |
88 | 108 | uids = layoutmanager.get_instance().find_all() |
89 | | logging.debug('Going to update the index with uids %r', uids) |
90 | | gobject.idle_add(lambda: self.__rebuild_index_cb(uids), |
| 109 | logging.debug('Going to update the index with object_ids %r', |
| 110 | uids) |
| 111 | gobject.idle_add(lambda: self.__update_index_cb(uids), |
91 | 112 | priority=gobject.PRIORITY_LOW) |
92 | 113 | |
93 | | def __rebuild_index_cb(self, uids): |
| 114 | def __update_index_cb(self, uids): |
94 | 115 | if uids: |
95 | 116 | uid = uids.pop() |
96 | 117 | |
… |
… |
class DataStore(dbus.service.Object): |
200 | 221 | uids, count = self._index_store.find(query) |
201 | 222 | except Exception: |
202 | 223 | logging.exception('Failed to query index, will rebuild') |
203 | | layoutmanager.get_instance().index_updated = False |
204 | | self._index_store.close_index() |
205 | | self._index_store.remove_index() |
206 | | self._index_store.open_index() |
207 | 224 | self._rebuild_index() |
208 | 225 | |
209 | 226 | if not layoutmanager.get_instance().index_updated: |
… |
… |
class DataStore(dbus.service.Object): |
216 | 233 | if not os.path.exists(entry_path): |
217 | 234 | logging.warning( |
218 | 235 | 'Inconsistency detected, returning all entries') |
219 | | |
220 | | layoutmanager.get_instance().index_updated = False |
221 | | self._index_store.close_index() |
222 | | self._index_store.remove_index() |
223 | | self._index_store.open_index() |
224 | 236 | self._rebuild_index() |
225 | | |
226 | 237 | return self._find_all(query, properties) |
227 | 238 | |
228 | 239 | metadata = self._metadata_store.retrieve(uid, properties) |
diff --git a/src/carquinyol/indexstore.py b/src/carquinyol/indexstore.py
index 4dfd620..8a69334 100644
a
|
b
|
class IndexStore(object): |
220 | 220 | self._database = WritableDatabase(index_path, xapian.DB_CREATE_OR_OPEN) |
221 | 221 | |
222 | 222 | def close_index(self): |
| 223 | """Close index database if it is open.""" |
| 224 | if not self._database: |
| 225 | return |
| 226 | |
223 | 227 | self._database.flush() |
224 | 228 | self._database = None |
225 | 229 | |
diff --git a/src/carquinyol/layoutmanager.py b/src/carquinyol/layoutmanager.py
index 1f2dd4c..0b0b91a 100644
a
|
b
|
import os |
18 | 18 | import logging |
19 | 19 | |
20 | 20 | MAX_QUERY_LIMIT = 40960 |
21 | | CURRENT_LAYOUT_VERSION = 2 |
| 21 | CURRENT_LAYOUT_VERSION = 4 |
22 | 22 | |
23 | 23 | class LayoutManager(object): |
24 | 24 | """Provide the logic about how entries are stored inside the datastore |