Querychapter
querychapter
¶
QUERYCHAPTER
¶
Source code in modules/querychapter.py
class QUERYCHAPTER:
def __init__(self):
pass
def makeQCindexes(self):
VERSIONS = current.VERSIONS
for vr in (
("2017", "2021") if current.SITUATION in {"local", "test"} else VERSIONS
):
self.makeQCindex(vr)
def makeQCindex(self, vr):
"""We build an index of queries by chapter in which they have results.
The index building takes may take multiple seconds per data version.
But the result is stored in the cache.
We need to prevent that index-making is triggered multiple times by
several requests that are fired while index-making is in progress.
So we put an indicator value in the cache during index buidling,
and we let everybody else wait until the index has been completed.
We do this per version.
To that end we put an empty dictionary in the cache.
We only start computing the index key "busy" has a falsy value.
Before computing the index, we set the value to 1.
After computing the index, we set the value to 2.
When the index is needed and the busy status is 1, we wait for it
to become 2 before we continue.
When the index is needed and busy is 2, the index has been
built and we have to do nothing.
Note that we do not need anything of the value of the index here,
we only need it to be built, so that other parts of the app can get it
quickly.
We cannot allow that other parts of the app see an unfinished index,
because it will lead to served content that is not yet correct.
Because this content will be cached, it will take a long time (or forever)
until it gets recomputed.
"""
Caching = current.Caching
busyStatus = Caching.get(f"busyIndex_{vr}_", lambda: {}, ALWAYS)
busy = busyStatus.get("busy", 0)
if busy == 0:
busyStatus["busy"] = 1
Caching.get(f"qcindex_{vr}_", lambda: self.makeQCindex_c(vr), ALWAYS)
busyStatus["busy"] = 2
elif busy == 1:
n = 0
while (
Caching.get(f"busyIndex_{vr}_", lambda: {}, ALWAYS).get("busy", 0) == 1
):
time.sleep(1)
n += 1
log(f"o-o-o waiting for chapter-query index {vr} ({n} sec) ...")
def makeQCindex_c(self, vr):
Caching = current.Caching
db = current.db
PASSAGE_DBS = current.PASSAGE_DBS
log(f"o-o-o making chapter-query index for version {vr} ...")
startTime = time.time()
pubStatus = Caching.get(
f"pubStatus_{vr}_",
lambda: {},
ALWAYS,
)
slotsFromChapter = Caching.get(
f"slotsFromChapter_{vr}_",
lambda: {},
ALWAYS,
)
chapterFromSlot = Caching.get(
f"chapterFromSlot_{vr}_",
lambda: {},
ALWAYS,
)
queriesFromChapter = Caching.get(
f"queriesFromChapter_{vr}_",
lambda: {},
ALWAYS,
)
chaptersFromQuery = Caching.get(
f"chaptersFromQuery_{vr}_",
lambda: {},
ALWAYS,
)
chapterSQL = dedent(
"""
select id, first_m, last_m from chapter
;
"""
)
chapterList = PASSAGE_DBS[vr].executesql(chapterSQL)
for (chapter_id, first_m, last_m) in chapterList:
for m in range(first_m, last_m + 1):
chapterFromSlot[m] = chapter_id
slotsFromChapter[chapter_id] = last_m
resultSQL1 = dedent(
f"""
select
query_exe.id, query_exe.is_published, query.id
from query
inner join query_exe on
query.id = query_exe.query_id
where
query_exe.version = '{vr}'
and
query_exe.executed_on >= query_exe.modified_on
and
query.is_shared = 'T'
;
"""
)
queryTime = time.time()
results1 = db.executesql(resultSQL1)
queryTime = time.time() - queryTime
log(f" found {len(results1)} shared queries in {delta(queryTime)}")
queryFromExe = {}
for (query_exe_id, is_published, query_id) in results1:
queryFromExe[query_exe_id] = query_id
pubStatus.setdefault(query_id, {})[vr] = is_published == "T"
if queryFromExe:
doQueryIndex(db, vr, queryFromExe)
exe = time.time() - startTime
log(f"o-o-o made chapter-query index for data {vr} in {delta(exe)}")
return (queriesFromChapter, chaptersFromQuery)
def updatePubStatus(self, vr, query_id, is_published):
Caching = current.Caching
log(f"o-o-o updating pubStatus for query {query_id} in version {vr} ...")
pubStatus = Caching.get(
f"pubStatus_{vr}_",
lambda: {},
ALWAYS,
)
pubStatus.setdefault(query_id, {})[vr] = is_published
log(f"o-o-o updating pubStatus for query {query_id} in version {vr} done")
def updateQCindex(self, vr, query_id, uptodate=True):
"""Update the chapter-query index if a query has been run or rerun.
We want an up to date mapping from chapters to the shared, up-to-date
queries with results in those chapters.
We call this function when:
* a query has run and a niew set of slots has been stored.
* the sharing status of a query changes
We do not call this function when:
* the published state of a query has changed (see updatePubStatus)
* when a query body is edited but not run (see below).
In those cases the following will be taken care of:
First delete the query from the index.
If the query is shared and up to date, we add it back to the index
based on its results.
However, this function might be called at a time that the results
of the query have been stored in the database, before the metadata
has arrived there.
In that case we can not test on the uptodateness, so we assume that the
caller has passed uptodate=True.
Indeed, when the sharing status has changed, we are able to perform
this test, and then there is no need to pass uptodate=True.
What if a query body is edited but not run? It will then become outdated,
and should be removed from the index.
But that is a rather costly operation, and it is likely that a query is edited
many times before it is run again.
What we do instead is, that when we fetch queries for the sidebar of a chapter,
we skip the ones that are outdated.
"""
Caching = current.Caching
db = current.db
log((f"o-o-o updating chapter-query index for data {vr}"))
startTime = time.time()
chaptersFromQuery = Caching.get(
f"chaptersFromQuery_{vr}_",
lambda: {},
ALWAYS,
)
queriesFromChapter = Caching.get(
f"queriesFromChapter_{vr}_",
lambda: {},
ALWAYS,
)
# remove query_id from both indexes: chaptersFromQuery and queriesFromChapter
if query_id in chaptersFromQuery:
theseChapters = chaptersFromQuery[query_id]
for chapter_id in theseChapters:
if chapter_id in queriesFromChapter:
if query_id in queriesFromChapter[chapter_id]:
del queriesFromChapter[chapter_id][query_id]
if not queriesFromChapter[chapter_id]:
del queriesFromChapter[chapter_id]
del chaptersFromQuery[query_id]
# add query_id again to both indexes (but now with updated results)
uptodateSQL = (
""
if uptodate
else dedent(
"""
and
query_exe.executed_on >= query_exe.modified_on
"""
)
)
resultSQL1 = dedent(
f"""
select
query_exe.id, query.id
from query
inner join query_exe on
query.id = query_exe.query_id
where
query.id = {query_id}
and
query_exe.version = '{vr}'
and
query.is_shared = 'T'
{uptodateSQL}
;
"""
)
queryTime = time.time()
results1 = db.executesql(resultSQL1)
queryTime = time.time() - queryTime
log(f" found {len(results1)} shared queries in {delta(queryTime)}")
queryFromExe = {}
for (query_exe_id, query_id) in results1:
queryFromExe[query_exe_id] = query_id
if queryFromExe:
doQueryIndex(db, vr, queryFromExe)
exe = time.time() - startTime
log(f"o-o-o updated chapter-query index for data {vr} in {delta(exe)}")
__init__(self)
special
¶
Source code in modules/querychapter.py
def __init__(self):
pass
makeQCindexes(self)
¶
Source code in modules/querychapter.py
def makeQCindexes(self):
VERSIONS = current.VERSIONS
for vr in (
("2017", "2021") if current.SITUATION in {"local", "test"} else VERSIONS
):
self.makeQCindex(vr)
makeQCindex(self, vr)
¶
We build an index of queries by chapter in which they have results.
The index building takes may take multiple seconds per data version. But the result is stored in the cache.
We need to prevent that index-making is triggered multiple times by several requests that are fired while index-making is in progress.
So we put an indicator value in the cache during index buidling, and we let everybody else wait until the index has been completed.
We do this per version.
To that end we put an empty dictionary in the cache. We only start computing the index key "busy" has a falsy value. Before computing the index, we set the value to 1. After computing the index, we set the value to 2.
When the index is needed and the busy status is 1, we wait for it to become 2 before we continue.
When the index is needed and busy is 2, the index has been built and we have to do nothing.
Note that we do not need anything of the value of the index here, we only need it to be built, so that other parts of the app can get it quickly.
We cannot allow that other parts of the app see an unfinished index, because it will lead to served content that is not yet correct. Because this content will be cached, it will take a long time (or forever) until it gets recomputed.
Source code in modules/querychapter.py
def makeQCindex(self, vr):
"""We build an index of queries by chapter in which they have results.
The index building takes may take multiple seconds per data version.
But the result is stored in the cache.
We need to prevent that index-making is triggered multiple times by
several requests that are fired while index-making is in progress.
So we put an indicator value in the cache during index buidling,
and we let everybody else wait until the index has been completed.
We do this per version.
To that end we put an empty dictionary in the cache.
We only start computing the index key "busy" has a falsy value.
Before computing the index, we set the value to 1.
After computing the index, we set the value to 2.
When the index is needed and the busy status is 1, we wait for it
to become 2 before we continue.
When the index is needed and busy is 2, the index has been
built and we have to do nothing.
Note that we do not need anything of the value of the index here,
we only need it to be built, so that other parts of the app can get it
quickly.
We cannot allow that other parts of the app see an unfinished index,
because it will lead to served content that is not yet correct.
Because this content will be cached, it will take a long time (or forever)
until it gets recomputed.
"""
Caching = current.Caching
busyStatus = Caching.get(f"busyIndex_{vr}_", lambda: {}, ALWAYS)
busy = busyStatus.get("busy", 0)
if busy == 0:
busyStatus["busy"] = 1
Caching.get(f"qcindex_{vr}_", lambda: self.makeQCindex_c(vr), ALWAYS)
busyStatus["busy"] = 2
elif busy == 1:
n = 0
while (
Caching.get(f"busyIndex_{vr}_", lambda: {}, ALWAYS).get("busy", 0) == 1
):
time.sleep(1)
n += 1
log(f"o-o-o waiting for chapter-query index {vr} ({n} sec) ...")
makeQCindex_c(self, vr)
¶
Source code in modules/querychapter.py
def makeQCindex_c(self, vr):
Caching = current.Caching
db = current.db
PASSAGE_DBS = current.PASSAGE_DBS
log(f"o-o-o making chapter-query index for version {vr} ...")
startTime = time.time()
pubStatus = Caching.get(
f"pubStatus_{vr}_",
lambda: {},
ALWAYS,
)
slotsFromChapter = Caching.get(
f"slotsFromChapter_{vr}_",
lambda: {},
ALWAYS,
)
chapterFromSlot = Caching.get(
f"chapterFromSlot_{vr}_",
lambda: {},
ALWAYS,
)
queriesFromChapter = Caching.get(
f"queriesFromChapter_{vr}_",
lambda: {},
ALWAYS,
)
chaptersFromQuery = Caching.get(
f"chaptersFromQuery_{vr}_",
lambda: {},
ALWAYS,
)
chapterSQL = dedent(
"""
select id, first_m, last_m from chapter
;
"""
)
chapterList = PASSAGE_DBS[vr].executesql(chapterSQL)
for (chapter_id, first_m, last_m) in chapterList:
for m in range(first_m, last_m + 1):
chapterFromSlot[m] = chapter_id
slotsFromChapter[chapter_id] = last_m
resultSQL1 = dedent(
f"""
select
query_exe.id, query_exe.is_published, query.id
from query
inner join query_exe on
query.id = query_exe.query_id
where
query_exe.version = '{vr}'
and
query_exe.executed_on >= query_exe.modified_on
and
query.is_shared = 'T'
;
"""
)
queryTime = time.time()
results1 = db.executesql(resultSQL1)
queryTime = time.time() - queryTime
log(f" found {len(results1)} shared queries in {delta(queryTime)}")
queryFromExe = {}
for (query_exe_id, is_published, query_id) in results1:
queryFromExe[query_exe_id] = query_id
pubStatus.setdefault(query_id, {})[vr] = is_published == "T"
if queryFromExe:
doQueryIndex(db, vr, queryFromExe)
exe = time.time() - startTime
log(f"o-o-o made chapter-query index for data {vr} in {delta(exe)}")
return (queriesFromChapter, chaptersFromQuery)
updatePubStatus(self, vr, query_id, is_published)
¶
Source code in modules/querychapter.py
def updatePubStatus(self, vr, query_id, is_published):
Caching = current.Caching
log(f"o-o-o updating pubStatus for query {query_id} in version {vr} ...")
pubStatus = Caching.get(
f"pubStatus_{vr}_",
lambda: {},
ALWAYS,
)
pubStatus.setdefault(query_id, {})[vr] = is_published
log(f"o-o-o updating pubStatus for query {query_id} in version {vr} done")
updateQCindex(self, vr, query_id, uptodate=True)
¶
Update the chapter-query index if a query has been run or rerun.
We want an up to date mapping from chapters to the shared, up-to-date queries with results in those chapters.
We call this function when: * a query has run and a niew set of slots has been stored. * the sharing status of a query changes
We do not call this function when: * the published state of a query has changed (see updatePubStatus) * when a query body is edited but not run (see below).
In those cases the following will be taken care of:
First delete the query from the index. If the query is shared and up to date, we add it back to the index based on its results.
However, this function might be called at a time that the results of the query have been stored in the database, before the metadata has arrived there. In that case we can not test on the uptodateness, so we assume that the caller has passed uptodate=True. Indeed, when the sharing status has changed, we are able to perform this test, and then there is no need to pass uptodate=True.
What if a query body is edited but not run? It will then become outdated, and should be removed from the index. But that is a rather costly operation, and it is likely that a query is edited many times before it is run again. What we do instead is, that when we fetch queries for the sidebar of a chapter, we skip the ones that are outdated.
Source code in modules/querychapter.py
def updateQCindex(self, vr, query_id, uptodate=True):
"""Update the chapter-query index if a query has been run or rerun.
We want an up to date mapping from chapters to the shared, up-to-date
queries with results in those chapters.
We call this function when:
* a query has run and a niew set of slots has been stored.
* the sharing status of a query changes
We do not call this function when:
* the published state of a query has changed (see updatePubStatus)
* when a query body is edited but not run (see below).
In those cases the following will be taken care of:
First delete the query from the index.
If the query is shared and up to date, we add it back to the index
based on its results.
However, this function might be called at a time that the results
of the query have been stored in the database, before the metadata
has arrived there.
In that case we can not test on the uptodateness, so we assume that the
caller has passed uptodate=True.
Indeed, when the sharing status has changed, we are able to perform
this test, and then there is no need to pass uptodate=True.
What if a query body is edited but not run? It will then become outdated,
and should be removed from the index.
But that is a rather costly operation, and it is likely that a query is edited
many times before it is run again.
What we do instead is, that when we fetch queries for the sidebar of a chapter,
we skip the ones that are outdated.
"""
Caching = current.Caching
db = current.db
log((f"o-o-o updating chapter-query index for data {vr}"))
startTime = time.time()
chaptersFromQuery = Caching.get(
f"chaptersFromQuery_{vr}_",
lambda: {},
ALWAYS,
)
queriesFromChapter = Caching.get(
f"queriesFromChapter_{vr}_",
lambda: {},
ALWAYS,
)
# remove query_id from both indexes: chaptersFromQuery and queriesFromChapter
if query_id in chaptersFromQuery:
theseChapters = chaptersFromQuery[query_id]
for chapter_id in theseChapters:
if chapter_id in queriesFromChapter:
if query_id in queriesFromChapter[chapter_id]:
del queriesFromChapter[chapter_id][query_id]
if not queriesFromChapter[chapter_id]:
del queriesFromChapter[chapter_id]
del chaptersFromQuery[query_id]
# add query_id again to both indexes (but now with updated results)
uptodateSQL = (
""
if uptodate
else dedent(
"""
and
query_exe.executed_on >= query_exe.modified_on
"""
)
)
resultSQL1 = dedent(
f"""
select
query_exe.id, query.id
from query
inner join query_exe on
query.id = query_exe.query_id
where
query.id = {query_id}
and
query_exe.version = '{vr}'
and
query.is_shared = 'T'
{uptodateSQL}
;
"""
)
queryTime = time.time()
results1 = db.executesql(resultSQL1)
queryTime = time.time() - queryTime
log(f" found {len(results1)} shared queries in {delta(queryTime)}")
queryFromExe = {}
for (query_exe_id, query_id) in results1:
queryFromExe[query_exe_id] = query_id
if queryFromExe:
doQueryIndex(db, vr, queryFromExe)
exe = time.time() - startTime
log(f"o-o-o updated chapter-query index for data {vr} in {delta(exe)}")
doQueryIndex(db, vr, queryFromExe)
¶
Source code in modules/querychapter.py
def doQueryIndex(db, vr, queryFromExe):
Caching = current.Caching
slotsFromChapter = Caching.get(f"slotsFromChapter_{vr}_", lambda: {}, ALWAYS)
chapterFromSlot = Caching.get(f"chapterFromSlot_{vr}_", lambda: {}, ALWAYS)
chaptersFromQuery = Caching.get(f"chaptersFromQuery_{vr}_", lambda: {}, ALWAYS)
queriesFromChapter = Caching.get(f"queriesFromChapter_{vr}_", lambda: {}, ALWAYS)
resultSQL2 = dedent(
f"""
select query_exe_id, first_m, last_m from monads
where
query_exe_id in ({",".join(str(idx) for idx in queryFromExe)})
;
"""
)
queryTime = time.time()
results2 = db.executesql(resultSQL2)
queryTime = time.time() - queryTime
log(f" found {len(results2)} result intervals in {delta(queryTime)}")
log(" processing information about queries ...")
procTime = time.time()
resultsByQ = {}
for (query_exe_id, first_m, last_m) in results2:
query_id = queryFromExe[query_exe_id]
resultsByQ.setdefault(query_id, []).append((first_m, last_m))
log(f" found {len(resultsByQ)} shared queries")
for (query_id, ranges) in resultsByQ.items():
chapters = {}
for (first_m, last_m) in ranges:
if first_m == last_m:
chapter_id = chapterFromSlot[first_m]
chapters.setdefault(chapter_id, []).append((first_m, first_m))
continue
m = first_m
while m <= last_m:
chapter_id = chapterFromSlot[m]
chapterLastSlot = slotsFromChapter[chapter_id]
endM = min((last_m, chapterLastSlot))
chapters.setdefault(chapter_id, []).append((m, endM))
m = chapterLastSlot + 1
if chapters:
chaptersFromQuery[query_id] = list(chapters)
for (chapter_id, ranges) in chapters.items():
queriesFromChapter.setdefault(chapter_id, {})[query_id] = ranges
procTime = time.time() - procTime
log(f" processed shared queries into index in {delta(procTime)}")