I am trying to integrate a whoosh
searcher into a django project. I saw that you can do that using haystack but I am realizing I can't (dont know yet) how to add my custom whoosh index into the searcher. My schema has ID
, KEYWORD
and TEXT
but they are all text in reality. I used these schemes because it suits my search needs for each of the documents. How do I use this schema in Haystack.
PS: A solution without Haystack is ok too.
Here is my whoosh schema/writer/searcher
import pandas as pd
from whoosh.index import create_in
from whoosh.fields import *
from whoosh.qparser import QueryParser
from whoosh.query import *
def nan2none(x):
y = None if pd.isna(x) else x
return(y)
df = pd.read_csv("df.csv", index_col=[0])
schema = Schema(a = ID(stored=True),
b = KEYWORD(lowercase=True),
c = TEXT,
d = KEYWORD(lowercase=True))
ix = create_in("indexdir", schema)
writer = ix.writer()
for index, row in df.iterrows():
writer.add_document(a = index,
b = nan2none(row['b']),
c = nan2none(row['c']),
d = nan2none(row['d']))
writer.commit()
search_term = "hobbit"
with ix.searcher() as searcher:
a_query = QueryParser("a", ix.schema).parse(search_term)
b_query = QueryParser("b", ix.schema).parse(search_term)
c_query = QueryParser("b", ix.schema).parse(search_term)
d_var_query = QueryParser("d", ix.schema, termclass=Variations).parse(search_term)
d_fuzz_query = QueryParser("d", ix.schema, termclass=FuzzyTerm).parse(search_term)
query = Or([a_query, b_query, c_query, d_var_query, d_fuzz_query])
results = searcher.search(query, limit=None)
print(results)
for res in results:
print(res)
But in my django model all the documents I am adding above are CharField
as follows:
class ModelLetters(modes.model):
a = models.CharField(max_length=50)
b = models.CharField(max_length=100)
c = models.CharField(max_length=100)
d = models.CharField(max_length=250)
Whereas my haystack index is as follows (all CharField
too):
from haystack import indexes
from appmanager.model.model_letters import ModelLetters
class LettersIndex(indexes.SearchIndex, indexes.Indexable):
text = indexes.CharField(document=True, use_template=True)
a = indexes.CharField(model_attr="a")
b = indexes.CharField(model_attr="b")
c = indexes.CharField(model_attr="c")
d = indexes.CharField(model_attr="d")
class Meta:
model = ModelLetters
fields = ["a", "b", "c", "d"]
def get_model(self):
return ModelLetters
def index_queryset(self, using=None):
"""Used when the entire index for model is updated."""
return self.get_model().objects.all()