Caching SearXNG Version 2

This revision is from 2024/07/30 23:07. You can Restore it.

In version 2 the caching filenames are hashed which means that international characters and symbols are supported and less likely to break searx.

  1. webapp.py in /usr/local/searxng/searxng-src/searx/webapp.py : def search()
  2. __init__.py in /usr/local/searxng/searxng-src/searx/search/__init__.py : class Search

sudo mkdir -p /usr/local/searxng/searxng-src/searx/cache

for i in {0..255}; do sudo mkdir -p /usr/local/searxng/searxng-src/searx/cache/$(printf "%02x" $i); done

sudo chown -R searxng:searxng /usr/local/searxng/searxng-src/searx/cache

sudo chmod -R 755 /usr/local/searxng/searxng-src/searx/cache

import hashlib

fname = request.form['q'] + str(search_query.pageno) + str(search_query.categories[0])

# Generate a hash of the search term

hash_object = hashlib.md5(fname.encode())

hex_dig = hash_object.hexdigest()

subdirectory = hex_dig[:2] # Use the first 2 characters of the hash as the subdirectory name

cache_dir = os.path.abspath(os.path.join("cache", subdirectory))

if not os.path.exists(cache_dir):

os.makedirs(cache_dir)

file_path = os.path.join(cache_dir, fname)

import hashlib

def search_standard(self):

"""

Update self.result_container, self.actual_timeout

"""

requests, self.actual_timeout = self._get_requests()

cache_dir = 'cache'

# send all search-request

fname = self.search_query.query.lower() + str(self.search_query.pageno) + str(self.search_query.categories[0])

hash_object = hashlib.md5(fname.encode())

hex_dig = hash_object.hexdigest()

subdirectory = hex_dig[:2] # Use the first 2 characters of the hash as the subdirectory name

query_dir = os.path.join(cache_dir, subdirectory)

mock_data_filename = os.path.join(query_dir, fname)

# Check if the file exists in the cache directory

if requests:

if os.path.isfile(mock_data_filename):

self.search_multiple_requests2(requests)

else:

self.search_multiple_requests(requests)

# return results, suggestions, answers and infoboxes

return True

def search_multiple_requests2(self, requests):

# pylint: disable=protected-access

search_id = str(uuid4())

mock_result_container = ResultContainer()

# Modify the path to load the JSON data

cache_dir = 'cache'

hash_object = hashlib.md5(self.hashed_filename.encode())

hex_dig = hash_object.hexdigest()

subdirectory = hex_dig[:2] # Use the first 2 characters of the hash as the subdirectory name

query_dir = os.path.join(cache_dir, subdirectory)

mock_data_filename = os.path.join(query_dir, self.hashed_filename) # Use the hashed filename

with open(mock_data_filename, encoding='utf-8') as mock_data_file:

mock_data = json.load(mock_data_file)

mock_results = mock_data['results'] # Extract 'results' from the JSON data

threads = []

for engine_name, _, _ in requests:

th = threading.Thread(

target=self.mock_search_function,

args=(engine_name, mock_results, mock_result_container),

name=search_id,

)

th._timeout = False

th._engine_name = engine_name

th.start()

threads.append(th)

remaining_time = None

for th in threads:

if th.name == search_id:

if remaining_time is None:

remaining_time = self.actual_timeout - (default_timer() - self.start_time)

th.join(remaining_time)

if th.is_alive():

th._timeout = True

self.result_container.add_unresponsive_engine(th._engine_name, 'timeout')

PROCESSORS[th._engine_name].logger.error('engine timeout')

# Wait for all threads to finish, even if some have timed out

for th in threads:

th.join()

# Copy the mock results to the actual result_container

self.result_container = mock_result_container

  

📝 📜 ⏱️ ⬆️