Caching SearXNG Version 2- Immortality Knowledge Base

Caching SearXNG Version 2

This revision is from 2024/07/30 23:07. You can Restore it.

In version 2 the caching filenames are hashed which means that international characters and symbols are supported and less likely to break searx.

webapp.py in /usr/local/searxng/searxng-src/searx/webapp.py : def search()
__init__.py in /usr/local/searxng/searxng-src/searx/search/__init__.py : class Search


sudo mkdir -p /usr/local/searxng/searxng-src/searx/cache
for i in {0..255}; do sudo mkdir -p /usr/local/searxng/searxng-src/searx/cache/$(printf "%02x" $i); done

sudo chown -R searxng:searxng /usr/local/searxng/searxng-src/searx/cache

sudo chmod -R 755 /usr/local/searxng/searxng-src/searx/cache



import hashlib

        fname = request.form['q'] + str(search_query.pageno) + str(search_query.categories[0])
    
        # Generate a hash of the search term
        hash_object = hashlib.md5(fname.encode())
        hex_dig = hash_object.hexdigest()
        subdirectory = hex_dig[:2]  # Use the first 2 characters of the hash as the subdirectory name

        cache_dir = os.path.abspath(os.path.join("cache", subdirectory))

        if not os.path.exists(cache_dir):
            os.makedirs(cache_dir)

        file_path = os.path.join(cache_dir, fname)



import hashlib

    def search_standard(self):
        """
        Update self.result_container, self.actual_timeout
        """
        requests, self.actual_timeout = self._get_requests()

        cache_dir = 'cache'

        # send all search-request
        fname = self.search_query.query.lower() + str(self.search_query.pageno) + str(self.search_query.categories[0])
        hash_object = hashlib.md5(fname.encode())
        hex_dig = hash_object.hexdigest()
        subdirectory = hex_dig[:2]  # Use the first 2 characters of the hash as the subdirectory name
        query_dir = os.path.join(cache_dir, subdirectory)
        mock_data_filename = os.path.join(query_dir, fname)

        # Check if the file exists in the cache directory
        if requests:
            if os.path.isfile(mock_data_filename):
                self.search_multiple_requests2(requests)
            else:
                self.search_multiple_requests(requests)

        # return results, suggestions, answers and infoboxes
        return True


def search_multiple_requests2(self, requests):
    # pylint: disable=protected-access
    search_id = str(uuid4())
    mock_result_container = ResultContainer()

    # Modify the path to load the JSON data
    cache_dir = 'cache'
    hash_object = hashlib.md5(self.hashed_filename.encode())
    hex_dig = hash_object.hexdigest()
    subdirectory = hex_dig[:2]  # Use the first 2 characters of the hash as the subdirectory name
    query_dir = os.path.join(cache_dir, subdirectory)
    mock_data_filename = os.path.join(query_dir, self.hashed_filename)  # Use the hashed filename

    with open(mock_data_filename, encoding='utf-8') as mock_data_file:
        mock_data = json.load(mock_data_file)
        mock_results = mock_data['results']  # Extract 'results' from the JSON data

        threads = []

        for engine_name, _, _ in requests:
            th = threading.Thread(
                target=self.mock_search_function,
                args=(engine_name, mock_results, mock_result_container),
                name=search_id,
            )
            th._timeout = False
            th._engine_name = engine_name
            th.start()
            threads.append(th)

        remaining_time = None
        for th in threads:
            if th.name == search_id:
                if remaining_time is None:
                    remaining_time = self.actual_timeout - (default_timer() - self.start_time)
                th.join(remaining_time)
                if th.is_alive():
                    th._timeout = True
                    self.result_container.add_unresponsive_engine(th._engine_name, 'timeout')
                    PROCESSORS[th._engine_name].logger.error('engine timeout')

        # Wait for all threads to finish, even if some have timed out
        for th in threads:
            th.join()

    # Copy the mock results to the actual result_container
    self.result_container = mock_result_container

📝 📜 ⏱️ ⬆️