SearX Robot- Immortality Knowledge Base

SearX Robot

New name B I U S link image code HTML list	Show page Syntax
Command: {pre} wget -qO- "http://localhost/searxng/search?q=test&category_general=&language=auto&time_range=&safesearch=0&theme=simple" {/pre} {pre} curl "http://localhost/searxng/search?q=test&category_general=&language=auto&time_range=&safesearch=0&theme=simple" {/pre} Build the cache from the keyword dataset {pre} import json import subprocess # Path to dataset JSON file json_file_path = 'path_to_your_json_file.json' # Read the JSON file with open(json_file_path, 'r') as file: data = json.load(file) # Iterate through the JSON data to extract keywords for item in data: keyword = item['keyphrase'] # Construct the wget command wget_command = f'wget -qO- "http://localhost/searxng/search?q={keyword}&category_general=&language=auto&time_range=&safesearch=0&theme=simple"' # Execute the wget command subprocess.run(wget_command, shell=True) # Add a 1 second delay time.sleep ( 1 ) {/pre} '''Keyword Datasets''' # https://www.kaggle.com/datasets/hofesiy/2019-search-engine-keywords Running this list produces new keyword suggestions right from searx, extract suggestions... {pre} # Scan the cache and grab all the keywords import json from pathlib import Path def process_searxng_cache(cache_dir, output_file): # Create a set to store unique processed entries entries = set() # Walk through all directories and files in the cache directory cache_path = Path(cache_dir) # Debug: Check if the cache directory exists if not cache_path.exists(): print(f"Cache directory {cache_path} does not exist.") return # Debug: Print the cache directory path print(f"Cache directory: {cache_path}") for subdir in cache_path.iterdir(): if subdir.is_dir(): print(f"Processing subdirectory: {subdir}") # Debug: Print each subdirectory being processed for file in subdir.iterdir(): if file.is_file(): # Attempt to open and read each file as JSON print(f"Found file: {file}") # Debug: Print each file being processed try: with file.open('r', encoding='utf-8') as f: data = json.load(f) # Print the JSON data to debug print(f"Processing file: {file}") print(f"JSON data: {data}") # Check if the required keys are in the JSON data if 'query' in data and 'suggestions' in data: query = data['query'] suggestions = data['suggestions'] print(f"Query: {query}, Suggestions: {suggestions}") # Debug output for suggestion in suggestions: entries.add(f"{query}: {suggestion}") else: print(f"Missing 'query' or 'suggestions' in file {file}") except (json.JSONDecodeError, KeyError, IOError) as e: print(f"Error processing file {file}: {e}") # Write the entries to the output file with Path(output_file).open('w', encoding='utf-8') as out_f: for entry in sorted(entries): out_f.write(f"{entry}\n") if __name__ == "__main__": cache_dir = "/usr/local/searxng/searxng-src/searx/cache/" output_file = "keywords.txt" process_searxng_cache(cache_dir, output_file) print(f"Processed entries have been saved to {output_file}") {/pre} Use the suggestions to crawl more {pre} import subprocess # Path to your text file text_file_path = 'path_to_your_text_file.txt' # Read the text file with open(text_file_path, 'r') as file: lines = file.readlines() # Iterate through the lines to extract keywords for line in lines: keyword = line.strip() # Remove any leading/trailing whitespace # Construct the wget command wget_command = f'wget -qO- "http://localhost/searxng/search?q={keyword}&category_general=&language=auto&time_range=&safesearch=0&theme=simple"' # Execute the wget command subprocess.run(wget_command, shell=True) # Add a 1 second delay time.sleep ( 1 ) {/pre}
Password Summary of changes

📜 ⏱️ ⬆️