Search Integration
Integrate web search capabilities into your AI applications using DuckDuckGo, Serper, or ValueSERP APIs.
Why Search Integration?
Search integration enables your AI applications to access current information from the web:
- Current Information: Get up-to-date information beyond LLM training data
- Research Automation: Gather information from multiple sources automatically
- Enhanced RAG: Augment responses with fresh web content
- Content Discovery: Find relevant articles, papers, and resources
- Data Collection: Build datasets from search results
Supported Search Providers
DuckDuckGo
Free, no API key required
- • No cost
- • No registration needed
- • Rate limited
- • Good for prototyping
Serper API
Google search results via API
- • Google search quality
- • Requires API key
- • Paid service
- • High rate limits
ValueSERP
Google search via ValueSERP
- • Google results
- • Requires API key
- • Paid service
- • Reliable
SearchResult Model
All search functions return a list of SearchResult
objects:
from SimplerLLM.tools.serp import search_with_duck_duck_go
results = search_with_duck_duck_go("machine learning", max_results=5)
for result in results:
print(f"Title: {result.Title}")
print(f"URL: {result.URL}")
print(f"Domain: {result.Domain}")
print(f"Description: {result.Description}")
print("-" * 60)
DuckDuckGo Search
Free search with no API key required - perfect for development and prototyping:
from SimplerLLM.tools.serp import search_with_duck_duck_go
# Basic search
query = "artificial intelligence trends 2024"
results = search_with_duck_duck_go(query, max_results=10)
print(f"Found {len(results)} results for: {query}\n")
for i, result in enumerate(results, 1):
print(f"{i}. {result.Title}")
print(f" {result.URL}")
print(f" {result.Description[:100]}...\n")
Serper API Search
Google search results via Serper API for production applications:
Setup
Add your Serper API key to .env
:
# .env file
SERPER_API_KEY=your_serper_api_key_here
Basic Usage
from SimplerLLM.tools.serp import search_with_serper_api
# Search with Serper
query = "machine learning frameworks"
results = search_with_serper_api(query, num_results=10)
print(f"Google search results for: {query}\n")
for result in results:
print(f"Title: {result.Title}")
print(f"Domain: {result.Domain}")
print(f"URL: {result.URL}")
print(f"Snippet: {result.Description}\n")
Async Search
import asyncio
from SimplerLLM.tools.serp import search_with_serper_api_async
async def search_multiple_queries():
queries = [
"machine learning",
"deep learning",
"natural language processing"
]
# Search all queries in parallel
tasks = [
search_with_serper_api_async(query, num_results=5)
for query in queries
]
results_list = await asyncio.gather(*tasks)
# Process results
for query, results in zip(queries, results_list):
print(f"\n{query}: {len(results)} results")
for r in results[:3]:
print(f" - {r.Title}")
# Run async search
asyncio.run(search_multiple_queries())
ValueSERP Search
Google search via ValueSERP API:
Setup
# .env file
VALUE_SERP_API_KEY=your_valueserp_api_key_here
Usage
from SimplerLLM.tools.serp import search_with_value_serp
# Search with ValueSERP
query = "AI research papers"
results = search_with_value_serp(query, num_results=10)
for result in results:
print(f"{result.Title} - {result.Domain}")
print(f"{result.Description}\n")
Async Search
import asyncio
from SimplerLLM.tools.serp import search_with_value_serp_async
async def async_search():
results = await search_with_value_serp_async(
"Python programming",
num_results=20
)
return results
results = asyncio.run(async_search())
print(f"Found {len(results)} results")
Real-World Examples
Research Assistant
from SimplerLLM.tools.serp import search_with_serper_api
from SimplerLLM.tools.generic_loader import load_content
from SimplerLLM.language.llm import LLM, LLMProvider
class ResearchAssistant:
def __init__(self):
self.llm = LLM.create(
provider=LLMProvider.OPENAI,
model_name="gpt-4o"
)
def research_topic(self, topic, num_sources=5):
"""Research a topic by searching and analyzing top results"""
print(f"Researching: {topic}\n")
# Search for relevant content
search_results = search_with_serper_api(topic, num_results=num_sources)
# Load and analyze each result
analyses = []
for i, result in enumerate(search_results[:num_sources], 1):
print(f"{i}. Loading {result.Title}...")
try:
# Load the page content
doc = load_content(str(result.URL))
# Analyze the content
prompt = f"""Summarize the key points from this article about {topic}:
{doc.content[:2000]}
Key Points (3-5 bullet points):"""
summary = self.llm.generate_response(prompt=prompt)
analyses.append({
'title': result.Title,
'url': result.URL,
'domain': result.Domain,
'summary': summary
})
print(f" ✓ Analyzed")
except Exception as e:
print(f" ✗ Error: {e}")
continue
# Create comprehensive report
report_prompt = f"""Based on these analyzed sources about {topic}, create a comprehensive research summary:
"""
for analysis in analyses:
report_prompt += f"\nSource: {analysis['title']}\n{analysis['summary']}\n"
report_prompt += "\nCreate a comprehensive summary covering:\n1. Main themes\n2. Key findings\n3. Expert opinions\n4. Future outlook\n\nResearch Summary:"
report = self.llm.generate_response(prompt=report_prompt)
return {
'topic': topic,
'num_sources': len(analyses),
'report': report,
'sources': analyses
}
# Usage
assistant = ResearchAssistant()
research = assistant.research_topic("future of quantum computing", num_sources=3)
print(f"\n{'='*60}")
print(f"RESEARCH REPORT: {research['topic']}")
print(f"{'='*60}\n")
print(research['report'])
print(f"\n\nBased on {research['num_sources']} sources")
Content Aggregation System
from SimplerLLM.tools.serp import search_with_serper_api
from SimplerLLM.language.llm import LLM, LLMProvider
from datetime import datetime
class ContentAggregator:
def __init__(self):
self.llm = LLM.create(
provider=LLMProvider.OPENAI,
model_name="gpt-4o-mini"
)
def create_daily_digest(self, topics):
"""Create daily digest from multiple topics"""
digest = {
'date': datetime.now().strftime("%Y-%m-%d"),
'topics': []
}
for topic in topics:
# Search for latest content
results = search_with_serper_api(f"{topic} latest news", num_results=5)
# Summarize findings
articles_text = "\n\n".join([
f"Title: {r.Title}\nSource: {r.Domain}\nSnippet: {r.Description}"
for r in results[:5]
])
prompt = f"""Summarize the latest developments in {topic} based on these articles:
{articles_text}
Summary (2-3 sentences):"""
summary = self.llm.generate_response(prompt=prompt)
digest['topics'].append({
'topic': topic,
'summary': summary,
'top_sources': [
{'title': r.Title, 'url': str(r.URL), 'domain': r.Domain}
for r in results[:3]
]
})
return digest
def format_digest(self, digest):
"""Format digest for display"""
output = f"\n{'='*60}\n"
output += f"Daily Tech Digest - {digest['date']}\n"
output += f"{'='*60}\n\n"
for item in digest['topics']:
output += f"## {item['topic'].upper()}\n\n"
output += f"{item['summary']}\n\n"
output += "Top Sources:\n"
for i, source in enumerate(item['top_sources'], 1):
output += f"{i}. {source['title']} ({source['domain']})\n"
output += f" {source['url']}\n"
output += "\n" + "-"*60 + "\n\n"
return output
# Usage
aggregator = ContentAggregator()
topics = [
"artificial intelligence",
"machine learning",
"large language models"
]
digest = aggregator.create_daily_digest(topics)
formatted = aggregator.format_digest(digest)
print(formatted)
RAG with Web Search
from SimplerLLM.tools.serp import search_with_serper_api
from SimplerLLM.tools.generic_loader import load_content
from SimplerLLM.language.llm import LLM, LLMProvider
class WebSearchRAG:
def __init__(self):
self.llm = LLM.create(
provider=LLMProvider.OPENAI,
model_name="gpt-4o"
)
def answer_with_search(self, question, num_sources=3):
"""Answer question using web search"""
print(f"Question: {question}\n")
print("Searching the web...")
# Search for relevant information
search_results = search_with_serper_api(question, num_results=num_sources)
# Extract key information from search results
context_parts = []
sources_used = []
for result in search_results[:num_sources]:
# Use search snippet as context
context_parts.append(
f"Source: {result.Title} ({result.Domain})\n{result.Description}"
)
sources_used.append({
'title': result.Title,
'url': str(result.URL),
'domain': result.Domain
})
# Build context
context = "\n\n".join(context_parts)
# Generate answer with context
prompt = f"""Answer this question using the following web search results.
Cite sources where appropriate.
Search Results:
{context}
Question: {question}
Answer:"""
answer = self.llm.generate_response(prompt=prompt)
return {
'question': question,
'answer': answer,
'sources': sources_used
}
# Usage
rag = WebSearchRAG()
result = rag.answer_with_search(
"What are the latest developments in GPT models?"
)
print(f"\nAnswer:\n{result['answer']}\n")
print("Sources:")
for i, source in enumerate(result['sources'], 1):
print(f"{i}. {source['title']}")
print(f" {source['url']}\n")
Choosing a Search Provider
Provider Comparison
Feature | DuckDuckGo | Serper | ValueSERP |
---|---|---|---|
Cost | Free | Paid | Paid |
API Key | Not required | Required | Required |
Search Quality | Good | Excellent (Google) | Excellent (Google) |
Rate Limits | Moderate | High | High |
Best For | Prototyping | Production | Production |
Best Practices
1. Cache Search Results
Search API calls cost money and take time. Cache results when possible to avoid repeated searches for the same query.
2. Handle Errors Gracefully
Network requests can fail. Always wrap search calls in try-except blocks and have fallback behavior.
3. Respect Rate Limits
Implement rate limiting and backoff strategies to avoid hitting API limits or getting blocked.
4. Filter and Validate Results
Not all search results are high quality. Implement filtering based on domain reputation, content length, etc.
5. Use Async for Multiple Queries
When searching multiple queries, use async functions to parallelize requests and improve performance.
Error Handling
from SimplerLLM.tools.serp import search_with_serper_api
import time
def safe_search(query, max_retries=3):
"""Search with retry logic and error handling"""
for attempt in range(max_retries):
try:
results = search_with_serper_api(query, num_results=10)
if not results:
print(f"Warning: No results for query: {query}")
return []
return results
except ConnectionError as e:
print(f"Connection error (attempt {attempt + 1}/{max_retries}): {e}")
if attempt < max_retries - 1:
time.sleep(2 ** attempt) # Exponential backoff
continue
except KeyError as e:
print(f"API key error: {e}")
print("Check that SERPER_API_KEY is set in .env")
return []
except Exception as e:
print(f"Unexpected error: {e}")
return []
print(f"Failed after {max_retries} attempts")
return []
# Usage
results = safe_search("machine learning trends")
Next Steps
← Text Chunking
Chunk loaded content for processing
Content Loading →
Load content from search results
🌐 Powerful Combination
Combine search integration with content loading and LLMs to build intelligent research assistants that can search the web, extract information, and provide comprehensive answers with citations.