Integrations
Supabase Vector
Production-ready Postgres vector store with pgvector for semantic search. Build semantic search with Supabase and Supacrawler in minutes.
Supabase Vector (pgvector)
Use Supabase's pgvector extension for production-ready vector storage and semantic search.
Setup
Enable pgvector Extension
-- Enable the extension
CREATE EXTENSION IF NOT EXISTS vector;
Create Table
-- Create documents table
CREATE TABLE documents (
id BIGSERIAL PRIMARY KEY,
content TEXT,
metadata JSONB,
embedding VECTOR(1536)
);
-- Create HNSW index for fast similarity search
CREATE INDEX ON documents
USING hnsw (embedding vector_cosine_ops);
Complete Example
import os
from supabase import create_client, Client
from supacrawler import SupacrawlerClient
import openai
# Configuration
SUPABASE_URL = os.environ['SUPABASE_URL']
SUPABASE_KEY = os.environ['SUPABASE_SERVICE_KEY']
OPENAI_API_KEY = os.environ['OPENAI_API_KEY']
SUPACRAWLER_API_KEY = os.environ['SUPACRAWLER_API_KEY']
# Initialize clients
supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)
crawler = SupacrawlerClient(api_key=SUPACRAWLER_API_KEY)
openai.api_key = OPENAI_API_KEY
# Step 1: Scrape content
result = crawler.scrape('https://example.com/docs', format='markdown')
# Step 2: Generate embedding
response = openai.embeddings.create(
model='text-embedding-3-small',
input=result.content
)
embedding = response.data[0].embedding
# Step 3: Store in Supabase
data = supabase.table('documents').insert({
'content': result.content,
'metadata': {
'url': result.url,
'title': result.title
},
'embedding': embedding
}).execute()
print(f"Stored document: {data.data[0]['id']}")
# Step 4: Semantic search
query = "How do I configure authentication?"
query_response = openai.embeddings.create(
model='text-embedding-3-small',
input=query
)
query_embedding = query_response.data[0].embedding
# Search with pgvector
results = supabase.rpc('match_documents', {
'query_embedding': query_embedding,
'match_count': 5
}).execute()
for doc in results.data:
print(f"Similarity: {doc['similarity']:.3f}")
print(f"Title: {doc['metadata']['title']}")
print(f"Content: {doc['content'][:200]}...")
print("---")
Create Match Function
-- Function for similarity search
CREATE OR REPLACE FUNCTION match_documents(
query_embedding VECTOR(1536),
match_count INT DEFAULT 5
)
RETURNS TABLE (
id BIGINT,
content TEXT,
metadata JSONB,
similarity FLOAT
)
LANGUAGE plpgsql
AS $$
BEGIN
RETURN QUERY
SELECT
documents.id,
documents.content,
documents.metadata,
1 - (documents.embedding <=> query_embedding) AS similarity
FROM documents
ORDER BY documents.embedding <=> query_embedding
LIMIT match_count;
END;
$$;
Batch Processing
# Crawl multiple pages
job = crawler.create_crawl_job(
url='https://example.com/docs',
depth=2,
link_limit=50
)
final = crawler.wait_for_crawl(job.job_id)
# Batch embed and store
documents = []
for url, page in final.data.crawl_data.items():
if hasattr(page, 'markdown') and page.markdown:
# Generate embedding
response = openai.embeddings.create(
model='text-embedding-3-small',
input=page.markdown
)
documents.append({
'content': page.markdown,
'metadata': {
'url': url,
'title': page.metadata.title if hasattr(page, 'metadata') else None
},
'embedding': response.data[0].embedding
})
# Batch insert
result = supabase.table('documents').insert(documents).execute()
print(f"Stored {len(result.data)} documents")
Performance Tips
- Use HNSW indexes for faster queries (up to 100x)
- Batch operations to reduce network overhead
- Choose appropriate dimensions (1536 for text-embedding-3-small)
- Cache embeddings to avoid recomputing
- Use RLS policies for multi-tenant applications
Resources
Was this page helpful?
LlamaIndex + Supabase
Ingest with Supacrawler, index with LlamaIndex, and store/query vectors in Supabase. Complete RAG pipeline in minutes.
Monitor Competitor Pricing
Automated monitoring of competitor pricing pages to track price changes and stay competitive. Monitor websites for price updates, product launches, and promotional changes automatically.