While tools like plaid2text are great, some of us prefer building custom solutions. After setting up Plaid integrations for several of my small business clients, I wanted to share a guide for building your own Plaid-to-Beancount pipeline from scratch.
Why Build Your Own?
- Full control over the data flow and transformation
- No MongoDB dependency (plaid2text requires it)
- Custom business logic for your specific needs
- Learning opportunity to understand how Plaid works
Prerequisites
- Python 3.9+
- A Plaid developer account (plaid.com)
- Basic familiarity with REST APIs
Step 1: Set Up Your Environment
# Create a virtual environment
python -m venv plaid-beancount
source plaid-beancount/bin/activate
# Install dependencies
pip install plaid-python beancount
Step 2: Authentication and Linking
First, you need to link a bank account. Plaid uses a flow called “Link” which requires a web interface for the user to authenticate with their bank.
import plaid
from plaid.api import plaid_api
from plaid.model.products import Products
from plaid.model.country_code import CountryCode
# Configure Plaid client
configuration = plaid.Configuration(
host=plaid.Environment.Development, # Use Sandbox for testing
api_key={
'clientId': 'YOUR_CLIENT_ID',
'secret': 'YOUR_SECRET',
}
)
api_client = plaid.ApiClient(configuration)
client = plaid_api.PlaidApi(api_client)
# Create a link token
from plaid.model.link_token_create_request import LinkTokenCreateRequest
from plaid.model.link_token_create_request_user import LinkTokenCreateRequestUser
request = LinkTokenCreateRequest(
products=[Products("transactions")],
client_name="My Beancount App",
country_codes=[CountryCode('US')],
language='en',
user=LinkTokenCreateRequestUser(client_user_id='unique-user-id')
)
response = client.link_token_create(request)
link_token = response.link_token
print(f"Open Plaid Link with token: {link_token}")
You’ll need a simple web page to complete the link flow and receive the public_token, which you then exchange for a persistent access_token.
Step 3: Fetch Transactions
Once you have an access token, fetching transactions is straightforward:
from datetime import datetime, timedelta
from plaid.model.transactions_get_request import TransactionsGetRequest
from plaid.model.transactions_get_request_options import TransactionsGetRequestOptions
def get_transactions(access_token, start_date=None, end_date=None):
"""Fetch transactions from Plaid."""
if not start_date:
start_date = datetime.now() - timedelta(days=30)
if not end_date:
end_date = datetime.now()
request = TransactionsGetRequest(
access_token=access_token,
start_date=start_date.date(),
end_date=end_date.date(),
options=TransactionsGetRequestOptions(
include_personal_finance_category=True
)
)
response = client.transactions_get(request)
transactions = response.transactions
# Handle pagination for accounts with many transactions
while len(transactions) < response.total_transactions:
request = TransactionsGetRequest(
access_token=access_token,
start_date=start_date.date(),
end_date=end_date.date(),
options=TransactionsGetRequestOptions(
offset=len(transactions)
)
)
response = client.transactions_get(request)
transactions.extend(response.transactions)
return transactions
Step 4: Convert to Beancount Format
Now the fun part - converting Plaid transactions to Beancount entries:
from beancount.core import data
from beancount.core.amount import Amount
from beancount.core.number import D
from decimal import Decimal
def transaction_to_beancount(plaid_txn, account_mapping):
"""Convert a Plaid transaction to a Beancount transaction."""
# Parse the amount (Plaid uses positive for debits)
amount = D(str(abs(plaid_txn.amount)))
if plaid_txn.amount > 0:
# Debit (expense)
posting_account = categorize_transaction(plaid_txn)
bank_amount = -amount
else:
# Credit (income/refund)
posting_account = "Income:Unknown"
bank_amount = amount
# Get bank account from mapping
bank_account = account_mapping.get(
plaid_txn.account_id,
"Assets:Bank:Unknown"
)
# Build metadata
meta = data.new_metadata('plaid', 0)
meta['plaid_id'] = plaid_txn.transaction_id
meta['plaid_category'] = ' > '.join(
plaid_txn.personal_finance_category.detailed.split('_')
) if plaid_txn.personal_finance_category else 'Unknown'
# Create postings
postings = [
data.Posting(
bank_account,
Amount(bank_amount, 'USD'),
None, None, None, None
),
data.Posting(
posting_account,
Amount(-bank_amount, 'USD'),
None, None, None, None
),
]
# Create transaction
txn = data.Transaction(
meta=meta,
date=plaid_txn.date,
flag='*',
payee=plaid_txn.merchant_name or plaid_txn.name,
narration=plaid_txn.name,
tags=set(),
links=set(),
postings=postings
)
return txn
def categorize_transaction(plaid_txn):
"""Map Plaid categories to Beancount accounts."""
category = plaid_txn.personal_finance_category
if not category:
return "Expenses:Uncategorized"
primary = category.primary
detailed = category.detailed
mapping = {
'FOOD_AND_DRINK': 'Expenses:Food',
'TRANSPORTATION': 'Expenses:Transportation',
'TRAVEL': 'Expenses:Travel',
'ENTERTAINMENT': 'Expenses:Entertainment',
'GENERAL_MERCHANDISE': 'Expenses:Shopping',
'PERSONAL_CARE': 'Expenses:Personal',
'MEDICAL': 'Expenses:Health',
'RENT_AND_UTILITIES': 'Expenses:Housing',
}
return mapping.get(primary, f"Expenses:{primary.title()}")
Step 5: Write to File
from beancount.parser import printer
def write_transactions(transactions, output_file):
"""Write Beancount transactions to a file."""
with open(output_file, 'w') as f:
for txn in transactions:
f.write(printer.format_entry(txn))
f.write('\n')
# Usage
plaid_txns = get_transactions(access_token)
beancount_txns = [
transaction_to_beancount(t, account_mapping)
for t in plaid_txns
]
write_transactions(beancount_txns, 'imports/bank.beancount')
Step 6: Deduplication
A critical piece - don’t re-import transactions you’ve already processed:
import json
import os
PROCESSED_FILE = 'processed_transactions.json'
def load_processed():
if os.path.exists(PROCESSED_FILE):
with open(PROCESSED_FILE) as f:
return set(json.load(f))
return set()
def save_processed(processed):
with open(PROCESSED_FILE, 'w') as f:
json.dump(list(processed), f)
def filter_new_transactions(transactions, processed):
return [t for t in transactions if t.transaction_id not in processed]
Security Considerations
Important: Your Plaid access tokens are sensitive. They provide read access to financial data. Store them securely:
import os
# Good - use environment variables
access_token = os.environ.get('PLAID_ACCESS_TOKEN')
# Better - use a secrets manager
from secretmanager import get_secret
access_token = get_secret('plaid-access-token')
# NEVER hard-code tokens or commit them to version control
Full Script Example
I’ve put together a complete working example in a GitHub gist (hypothetical link) that ties all this together.
Questions and Discussion
- What error handling would you add for production use?
- Anyone interested in collaborating on an open-source alternative to plaid2text?
- How do you handle the initial Link flow in a headless/CLI environment?
Happy to dive deeper into any part of this!