Data Import

Core Import Tools

Command-line Interface

# Identify importable files
bean-identify config.py ~/Downloads

# Extract transactions
bean-extract -e ledger.beancount config.py ~/Downloads > extracted.beancount

# File documents
bean-file -o ~/documents config.py ~/Downloads

Importer Implementation

Basic Importer Structure

from beancount.ingest import importer
from beancount.core import data

class CustomImporter(importer.ImporterProtocol):
    def __init__(self, account_root):
        self.account_root = account_root

    def identify(self, file):
        """Return true if this importer can handle the file."""
        return file.name.endswith('.csv')

    def extract(self, file):
        """Extract Beancount directives from file."""
        entries = []
        # Process file and create entries
        return entries

    def file_account(self):
        """Return account for filing."""
        return self.account_root

    def file_name(self, file):
        """Return desired filed filename."""
        return 'renamed-{}'.format(file.name)

    def file_date(self, file):
        """Extract date from file contents."""
        return datetime.date.today()

Configuration Setup

#!/usr/bin/env python3
from importers import bank, investment, credit

CONFIG = [
    bank.Importer(
        account='Assets:US:Bank:Checking',
        currency='USD'
    ),
    investment.Importer(
        account='Assets:US:Investment',
        commission_account='Expenses:Fees:Commission'
    ),
    credit.Importer(
        account='Liabilities:US:Credit',
        currency='USD'
    )
]

Testing Framework

Regression Test Setup

from beancount.ingest import regression

class TestImporter(unittest.TestCase):
    @regression.check_file(
        account="Assets:Test",
        regexp_mime="text/csv"
    )
    def test_basic(self, importer, file):
        """Basic CSV import test."""
        entries = importer.extract(file)
        self.assertTrue(entries)

Test File Structure

importers/
├── __init__.py
├── bank/
│   ├── __init__.py
│   ├── importer.py
│   ├── test_sample.csv
│   ├── test_sample.csv.extract
│   └── test_sample.csv.file_date

File Processing

File Conversion Cache

def process_file(file):
    # Use file.convert() for caching
    text = file.convert(conversion_function)
    return text

def conversion_function(filename):
    """Convert file content with caching."""
    # Implementation

PDF Processing

def extract_pdf_text(filename):
    """Extract text from PDF with fallbacks."""
    try:
        return extract_with_pdfminer(filename)
    except:
        try:
            return extract_with_pdftotext(filename)
        except:
            return extract_with_poppler(filename)

Directory Organization

Recommended Structure

project/
├── documents/
│   ├── Assets/
│   ├── Liabilities/
│   └── Income/
├── importers/
│   └── custom/
├── ledger.beancount
└── import.py

File Moving Logic

def determine_file_path(importer, file):
    """Determine final path for imported file."""
    date = importer.file_date(file)
    account = importer.file_account()
    clean_name = importer.file_name(file)
    return f"{date.strftime('%Y-%m-%d')}-{clean_name}"

Best Practices

Error Handling

def safe_extract(self, file):
    """Extract with robust error handling."""
    try:
        return self.extract(file)
    except Exception as e:
        logger.error("Extraction failed: %s", str(e))
        return []

File Validation

def validate_file(self, file):
    """Validate file before processing."""
    if not self._check_header(file):
        return False
    if not self._verify_structure(file):
        return False
    return True

Data Normalization

def clean_payee(self, payee):
    """Normalize payee names."""
    payee = re.sub(r'\s+', ' ', payee)
    payee = payee.strip().upper()
    return self.payee_map.get(payee, payee)

This guide provides a technical foundation for implementing data import capabilities in Beancount while maintaining precise control over the import process.

Share on Twitter

Core Import Tools​

Command-line Interface​

Importer Implementation​

Basic Importer Structure​

Configuration Setup​

Testing Framework​

Regression Test Setup​

Test File Structure​

File Processing​

File Conversion Cache​

PDF Processing​

Directory Organization​

Recommended Structure​

File Moving Logic​

Best Practices​

About Beancount.io

Core Import Tools

Command-line Interface

Importer Implementation

Basic Importer Structure

Configuration Setup

Testing Framework

Regression Test Setup

Test File Structure

File Processing

File Conversion Cache

PDF Processing

Directory Organization

Recommended Structure

File Moving Logic

Best Practices