Skip to content

FECo3

Python bindings for a .fec file parser in rust.

Install with pip install feco3

Example

example.py
import feco3
import pyarrow as pa

# ruff: noqa: E501

# You can supply a URL or a path to a file.
src = "https://docquery.fec.gov/dcdev/posted/1002596.fec"
# src = "path/to/file.fec"
# src = pathlib.Path("path/to/file.fec")

# The straightforward way is to just parse to a directory of files,
# one file for each itemization type, eg "csvs/SA11AI.csv", etc
feco3.FecFile(src).to_csvs("csvs/")
feco3.FecFile(src).to_parquets("parquets/")

# Or, you can look at the file at a lower level.
# This doesn't actually read or parse any data yet
fec = feco3.FecFile(src)
print(fec)
# FecFile(src='https://docquery.fec.gov/dcdev/posted/1002596.fec')

# Only when we access something do we actually start parsing.
# Still, we only parse as far as we need to, so this is quite fast.
# This is useful, for example, if you only need the header or cover,
# or if you only want to look at the itemizations in certain forms.
print(fec.header)
print(fec.cover)
# Header(fec_version='8.1', software_name='NetFile', software_version='199199', report_id=None, report_number='0')
# Cover(form_type='F3N', filer_committee_id='C00479188')

# Iterate through the itemizations in the file in batches of pyarrow RecordBatches.
# By iterating, this keeps us from having to load the entire file into memory.
# By using pyarrow, we can avoid copying the underlying data from Rust to Python.
# It integrates well with the rest of the Python data ecosystem, for example
# it's easy to convert to a pandas DataFrames.
batcher = feco3.PyarrowBatcher(fec, max_batch_size=1024 * 1024)
for batch in batcher:
    # The record code for this kind of itemizations, eg. 'SA11AI'
    assert isinstance(batch.code, str)
    # A pyarrow RecordBatch of the itemizations
    assert isinstance(batch.records, pa.RecordBatch)
    df = batch.records.to_pandas()
    print(batch.code)
    print(df.head(3))
# SA15
#   filer_committee_id_number transaction_id back_reference_tran_id_number back_reference_sched_name  ... conduit_zip_code memo_code memo_text_description reference_code
# 0                 C00479188        INCA994                                                          ...
# 1                 C00479188        INCA992                                                          ...
# 2                 C00479188        INCA993                                                          ...

# [3 rows x 44 columns]
# TEXT
#   filer_committee_id_number transaction_id_number back_reference_tran_id_number back_reference_sched_form_name            text
# 0                 C00479188              TPAYC760                       PAYC760                          SC/10  PERSONAL FUNDS
# SC/10
#   filer_committee_id_number transaction_id_number receipt_line_number entity_type  ... lender_candidate_state lender_candidate_district memo_code memo_text_description
# 0                 C00479188               PAYC760                 13B         CAN  ...

# [1 rows x 37 columns]                       ...