bit-struct, a bit-struct macro for racket

published
permalink
https://accidental.cc/notes/2024/bit-struct/

I’ve been playing with mdns, as a way to do local-only automatic syncing as part of a notebook application I’m working on in Racket, and I needed a way to represent DNS reconds, which are packed c-style structs.

I could have used the FFI package to define these, but I like playing around, and ended up with these two modules, which define bit-spec, a struct which describes a field in a bit-struct, and define-bit-struct, a macro which builds up a packed bit-struct.

Right now they handle endianness, signed numbers and padding; I may end up with more features eventually.

As an example,

(require "./bit-struct.rkt")

(define-bit-struct dns-header
  [(some-header 16 #:signed? #t)
   (some-field  32)  ; 32 bit number
   (some-bool    1)  ; 1 bit boolean
   (_            7)  ; 7 bits of padding
   (some-bytes   0)]) ; rest as a bytestring
  
(define obj (build-dns-header #:some-bytes #"hello world" #:some-field 12345 #:some-bool #t))
;=> (dns-header 0 12345 #t #"hello world")

(define out (dns-header->bytes obj))
;=> #"\0\0\0\00009\200hello world"

(bytes->dns-header #"\0\0\0\00009\200hello world")
;=> (dns-header 0 12345 #t #"hello world")

The macro defines:

  • dns-header struct, with contract (dns-header/c) and getters
  • dns-header-allspecs, a list of all bitspecs in order, (including padding)
  • dns-header-bitspecs, a hash table of field names to bitspecs
  • build-dns-header, which takes an arbitrary set of keyword args
  • dns-header-data, which returns a all the struct’s data in a list
  • bytes->dns-header and dns-header->bytes which de/serialize respectively
#lang racket/base

; a bitspec determines the layout of a given field
; ie; (build-bitspec bitlen #:big-endian? #:signed?)
;
; you can get a default value, and a generated contract for the field
; you can convert a value+bitspec into a bit string
;                   bit string+bitspec into a value

(require bitsyntax

         racket/contract
         racket/match
         racket/math)

(provide bitspec
         bitspec/c

         build-bitspec

         bitspec-bitlen
         bitspec-big-endian?
         bitspec-signed?

         bitspec-rest?
         bitspec-default-value
         bitspec-contract

         bit-string+bitspec->value
         value+bitspec->bit-string)

; prefab so we can splice in as syntax
(struct bitspec [bitlen big-endian? signed?] #:prefab)

; contract because we can't use a guard with prefab
(define bitspec/c  (struct/c bitspec natural? boolean? boolean?))
(define bitvalue/c (or/c bytes? boolean? exact-integer?))

(define (bit-length/c n)
  (and/c exact-integer?
         (make-flat-contract
          #:name        "bit-length/c"
          #:first-order (lambda (v) (and (exact-integer? v) (<= (integer-length v) n))))))

;;;;

(define/contract (build-bitspec bitlen #:big-endian? (big-endian? #t) #:signed? (signed? #f))
  (->* (natural?) (#:big-endian? boolean? #:signed? boolean?) bitspec/c)
  (bitspec bitlen big-endian? signed?))

(define/contract (bitspec-rest? spec)
  (-> bitspec/c boolean?)
  (zero? (bitspec-bitlen spec)))

(define/contract (bitspec-contract spec)
  (-> bitspec/c contract?)

  (match (bitspec-bitlen spec)
    [0 bytes?]
    [1 boolean?]
    [x (if (bitspec-signed? spec)
           (and/c exact-integer? (bit-length/c (- x 1)))
           (and/c natural?       (bit-length/c x)))]))

(define/contract (bitspec-default-value spec)
  (->i ([spec bitspec/c]) [_ (spec) (and/c bitvalue/c (bitspec-contract spec))])
  (match (bitspec-bitlen spec)
    [0 #""]
    [1  #f]
    [_   0]))

(define/contract (bit-string+bitspec->value chunk spec)
  (->i ([str bit-string?] [spec bitspec/c]) [_ (spec) (and/c bitvalue/c (bitspec-contract spec))])
  (match (bitspec-bitlen spec)
    [0 (bit-string->bytes chunk)]
    [1 (= 1 (bit-string-ref chunk 0))]
    [_ (bit-string->integer chunk (bitspec-big-endian? spec) (bitspec-signed? spec))]))

(define/contract (value+bitspec->bit-string value config)
  (->i ([str (spec) (and/c bitvalue/c (bitspec-contract spec))] [spec bitspec/c]) [_ bit-string?])
  (match (bitspec-bitlen config)
    [0 value] ; zero means rest
    [1 (integer->bit-string (if value 1 0) 1 #t)]
    [_ (integer->bit-string value (bitspec-bitlen config) (bitspec-big-endian? config))]))

(module+ test
  (require rackunit)
  
  (check-exn exn:fail:contract? (lambda () (build-bitspec 'hi)))
  (check-exn exn:fail:contract? (lambda () (build-bitspec 0 #:big-endian? 6)))
  (check-exn exn:fail:contract? (lambda () (build-bitspec 0 #:signed? 15)))
  
  ; check signed output
  (let [(s-bitspec (build-bitspec 8 #:signed? #t))
        (u-bitspec (build-bitspec 8))]
    (check-equal? (bit-string+bitspec->value #"\201" u-bitspec) 129)
    (check-equal? (bit-string+bitspec->value #"\201" s-bitspec) -127)
    (check-equal? (value+bitspec->bit-string 129 u-bitspec) #"\201")
    (check-equal? (value+bitspec->bit-string -127 s-bitspec) #"\201"))
  
  ; check endian output
  (let [(b-bitspec (build-bitspec 32))
        (l-bitspec (build-bitspec 32 #:big-endian? #f))]
    (check-equal? (bit-string+bitspec->value #"\0\0\0\1" b-bitspec) 1)
    (check-equal? (bit-string+bitspec->value #"\0\0\0\1" l-bitspec) #x1000000)
    (check-equal? (value+bitspec->bit-string 1         b-bitspec) #"\0\0\0\1")
    (check-equal? (value+bitspec->bit-string #x1000000 l-bitspec) #"\0\0\0\1"))
  
  ; contract violation with out of range values
  (let [(spec (build-bitspec 8 #:signed? #t))]
    (check-exn exn:fail:contract? (lambda () (value+bitspec->bit-string #f     spec)))
    (check-exn exn:fail:contract? (lambda () (value+bitspec->bit-string 129    spec)))
    (check-exn exn:fail:contract? (lambda () (value+bitspec->bit-string -129   spec)))
    (check-exn exn:fail:contract? (lambda () (value+bitspec->bit-string #"wat" spec)))))
#lang racket/base

(require bitsyntax
         "./bit-spec.rkt"
         racket/struct
         racket/contract)

(require
  (for-syntax arguments
              "./bit-spec.rkt"
              racket/base
              racket/contract
              racket/list
              racket/syntax))

(provide define-bit-struct)

(define-syntax (define-bit-struct stx)
  (syntax-case stx ()
    [(_ struct-name ([name* bitspecs* ...] ...))
     (begin
       (define symbol->keyword (compose string->keyword symbol->string))
       
       ; get symbols and cons from the syntax, so we can precompute some stuff
       (define parsed-names    (syntax->datum #'(name* ...)))
       (define parsed-bitspecs (syntax->datum #'((bitspecs* ...) ...)))
       
       ; get structs from the parsed cons lists
       (define built-bitspecs
         (for/list ([bitspec (in-list parsed-bitspecs)])
           (define bitspec-args   (make-arguments (list (car bitspec)) (apply hash (cdr bitspec))))
           (define bitspec-struct (apply/arguments build-bitspec bitspec-args))
           bitspec-struct))
       
       ; get a hash of field name to bitspecs, excluding anonymous padding
       (define named-bitspecs
         (for/hash ([name (in-list parsed-names)]
                    [spec (in-list built-bitspecs)]
                    #:unless (eq? name '_))
           (values name spec)))
       
       ; (list name...) for named named
       (define struct-named-fields
         (for/list ([name (in-list parsed-names)]
                    #:unless (eq? name '_))
           name))
       
       ; (list contract ...) for named fields
       (define struct-contract-fields
         (for/list ([name (in-list parsed-names)]
                    #:unless (eq? name '_))
           (bitspec-contract (hash-ref named-bitspecs name))))
       
       ; (list (#:keyword (keyword default)) ...)
       (define builder-kw-default-arguments
         (for/list ([(name spec) (in-hash named-bitspecs)])
           (list (symbol->keyword name) (list name (bitspec-default-value spec)))))
       
       ; (list (#:keyword contract) ...)
       (define builder-kw-contract-arguments
         (for/list ([(name spec) (in-hash named-bitspecs)])
           (list (symbol->keyword name) (bitspec-contract spec))))
       
       (with-syntax ([contract-name (format-id stx "~a/c"        #'struct-name)]
                     [allspecs-name (format-id stx "~a-allspecs" #'struct-name)]
                     [bitspecs-name (format-id stx "~a-bitspecs" #'struct-name)]
                     [builder-name  (format-id stx "build-~a"    #'struct-name)]
                     [data-name     (format-id stx "~a-data"     #'struct-name)]
                     [bytes->-name  (format-id stx "bytes->~a"   #'struct-name)]
                     [->bytes-name  (format-id stx "~a->bytes"   #'struct-name)])
         
         #`(begin
             (struct struct-name #,struct-named-fields #:transparent)
             
             (define contract-name
               (struct/c struct-name #,@struct-contract-fields))
             
             (define allspecs-name
               '#,(datum->syntax stx built-bitspecs))
             
             (define bitspecs-name
               (for/hash [(name (in-list '#,parsed-names))
                          (spec (in-list allspecs-name))
                          #:unless (eq? name '_)]
                 (values name spec)))
             
             ; drops the first value, which is the
             (define/contract (data-name obj)
               (-> contract-name list?)
               (struct->list obj))
             
             ; builder with keywords + default values
             (define/contract (builder-name #,@(append* builder-kw-default-arguments))
               (->* () #,(append* builder-kw-contract-arguments) contract-name)
               (struct-name #,@struct-named-fields))
             
             (define/contract (bytes->-name input)
               (-> bytes? contract-name)
               (define (append-maybe-padding name arg arguments)
                 (if (eq? name '_) arguments (cons arg arguments)))
               
               (for/fold ([offset    0]
                          [arguments null]
                          #:result (apply struct-name (reverse arguments)))
                         ([name (in-list '#,parsed-names)]
                          [spec (in-list allspecs-name)])
                 (if (bitspec-rest? spec)
                     ; read the rest of the string
                     (let* ([offset^ (bytes-length input)]
                            [chunk   (bit-string-drop input offset)]
                            [arg     (bit-string+bitspec->value chunk spec)])
                       (values offset^ (append-maybe-padding name arg arguments)))
                     ; or read bitlen bits
                     (let* ([offset^ (+ offset (bitspec-bitlen spec))]
                            [chunk   (sub-bit-string input offset offset^)]
                            [arg     (bit-string+bitspec->value chunk spec)])
                       (values offset^ (append-maybe-padding name arg arguments))))))
             
             (define/contract (->bytes-name self #:align-right? (align-right? #t))
               (->* (contract-name) (#:align-right? boolean?) bytes?)
               (for/fold ([acc #""]
                          [data (data-name self)]
                          #:result (bit-string->bytes/align acc align-right?))
                         ([name (in-list '#,parsed-names)]
                          [spec (in-list allspecs-name)])
                 (if (eq? name '_)
                     ; padding gets default values
                     (values (bit-string-append acc (value+bitspec->bit-string (bitspec-default-value spec) spec)) data)
                     ; not padding puts values from the struct
                     (values (bit-string-append acc (value+bitspec->bit-string (car data) spec)) (cdr data))))))))]))

(module+ test
  (require rackunit)
  
  (define-bit-struct example-le-struct
    ([fielda  32 #:big-endian? #f]
     [fieldb  16 #:signed? #t]))
  
  (let* ([empty-val   (build-example-le-struct)]
         [empty-bytes (example-le-struct->bytes empty-val)])
    (check-equal? empty-bytes #"\0\0\0\0\0\0"))
  
  (let* ([something-val   (build-example-le-struct #:fielda 512 #:fieldb 512)]
         [something-bytes (example-le-struct->bytes something-val)])
    (check-equal? something-bytes #"\0\2\0\0\2\0"))
  
  (define-bit-struct example-be-struct
    ([fielda  32 #:big-endian? #t]
     [fieldb  16 #:signed? #t]))
  
  (let* ([something-val   (build-example-be-struct #:fielda 512 #:fieldb 512)]
         [something-bytes (example-be-struct->bytes something-val)])
    (check-equal? something-bytes #"\0\0\2\0\2\0"))
  
  (define-bit-struct example-data-struct
    ([fielda  1]
     [_      39]
     [fieldb  0]))
  
  (let* ([data-val    (build-example-data-struct #:fielda #t #:fieldb #"hello!")]
         [data-bytes  (example-data-struct->bytes data-val)]
         [rebuilt-val (bytes->example-data-struct data-bytes)])
    (check-equal? data-bytes #"\200\0\0\0\0hello!")
    (check-equal? rebuilt-val data-val)))