gemini - kennedy.gemi.dev

💾 Archived View for heavysquare.com › unix › 9996-multiconnection-download-scsh.txt captured on 2023-04-19 at 23:18:27.
-=-=-=-=-=-=-
There's a server (of a radio) I download audio files from. The thing is, that
the bandwidth for a connection is limited on their side to ~24Kb/sec nowdays
(several years ago there wasn't any limit). By getting the file with multiple
parallel connections solves the problem somewhat. Unfortunately the
number of connections from a given IP address is also limited to ~15. Anyway,
let's say ~240Kb/sec (when using 10 connections) is much more than 24Kb/sec.

Parts of a file can be obtained by [Curl](http://en.wikipedia.org/wiki/CURL). I
decided to use [The Scheme Shell](http://www.scsh.net/) to implement my idea
due to its thread support and strong relationship with command line tools (as
being a shell).

The solution is a fast hack. Let's see..

    $ ls
    getItFast.scm
    $ ./getItFast.scm http://someserver/2200.mp3
    $ ls
    2200.mp3.00  2200.mp3.08  2200.mp3.16  2200.mp3.24  2200.mp3.32  2200.mp3.40
    2200.mp3.01  2200.mp3.09  2200.mp3.17  2200.mp3.25  2200.mp3.33  2200.mp3.41
    2200.mp3.02  2200.mp3.10  2200.mp3.18  2200.mp3.26  2200.mp3.34  2200.mp3.42
    2200.mp3.03  2200.mp3.11  2200.mp3.19  2200.mp3.27  2200.mp3.35  getItFast.scm
    2200.mp3.04  2200.mp3.12  2200.mp3.20  2200.mp3.28  2200.mp3.36
    2200.mp3.05  2200.mp3.13  2200.mp3.21  2200.mp3.29  2200.mp3.37
    2200.mp3.06  2200.mp3.14  2200.mp3.22  2200.mp3.30  2200.mp3.38
    2200.mp3.07  2200.mp3.15  2200.mp3.23  2200.mp3.31  2200.mp3.39
    $ cat 2200.mp3.* > 2200.mp3
    $ rm 2200.mp3.*
    $ ls
    2200.mp3  getItFast.scm
    $ cat getItFast.scm
    #!/usr/bin/scsh 
    -o placeholders -o threads -o locks -s
    !#
    
    ; this many thread will be started,
    ; each of'em represents a connection
    (define POOL-SIZE 10)
    
    ; the length of a chunk in bytes
    ; (downloaded with one connection)
    (define STEP 1000000)
    
    (define URL (argv 1))
    (define FNAME (file-name-nondirectory URL))
    
    (define url-content-length
      (lambda (url)
        (string->number
         (cadr ((infix-splitter (rx (+ white)))
                (run/string
                 (| (curl -s -S -I ,url)
                    (grep "Content-Length"))))))))
    
    (define LENGTH (url-content-length URL))
    
    (define make-queue
      (lambda (data-list)
        (let ((lock (make-lock)))
          (lambda ()
            (let ((re '()))
              (obtain-lock lock)
              (if (null? data-list)
                  (set! re '())
                  (begin
                    (set! re (car data-list))
                    (set! data-list (cdr data-list))))
              (release-lock lock)
              re)))))
    
    (define range-string
      (lambda (beg end)
        (let ((begs (number->string beg))
              (ends (number->string end)))
          (string-append begs "-" ends))))
    
    (define get-part
      (lambda (beg end fn)
        (run (curl -o ,fn -s -S -r
                   (range-string beg end) ,URL))))
    
    ; this long is the number field
    ; in the filenames of parts
    (define PADLEN
      (string-length
       (number->string
        (ceiling
         (/ LENGTH STEP)))))
    
    (define file-counter-string
      (lambda (i)
        (let loop ((s (number->string i)))
          (if (<= PADLEN (string-length s))
              s
              (loop (string-append "0" s))))))
    
    (define counted-file-name
      (lambda (i)
        (string-append FNAME
                       "."
                       (file-counter-string i))))
    
    ; this contains the works to do
    ; (work ~ download a specific chunk)
    ; e.g. ((0 999999 "foo.mp3.00") (1000000 1999999 "foo.mp3.01") ... )
    (define QUEUE
      (make-queue
       (let loop ((work-list '()) (low 0) (upp (- STEP 1)) (counter 0))
         (if (> low LENGTH)
             work-list
             (loop (cons (list low upp (counted-file-name counter)) work-list)
                   (+ upp 1)
                   (min LENGTH (+ upp STEP))
                   (+ counter 1))))))
    
    (define signal-thread-finish
      (lambda (waiter)
        (placeholder-set! waiter #f)))
    
    (define start-worker
      (lambda ()
        (let ((waiter (make-placeholder)))
          (spawn
           (lambda ()
             (let loop ()
               (let ((work (QUEUE)))
                 (if (null? work)
                     (signal-thread-finish waiter)
                     (begin
                       (apply get-part work)
                       (loop)))))))
          waiter)))
    
    (let loop ((i POOL-SIZE) (waiters '()))
      (if (= i 0)
          (map placeholder-value waiters)
          (loop (- i 1) (cons (start-worker) waiters))))
    $

Useful links:

- [scsh concurrent programming](http://www.scsh.net/docu/html/man-Z-H-10.html) 
- [scsh process notation](http://www.scsh.net/docu/html/man-Z-H-3.html) 
- [r6rs](http://www.r6rs.org/final/html/r6rs/r6rs.html)