diff -urN sbcl/contrib/sb-aclrepl/inspect.lisp sbcl.char/contrib/sb-aclrepl/inspect.lisp --- sbcl/contrib/sb-aclrepl/inspect.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/contrib/sb-aclrepl/inspect.lisp 2004-10-27 13:34:22.000000000 +0200 @@ -681,7 +681,7 @@ (description-maybe-internals "character ~W char-code #x~2,'0X" (list object (char-code object)) "[#x~8,'0X]" - (logior sb-vm:base-char-widetag + (logior sb-vm:character-widetag (ash (char-code object) sb-vm:n-widetag-bits)))) diff -urN sbcl/contrib/sb-simple-streams/impl.lisp sbcl.char/contrib/sb-simple-streams/impl.lisp --- sbcl/contrib/sb-simple-streams/impl.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/contrib/sb-simple-streams/impl.lisp 2004-10-27 13:34:22.000000000 +0200 @@ -173,7 +173,7 @@ (index 0) ; current index in current buffer (total 0)) ; total characters (declare (type simple-stream encap) - (type simple-base-string cbuf) + (type simple-string cbuf) (type cons bufs tail) (type sb-int:index index total)) (loop @@ -200,7 +200,7 @@ (do ((list bufs (cdr list))) ((eq list tail)) (let ((buf (car list))) - (declare (type simple-base-string buf)) + (declare (type simple-string buf)) (replace cbuf buf :start1 idx) (incf idx (length buf))))) (return (values (sb-kernel:shrink-vector cbuf total) @@ -210,7 +210,7 @@ (index 0)) (declare (type sb-int:index index)) (dolist (buf bufs) - (declare (type simple-base-string buf)) + (declare (type simple-string buf)) (replace string buf :start1 index) (incf index (length buf))) (return (values string (eq done :eof))))) diff -urN sbcl/make-host-1.sh sbcl.char/make-host-1.sh --- sbcl/make-host-1.sh 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/make-host-1.sh 2004-10-27 13:34:22.000000000 +0200 @@ -43,6 +43,9 @@ (when (find :sb-test *shebang-features*) (load "tests/type.before-xc.lisp") (load "tests/info.before-xc.lisp")) + (load "tools-for-build/ucd.lisp") + (sb-cold::slurp-ucd) + (sb-cold::output) (host-cload-stem "src/compiler/generic/genesis") (sb!vm:genesis :c-header-dir-name "src/runtime/genesis") #+cmu (ext:quit) diff -urN sbcl/package-data-list.lisp-expr sbcl.char/package-data-list.lisp-expr --- sbcl/package-data-list.lisp-expr 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/package-data-list.lisp-expr 2004-10-27 13:34:22.000000000 +0200 @@ -63,6 +63,7 @@ "SAP-ALIEN" "SHORT" "SIGNED" "SLOT" "STRUCT" "UNSIGNED" "UNSIGNED-CHAR" "UNSIGNED-INT" "UNSIGNED-LONG" "UNSIGNED-SHORT" + "UTF8-STRING" "VOID" "WITH-ALIEN")) @@ -906,7 +907,7 @@ ;; various internal defaults "DEFAULT-INIT-CHAR" "*DEFAULT-INIT-CHAR-FORM*" - "*LOAD-SOURCE-DEFAULT-TYPE*" + "*LOAD-SOURCE-DEFAULT-TYPE*" "BASE-CHAR-CODE-LIMIT" ;; hash caches "DEFINE-HASH-CACHE" @@ -1122,7 +1123,11 @@ "BINDING-STACK-POINTER-SAP" "BIT-BASH-COPY" "BIT-INDEX" "BOGUS-ARG-TO-VALUES-LIST-ERROR" "BOOLE-CODE" "BOUNDING-INDICES-BAD-ERROR" "BYTE-SPECIFIER" "%BYTE-BLT" - "CALLABLE" "CASE-BODY-ERROR" "CHARPOS" + "CALLABLE" "CASE-BODY-ERROR" + "CHARACTER-SET" "CHARACTER-SET-TYPE" + "CHARACTER-SET-TYPE-PAIRS" + "CHARACTER-STRING-P" + "CHARPOS" "CHECK-FOR-CIRCULARITY" "CHECK-TYPE-ERROR" "CLOSED-FLAME" "CODE-COMPONENT" "CODE-COMPONENT-P" "CODE-DEBUG-INFO" "CODE-HEADER-REF" "CODE-HEADER-SET" "CODE-INSTRUCTIONS" @@ -1154,6 +1159,7 @@ "FLOAT-WAIT" "DYNAMIC-SPACE-FREE-POINTER" "DYNAMIC-USAGE" "EFFECTIVE-FIND-POSITION-TEST" "EFFECTIVE-FIND-POSITION-KEY" "ERROR-NUMBER-OR-LOSE" + "EXTENDED-CHAR-P" "FAILED-%WITH-ARRAY-DATA" "FDEFINITION-OBJECT" "FDOCUMENTATION" "FILENAME" "FIND-AND-INIT-OR-CHECK-LAYOUT" "FLOAT-EXPONENT" @@ -1222,9 +1228,10 @@ "NUMERIC-TYPE-CLASS" "NUMERIC-TYPE-COMPLEXP" "NUMERIC-TYPE-EQUAL" "NUMERIC-TYPE-FORMAT" "NUMERIC-TYPE-HIGH" "NUMERIC-TYPE-LOW" "NUMERIC-TYPE-P" - "OBJECT-NOT-ARRAY-ERROR" "OBJECT-NOT-BASE-CHAR-ERROR" + "OBJECT-NOT-ARRAY-ERROR" "OBJECT-NOT-CHARACTER-ERROR" "OBJECT-NOT-BASE-STRING-ERROR" "OBJECT-NOT-BIGNUM-ERROR" - "OBJECT-NOT-BIT-VECTOR-ERROR" "OBJECT-NOT-COMPLEX-ERROR" + "OBJECT-NOT-BIT-VECTOR-ERROR" + "OBJECT-NOT-CHARACTER-STRING-ERROR" "OBJECT-NOT-COMPLEX-ERROR" "OBJECT-NOT-COMPLEX-FLOAT-ERROR" "OBJECT-NOT-COMPLEX-SINGLE-FLOAT-ERROR" #!+long-float "OBJECT-NOT-COMPLEX-LONG-FLOAT-ERROR" @@ -1290,6 +1297,7 @@ "OBJECT-NOT-SIMPLE-ARRAY-SIGNED-BYTE-8-ERROR" "OBJECT-NOT-SIMPLE-BIT-VECTOR-ERROR" "OBJECT-NOT-SIMPLE-BASE-STRING-ERROR" + "OBJECT-NOT-SIMPLE-CHARACTER-STRING-ERROR" "OBJECT-NOT-SIMPLE-STRING-ERROR" "OBJECT-NOT-SIMPLE-VECTOR-ERROR" "OBJECT-NOT-SINGLE-FLOAT-ERROR" "OBJECT-NOT-STRING-ERROR" @@ -1298,7 +1306,9 @@ "OBJECT-NOT-UNSIGNED-BYTE-32-ERROR" ;; FIXME: 32/64-bit issues "OBJECT-NOT-UNSIGNED-BYTE-64-ERROR" - "OBJECT-NOT-VECTOR-ERROR" "OBJECT-NOT-WEAK-POINTER-ERROR" + "OBJECT-NOT-VECTOR-ERROR" + "OBJECT-NOT-VECTOR-NIL-ERROR" + "OBJECT-NOT-WEAK-POINTER-ERROR" "ODD-KEY-ARGS-ERROR" "OUTPUT-OBJECT" "OUTPUT-UGLY-OBJECT" "PACKAGE-DESIGNATOR" "PACKAGE-DOC-STRING" "PACKAGE-HASHTABLE-SIZE" "PACKAGE-HASHTABLE-FREE" @@ -1345,6 +1355,7 @@ "SIMPLE-ARRAY-SIGNED-BYTE-61-P" "SIMPLE-ARRAY-SIGNED-BYTE-64-P" "SIMPLE-ARRAY-SIGNED-BYTE-8-P" "SIMPLE-BASE-STRING-P" + "SIMPLE-CHARACTER-STRING-P" "SIMPLE-PACKAGE-ERROR" "SIMPLE-UNBOXED-ARRAY" "SINGLE-FLOAT-BITS" "SINGLE-FLOAT-EXPONENT" "SINGLE-FLOAT-INT-EXPONENT" "SINGLE-FLOAT-SIGNIFICAND" @@ -1892,7 +1903,7 @@ "EUSERS" "EVICEERR" "EVICEOP" "EWOULDBLOCK" "EXDEV" "FD-ISSET" "FD-SET" "LTCHARS" "UNIX-FAST-SELECT" - "UNIX-FILE-KIND" "UNIX-KILL" + "UNIX-FILE-KIND" "UNIX-KILL" "CODESET" "TCSETPGRP" "FD-ZERO" "FD-CLR" "CHECK" "UNIX-RESOLVE-LINKS" "FD-SETSIZE" "TCGETPGRP" "UNIX-FAST-GETRUSAGE" "UNIX-SIMPLIFY-PATHNAME" "UNIX-KILLPG" @@ -1958,8 +1969,9 @@ "ANY-REG-SC-NUMBER" "ARRAY-DATA-SLOT" "ARRAY-DIMENSIONS-OFFSET" "ARRAY-DISPLACED-P-SLOT" "ARRAY-DISPLACEMENT-SLOT" "ARRAY-ELEMENTS-SLOT" "ARRAY-FILL-POINTER-P-SLOT" - "ARRAY-FILL-POINTER-SLOT" "ATOMIC-FLAG" "BASE-CHAR-REG-SC-NUMBER" - "BASE-CHAR-STACK-SC-NUMBER" "BASE-CHAR-WIDETAG" + "ARRAY-FILL-POINTER-SLOT" "ATOMIC-FLAG" + "CHARACTER-REG-SC-NUMBER" + "CHARACTER-STACK-SC-NUMBER" "CHARACTER-WIDETAG" "BIGNUM-DIGITS-OFFSET" "BIGNUM-WIDETAG" "BINDING-SIZE" "BINDING-SYMBOL-SLOT" "BINDING-VALUE-SLOT" "BREAKPOINT-TRAP" "N-BYTE-BITS" "BYTE-REG-SC-NUMBER" @@ -1988,7 +2000,8 @@ "COMPLEX-SINGLE-FLOAT-IMAG-SLOT" "COMPLEX-SINGLE-FLOAT-REAL-SLOT" "COMPLEX-SINGLE-FLOAT-SIZE" "COMPLEX-SINGLE-FLOAT-WIDETAG" "COMPLEX-SINGLE-REG-SC-NUMBER" "COMPLEX-SINGLE-STACK-SC-NUMBER" - "COMPLEX-SIZE" "COMPLEX-BASE-STRING-WIDETAG" "COMPLEX-WIDETAG" + "COMPLEX-SIZE" "COMPLEX-BASE-STRING-WIDETAG" + "COMPLEX-CHARACTER-STRING-WIDETAG" "COMPLEX-WIDETAG" "COMPLEX-VECTOR-NIL-WIDETAG" "COMPLEX-VECTOR-WIDETAG" "CONS-CAR-SLOT" "CONS-CDR-SLOT" "CONS-SIZE" "CONSTANT-SC-NUMBER" @@ -2038,7 +2051,7 @@ "FUNCALLABLE-INSTANCE-LAYOUT-SLOT" "FUNCALLABLE-INSTANCE-LEXENV-SLOT" "GENESIS" "HALT-TRAP" "IGNORE-ME-SC-NUMBER" - "IMMEDIATE-BASE-CHAR-SC-NUMBER" "IMMEDIATE-SAP-SC-NUMBER" + "IMMEDIATE-CHARACTER-SC-NUMBER" "IMMEDIATE-SAP-SC-NUMBER" "IMMEDIATE-SC-NUMBER" "*INITIAL-DYNAMIC-SPACE-FREE-POINTER*" "INSTANCE-HEADER-WIDETAG" "INSTANCE-POINTER-LOWTAG" "INSTANCE-SLOTS-OFFSET" "INSTANCE-USAGE" @@ -2128,6 +2141,7 @@ "SIMPLE-ARRAY-SIGNED-BYTE-8-WIDETAG" "SIMPLE-BIT-VECTOR-WIDETAG" "SIMPLE-BASE-STRING-WIDETAG" + "SIMPLE-CHARACTER-STRING-WIDETAG" "SIMPLE-VECTOR-WIDETAG" "SINGLE-FLOAT-BIAS" "SINGLE-FLOAT-DIGITS" "SINGLE-FLOAT-EXPONENT-BYTE" "SINGLE-FLOAT-HIDDEN-BIT" "SINGLE-FLOAT-NORMAL-EXPONENT-MAX" diff -urN sbcl/src/code/ansi-stream.lisp sbcl.char/src/code/ansi-stream.lisp --- sbcl/src/code/ansi-stream.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/code/ansi-stream.lisp 2004-10-27 13:34:22.000000000 +0200 @@ -89,6 +89,9 @@ (deftype ansi-stream-in-buffer () `(simple-array (unsigned-byte 8) (,+ansi-stream-in-buffer-length+))) +(deftype ansi-stream-cin-buffer () + `(simple-array character (,+ansi-stream-in-buffer-length+))) + ;;; base class for ANSI standard streams (as opposed to the Gray ;;; streams extension) (defstruct (ansi-stream (:constructor nil) @@ -100,6 +103,7 @@ ;; slot must must be NIL, and the IN-INDEX must be ;; +ANSI-STREAM-IN-BUFFER-LENGTH+.) (in-buffer nil :type (or ansi-stream-in-buffer null)) + (cin-buffer nil :type (or ansi-stream-cin-buffer null)) (in-index +ansi-stream-in-buffer-length+ :type index) ;; buffered input functions diff -urN sbcl/src/code/array.lisp sbcl.char/src/code/array.lisp --- sbcl/src/code/array.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/code/array.lisp 2004-10-27 13:34:22.000000000 +0200 @@ -93,6 +93,8 @@ (values #.sb!vm:simple-vector-widetag #.sb!vm:n-word-bits)) ((base-char standard-char) (values #.sb!vm:simple-base-string-widetag #.sb!vm:n-byte-bits)) + ((character) + (values #.sb!vm:simple-character-string-widetag #.sb!vm:n-word-bits)) ((bit) (values #.sb!vm:simple-bit-vector-widetag 1)) ;; OK, we have to wade into SUBTYPEPing after all. @@ -112,6 +114,8 @@ #.sb!vm:complex-vector-widetag) ((base-char) #.sb!vm:complex-base-string-widetag) + ((character) + #.sb!vm:complex-character-string-widetag) ((nil) #.sb!vm:complex-vector-nil-widetag) ((bit) @@ -121,6 +125,7 @@ (pick-vector-type type (nil #.sb!vm:complex-vector-nil-widetag) (base-char #.sb!vm:complex-base-string-widetag) + (character #.sb!vm:complex-character-string-widetag) (bit #.sb!vm:complex-bit-vector-widetag) (t #.sb!vm:complex-vector-widetag))))) @@ -148,7 +153,8 @@ (array (allocate-vector type length - (ceiling (* (if (= type sb!vm:simple-base-string-widetag) + (ceiling (* (if (or (= type sb!vm:simple-base-string-widetag) + (= type sb!vm:simple-character-string-widetag)) (1+ length) length) n-bits) @@ -854,7 +860,8 @@ ,@(map 'list (lambda (saetp) `((simple-array ,(sb!vm:saetp-specifier saetp) (*)) - ,(if (eq (sb!vm:saetp-specifier saetp) 'base-char) + ,(if (or (eq (sb!vm:saetp-specifier saetp) 'character) + (eq (sb!vm:saetp-specifier saetp) 'base-char)) *default-init-char-form* (sb!vm:saetp-initial-element-default saetp)))) (remove-if-not diff -urN sbcl/src/code/char.lisp sbcl.char/src/code/char.lisp --- sbcl/src/code/char.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/code/char.lisp 2004-10-27 13:34:22.000000000 +0200 @@ -12,6 +12,8 @@ (in-package "SB!IMPL") -(def!constant sb!xc:char-code-limit 256 +(def!constant sb!xc:char-code-limit #x110000 #!+sb-doc "the upper exclusive bound on values produced by CHAR-CODE") + +(def!constant base-char-code-limit 128) diff -urN sbcl/src/code/class.lisp sbcl.char/src/code/class.lisp --- sbcl/src/code/class.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/code/class.lisp 2004-10-27 13:34:22.000000000 +0200 @@ -935,8 +935,11 @@ (setq *built-in-classes* '((t :state :read-only :translation t) - (character :enumerable t :translation base-char + (character :enumerable t + :codes (#.sb!vm:character-widetag) + :translation (character-set) :prototype-form (code-char 42)) + #+nil (base-char :enumerable t :inherits (character) :codes (#.sb!vm:base-char-widetag) @@ -1245,6 +1248,19 @@ :inherits (base-string simple-string string vector simple-array array sequence) :prototype-form (make-array 0 :element-type 'base-char)) + (character-string + :translation (vector character) + :codes (#.sb!vm:complex-character-string-widetag) + :direct-superclasses (string) + :inherits (string vector array sequence) + :prototype-form (make-array 0 :element-type 'character :fill-pointer t)) + (simple-character-string + :translation (simple-array character (*)) + :codes (#.sb!vm:simple-character-string-widetag) + :direct-superclasses (character-string simple-string) + :inherits (character-string simple-string string vector simple-array + array sequence) + :prototype-form (make-array 0 :element-type 'character)) (list :translation (or cons (member nil)) :inherits (sequence)) diff -urN sbcl/src/code/cold-init.lisp sbcl.char/src/code/cold-init.lisp --- sbcl/src/code/cold-init.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/code/cold-init.lisp 2004-10-27 13:34:22.000000000 +0200 @@ -111,6 +111,8 @@ ;; this to be initialized, so we initialize it right away. (show-and-call !random-cold-init) + (show-and-call !character-database-cold-init) + (show-and-call !early-package-cold-init) (show-and-call !package-cold-init) @@ -313,7 +315,7 @@ (defun hexstr (thing) (/noshow0 "entering HEXSTR") (let ((addr (get-lisp-obj-address thing)) - (str (make-string 10))) + (str (make-string 10 :element-type 'base-char))) (/noshow0 "ADDR and STR calculated") (setf (char str 0) #\0 (char str 1) #\x) diff -urN sbcl/src/code/cross-condition.lisp sbcl.char/src/code/cross-condition.lisp --- sbcl/src/code/cross-condition.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/code/cross-condition.lisp 2004-10-27 13:34:22.000000000 +0200 @@ -31,6 +31,9 @@ (define-condition reference-condition () ((references :initarg :references :reader reference-condition-references))) +;;; KLUDGE +(define-condition type-warning (simple-warning) ()) + (define-condition bug (simple-error) () (:report diff -urN sbcl/src/code/cross-type.lisp sbcl.char/src/code/cross-type.lisp --- sbcl/src/code/cross-type.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/code/cross-type.lisp 2004-10-27 13:34:22.000000000 +0200 @@ -82,6 +82,9 @@ 'fixnum) (t 'integer))) + ((subtypep raw-result 'simple-string) + `(simple-base-string ,(length object))) + ((subtypep raw-result 'string) 'base-string) ((some (lambda (type) (subtypep raw-result type)) '(array character list symbol)) raw-result) @@ -360,6 +363,11 @@ (make-member-type :members (list x))) (number (ctype-of-number x)) + (string + (make-array-type :dimensions (array-dimensions x) + :complexp (not (typep x 'simple-array)) + :element-type (specifier-type 'base-char) + :specialized-element-type (specifier-type 'base-char))) (array (let ((etype (specifier-type (array-element-type x)))) (make-array-type :dimensions (array-dimensions x) @@ -369,9 +377,7 @@ (cons (specifier-type 'cons)) (character (cond ((typep x 'standard-char) - ;; (Note that SBCL doesn't distinguish between BASE-CHAR and - ;; CHARACTER.) - (find-classoid 'base-char)) + (specifier-type 'character)) ((not (characterp x)) nil) (t diff -urN sbcl/src/code/debug-int.lisp sbcl.char/src/code/debug-int.lisp --- sbcl/src/code/debug-int.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/code/debug-int.lisp 2004-10-27 13:34:22.000000000 +0200 @@ -2000,7 +2000,7 @@ (zerop (logand val 3)) ;; character (and (zerop (logand val #xffff0000)) ; Top bits zero - (= (logand val #xff) sb!vm:base-char-widetag)) ; char tag + (= (logand val #xff) sb!vm:character-widetag)) ; char tag ;; unbound marker (= val sb!vm:unbound-marker-widetag) ;; pointer @@ -2055,7 +2055,7 @@ (sb!sys:without-gcing (with-escaped-value (val) (sb!kernel:make-lisp-obj val)))) - (#.sb!vm:base-char-reg-sc-number + (#.sb!vm:character-reg-sc-number (with-escaped-value (val) (code-char val))) (#.sb!vm:sap-reg-sc-number @@ -2145,7 +2145,7 @@ sb!vm:n-word-bytes))))) (#.sb!vm:control-stack-sc-number (sb!kernel:stack-ref fp (sb!c:sc-offset-offset sc-offset))) - (#.sb!vm:base-char-stack-sc-number + (#.sb!vm:character-stack-sc-number (with-nfp (nfp) (code-char (sb!sys:sap-ref-32 nfp (* (sb!c:sc-offset-offset sc-offset) sb!vm:n-word-bytes))))) @@ -2190,7 +2190,7 @@ (without-gcing (with-escaped-value (val) (make-valid-lisp-obj val)))) - (#.sb!vm:base-char-reg-sc-number + (#.sb!vm:character-reg-sc-number (with-escaped-value (val) (code-char val))) (#.sb!vm:sap-reg-sc-number @@ -2249,7 +2249,7 @@ sb!vm:n-word-bytes))))) (#.sb!vm:control-stack-sc-number (stack-ref fp (sb!c:sc-offset-offset sc-offset))) - (#.sb!vm:base-char-stack-sc-number + (#.sb!vm:character-stack-sc-number (code-char (sap-ref-32 fp (- (* (1+ (sb!c:sc-offset-offset sc-offset)) sb!vm:n-word-bytes))))) @@ -2330,7 +2330,7 @@ (without-gcing (set-escaped-value (get-lisp-obj-address value)))) - (#.sb!vm:base-char-reg-sc-number + (#.sb!vm:character-reg-sc-number (set-escaped-value (char-code value))) (#.sb!vm:sap-reg-sc-number (set-escaped-value (sap-int value))) @@ -2429,7 +2429,7 @@ (the long-float (realpart value))))) (#.sb!vm:control-stack-sc-number (setf (stack-ref fp (sb!c:sc-offset-offset sc-offset)) value)) - (#.sb!vm:base-char-stack-sc-number + (#.sb!vm:character-stack-sc-number (with-nfp (nfp) (setf (sap-ref-32 nfp (* (sb!c:sc-offset-offset sc-offset) sb!vm:n-word-bytes)) @@ -2464,7 +2464,7 @@ (without-gcing (set-escaped-value (get-lisp-obj-address value)))) - (#.sb!vm:base-char-reg-sc-number + (#.sb!vm:character-reg-sc-number (set-escaped-value (char-code value))) (#.sb!vm:sap-reg-sc-number (set-escaped-value (sap-int value))) @@ -2528,7 +2528,7 @@ (imagpart (the (complex long-float) value)))) (#.sb!vm:control-stack-sc-number (setf (stack-ref fp (sb!c:sc-offset-offset sc-offset)) value)) - (#.sb!vm:base-char-stack-sc-number + (#.sb!vm:character-stack-sc-number (setf (sap-ref-32 fp (- (* (1+ (sb!c:sc-offset-offset sc-offset)) sb!vm:n-word-bytes))) (char-code (the character value)))) diff -urN sbcl/src/code/deftypes-for-target.lisp sbcl.char/src/code/deftypes-for-target.lisp --- sbcl/src/code/deftypes-for-target.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/code/deftypes-for-target.lisp 2004-10-27 13:34:22.000000000 +0200 @@ -60,6 +60,9 @@ (sb!xc:deftype atom () '(not cons)) +(sb!xc:deftype base-char () + '(character-set ((0 . #.(1- base-char-code-limit))))) + (sb!xc:deftype extended-char () #!+sb-doc "Type of CHARACTERs that aren't BASE-CHARs." diff -urN sbcl/src/code/early-format.lisp sbcl.char/src/code/early-format.lisp --- sbcl/src/code/early-format.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/code/early-format.lisp 2004-10-27 13:34:22.000000000 +0200 @@ -18,9 +18,9 @@ #-sb-xc-host (code-char tab-char-code))) (defvar *format-directive-expanders* - (make-array char-code-limit :initial-element nil)) + (make-array base-char-code-limit :initial-element nil)) (defvar *format-directive-interpreters* - (make-array char-code-limit :initial-element nil)) + (make-array base-char-code-limit :initial-element nil)) (defvar *default-format-error-control-string* nil) (defvar *default-format-error-offset* nil) diff -urN sbcl/src/code/early-type.lisp sbcl.char/src/code/early-type.lisp --- sbcl/src/code/early-type.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/code/early-type.lisp 2004-10-27 13:34:22.000000000 +0200 @@ -330,6 +330,34 @@ :high high :enumerable enumerable)) +(defstruct (character-set-type + (:include ctype + (class-info (type-class-or-lose 'character-set))) + (:constructor %make-character-set-type) + (:copier nil)) + (pairs (missing-arg) :type list :read-only t)) +(defun make-character-set-type (&key pairs) + (aver (equal (mapcar #'car pairs) + (sort (mapcar #'car pairs) #'<))) + (let ((pairs (let (result) + (do ((pairs pairs (cdr pairs))) + ((null pairs) (nreverse result)) + (destructuring-bind (low . high) (car pairs) + (loop for (low1 . high1) in (cdr pairs) + if (<= low1 (1+ high)) + do (progn (setf high (max high high1)) + (setf pairs (cdr pairs))) + else do (return nil)) + (cond + ((>= low sb!xc:char-code-limit)) + ((< high 0)) + (t (push (cons (max 0 low) + (min high (1- sb!xc:char-code-limit))) + result)))))))) + (if (null pairs) + *empty-type* + (%make-character-set-type :pairs pairs)))) + ;;; An ARRAY-TYPE is used to represent any array type, including ;;; things such as SIMPLE-BASE-STRING. (defstruct (array-type (:include ctype diff -urN sbcl/src/code/fd-stream.lisp sbcl.char/src/code/fd-stream.lisp --- sbcl/src/code/fd-stream.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/code/fd-stream.lisp 2004-10-27 13:34:22.000000000 +0200 @@ -86,7 +86,9 @@ ;; timeout specified for this stream, or NIL if none (timeout nil :type (or index null)) ;; pathname of the file this stream is opened to (returned by PATHNAME) - (pathname nil :type (or pathname null))) + (pathname nil :type (or pathname null)) + (external-format :default) + (output-bytes #'ill-out :type function)) (def!method print-object ((fd-stream file-stream) stream) (declare (type stream stream)) (print-unreadable-object (fd-stream stream :type t :identity t) @@ -197,6 +199,32 @@ (frob-output stream (fd-stream-obuf-sap stream) 0 length t) (setf (fd-stream-obuf-tail stream) 0)))) +(defmacro output-wrapper/variable-width ((stream size buffering) + &body body) + (let ((stream-var (gensym))) + `(let ((,stream-var ,stream) + (size ,size)) + ,(unless (eq (car buffering) :none) + `(when (< (fd-stream-obuf-length ,stream-var) + (+ (fd-stream-obuf-tail ,stream-var) + size)) + (flush-output-buffer ,stream-var))) + ,(unless (eq (car buffering) :none) + `(when (> (fd-stream-ibuf-tail ,stream-var) + (fd-stream-ibuf-head ,stream-var)) + (file-position ,stream-var (file-position ,stream-var)))) + + ,@body + (incf (fd-stream-obuf-tail ,stream-var) size) + ,(ecase (car buffering) + (:none + `(flush-output-buffer ,stream-var)) + (:line + `(when (eq (char-code byte) (char-code #\Newline)) + (flush-output-buffer ,stream-var))) + (:full)) + (values)))) + (defmacro output-wrapper ((stream size buffering) &body body) (let ((stream-var (gensym))) `(let ((,stream-var ,stream)) @@ -221,6 +249,32 @@ (:full)) (values)))) +(defmacro def-output-routines/variable-width ((name-fmt size external-format + &rest bufferings) + &body body) + (declare (optimize (speed 1))) + (cons 'progn + (mapcar + (lambda (buffering) + (let ((function + (intern (let ((*print-case* :upcase)) + (format nil name-fmt (car buffering)))))) + `(progn + (defun ,function (stream byte) + (output-wrapper/variable-width (stream ,size ,buffering) + ,@body)) + (setf *output-routines* + (nconc *output-routines* + ',(mapcar + (lambda (type) + (list type + (car buffering) + function + 1 + external-format)) + (cdr buffering))))))) + bufferings))) + ;;; Define output routines that output numbers SIZE bytes long for the ;;; given bufferings. Use BODY to do the actual output. (defmacro def-output-routines ((name-fmt size &rest bufferings) &body body) @@ -242,7 +296,8 @@ (list type (car buffering) function - size)) + size + nil)) (cdr buffering))))))) bufferings))) @@ -251,7 +306,7 @@ (:none character) (:line character) (:full character)) - (if (and (base-char-p byte) (char= byte #\Newline)) + (if (char= byte #\Newline) (setf (fd-stream-char-pos stream) 0) (incf (fd-stream-char-pos stream))) (setf (sap-ref-8 (fd-stream-obuf-sap stream) (fd-stream-obuf-tail stream)) @@ -302,6 +357,8 @@ (fd-stream-obuf-tail stream)) byte)) + + ;;; Do the actual output. If there is space to buffer the string, ;;; buffer it. If the string would normally fit in the buffer, but ;;; doesn't because of other stuff in the buffer, flush the old noise @@ -389,15 +446,24 @@ :from-end t :start start :end end)))) - (ecase (fd-stream-buffering stream) - (:full - (output-raw-bytes stream thing start end)) - (:line - (output-raw-bytes stream thing start end) - (when last-newline - (flush-output-buffer stream))) - (:none - (frob-output stream thing start end nil))) + (if (and (typep thing 'base-string) + (eq (fd-stream-external-format stream) :latin-1)) + (ecase (fd-stream-buffering stream) + (:full + (output-raw-bytes stream thing start end)) + (:line + (output-raw-bytes stream thing start end) + (when last-newline + (flush-output-buffer stream))) + (:none + (frob-output stream thing start end nil))) + (ecase (fd-stream-buffering stream) + (:full (funcall (fd-stream-output-bytes stream) + stream thing nil start end)) + (:line (funcall (fd-stream-output-bytes stream) + stream thing last-newline start end)) + (:none (funcall (fd-stream-output-bytes stream) + stream thing t start end)))) (if last-newline (setf (fd-stream-char-pos stream) (- end last-newline 1)) @@ -409,22 +475,43 @@ (:none (frob-output stream thing start end nil)))))) +(defvar *external-formats* () + #!+sb-doc + "List of all available external formats. Each element is a list of the + element-type, string input function name, character input function name, + and string output function name.") + ;;; Find an output routine to use given the type and buffering. Return ;;; as multiple values the routine, the real type transfered, and the ;;; number of bytes per element. -(defun pick-output-routine (type buffering) +(defun pick-output-routine (type buffering &optional external-format) + (when (subtypep type 'character) + (dolist (entry *external-formats*) + (when (member external-format (first entry)) + (return-from pick-output-routine + (values (symbol-function (nth (ecase buffering + (:none 4) + (:line 5) + (:full 6)) + entry)) + 'character + 1 + (symbol-function (fourth entry)) + (first (first entry))))))) (dolist (entry *output-routines*) - (when (and (subtypep type (car entry)) - (eq buffering (cadr entry))) + (when (and (subtypep type (first entry)) + (eq buffering (second entry)) + (or (not (fifth entry)) + (eq external-format (fifth entry)))) (return-from pick-output-routine - (values (symbol-function (caddr entry)) - (car entry) - (cadddr entry))))) + (values (symbol-function (third entry)) + (first entry) + (fourth entry))))) ;; KLUDGE: dealing with the buffering here leads to excessive code ;; explosion. ;; ;; KLUDGE: also see comments in PICK-INPUT-ROUTINE - (loop for i from 40 by 8 to 1024 ; ARB (KLUDGE) + (loop for i from 40 by 8 to 1024 ; ARB (KLUDGE) if (subtypep type `(unsigned-byte ,i)) do (return-from pick-output-routine (values @@ -432,22 +519,22 @@ (:none (lambda (stream byte) (output-wrapper (stream (/ i 8) (:none)) - (loop for j from 0 below (/ i 8) - do (setf (sap-ref-8 - (fd-stream-obuf-sap stream) - (+ j (fd-stream-obuf-tail stream))) - (ldb (byte 8 (- i 8 (* j 8))) byte)))))) + (loop for j from 0 below (/ i 8) + do (setf (sap-ref-8 + (fd-stream-obuf-sap stream) + (+ j (fd-stream-obuf-tail stream))) + (ldb (byte 8 (- i 8 (* j 8))) byte)))))) (:full (lambda (stream byte) (output-wrapper (stream (/ i 8) (:full)) - (loop for j from 0 below (/ i 8) - do (setf (sap-ref-8 - (fd-stream-obuf-sap stream) - (+ j (fd-stream-obuf-tail stream))) - (ldb (byte 8 (- i 8 (* j 8))) byte))))))) + (loop for j from 0 below (/ i 8) + do (setf (sap-ref-8 + (fd-stream-obuf-sap stream) + (+ j (fd-stream-obuf-tail stream))) + (ldb (byte 8 (- i 8 (* j 8))) byte))))))) `(unsigned-byte ,i) (/ i 8)))) - (loop for i from 40 by 8 to 1024 ; ARB (KLUDGE) + (loop for i from 40 by 8 to 1024 ; ARB (KLUDGE) if (subtypep type `(signed-byte ,i)) do (return-from pick-output-routine (values @@ -455,19 +542,19 @@ (:none (lambda (stream byte) (output-wrapper (stream (/ i 8) (:none)) - (loop for j from 0 below (/ i 8) - do (setf (sap-ref-8 - (fd-stream-obuf-sap stream) - (+ j (fd-stream-obuf-tail stream))) - (ldb (byte 8 (- i 8 (* j 8))) byte)))))) + (loop for j from 0 below (/ i 8) + do (setf (sap-ref-8 + (fd-stream-obuf-sap stream) + (+ j (fd-stream-obuf-tail stream))) + (ldb (byte 8 (- i 8 (* j 8))) byte)))))) (:full (lambda (stream byte) (output-wrapper (stream (/ i 8) (:full)) - (loop for j from 0 below (/ i 8) - do (setf (sap-ref-8 - (fd-stream-obuf-sap stream) - (+ j (fd-stream-obuf-tail stream))) - (ldb (byte 8 (- i 8 (* j 8))) byte))))))) + (loop for j from 0 below (/ i 8) + do (setf (sap-ref-8 + (fd-stream-obuf-sap stream) + (+ j (fd-stream-obuf-tail stream))) + (ldb (byte 8 (- i 8 (* j 8))) byte))))))) `(signed-byte ,i) (/ i 8))))) @@ -561,6 +648,31 @@ (return)) (frob-input ,stream-var))))) +(defmacro input-wrapper/variable-width ((stream bytes eof-error eof-value) + &body read-forms) + (let ((stream-var (gensym)) + (element-var (gensym))) + `(let ((,stream-var ,stream) + (size nil)) + (if (fd-stream-unread ,stream-var) + (prog1 + (fd-stream-unread ,stream-var) + (setf (fd-stream-unread ,stream-var) nil) + (setf (fd-stream-listen ,stream-var) nil)) + (let ((,element-var + (catch 'eof-input-catcher + (input-at-least ,stream-var 1) + (let* ((byte (sap-ref-8 (fd-stream-ibuf-sap ,stream-var) + (fd-stream-ibuf-head ,stream-var)))) + (setq size ,bytes) + (input-at-least ,stream-var size) + (locally ,@read-forms))))) + (cond (,element-var + (incf (fd-stream-ibuf-head ,stream-var) size) + ,element-var) + (t + (eof-or-lose ,stream-var ,eof-error ,eof-value)))))))) + ;;; a macro to wrap around all input routines to handle EOF-ERROR noise (defmacro input-wrapper ((stream bytes eof-error eof-value) &body read-forms) (let ((stream-var (gensym)) @@ -581,6 +693,19 @@ (t (eof-or-lose ,stream-var ,eof-error ,eof-value)))))))) +(defmacro def-input-routine/variable-width (name + (type external-format size sap head) + &rest body) + `(progn + (defun ,name (stream eof-error eof-value) + (input-wrapper/variable-width (stream ,size eof-error eof-value) + (let ((,sap (fd-stream-ibuf-sap stream)) + (,head (fd-stream-ibuf-head stream))) + ,@body))) + (setf *input-routines* + (nconc *input-routines* + (list (list ',type ',name 1 ',external-format)))))) + (defmacro def-input-routine (name (type size sap head) &rest body) @@ -592,7 +717,7 @@ ,@body))) (setf *input-routines* (nconc *input-routines* - (list (list ',type ',name ',size)))))) + (list (list ',type ',name ',size nil)))))) ;;; STREAM-IN routine for reading a string char (def-input-routine input-character @@ -629,16 +754,29 @@ ((signed-byte 32) 4 sap head) (signed-sap-ref-32 sap head)) + + ;;; Find an input routine to use given the type. Return as multiple ;;; values the routine, the real type transfered, and the number of -;;; bytes per element. -(defun pick-input-routine (type) +;;; bytes per element (and for character types string input routine). +(defun pick-input-routine (type &optional external-format) + (when (subtypep type 'character) + (dolist (entry *external-formats*) + (when (member external-format (first entry)) + (return-from pick-input-routine + (values (symbol-function (third entry)) + 'character + 1 + (symbol-function (second entry)) + (first (first entry))))))) (dolist (entry *input-routines*) - (when (subtypep type (car entry)) + (when (and (subtypep type (first entry)) + (or (not (fourth entry)) + (eq external-format (fourth entry)))) (return-from pick-input-routine - (values (symbol-function (cadr entry)) - (car entry) - (caddr entry))))) + (values (symbol-function (second entry)) + (first entry) + (third entry))))) ;; FIXME: let's do it the hard way, then (but ignore things like ;; endianness, efficiency, and the necessary coupling between these ;; and the output routines). -- CSR, 2004-02-09 @@ -747,18 +885,297 @@ (defun refill-fd-stream-buffer (stream) ;; We don't have any logic to preserve leftover bytes in the buffer, ;; so we should only be called when the buffer is empty. - (aver (= (fd-stream-ibuf-head stream) (fd-stream-ibuf-tail stream))) - (multiple-value-bind (count err) - (sb!unix:unix-read (fd-stream-fd stream) - (fd-stream-ibuf-sap stream) - (fd-stream-ibuf-length stream)) - (declare (type (or index null) count)) - (when (null count) - (simple-stream-perror "couldn't read from ~S" stream err)) - (setf (fd-stream-listen stream) nil - (fd-stream-ibuf-head stream) 0 - (fd-stream-ibuf-tail stream) count) - count)) + ;; FIXME: can have three bytes in buffer because of UTF-8 + (let ((new-head 0) + (sap (fd-stream-ibuf-sap stream))) + (do ((head (fd-stream-ibuf-head stream) (1+ head)) + (tail (fd-stream-ibuf-tail stream))) + ((= head tail)) + (setf (sap-ref-8 sap new-head) (sap-ref-8 sap head)) + (incf new-head)) + (multiple-value-bind (count err) + (sb!unix:unix-read (fd-stream-fd stream) + (sap+ sap new-head) + (- (fd-stream-ibuf-length stream) new-head)) + (declare (type (or index null) count)) + (when (null count) + (simple-stream-perror "couldn't read from ~S" stream err)) + (setf (fd-stream-listen stream) nil + (fd-stream-ibuf-head stream) new-head + (fd-stream-ibuf-tail stream) (+ count new-head)) + count))) + +(defmacro define-external-format (external-format size out-expr in-expr) + (let* ((name (first external-format)) + (out-function (intern (let ((*print-case* :upcase)) + (format nil "OUTPUT-BYTES/~A" name)))) + (format (format nil "OUTPUT-CHAR-~A-~~A-BUFFERED" name)) + (in-function (intern (let ((*print-case* :upcase)) + (format nil "FD-STREAM-READ-N-CHARACTERS/~A" + name)))) + (in-char-function (intern (let ((*print-case* :upcase)) + (format nil "INPUT-CHAR/~A" name))))) + `(progn + (defun ,out-function (fd-stream string flush-p start end) + (let ((start (or start 0)) + (end (or end (length string)))) + (declare (type index start end)) + (when (> (fd-stream-ibuf-tail fd-stream) + (fd-stream-ibuf-head fd-stream)) + (file-position fd-stream (file-position fd-stream))) + (when (< end start) + (error ":END before :START!")) + (do () + ((= end start)) + (setf (fd-stream-obuf-tail fd-stream) + (do* ((len (fd-stream-obuf-length fd-stream)) + (sap (fd-stream-obuf-sap fd-stream)) + (tail (fd-stream-obuf-tail fd-stream))) + ((or (= start end) (< (- len tail) 4)) tail) + (let* ((byte (aref string start)) + (bits (char-code byte))) + ,out-expr + (incf tail ,size) + (incf start)))) + (when (< start end) + (flush-output-buffer fd-stream))) + (when flush-p + (flush-output-buffer fd-stream)))) + (def-output-routines (,format + ,size + (:none character) + (:line character) + (:full character)) + (if (char= byte #\Newline) + (setf (fd-stream-char-pos stream) 0) + (incf (fd-stream-char-pos stream))) + (let ((bits (char-code byte)) + (sap (fd-stream-obuf-sap stream)) + (tail (fd-stream-obuf-tail stream))) + ,out-expr)) + (defun ,in-function (stream buffer start requested eof-error-p + &aux (total-copied 0)) + (declare (type file-stream stream)) + (declare (type index start requested total-copied)) + (let ((unread (fd-stream-unread stream))) + (when unread + (setf (aref buffer start) unread) + (setf (fd-stream-unread stream) nil) + (setf (fd-stream-listen stream) nil) + (incf total-copied))) + (do () + (nil) + (let* ((head (fd-stream-ibuf-head stream)) + (tail (fd-stream-ibuf-tail stream)) + (sap (fd-stream-ibuf-sap stream))) + (declare (type index head tail)) + ;; Copy data from stream buffer into user's buffer. + (do () + ((or (= tail head) (= requested total-copied))) + (let* ((byte (sap-ref-8 sap head))) + (when (> ,size (- tail head)) + (return)) + (setf (aref buffer (+ start total-copied)) ,in-expr) + (incf total-copied) + (incf head ,size))) + (setf (fd-stream-ibuf-head stream) head) + ;; Maybe we need to refill the stream buffer. + (cond ( ;; If there were enough data in the stream buffer, we're done. + (= total-copied requested) + (return total-copied)) + ( ;; If EOF, we're done in another way. + (zerop (refill-fd-stream-buffer stream)) + (if eof-error-p + (error 'end-of-file :stream stream) + (return total-copied))) + ;; Otherwise we refilled the stream buffer, so fall + ;; through into another pass of the loop. + )))) + (def-input-routine ,in-char-function (character ,size sap head) + (let ((byte (sap-ref-8 sap head))) + ,in-expr)) + (setf *external-formats* + (cons '(,external-format ,in-function ,in-char-function ,out-function + ,@(mapcar #'(lambda (buffering) + (intern (let ((*print-case* :upcase)) + (format nil format buffering)))) + '(:none :line :full))) + *external-formats*))))) + +(defmacro define-external-format/variable-width (external-format out-size-expr + out-expr in-size-expr in-expr) + (let* ((name (first external-format)) + (out-function (intern (let ((*print-case* :upcase)) + (format nil "OUTPUT-BYTES/~A" name)))) + (format (format nil "OUTPUT-CHAR-~A-~~A-BUFFERED" name)) + (in-function (intern (let ((*print-case* :upcase)) + (format nil "FD-STREAM-READ-N-CHARACTERS/~A" + name)))) + (in-char-function (intern (let ((*print-case* :upcase)) + (format nil "INPUT-CHAR/~A" name))))) + `(progn + (defun ,out-function (fd-stream string flush-p start end) + (let ((start (or start 0)) + (end (or end (length string)))) + (declare (type index start end)) + (when (> (fd-stream-ibuf-tail fd-stream) + (fd-stream-ibuf-head fd-stream)) + (file-position fd-stream (file-position fd-stream))) + (when (< end start) + (error ":END before :START!")) + (do () + ((= end start)) + (setf (fd-stream-obuf-tail fd-stream) + (do* ((len (fd-stream-obuf-length fd-stream)) + (sap (fd-stream-obuf-sap fd-stream)) + (tail (fd-stream-obuf-tail fd-stream))) + ((or (= start end) (< (- len tail) 4)) tail) + (let* ((byte (aref string start)) + (bits (char-code byte)) + (size ,out-size-expr)) + ,out-expr + (incf tail size) + (incf start)))) + (when (< start end) + (flush-output-buffer fd-stream))) + (when flush-p + (flush-output-buffer fd-stream)))) + (def-output-routines/variable-width (,format + ,out-size-expr + ,external-format + (:none character) + (:line character) + (:full character)) + (if (char= byte #\Newline) + (setf (fd-stream-char-pos stream) 0) + (incf (fd-stream-char-pos stream))) + (let ((bits (char-code byte)) + (sap (fd-stream-obuf-sap stream)) + (tail (fd-stream-obuf-tail stream))) + ,out-expr)) + (defun ,in-function (stream buffer start requested eof-error-p + &aux (total-copied 0)) + (declare (type file-stream stream)) + (declare (type index start requested total-copied)) + (let ((unread (fd-stream-unread stream))) + (when unread + (setf (aref buffer start) unread) + (setf (fd-stream-unread stream) nil) + (setf (fd-stream-listen stream) nil) + (incf total-copied))) + (do () + (nil) + (let* ((head (fd-stream-ibuf-head stream)) + (tail (fd-stream-ibuf-tail stream)) + (sap (fd-stream-ibuf-sap stream))) + (declare (type index head tail)) + ;; Copy data from stream buffer into user's buffer. + (do () + ((or (= tail head) (= requested total-copied))) + (let* ((byte (sap-ref-8 sap head)) + (size ,in-size-expr)) + (when (> size (- tail head)) + (return)) + (setf (aref buffer (+ start total-copied)) ,in-expr) + (incf total-copied) + (incf head size))) + (setf (fd-stream-ibuf-head stream) head) + ;; Maybe we need to refill the stream buffer. + (cond ( ;; If there were enough data in the stream buffer, we're done. + (= total-copied requested) + (return total-copied)) + ( ;; If EOF, we're done in another way. + (zerop (refill-fd-stream-buffer stream)) + (if eof-error-p + (error 'end-of-file :stream stream) + (return total-copied))) + ;; Otherwise we refilled the stream buffer, so fall + ;; through into another pass of the loop. + )))) + (def-input-routine/variable-width ,in-char-function (character + ,external-format + ,in-size-expr + sap head) + (let ((byte (sap-ref-8 sap head))) + ,in-expr)) + (setf *external-formats* + (cons '(,external-format ,in-function ,in-char-function ,out-function + ,@(mapcar #'(lambda (buffering) + (intern (let ((*print-case* :upcase)) + (format nil format buffering)))) + '(:none :line :full))) + *external-formats*))))) + +(define-external-format (:latin-1 :latin1 :iso-8859-1 + ;; FIXME: shouldn't ASCII-like things have an + ;; extra typecheck for 7-bitness? + :ascii :us-ascii :ansi_x3.4-1968) + 1 + (setf (sap-ref-8 sap tail) bits) + (code-char byte)) + +(let ((latin-9-table (let ((table (make-string 256))) + (do ((i 0 (1+ i))) + ((= i 256)) + (setf (aref table i) (code-char i))) + (setf (aref table #xa4) (code-char #x20ac)) + (setf (aref table #xa6) (code-char #x0160)) + (setf (aref table #xa8) (code-char #x0161)) + (setf (aref table #xb4) (code-char #x017d)) + (setf (aref table #xb8) (code-char #x017e)) + (setf (aref table #xbc) (code-char #x0152)) + (setf (aref table #xbd) (code-char #x0153)) + (setf (aref table #xbe) (code-char #x0178)) + table)) + (latin-9-reverse-1 (make-array 16 + :element-type '(unsigned-byte 21) + :initial-contents '(#x0160 #x0161 #x0152 #x0153 0 0 0 0 #x0178 0 0 0 #x20ac #x017d #x017e 0))) + (latin-9-reverse-2 (make-array 16 + :element-type '(unsigned-byte 8) + :initial-contents '(#xa6 #xa8 #xbc #xbd 0 0 0 0 #xbe 0 0 0 #xa4 #xb4 #xb8 0)))) + (define-external-format (:latin-9 :latin9 :iso-8859-15) + 1 + (setf (sap-ref-8 sap tail) + (if (< bits 256) + (if (= bits (char-code (aref latin-9-table bits))) + bits + (error "cannot encode ~A in latin-9" bits)) + (if (= (aref latin-9-reverse-1 (logand bits 15)) bits) + (aref latin-9-reverse-2 (logand bits 15)) + (error "cannot encode ~A in latin-9" bits)))) + (aref latin-9-table byte))) + +(define-external-format/variable-width (:utf-8 :utf8) + (let ((bits (char-code byte))) + (cond ((< bits #x80) 1) + ((< bits #x800) 2) + ((< bits #x10000) 3) + (t 4))) + (ecase size + (1 (setf (sap-ref-8 sap tail) bits)) + (2 (setf (sap-ref-8 sap tail) (logior #xc0 (ldb (byte 5 6) bits)) + (sap-ref-8 sap (1+ tail)) (logior #x80 (ldb (byte 6 0) bits)))) + (3 (setf (sap-ref-8 sap tail) (logior #xe0 (ldb (byte 4 12) bits)) + (sap-ref-8 sap (1+ tail)) (logior #x80 (ldb (byte 6 6) bits)) + (sap-ref-8 sap (+ 2 tail)) (logior #x80 (ldb (byte 6 0) bits)))) + (4 (setf (sap-ref-8 sap tail) (logior #xf0 (ldb (byte 3 18) bits)) + (sap-ref-8 sap (1+ tail)) (logior #x80 (ldb (byte 6 12) bits)) + (sap-ref-8 sap (+ 2 tail)) (logior #x80 (ldb (byte 6 6) bits)) + (sap-ref-8 sap (+ 3 tail)) (logior #x80 (ldb (byte 6 0) bits))))) + (cond ((< byte #x80) 1) + ((< byte #xe0) 2) + ((< byte #xf0) 3) + (t 4)) + (code-char (ecase size + (1 byte) + (2 (dpb byte (byte 5 6) (sap-ref-8 sap (1+ head)))) + (3 (dpb byte (byte 4 12) + (dpb (sap-ref-8 sap (1+ head)) (byte 6 6) + (sap-ref-8 sap (+ 2 head))))) + (4 (dpb byte (byte 3 18) + (dpb (sap-ref-8 sap (1+ head)) (byte 6 12) + (dpb (sap-ref-8 sap (+ 2 head)) (byte 6 6) + (sap-ref-8 sap (+ 3 head))))))))) ;;;; utility functions (misc routines, etc) @@ -776,7 +1193,8 @@ (input-type nil) (output-type nil) (input-size nil) - (output-size nil)) + (output-size nil) + (character-stream-p (subtypep type 'character))) (when (fd-stream-obuf-sap fd-stream) (push (fd-stream-obuf-sap fd-stream) *available-buffers*) @@ -785,39 +1203,72 @@ (push (fd-stream-ibuf-sap fd-stream) *available-buffers*) (setf (fd-stream-ibuf-sap fd-stream) nil)) + (when (and character-stream-p + (eq (fd-stream-external-format fd-stream) :default)) + (setf (fd-stream-external-format fd-stream) + (intern (or (alien-funcall + (extern-alien "nl_langinfo" + (function c-string int)) + sb!unix:codeset) + "LATIN-1") + "KEYWORD"))) + (dolist (entry *external-formats* + (setf (fd-stream-external-format fd-stream) :latin-1)) + (when (member (fd-stream-external-format fd-stream) (first entry)) + (return))) + (when input-p - (multiple-value-bind (routine type size) - (pick-input-routine target-type) + (multiple-value-bind (routine type size read-n-characters + normalized-external-format) + (pick-input-routine target-type + (fd-stream-external-format fd-stream)) + (when normalized-external-format + (setf (fd-stream-external-format fd-stream) + normalized-external-format)) (unless routine (error "could not find any input routine for ~S" target-type)) (setf (fd-stream-ibuf-sap fd-stream) (next-available-buffer)) (setf (fd-stream-ibuf-length fd-stream) bytes-per-buffer) (setf (fd-stream-ibuf-tail fd-stream) 0) - (if (subtypep type 'character) + (if character-stream-p (setf (fd-stream-in fd-stream) routine (fd-stream-bin fd-stream) #'ill-bin) (setf (fd-stream-in fd-stream) #'ill-in (fd-stream-bin fd-stream) routine)) (when (eql size 1) - (setf (fd-stream-n-bin fd-stream) #'fd-stream-read-n-bytes) + (setf (fd-stream-n-bin fd-stream) + (if character-stream-p + read-n-characters + #'fd-stream-read-n-bytes)) (when (and buffer-p ;; We only create this buffer for streams of type ;; (unsigned-byte 8). Because there's no buffer, the ;; other element-types will dispatch to the appropriate ;; input (output) routine in fast-read-byte. - (equal target-type '(unsigned-byte 8)) - #+nil + (or character-stream-p + (equal target-type '(unsigned-byte 8))) + #+(or) (or (eq type 'unsigned-byte) (eq type :default))) - (setf (ansi-stream-in-buffer fd-stream) - (make-array +ansi-stream-in-buffer-length+ - :element-type '(unsigned-byte 8))))) + (if character-stream-p + (setf (ansi-stream-cin-buffer fd-stream) + (make-array +ansi-stream-in-buffer-length+ + :element-type 'character)) + (setf (ansi-stream-in-buffer fd-stream) + (make-array +ansi-stream-in-buffer-length+ + :element-type '(unsigned-byte 8)))))) (setf input-size size) (setf input-type type))) (when output-p - (multiple-value-bind (routine type size) - (pick-output-routine target-type (fd-stream-buffering fd-stream)) + (multiple-value-bind (routine type size output-bytes + normalized-external-format) + (pick-output-routine target-type + (fd-stream-buffering fd-stream) + (fd-stream-external-format fd-stream)) + (when normalized-external-format + (setf (fd-stream-external-format fd-stream) + normalized-external-format)) (unless routine (error "could not find any output routine for ~S buffered ~S" (fd-stream-buffering fd-stream) @@ -825,15 +1276,17 @@ (setf (fd-stream-obuf-sap fd-stream) (next-available-buffer)) (setf (fd-stream-obuf-length fd-stream) bytes-per-buffer) (setf (fd-stream-obuf-tail fd-stream) 0) - (if (subtypep type 'character) - (setf (fd-stream-out fd-stream) routine - (fd-stream-bout fd-stream) #'ill-bout) - (setf (fd-stream-out fd-stream) - (or (if (eql size 1) - (pick-output-routine 'base-char - (fd-stream-buffering fd-stream))) - #'ill-out) - (fd-stream-bout fd-stream) routine)) + (when character-stream-p + (setf (fd-stream-output-bytes fd-stream) output-bytes)) + (if character-stream-p + (setf (fd-stream-out fd-stream) routine + (fd-stream-bout fd-stream) #'ill-bout) + (setf (fd-stream-out fd-stream) + (or (if (eql size 1) + (pick-output-routine + 'base-char (fd-stream-buffering fd-stream))) + #'ill-out) + (fd-stream-bout fd-stream) routine)) (setf (fd-stream-sout fd-stream) (if (eql size 1) #'fd-sout #'ill-out)) (setf (fd-stream-char-pos fd-stream) 0) @@ -1115,6 +1568,7 @@ (output nil output-p) (element-type 'base-char) (buffering :full) + (external-format :default) timeout file original @@ -1138,6 +1592,7 @@ :delete-original delete-original :pathname pathname :buffering buffering + :external-format external-format :timeout timeout))) (set-fd-stream-routines stream element-type input output input-buffer-p) (when (and auto-close (fboundp 'finalize)) @@ -1201,8 +1656,6 @@ :IF-DOES-NOT-EXIST - one of :ERROR, :CREATE or NIL See the manual for details." - (declare (ignore external-format)) ; FIXME: CHECK-TYPE? WARN-if-not? - ;; Calculate useful stuff. (multiple-value-bind (input output mask) (case direction @@ -1327,6 +1780,7 @@ :input input :output output :element-type element-type + :external-format external-format :file namestring :original original :delete-original delete-original @@ -1389,7 +1843,8 @@ (make-fd-stream 1 :name "standard output" :output t :buffering :line)) (setf *stderr* (make-fd-stream 2 :name "standard error" :output t :buffering :line)) - (let ((tty (sb!unix:unix-open "/dev/tty" sb!unix:o_rdwr #o666))) + (let* ((ttyname #.(coerce "/dev/tty" 'simple-base-string)) + (tty (sb!unix:unix-open ttyname sb!unix:o_rdwr #o666))) (if tty (setf *tty* (make-fd-stream tty @@ -1441,7 +1896,9 @@ (string (length object)))) (defun stream-external-format (stream) - (declare (type file-stream stream) (ignore stream)) + (declare (type file-stream stream)) #!+sb-doc - "Return :DEFAULT." - :default) + "Return the actual external format for file-streams, otherwise :DEFAULT." + (if (typep stream 'file-stream) + (fd-stream-external-format stream) + :default)) diff -urN sbcl/src/code/filesys.lisp sbcl.char/src/code/filesys.lisp --- sbcl/src/code/filesys.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/code/filesys.lisp 2004-10-27 13:34:22.000000000 +0200 @@ -192,8 +192,9 @@ (values absolute (pieces))))) (defun parse-unix-namestring (namestr start end) - (declare (type simple-base-string namestr) + (declare (type simple-string namestr) (type index start end)) + (setf namestr (coerce namestr 'simple-base-string)) (multiple-value-bind (absolute pieces) (split-at-slashes namestr start end) (multiple-value-bind (name type version) (let* ((tail (car (last pieces))) @@ -296,7 +297,7 @@ (t (error "invalid pattern piece: ~S" piece)))))) (apply #'concatenate - 'simple-string + 'simple-base-string (strings)))))) (defun unparse-unix-directory-list (directory) @@ -322,7 +323,7 @@ (pieces "/")) (t (error "invalid directory component: ~S" dir))))) - (apply #'concatenate 'simple-string (pieces)))) + (apply #'concatenate 'simple-base-string (pieces)))) (defun unparse-unix-directory (pathname) (declare (type pathname pathname)) @@ -350,18 +351,18 @@ (when type-supplied (unless name (error "cannot specify the type without a file: ~S" pathname)) - (when (typep type 'simple-base-string) + (when (typep type 'simple-string) (when (position #\. type) (error "type component can't have a #\. inside: ~S" pathname))) (strings ".") (strings (unparse-unix-piece type)))) - (apply #'concatenate 'simple-string (strings)))) + (apply #'concatenate 'simple-base-string (strings)))) (/show0 "filesys.lisp 406") (defun unparse-unix-namestring (pathname) (declare (type pathname pathname)) - (concatenate 'simple-string + (concatenate 'simple-base-string (unparse-unix-directory pathname) (unparse-unix-file pathname))) @@ -445,6 +446,7 @@ (follow-links t)) &body body) `(block nil + (/show0 "about to call %ENUMERATE-MATCHES") (%enumerate-matches (pathname ,pathname) ,verify-existence ,follow-links @@ -455,7 +457,7 @@ ;;; Call FUNCTION on matches. (defun %enumerate-matches (pathname verify-existence follow-links function) - (/noshow0 "entering %ENUMERATE-MATCHES") + (/show0 "entering %ENUMERATE-MATCHES") (when (pathname-type pathname) (unless (pathname-name pathname) (error "cannot supply a type without a name:~% ~S" pathname))) @@ -463,16 +465,16 @@ (member (pathname-type pathname) '(nil :unspecific))) (error "cannot supply a version without a type:~% ~S" pathname)) (let ((directory (pathname-directory pathname))) - (/noshow0 "computed DIRECTORY") + (/show0 "computed DIRECTORY") (if directory (ecase (first directory) (:absolute - (/noshow0 "absolute directory") + (/show0 "absolute directory") (%enumerate-directories "/" (rest directory) pathname verify-existence follow-links nil function)) (:relative - (/noshow0 "relative directory") + (/show0 "relative directory") (%enumerate-directories "" (rest directory) pathname verify-existence follow-links nil function))) @@ -576,16 +578,17 @@ ;;; Call FUNCTION on files. (defun %enumerate-files (directory pathname verify-existence function) (declare (simple-string directory)) - (/noshow0 "entering %ENUMERATE-FILES") + (/show0 "entering %ENUMERATE-FILES") (let ((name (%pathname-name pathname)) (type (%pathname-type pathname)) (version (%pathname-version pathname))) - (/noshow0 "computed NAME, TYPE, and VERSION") + (/show0 "computed NAME, TYPE, and VERSION") (cond ((member name '(nil :unspecific)) (/noshow0 "UNSPECIFIC, more or less") - (when (or (not verify-existence) - (sb!unix:unix-file-kind directory)) - (funcall function directory))) + (let ((directory (coerce directory 'base-string))) + (when (or (not verify-existence) + (sb!unix:unix-file-kind directory)) + (funcall function directory)))) ((or (pattern-p name) (pattern-p type) (eq name :wild) @@ -613,20 +616,20 @@ directory complete-filename)))))) (t - (/noshow0 "default case") + (/show0 "default case") (let ((file (concatenate 'base-string directory name))) - (/noshow "computed basic FILE") + (/show0 "computed basic FILE") (unless (or (null type) (eq type :unspecific)) - (/noshow0 "tweaking FILE for more-or-less-:UNSPECIFIC case") + (/show0 "tweaking FILE for more-or-less-:UNSPECIFIC case") (setf file (concatenate 'base-string file "." type))) (unless (member version '(nil :newest :wild :unspecific)) - (/noshow0 "tweaking FILE for more-or-less-:WILD case") + (/show0 "tweaking FILE for more-or-less-:WILD case") (setf file (concatenate 'base-string file "." (quick-integer-to-string version)))) - (/noshow0 "finished possibly tweaking FILE") + (/show0 "finished possibly tweaking FILE") (when (or (not verify-existence) (sb!unix:unix-file-kind file t)) - (/noshow0 "calling FUNCTION on FILE") + (/show0 "calling FUNCTION on FILE") (funcall function file))))))) (/noshow0 "filesys.lisp 603") @@ -679,6 +682,7 @@ ;;; Convert PATHNAME into a string that can be used with UNIX system ;;; calls, or return NIL if no match is found. Wild-cards are expanded. (defun unix-namestring (pathname-spec &optional (for-input t)) + (/show0 "in UNIX-NAMESTRING") (let* ((namestring (physicalize-pathname (merge-pathnames pathname-spec))) (matches nil)) ; an accumulator for actual matches (when (wild-pathname-p namestring) @@ -716,12 +720,16 @@ #!+sb-doc "Return a pathname which is the truename of the file if it exists, or NIL otherwise. An error of type FILE-ERROR is signaled if pathname is wild." + (/show0 "in PROBE-FILE") (let* ((defaulted-pathname (merge-pathnames pathname (sane-default-pathname-defaults))) (namestring (unix-namestring defaulted-pathname t))) + (/show0 "got NAMESTRING") (when (and namestring (sb!unix:unix-file-kind namestring t)) + (/show0 "got FILE-KIND") (let ((trueishname (sb!unix:unix-resolve-links namestring))) + (/show0 "resolved links") (when trueishname (let* ((*ignore-wildcards* t) (name (sb!unix:unix-simplify-pathname trueishname))) @@ -1040,7 +1048,7 @@ :device (pathname-device pathname) :directory (subseq dir 0 i)))) (unless (probe-file newpath) - (let ((namestring (namestring newpath))) + (let ((namestring (coerce (namestring newpath) 'base-string))) (when verbose (format *standard-output* "~&creating directory: ~A~%" diff -urN sbcl/src/code/fop.lisp sbcl.char/src/code/fop.lisp --- sbcl/src/code/fop.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/code/fop.lisp 2004-10-27 13:34:22.000000000 +0200 @@ -71,11 +71,12 @@ ;;; of like READ-SEQUENCE specialized for files of (UNSIGNED-BYTE 8), ;;; with an automatic conversion from (UNSIGNED-BYTE 8) into CHARACTER ;;; for each element read -(declaim (ftype (function (stream simple-string &optional index) (values)) read-string-as-bytes)) +(declaim (ftype (function (stream simple-string &optional index) (values)) + read-string-as-bytes read-string-as-words)) (defun read-string-as-bytes (stream string &optional (length (length string))) (dotimes (i length) (setf (aref string i) - (code-char (read-byte stream)))) + (sb!xc:code-char (read-byte stream)))) ;; FIXME: The classic CMU CL code to do this was ;; (READ-N-BYTES FILE STRING START END). ;; It was changed for SBCL because we needed a portable version for @@ -83,6 +84,16 @@ ;; significantly better than the portable version here. If it is, then use ;; it as an alternate definition, protected with #-SB-XC-HOST. (values)) +(defun read-string-as-words (stream string &optional (length (length string))) + #+sb-xc-host (bug "READ-STRING-AS-WORDS called") + (dotimes (i length) + (setf (aref string i) + (sb!xc:code-char (logior + (read-byte stream) + (ash (read-byte stream) 8) + (ash (read-byte stream) 16) + (ash (read-byte stream) 24))))) + (values)) ;;;; miscellaneous fops @@ -123,11 +134,8 @@ #-sb-xc-host (%primitive sb!c:make-other-immediate-type 0 sb!vm:unbound-marker-widetag)) -;;; CMU CL had FOP-CHARACTER as fop 68, but it's not needed in current -;;; SBCL as we have no extended characters, only 1-byte characters. -;;; (Ditto for CMU CL, actually: FOP-CHARACTER was speculative generality.) -(define-fop (fop-short-character 69) - (code-char (read-byte-arg))) +(define-cloned-fops (fop-character 68) (fop-short-character 69) + (code-char (clone-arg))) (define-cloned-fops (fop-struct 48) (fop-small-struct 49) (let* ((size (clone-arg)) @@ -183,7 +191,12 @@ (make-string (* ,n-size 2)))) (done-with-fast-read-byte) (let ((,n-buffer *fasl-symbol-buffer*)) - (read-string-as-bytes *fasl-input-stream* + #+sb-xc-host + (read-string-as-bytes *fasl-input-stream* + ,n-buffer + ,n-size) + #-sb-xc-host + (read-string-as-words *fasl-input-stream* ,n-buffer ,n-size) (push-fop-table (without-package-locks @@ -232,7 +245,7 @@ (fop-uninterned-small-symbol-save 13) (let* ((arg (clone-arg)) (res (make-string arg))) - (read-string-as-bytes *fasl-input-stream* res) + (read-string-as-words *fasl-input-stream* res) (push-fop-table (make-symbol res)))) (define-fop (fop-package 14) @@ -344,12 +357,23 @@ ;;;; fops for loading arrays -(define-cloned-fops (fop-string 37) (fop-small-string 38) +(define-cloned-fops (fop-base-string 37) (fop-small-base-string 38) (let* ((arg (clone-arg)) - (res (make-string arg))) + (res (make-string arg :element-type 'base-char))) (read-string-as-bytes *fasl-input-stream* res) res)) +#+sb-xc-host +(define-cloned-fops (fop-character-string 161) (fop-small-character-string 162) + (bug "CHARACTER-STRING FOP encountered")) + +#-sb-xc-host +(define-cloned-fops (fop-character-string 161) (fop-small-character-string 162) + (let* ((arg (clone-arg)) + (res (make-string arg))) + (read-string-as-words *fasl-input-stream* res) + res)) + (define-cloned-fops (fop-vector 39) (fop-small-vector 40) (let* ((size (clone-arg)) (res (make-array size))) @@ -639,7 +663,7 @@ (let* ((kind (pop-stack)) (code-object (pop-stack)) (len (read-byte-arg)) - (sym (make-string len))) + (sym (make-string len :element-type 'base-char))) (read-n-bytes *fasl-input-stream* sym 0 len) (sb!vm:fixup-code-object code-object (read-word-arg) diff -urN sbcl/src/code/host-c-call.lisp sbcl.char/src/code/host-c-call.lisp --- sbcl/src/code/host-c-call.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/code/host-c-call.lisp 2004-10-27 13:34:22.000000000 +0200 @@ -14,9 +14,8 @@ (define-alien-type-class (c-string :include pointer :include-args (to))) (define-alien-type-translator c-string () - (make-alien-c-string-type :to - (parse-alien-type 'char - (sb!kernel::make-null-lexenv)))) + (make-alien-c-string-type + :to (parse-alien-type 'char (sb!kernel:make-null-lexenv)))) (define-alien-type-method (c-string :unparse) (type) (declare (ignore type)) @@ -24,7 +23,7 @@ (define-alien-type-method (c-string :lisp-rep) (type) (declare (ignore type)) - '(or simple-base-string null (alien (* char)))) + '(or simple-string null (alien (* char)))) (define-alien-type-method (c-string :naturalize-gen) (type alien) (declare (ignore type)) @@ -37,6 +36,37 @@ `(etypecase ,value (null (int-sap 0)) ((alien (* char)) (alien-sap ,value)) - (simple-base-string (vector-sap ,value)))) + (simple-base-string (vector-sap ,value)) + (simple-string (vector-sap (coerce ,value 'simple-base-string))))) + +(/show0 "host-c-call.lisp 42") + +(define-alien-type-class (utf8-string :include pointer :include-args (to))) + +(define-alien-type-translator utf8-string () + (make-alien-utf8-string-type + :to (parse-alien-type 'char (sb!kernel:make-null-lexenv)))) + +(define-alien-type-method (utf8-string :unparse) (type) + (declare (ignore type)) + 'utf8-string) + +(define-alien-type-method (utf8-string :lisp-rep) (type) + (declare (ignore type)) + '(or simple-string null (alien (* char)))) + +(define-alien-type-method (utf8-string :naturalize-gen) (type alien) + (declare (ignore type)) + `(if (zerop (sap-int ,alien)) + nil + (%naturalize-utf8-string ,alien))) + +(define-alien-type-method (utf8-string :deport-gen) (type value) + (declare (ignore type)) + `(etypecase ,value + (null (int-sap 0)) + ((alien (* char)) (alien-sap ,value)) + (simple-base-string (vector-sap ,value)) + (simple-string (vector-sap (%deport-utf8-string ,value))))) (/show0 "host-c-call.lisp end of file") diff -urN sbcl/src/code/interr.lisp sbcl.char/src/code/interr.lisp --- sbcl/src/code/interr.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/code/interr.lisp 2004-10-27 13:34:22.000000000 +0200 @@ -123,6 +123,16 @@ :datum object :expected-type 'base-string)) +(deferr object-not-vector-nil-error (object) + (error 'type-error + :datum object + :expected-type '(vector nil))) + +(deferr object-not-character-string-error (object) + (error 'type-error + :datum object + :expected-type '(vector character))) + (deferr object-not-bit-vector-error (object) (error 'type-error :datum object @@ -190,10 +200,10 @@ (deferr unbound-symbol-error (symbol) (error 'unbound-variable :name symbol)) -(deferr object-not-base-char-error (object) +(deferr object-not-character-error (object) (error 'type-error :datum object - :expected-type 'base-char)) + :expected-type 'character)) (deferr object-not-sap-error (object) (error 'type-error diff -urN sbcl/src/code/late-format.lisp sbcl.char/src/code/late-format.lisp --- sbcl/src/code/late-format.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/code/late-format.lisp 2004-10-27 13:34:22.000000000 +0200 @@ -39,7 +39,7 @@ (string (missing-arg) :type simple-string) (start (missing-arg) :type (and unsigned-byte fixnum)) (end (missing-arg) :type (and unsigned-byte fixnum)) - (character (missing-arg) :type base-char) + (character (missing-arg) :type character) (colonp nil :type (member t nil)) (atsignp nil :type (member t nil)) (params nil :type list)) @@ -270,8 +270,11 @@ (etypecase directive (format-directive (let ((expander - (aref *format-directive-expanders* - (char-code (format-directive-character directive)))) + (let ((char (format-directive-character directive))) + (typecase char + (base-char + (aref *format-directive-expanders* (char-code char))) + (character nil)))) (*default-format-error-offset* (1- (format-directive-end directive)))) (declare (type (or null function) expander)) diff -urN sbcl/src/code/late-type.lisp sbcl.char/src/code/late-type.lisp --- sbcl/src/code/late-type.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/code/late-type.lisp 2004-10-27 13:34:22.000000000 +0200 @@ -2096,22 +2096,22 @@ (if (eq (car dims) '*) (case eltype (bit 'bit-vector) - (base-char 'base-string) + ((base-char) 'base-string) (* 'vector) (t `(vector ,eltype))) (case eltype (bit `(bit-vector ,(car dims))) - (base-char `(base-string ,(car dims))) + ((base-char) `(base-string ,(car dims))) (t `(vector ,eltype ,(car dims))))) (if (eq (car dims) '*) (case eltype (bit 'simple-bit-vector) - (base-char 'simple-base-string) + ((base-char) 'simple-base-string) ((t) 'simple-vector) (t `(simple-array ,eltype (*)))) (case eltype (bit `(simple-bit-vector ,(car dims))) - (base-char `(simple-base-string ,(car dims))) + ((base-char) `(simple-base-string ,(car dims))) ((t) `(simple-vector ,(car dims))) (t `(simple-array ,eltype ,dims)))))) (t @@ -2378,18 +2378,24 @@ (!def-type-translator member (&rest members) (if members - (let (ms numbers) + (let (ms numbers char-codes) (dolist (m (remove-duplicates members)) (typecase m (float (if (zerop m) (push m ms) (push (ctype-of m) numbers))) (real (push (ctype-of m) numbers)) + (character (push (sb!xc:char-code m) char-codes)) (t (push m ms)))) (apply #'type-union (if ms (make-member-type :members ms) *empty-type*) + (if char-codes + (make-character-set-type + :pairs (mapcar (lambda (x) (cons x x)) + (sort char-codes #'<))) + *empty-type*) (nreverse numbers))) *empty-type*)) @@ -2562,6 +2568,7 @@ ((type= type (specifier-type 'simple-string)) 'simple-string) ((type= type (specifier-type 'string)) 'string) ((type= type (specifier-type 'complex)) 'complex) + ((type= type (specifier-type 'standard-char)) 'standard-char) (t `(or ,@(mapcar #'type-specifier (union-type-types type)))))) ;;; Two union types are equal if they are each subtypes of each @@ -2834,6 +2841,89 @@ (type-intersection (cons-type-car-type type1) (cons-type-car-type type2)) cdr-int2))))) + +;;;; CHARACTER-SET types + +(!define-type-class character-set) + +(!def-type-translator character-set + (&optional (pairs '((0 . #.(1- sb!xc:char-code-limit))))) + (make-character-set-type :pairs pairs)) + +(!define-type-method (character-set :negate) (type) + (let ((pairs (character-set-type-pairs type))) + (if (and (= (length pairs) 1) + (= (caar pairs) 0) + (= (cdar pairs) (1- sb!xc:char-code-limit))) + (make-negation-type :type type) + (let ((not-character + (make-negation-type + :type (make-character-set-type + :pairs '((0 . #.(1- sb!xc:char-code-limit))))))) + (type-union + not-character + (make-character-set-type + :pairs (let (not-pairs) + (when (> (caar pairs) 0) + (push (cons 0 (1- (caar pairs))) not-pairs)) + (do* ((tail pairs (cdr tail)) + (high1 (cdar tail)) + (low2 (caadr tail))) + ((null (cdr tail)) + (when (< (cdar tail) (1- sb!xc:char-code-limit)) + (push (cons (1+ (cdar tail)) + (1- sb!xc:char-code-limit)) + not-pairs)) + (nreverse not-pairs)) + (push (cons (1+ high1) (1- low2)) not-pairs))))))))) + +(!define-type-method (character-set :unparse) (type) + (cond + ((type= type (specifier-type 'character)) 'character) + ((type= type (specifier-type 'base-char)) 'base-char) + ((type= type (specifier-type 'extended-char)) 'extended-char) + ((type= type (specifier-type 'standard-char)) 'standard-char) + (t (let ((pairs (character-set-type-pairs type))) + `(member ,@(loop for (low . high) in pairs + append (loop for code from low upto high + collect (sb!xc:code-char code)))))))) + +(!define-type-method (character-set :simple-=) (type1 type2) + (let ((pairs1 (character-set-type-pairs type1)) + (pairs2 (character-set-type-pairs type2))) + (values (equal pairs1 pairs2) t))) + +(!define-type-method (character-set :simple-subtypep) (type1 type2) + (values + (dolist (pair (character-set-type-pairs type1) t) + (unless (position pair (character-set-type-pairs type2) + :test (lambda (x y) (and (>= (car x) (car y)) + (<= (cdr x) (cdr y))))) + (return nil))) + t)) + +(!define-type-method (character-set :simple-union2) (type1 type2) + ;; KLUDGE: the canonizing in the MAKE-CHARACTER-SET-TYPE function + ;; actually does the union for us. It might be a little fragile to + ;; rely on it. + (make-character-set-type + :pairs (merge 'list + (copy-alist (character-set-type-pairs type1)) + (copy-alist (character-set-type-pairs type2)) + #'< :key #'car))) + +(!define-type-method (character-set :simple-intersection2) (type1 type2) + ;; KLUDGE: brute force. + (let (pairs) + (dolist (pair1 (character-set-type-pairs type1) + (make-character-set-type + :pairs (sort pairs #'< :key #'car))) + (dolist (pair2 (character-set-type-pairs type2)) + (cond + ((<= (car pair1) (car pair2) (cdr pair1)) + (push (cons (car pair2) (min (cdr pair1) (cdr pair2))) pairs)) + ((<= (car pair2) (car pair1) (cdr pair2)) + (push (cons (car pair1) (min (cdr pair1) (cdr pair2))) pairs))))))) ;;; Return the type that describes all objects that are in X but not ;;; in Y. If we can't determine this type, then return NIL. diff -urN sbcl/src/code/pprint.lisp sbcl.char/src/code/pprint.lisp --- sbcl/src/code/pprint.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/code/pprint.lisp 2004-10-27 13:34:22.000000000 +0200 @@ -119,7 +119,7 @@ (defun pretty-out (stream char) (declare (type pretty-stream stream) - (type base-char char)) + (type character char)) (cond ((char= char #\newline) (enqueue-newline stream :literal)) (t diff -urN sbcl/src/code/pred.lisp sbcl.char/src/code/pred.lisp --- sbcl/src/code/pred.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/code/pred.lisp 2004-10-27 13:34:22.000000000 +0200 @@ -64,6 +64,7 @@ ;; the type it tests for in the Common Lisp type system, and since it's ;; only used in the implementation of a few specialized things.) (def-type-predicate-wrapper double-float-p) + (def-type-predicate-wrapper extended-char-p) (def-type-predicate-wrapper fdefn-p) (def-type-predicate-wrapper fixnump) (def-type-predicate-wrapper floatp) @@ -78,7 +79,7 @@ (def-type-predicate-wrapper ratiop) (def-type-predicate-wrapper realp) (def-type-predicate-wrapper short-float-p) - (def-type-predicate-wrapper sb!kernel:simple-array-p) + (def-type-predicate-wrapper simple-array-p) (def-type-predicate-wrapper simple-bit-vector-p) (def-type-predicate-wrapper simple-base-string-p) (def-type-predicate-wrapper simple-string-p) @@ -127,6 +128,8 @@ '(integer #.(1+ sb!xc:most-positive-fixnum)) 'bignum)) (standard-char 'standard-char) + (base-char 'base-char) + (extended-char 'extended-char) ((member t) 'boolean) (keyword 'keyword) ((or array complex) (type-specifier (ctype-of object))) diff -urN sbcl/src/code/primordial-extensions.lisp sbcl.char/src/code/primordial-extensions.lisp --- sbcl/src/code/primordial-extensions.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/code/primordial-extensions.lisp 2004-10-27 13:34:21.000000000 +0200 @@ -167,30 +167,15 @@ ;;; producing a symbol in the current package. (eval-when (#-sb-xc :compile-toplevel :load-toplevel :execute) (defun symbolicate (&rest things) - (let ((name (case (length things) - ;; Why isn't this just the value in the T branch? - ;; Well, this is called early in cold-init, before - ;; the type system is set up; however, now that we - ;; check for bad lengths, the type system is needed - ;; for calls to CONCATENATE. So we need to make sure - ;; that the calls are transformed away: - (1 (concatenate 'string - (the simple-base-string - (string (car things))))) - (2 (concatenate 'string - (the simple-base-string - (string (car things))) - (the simple-base-string - (string (cadr things))))) - (3 (concatenate 'string - (the simple-base-string - (string (car things))) - (the simple-base-string - (string (cadr things))) - (the simple-base-string - (string (caddr things))))) - (t (apply #'concatenate 'string (mapcar #'string things)))))) - (values (intern name))))) + (let* ((length (reduce #'+ things + :key (lambda (x) (length (string x))))) + (name (make-array length :element-type 'character))) + (let ((index 0)) + (dolist (thing things (values (intern name))) + (let* ((x (string thing)) + (len (length x))) + (replace name x :start1 index) + (incf index len))))))) ;;; like SYMBOLICATE, but producing keywords (defun keywordicate (&rest things) diff -urN sbcl/src/code/print.lisp sbcl.char/src/code/print.lisp --- sbcl/src/code/print.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/code/print.lisp 2004-10-27 13:34:22.000000000 +0200 @@ -615,10 +615,10 @@ ;;; character has. At characters have at least one bit set, so we can ;;; search for any character with a positive test. (defvar *character-attributes* - (make-array char-code-limit + (make-array 160 ; FIXME :element-type '(unsigned-byte 16) :initial-element 0)) -(declaim (type (simple-array (unsigned-byte 16) (#.char-code-limit)) +(declaim (type (simple-array (unsigned-byte 16) (#.160)) ; FIXME *character-attributes*)) ;;; constants which are a bit-mask for each interesting character attribute @@ -672,17 +672,17 @@ (set-bit #\/ slash-attribute) ;; Mark anything not explicitly allowed as funny. - (dotimes (i char-code-limit) + (dotimes (i 160) ; FIXME (when (zerop (aref *character-attributes* i)) (setf (aref *character-attributes* i) funny-attribute)))) ;;; For each character, the value of the corresponding element is the ;;; lowest base in which that character is a digit. (defvar *digit-bases* - (make-array char-code-limit + (make-array 128 ; FIXME :element-type '(unsigned-byte 8) :initial-element 36)) -(declaim (type (simple-array (unsigned-byte 8) (#.char-code-limit)) +(declaim (type (simple-array (unsigned-byte 8) (#.128)) ; FIXME *digit-bases*)) (dotimes (i 36) (let ((char (digit-char i 36))) @@ -698,7 +698,11 @@ ,(if at-end '(go TEST-SIGN) '(return nil))) (setq current (schar name index) code (char-code current) - bits (aref attributes code)) + bits (cond ; FIXME + ((< code 160) (aref attributes code)) + ((upper-case-p current) uppercase-attribute) + ((lower-case-p current) lowercase-attribute) + (t other-attribute))) (incf index) (go ,tag))) (test (&rest attributes) @@ -713,7 +717,8 @@ attributes)) bits))))) (digitp () - `(< (the fixnum (aref bases code)) base))) + `(and (< code 128) ; FIXME + (< (the fixnum (aref bases code)) base)))) (prog ((len (length name)) (attributes *character-attributes*) @@ -740,7 +745,13 @@ letter-attribute))) (do ((i (1- index) (1+ i))) ((= i len) (return-from symbol-quotep nil)) - (unless (zerop (logand (aref attributes (char-code (schar name i))) + (unless (zerop (logand (let* ((char (schar name i)) + (code (char-code char))) + (cond + ((< code 160) (aref attributes code)) + ((upper-case-p char) uppercase-attribute) + ((lower-case-p char) lowercase-attribute) + (t other-attribute))) mask)) (return-from symbol-quotep t)))) @@ -1251,7 +1262,11 @@ ;;; [CMUC]tradix.press. DO NOT EVEN THINK OF ATTEMPTING TO ;;; UNDERSTAND THIS CODE WITHOUT READING THE PAPER! -(declaim (type (simple-array character (10)) *digits*)) +;;; and, to add to the confusion: this declaration may look wrong, but +;;; in fact it's right. FIXME: it should probably be local to +;;; FLOAT-STRING, 'cause we're probably not ever going to print floats +;;; in bases other than 10; then this declaration can go away. +(declaim (type (simple-array base-char (10)) *digits*)) (defvar *digits* "0123456789") (defun flonum-to-string (x &optional width fdigits scale fmin) diff -urN sbcl/src/code/reader.lisp sbcl.char/src/code/reader.lisp --- sbcl/src/code/reader.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/code/reader.lisp 2004-10-27 13:34:22.000000000 +0200 @@ -61,21 +61,28 @@ (defmacro get-cat-entry (char rt) ;; KLUDGE: Only give this side-effect-free args. ;; FIXME: should probably become inline function - `(elt (character-attribute-table ,rt) - (char-code ,char))) + `(if (typep ,char 'base-char) + (elt (character-attribute-array ,rt) (char-code ,char)) + (gethash ,char (character-attribute-hash-table ,rt) +char-attr-constituent+))) (defun set-cat-entry (char newvalue &optional (rt *readtable*)) - (setf (elt (character-attribute-table rt) - (char-code char)) - newvalue)) + (if (typep char 'base-char) + (setf (elt (character-attribute-array rt) (char-code char)) newvalue) + ;; FIXME: could REMHASH if we're setting to + ;; +CHAR-ATTR-CONSTITUENT+ + (setf (gethash char (character-attribute-hash-table rt)) newvalue))) ;;; the value actually stored in the character macro table. As per ;;; ANSI #'GET-MACRO-CHARACTER and #'SET-MACRO-CHARACTER, this can ;;; be either a function or NIL. (eval-when (:compile-toplevel :execute) (sb!xc:defmacro get-raw-cmt-entry (char readtable) - `(svref (character-macro-table ,readtable) - (char-code ,char)))) + `(if (typep ,char 'base-char) + (svref (character-macro-array ,readtable) (char-code ,char)) + ;; Note: DEFAULT here is NIL, not #'UNDEFINED-MACRO-CHAR, so + ;; that everything above the base-char range is a non-macro + ;; constituent by default. + (gethash ,char (character-macro-hash-table ,readtable) nil)))) ;;; the value represented by whatever is stored in the character macro ;;; table. As per ANSI #'GET-MACRO-CHARACTER and #'SET-MACRO-CHARACTER, @@ -87,10 +94,13 @@ #'read-token))) (defun set-cmt-entry (char new-value-designator &optional (rt *readtable*)) - (setf (svref (character-macro-table rt) - (char-code char)) - (and new-value-designator - (%coerce-callable-to-fun new-value-designator)))) + (if (typep char 'base-char) + (setf (svref (character-macro-array rt) (char-code char)) + (and new-value-designator + (%coerce-callable-to-fun new-value-designator))) + (setf (gethash char (character-macro-hash-table rt)) + (and new-value-designator + (%coerce-callable-to-fun new-value-designator))))) (defun undefined-macro-char (stream char) (unless *read-suppress* @@ -137,7 +147,7 @@ (defun !cold-init-secondary-attribute-table () (setq *secondary-attribute-table* - (make-array char-code-limit :element-type '(unsigned-byte 8) + (make-array base-char-code-limit :element-type '(unsigned-byte 8) :initial-element +char-attr-constituent+)) (!set-secondary-attribute #\: +char-attr-package-delimiter+) (!set-secondary-attribute #\| +char-attr-multiple-escape+) ; |) [for EMACS] @@ -165,17 +175,29 @@ ;;;; readtable operations +(defun shallow-replace/eql-hash-table (to from) + (maphash (lambda (k v) (setf (gethash k to) v)) from)) + (defun copy-readtable (&optional (from-readtable *readtable*) to-readtable) (let ((really-from-readtable (or from-readtable *standard-readtable*)) (really-to-readtable (or to-readtable (make-readtable)))) - (replace (character-attribute-table really-to-readtable) - (character-attribute-table really-from-readtable)) - (replace (character-macro-table really-to-readtable) - (character-macro-table really-from-readtable)) + (replace (character-attribute-array really-to-readtable) + (character-attribute-array really-from-readtable)) + (shallow-replace/eql-hash-table + (character-attribute-hash-table really-to-readtable) + (character-attribute-hash-table really-from-readtable)) + (replace (character-macro-array really-to-readtable) + (character-macro-array really-from-readtable)) + (shallow-replace/eql-hash-table + (character-macro-hash-table really-to-readtable) + (character-macro-hash-table really-from-readtable)) (setf (dispatch-tables really-to-readtable) - (mapcar (lambda (pair) (cons (car pair) - (copy-seq (cdr pair)))) + (mapcar (lambda (pair) + (cons (car pair) + (let ((table (make-hash-table))) + (shallow-replace/eql-hash-table table (cdr pair)) + table))) (dispatch-tables really-from-readtable))) (setf (readtable-case really-to-readtable) (readtable-case really-from-readtable)) @@ -250,17 +272,27 @@ (let ((stream (in-synonym-of stream))) (if (ansi-stream-p stream) (prepare-for-fast-read-char stream - (do ((attribute-table (character-attribute-table *readtable*)) + (do ((attribute-array (character-attribute-array *readtable*)) + (attribute-hash-table + (character-attribute-hash-table *readtable*)) (char (fast-read-char t) (fast-read-char t))) - ((/= (the fixnum (aref attribute-table (char-code char))) + ((/= (the fixnum + (if (typep char 'base-char) + (aref attribute-array (char-code char)) + (gethash char attribute-hash-table +char-attr-constituent+))) +char-attr-whitespace+) (done-with-fast-read-char) char))) ;; CLOS stream - (do ((attribute-table (character-attribute-table *readtable*)) + (do ((attribute-array (character-attribute-array *readtable*)) + (attribute-hash-table + (character-attribute-hash-table *readtable*)) (char (read-char stream nil :eof) (read-char stream nil :eof))) ((or (eq char :eof) - (/= (the fixnum (aref attribute-table (char-code char))) + (/= (the fixnum + (if (typep char 'base-char) + (aref attribute-array (char-code char)) + (gethash char attribute-hash-table +char-attr-constituent+))) +char-attr-whitespace+)) (if (eq char :eof) (error 'end-of-file :stream stream) @@ -299,7 +331,7 @@ ;; all constituents (do ((ichar 0 (1+ ichar)) (char)) - ((= ichar #O200)) + ((= ichar base-char-code-limit)) (setq char (code-char ichar)) (when (constituentp char *standard-readtable*) (set-cat-entry char (get-secondary-attribute char)) @@ -622,8 +654,13 @@ ;;;; character classes ;;; Return the character class for CHAR. -(defmacro char-class (char attable) - `(let ((att (aref ,attable (char-code ,char)))) +;;; +;;; FIXME: why aren't these ATT-getting forms using GET-CAT-ENTRY? +;;; Because we've cached the readtable tables? +(defmacro char-class (char attarray atthash) + `(let ((att (if (typep ,char 'base-char) + (aref ,attarray (char-code ,char)) + (gethash ,char ,atthash +char-attr-constituent+)))) (declare (fixnum att)) (if (<= att +char-attr-terminating-macro+) +char-attr-delimiter+ @@ -631,8 +668,10 @@ ;;; Return the character class for CHAR, which might be part of a ;;; rational number. -(defmacro char-class2 (char attable) - `(let ((att (aref ,attable (char-code ,char)))) +(defmacro char-class2 (char attarray atthash) + `(let ((att (if (typep ,char 'base-char) + (aref ,attarray (char-code ,char)) + (gethash ,char ,atthash +char-attr-constituent+)))) (declare (fixnum att)) (if (<= att +char-attr-terminating-macro+) +char-attr-delimiter+ @@ -645,8 +684,10 @@ ;;; Return the character class for a char which might be part of a ;;; rational or floating number. (Assume that it is a digit if it ;;; could be.) -(defmacro char-class3 (char attable) - `(let ((att (aref ,attable (char-code ,char)))) +(defmacro char-class3 (char attarray atthash) + `(let ((att (if (typep ,char 'base-char) + (aref ,attarray (char-code ,char)) + (gethash ,char ,atthash +char-attr-constituent+)))) (declare (fixnum att)) (if possibly-rational (setq possibly-rational @@ -735,7 +776,8 @@ (when *read-suppress* (internal-read-extended-token stream firstchar nil) (return-from read-token nil)) - (let ((attribute-table (character-attribute-table *readtable*)) + (let ((attribute-array (character-attribute-array *readtable*)) + (attribute-hash-table (character-attribute-hash-table *readtable*)) (package-designator nil) (colons 0) (possibly-rational t) @@ -746,7 +788,7 @@ (seen-multiple-escapes nil)) (reset-read-buffer) (prog ((char firstchar)) - (case (char-class3 char attribute-table) + (case (char-class3 char attribute-array attribute-hash-table) (#.+char-attr-constituent-sign+ (go SIGN)) (#.+char-attr-constituent-digit+ (go LEFTDIGIT)) (#.+char-attr-constituent-digit-or-expt+ @@ -765,7 +807,7 @@ (unless char (go RETURN-SYMBOL)) (setq possibly-rational t possibly-float t) - (case (char-class3 char attribute-table) + (case (char-class3 char attribute-array attribute-hash-table) (#.+char-attr-constituent-digit+ (go LEFTDIGIT)) (#.+char-attr-constituent-digit-or-expt+ (setq seen-digit-or-expt t) @@ -782,7 +824,7 @@ (setq char (read-char stream nil nil)) (unless char (return (make-integer))) (setq was-possibly-float possibly-float) - (case (char-class3 char attribute-table) + (case (char-class3 char attribute-array attribute-hash-table) (#.+char-attr-constituent-digit+ (go LEFTDIGIT)) (#.+char-attr-constituent-decimal-digit+ (if possibly-float (go LEFTDECIMALDIGIT) @@ -811,7 +853,7 @@ (ouch-read-buffer char) (setq char (read-char stream nil nil)) (unless char (return (make-integer))) - (case (char-class3 char attribute-table) + (case (char-class3 char attribute-array attribute-hash-table) (#.+char-attr-constituent-digit+ (go LEFTDIGIT)) (#.+char-attr-constituent-decimal-digit+ (bug "impossible!")) (#.+char-attr-constituent-dot+ (go SYMBOL)) @@ -832,7 +874,7 @@ (ouch-read-buffer char) (setq char (read-char stream nil nil)) (unless char (go RETURN-SYMBOL)) - (case (char-class char attribute-table) + (case (char-class char attribute-array attribute-hash-table) (#.+char-attr-constituent-digit+ (go LEFTDECIMALDIGIT)) (#.+char-attr-constituent-dot+ (go MIDDLEDOT)) (#.+char-attr-constituent-expt+ (go EXPONENT)) @@ -849,7 +891,7 @@ (setq char (read-char stream nil nil)) (unless char (return (let ((*read-base* 10)) (make-integer)))) - (case (char-class char attribute-table) + (case (char-class char attribute-array attribute-hash-table) (#.+char-attr-constituent-digit+ (go RIGHTDIGIT)) (#.+char-attr-constituent-expt+ (go EXPONENT)) (#.+char-attr-delimiter+ @@ -864,7 +906,7 @@ (ouch-read-buffer char) (setq char (read-char stream nil nil)) (unless char (return (make-float stream))) - (case (char-class char attribute-table) + (case (char-class char attribute-array attribute-hash-table) (#.+char-attr-constituent-digit+ (go RIGHTDIGIT)) (#.+char-attr-constituent-expt+ (go EXPONENT)) (#.+char-attr-delimiter+ @@ -878,7 +920,7 @@ (ouch-read-buffer char) (setq char (read-char stream nil nil)) (unless char (go RETURN-SYMBOL)) - (case (char-class char attribute-table) + (case (char-class char attribute-array attribute-hash-table) (#.+char-attr-constituent-digit+ (go RIGHTDIGIT)) (#.+char-attr-delimiter+ (unread-char char stream) (go RETURN-SYMBOL)) (#.+char-attr-escape+ (go ESCAPE)) @@ -888,7 +930,7 @@ (ouch-read-buffer char) (setq char (read-char stream nil nil)) (unless char (%reader-error stream "dot context error")) - (case (char-class char attribute-table) + (case (char-class char attribute-array attribute-hash-table) (#.+char-attr-constituent-digit+ (go RIGHTDIGIT)) (#.+char-attr-constituent-dot+ (go DOTS)) (#.+char-attr-delimiter+ (%reader-error stream "dot context error")) @@ -901,7 +943,7 @@ (setq char (read-char stream nil nil)) (unless char (go RETURN-SYMBOL)) (setq possibly-float t) - (case (char-class char attribute-table) + (case (char-class char attribute-array attribute-hash-table) (#.+char-attr-constituent-sign+ (go EXPTSIGN)) (#.+char-attr-constituent-digit+ (go EXPTDIGIT)) (#.+char-attr-delimiter+ (unread-char char stream) (go RETURN-SYMBOL)) @@ -913,7 +955,7 @@ (ouch-read-buffer char) (setq char (read-char stream nil nil)) (unless char (go RETURN-SYMBOL)) - (case (char-class char attribute-table) + (case (char-class char attribute-array attribute-hash-table) (#.+char-attr-constituent-digit+ (go EXPTDIGIT)) (#.+char-attr-delimiter+ (unread-char char stream) (go RETURN-SYMBOL)) (#.+char-attr-escape+ (go ESCAPE)) @@ -924,7 +966,7 @@ (ouch-read-buffer char) (setq char (read-char stream nil nil)) (unless char (return (make-float stream))) - (case (char-class char attribute-table) + (case (char-class char attribute-array attribute-hash-table) (#.+char-attr-constituent-digit+ (go EXPTDIGIT)) (#.+char-attr-delimiter+ (unread-char char stream) @@ -937,7 +979,7 @@ (ouch-read-buffer char) (setq char (read-char stream nil nil)) (unless char (go RETURN-SYMBOL)) - (case (char-class2 char attribute-table) + (case (char-class2 char attribute-array attribute-hash-table) (#.+char-attr-constituent-digit+ (go RATIODIGIT)) (#.+char-attr-delimiter+ (unread-char char stream) (go RETURN-SYMBOL)) (#.+char-attr-escape+ (go ESCAPE)) @@ -948,7 +990,7 @@ (ouch-read-buffer char) (setq char (read-char stream nil nil)) (unless char (return (make-ratio stream))) - (case (char-class2 char attribute-table) + (case (char-class2 char attribute-array attribute-hash-table) (#.+char-attr-constituent-digit+ (go RATIODIGIT)) (#.+char-attr-delimiter+ (unread-char char stream) @@ -961,7 +1003,7 @@ (ouch-read-buffer char) (setq char (read-char stream nil nil)) (unless char (%reader-error stream "too many dots")) - (case (char-class char attribute-table) + (case (char-class char attribute-array attribute-hash-table) (#.+char-attr-constituent-dot+ (go DOTS)) (#.+char-attr-delimiter+ (unread-char char stream) @@ -979,7 +1021,7 @@ (ouch-read-buffer char) (setq char (fast-read-char nil nil)) (unless char (go RETURN-SYMBOL)) - (case (char-class char attribute-table) + (case (char-class char attribute-array attribute-hash-table) (#.+char-attr-escape+ (done-with-fast-read-char) (go ESCAPE)) (#.+char-attr-delimiter+ (done-with-fast-read-char) @@ -996,7 +1038,7 @@ (ouch-read-buffer char) (setq char (read-char stream nil :eof)) (when (eq char :eof) (go RETURN-SYMBOL)) - (case (char-class char attribute-table) + (case (char-class char attribute-array attribute-hash-table) (#.+char-attr-escape+ (go ESCAPE)) (#.+char-attr-delimiter+ (unread-char char stream) (go RETURN-SYMBOL)) @@ -1013,7 +1055,7 @@ (ouch-read-buffer nextchar)) (setq char (read-char stream nil nil)) (unless char (go RETURN-SYMBOL)) - (case (char-class char attribute-table) + (case (char-class char attribute-array attribute-hash-table) (#.+char-attr-delimiter+ (unread-char char stream) (go RETURN-SYMBOL)) (#.+char-attr-escape+ (go ESCAPE)) (#.+char-attr-multiple-escape+ (go MULT-ESCAPE)) @@ -1028,7 +1070,7 @@ (ouch-read-buffer char)) (setq char (read-char stream nil nil)) (unless char (go RETURN-SYMBOL)) - (case (char-class char attribute-table) + (case (char-class char attribute-array attribute-hash-table) (#.+char-attr-delimiter+ (unread-char char stream) (go RETURN-SYMBOL)) (#.+char-attr-escape+ (go ESCAPE)) (#.+char-attr-multiple-escape+ (go MULT-ESCAPE)) @@ -1055,7 +1097,7 @@ (setq escapes ()) (setq char (read-char stream nil nil)) (unless char (reader-eof-error stream "after reading a colon")) - (case (char-class char attribute-table) + (case (char-class char attribute-array attribute-hash-table) (#.+char-attr-delimiter+ (unread-char char stream) (%reader-error stream @@ -1070,7 +1112,7 @@ (setq char (read-char stream nil nil)) (unless char (reader-eof-error stream "after reading a colon")) - (case (char-class char attribute-table) + (case (char-class char attribute-array attribute-hash-table) (#.+char-attr-delimiter+ (unread-char char stream) (%reader-error stream @@ -1323,7 +1365,7 @@ ;;;; cruft for dispatch macros (defun make-char-dispatch-table () - (make-array char-code-limit :initial-element #'dispatch-char-error)) + (make-hash-table)) (defun dispatch-char-error (stream sub-char ignore) (declare (ignore ignore)) @@ -1362,9 +1404,7 @@ (dpair (find disp-char (dispatch-tables rt) :test #'char= :key #'car))) (if dpair - (setf (elt (the simple-vector (cdr dpair)) - (char-code sub-char)) - (coerce function 'function)) + (setf (gethash sub-char (cdr dpair)) (coerce function 'function)) (error "~S is not a dispatch char." disp-char)))) (defun get-dispatch-macro-character (disp-char sub-char @@ -1377,14 +1417,7 @@ (dpair (find disp-char (dispatch-tables rt) :test #'char= :key #'car))) (if dpair - (let ((dispatch-fun (elt (the simple-vector (cdr dpair)) - (char-code sub-char)))) - ;; Digits are also initialized in a dispatch table to - ;; #'dispatch-char-error; READ-DISPATCH-CHAR handles them - ;; separately. - CSR, 2002-04-12 - (if (eq dispatch-fun #'dispatch-char-error) - nil - dispatch-fun)) + (values (gethash sub-char (cdr dpair))) (error "~S is not a dispatch char." disp-char)))) (defun read-dispatch-char (stream char) @@ -1408,8 +1441,7 @@ :test #'char= :key #'car))) (if dpair (funcall (the function - (elt (the simple-vector (cdr dpair)) - (char-code sub-char))) + (gethash sub-char (cdr dpair) #'dispatch-char-error)) stream sub-char (if numargp numarg nil)) (%reader-error stream "no dispatch table for dispatch char"))))) @@ -1433,10 +1465,10 @@ (start start) (end (%check-vector-sequence-bounds string start end))) (unless *read-from-string-spares* - (push (internal-make-string-input-stream "" 0 0) - *read-from-string-spares*)) + (push (make-string-input-stream "" 0 0) *read-from-string-spares*)) (let ((stream (pop *read-from-string-spares*))) - (setf (string-input-stream-string stream) string) + (setf (string-input-stream-string stream) + (coerce string '(simple-array character (*)))) (setf (string-input-stream-current stream) start) (setf (string-input-stream-end stream) end) (unwind-protect diff -urN sbcl/src/code/readtable.lisp sbcl.char/src/code/readtable.lisp --- sbcl/src/code/readtable.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/code/readtable.lisp 2004-10-27 13:34:22.000000000 +0200 @@ -12,7 +12,7 @@ (in-package "SB!IMPL") (sb!xc:deftype attribute-table () - '(simple-array (unsigned-byte 8) (#.sb!xc:char-code-limit))) + '(simple-array (unsigned-byte 8) (#.base-char-code-limit))) ;;; constants for readtable character attributes. These are all as in ;;; the manual. @@ -58,20 +58,22 @@ ;; In order to make READ-TOKEN fast, all this information is stored ;; in the character attribute table by having different varieties of ;; constituents. - (character-attribute-table - (make-array sb!xc:char-code-limit + (character-attribute-array + (make-array base-char-code-limit :element-type '(unsigned-byte 8) :initial-element +char-attr-constituent+) :type attribute-table) + (character-attribute-hash-table (make-hash-table) :type hash-table) ;; The CHARACTER-MACRO-TABLE is a vector of CHAR-CODE-LIMIT ;; functions. One of these functions called with appropriate ;; arguments whenever any non-WHITESPACE character is encountered ;; inside READ-PRESERVING-WHITESPACE. These functions are used to ;; implement user-defined read-macros, system read-macros, and the ;; number-symbol reader. - (character-macro-table - (make-array sb!xc:char-code-limit :initial-element #'undefined-macro-char) - :type (simple-vector #.sb!xc:char-code-limit)) + (character-macro-array + (make-array base-char-code-limit :initial-element #'undefined-macro-char) + :type (simple-vector #.base-char-code-limit)) + (character-macro-hash-table (make-hash-table) :type hash-table) ;; an alist from dispatch characters to vectors of CHAR-CODE-LIMIT ;; functions, for use in defining dispatching macros (like #-macro) (dispatch-tables () :type list) diff -urN sbcl/src/code/room.lisp sbcl.char/src/code/room.lisp --- sbcl/src/code/room.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/code/room.lisp 2004-10-27 13:34:22.000000000 +0200 @@ -72,7 +72,8 @@ :kind :fixed :length size)))))) -(dolist (code (list complex-base-string-widetag simple-array-widetag +(dolist (code (list complex-character-string-widetag + complex-base-string-widetag simple-array-widetag complex-bit-vector-widetag complex-vector-widetag complex-array-widetag complex-vector-nil-widetag)) (setf (svref *meta-room-info* code) @@ -122,6 +123,11 @@ :kind :string :length 0)) +(setf (svref *meta-room-info* simple-character-string-widetag) + (make-room-info :name 'simple-character-string + :kind :string + :length 2)) + (setf (svref *meta-room-info* simple-array-nil-widetag) (make-room-info :name 'simple-array-nil :kind :fixed @@ -176,11 +182,11 @@ ;;; Return the total size of a vector in bytes, including any pad. #!-sb-fluid (declaim (inline vector-total-size)) (defun vector-total-size (obj info) - (let ((shift (room-info-length info)) - (len (+ (length (the (simple-array * (*)) obj)) - (ecase (room-info-kind info) - (:vector 0) - (:string 1))))) + (let* ((shift (room-info-length info)) + (len (+ (length (the (simple-array * (*)) obj)) + (ecase (room-info-kind info) + (:vector 0) + (:string 1))))) (declare (type (integer -3 3) shift)) (round-to-dualword (+ (* vector-data-offset n-word-bytes) @@ -246,7 +252,7 @@ (size (ecase (room-info-kind info) (:fixed (aver (or (eql (room-info-length info) - (1+ (get-header-data obj))) + (1+ (get-header-data obj))) (floatp obj) (simple-array-nil-p obj))) (round-to-dualword @@ -467,6 +473,7 @@ #.single-float-widetag #.double-float-widetag #.simple-base-string-widetag + #.simple-character-string-widetag #.simple-array-nil-widetag #.simple-bit-vector-widetag #.simple-array-unsigned-byte-2-widetag diff -urN sbcl/src/code/run-program.lisp sbcl.char/src/code/run-program.lisp --- sbcl/src/code/run-program.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/code/run-program.lisp 2004-10-27 13:34:22.000000000 +0200 @@ -289,12 +289,12 @@ (defun find-a-pty () (dolist (char '(#\p #\q)) (dotimes (digit 16) - (let* ((master-name (format nil "/dev/pty~C~X" char digit)) + (let* ((master-name (coerce (format nil "/dev/pty~C~X" char digit) 'base-string)) (master-fd (sb-unix:unix-open master-name sb-unix:o_rdwr #o666))) (when master-fd - (let* ((slave-name (format nil "/dev/tty~C~X" char digit)) + (let* ((slave-name (coerce (format nil "/dev/tty~C~X" char digit) 'base-string)) (slave-fd (sb-unix:unix-open slave-name sb-unix:o_rdwr #o666))) @@ -347,7 +347,9 @@ (declare (simple-string s)) (let ((n (length s))) ;; Blast the string into place. - (sb-kernel:copy-to-system-area (the simple-string s) + (sb-kernel:copy-to-system-area (the simple-base-string + ;; FIXME + (coerce s 'simple-base-string)) (* sb-vm:vector-data-offset sb-vm:n-word-bits) string-sap 0 @@ -382,6 +384,7 @@ ;;; Is UNIX-FILENAME the name of a file that we can execute? (defun unix-filename-is-executable-p (unix-filename) (declare (type simple-string unix-filename)) + (setf unix-filename (coerce unix-filename 'base-string)) (values (and (eq (sb-unix:unix-file-kind unix-filename) :file) (sb-unix:unix-access unix-filename sb-unix:x_ok)))) @@ -614,7 +617,7 @@ ;;; stream. (defun copy-descriptor-to-stream (descriptor stream cookie) (incf (car cookie)) - (let ((string (make-string 256)) + (let ((string (make-string 256 :element-type 'base-char)) handler) (setf handler (sb-sys:add-fd-handler @@ -683,7 +686,7 @@ ;; Use /dev/null. (multiple-value-bind (fd errno) - (sb-unix:unix-open "/dev/null" + (sb-unix:unix-open #.(coerce "/dev/null" 'base-string) (case direction (:input sb-unix:o_rdonly) (:output sb-unix:o_wronly) @@ -735,7 +738,7 @@ (dotimes (count 256 (error "could not open a temporary file in /tmp")) - (let* ((name (format nil "/tmp/.run-program-~D" count)) + (let* ((name (coerce (format nil "/tmp/.run-program-~D" count) 'base-string)) (fd (sb-unix:unix-open name (logior sb-unix:o_rdwr sb-unix:o_creat diff -urN sbcl/src/code/seq.lisp sbcl.char/src/code/seq.lisp --- sbcl/src/code/seq.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/code/seq.lisp 2004-10-27 13:34:22.000000000 +0200 @@ -569,6 +569,14 @@ (when (null source-end) (setq source-end (length source-sequence))) (mumble-replace-from-mumble)) +(defun simple-character-string-replace-from-simple-character-string* + (target-sequence source-sequence + target-start target-end source-start source-end) + (declare (type (simple-array character (*)) target-sequence source-sequence)) + (when (null target-end) (setq target-end (length target-sequence))) + (when (null source-end) (setq source-end (length source-sequence))) + (mumble-replace-from-mumble)) + (define-sequence-traverser replace (sequence1 sequence2 &key start1 end1 start2 end2) #!+sb-doc diff -urN sbcl/src/code/stream.lisp sbcl.char/src/code/stream.lisp --- sbcl/src/code/stream.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/code/stream.lisp 2004-10-27 13:34:21.000000000 +0200 @@ -276,11 +276,11 @@ #!-sb-fluid (declaim (inline ansi-stream-unread-char)) (defun ansi-stream-unread-char (character stream) (let ((index (1- (ansi-stream-in-index stream))) - (buffer (ansi-stream-in-buffer stream))) + (buffer (ansi-stream-cin-buffer stream))) (declare (fixnum index)) (when (minusp index) (error "nothing to unread")) (cond (buffer - (setf (aref buffer index) (char-code character)) + (setf (aref buffer index) character) (setf (ansi-stream-in-index stream) index)) (t (funcall (ansi-stream-misc stream) stream @@ -418,7 +418,7 @@ ;;; the IN-BUFFER for text streams. There is definitely an IN-BUFFER, ;;; and hence must be an N-BIN method. (defun fast-read-char-refill (stream eof-error-p eof-value) - (let* ((ibuf (ansi-stream-in-buffer stream)) + (let* ((ibuf (ansi-stream-cin-buffer stream)) (count (funcall (ansi-stream-n-bin stream) stream ibuf @@ -433,16 +433,17 @@ (funcall (ansi-stream-in stream) stream eof-error-p eof-value)) (t (when (/= start +ansi-stream-in-buffer-extra+) + ;; FIXME AARGH KLUDGE There's no sb!vm:n-character-bits. (bit-bash-copy ibuf (+ (* +ansi-stream-in-buffer-extra+ - sb!vm:n-byte-bits) + sb!vm:n-byte-bits 4) (* sb!vm:vector-data-offset sb!vm:n-word-bits)) - ibuf (+ (the index (* start sb!vm:n-byte-bits)) + ibuf (+ (the index (* start sb!vm:n-byte-bits 4)) (* sb!vm:vector-data-offset sb!vm:n-word-bits)) - (* count sb!vm:n-byte-bits))) + (* count sb!vm:n-byte-bits 4))) (setf (ansi-stream-in-index stream) (1+ start)) - (code-char (aref ibuf start)))))) + (aref ibuf start))))) ;;; This is similar to FAST-READ-CHAR-REFILL, but we don't have to ;;; leave room for unreading. @@ -1028,7 +1029,7 @@ (:include string-stream (in #'string-inch) (bin #'ill-bin) - (n-bin #'string-stream-read-n-bytes) + (n-bin #'ill-bin) (misc #'string-in-misc) (string (missing-arg) :type simple-string)) (:constructor internal-make-string-input-stream @@ -1233,14 +1234,19 @@ (element-type (string-output-stream-element-type stream)) (result (case element-type - ;; Overwhelmingly common case; can be inlined. + ;; overwhelmingly common case: can be inlined ((character) (make-string length)) + ;; slightly less common cases: inline it anyway + ((base-char standard-char) + (make-string length :element-type 'base-char)) (t (make-string length :element-type element-type))))) ;; For the benefit of the REPLACE transform, let's do this, so ;; that the common case isn't ludicrously expensive. (etypecase result ((simple-array character (*)) (replace result (string-output-stream-string stream))) + (simple-base-string + (replace result (string-output-stream-string stream))) ((simple-array nil (*)) (replace result (string-output-stream-string stream)))) (setf (string-output-stream-index stream) 0 @@ -1263,6 +1269,8 @@ ;;; the CLM, but they are required for the implementation of ;;; WITH-OUTPUT-TO-STRING. +;;; FIXME: need to support (VECTOR BASE-CHAR) and (VECTOR NIL), +;;; ideally without destroying all hope of efficiency. (deftype string-with-fill-pointer () '(and (vector character) (satisfies array-has-fill-pointer-p))) @@ -1292,9 +1300,9 @@ (if (= offset-current end) (let* ((new-length (1+ (* current 2))) (new-workspace (make-string new-length))) - (declare (simple-string new-workspace)) - (%byte-blt workspace start - new-workspace 0 current) + (declare (type (simple-array character (*)) new-workspace)) + (replace new-workspace workspace + :start2 start :end2 offset-current) (setf workspace new-workspace offset-current current) (set-array-header buffer workspace new-length @@ -1322,21 +1330,16 @@ (let* ((new-length (+ (the fixnum (* current 2)) string-len)) (new-workspace (make-string new-length))) (declare (type (simple-array character (*)) new-workspace)) - (%byte-blt workspace dst-start - new-workspace 0 current) - (setf workspace new-workspace) - (setf offset-current current) - (setf offset-dst-end dst-end) - (set-array-header buffer - workspace - new-length - dst-end - 0 - new-length - nil)) + (replace new-workspace workspace + :start2 dst-start :end2 offset-current) + (setf workspace new-workspace + offset-current current + offset-dst-end dst-end) + (set-array-header buffer workspace new-length + dst-end 0 new-length nil)) (setf (fill-pointer buffer) dst-end)) - (%byte-blt string start - workspace offset-current offset-dst-end))) + (replace workspace string + :start1 offset-current :start2 start :end2 end))) dst-end)) (defun fill-pointer-misc (stream operation &optional arg1 arg2) @@ -1525,7 +1528,7 @@ (defun case-frob-upcase-sout (stream str start end) (declare (type case-frob-stream stream) - (type simple-base-string str) + (type simple-string str) (type index start) (type (or index null) end)) (let* ((target (case-frob-stream-target stream)) @@ -1550,7 +1553,7 @@ (defun case-frob-downcase-sout (stream str start end) (declare (type case-frob-stream stream) - (type simple-base-string str) + (type simple-string str) (type index start) (type (or index null) end)) (let* ((target (case-frob-stream-target stream)) @@ -1583,7 +1586,7 @@ (defun case-frob-capitalize-sout (stream str start end) (declare (type case-frob-stream stream) - (type simple-base-string str) + (type simple-string str) (type index start) (type (or index null) end)) (let* ((target (case-frob-stream-target stream)) @@ -1628,7 +1631,7 @@ (defun case-frob-capitalize-aux-sout (stream str start end) (declare (type case-frob-stream stream) - (type simple-base-string str) + (type simple-string str) (type index start) (type (or index null) end)) (let* ((target (case-frob-stream-target stream)) @@ -1673,7 +1676,7 @@ (defun case-frob-capitalize-first-sout (stream str start end) (declare (type case-frob-stream stream) - (type simple-base-string str) + (type simple-string str) (type index start) (type (or index null) end)) (let* ((target (case-frob-stream-target stream)) @@ -1742,8 +1745,7 @@ (with-array-data ((data seq) (offset-start start) (offset-end end)) (typecase data ((or (simple-array (unsigned-byte 8) (*)) - (simple-array (signed-byte 8) (*)) - simple-string) + (simple-array (signed-byte 8) (*))) (let* ((numbytes (- end start)) (bytes-read (read-n-bytes stream data offset-start numbytes nil))) diff -urN sbcl/src/code/sysmacs.lisp sbcl.char/src/code/sysmacs.lisp --- sbcl/src/code/sysmacs.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/code/sysmacs.lisp 2004-10-27 13:34:22.000000000 +0200 @@ -105,7 +105,7 @@ (defmacro prepare-for-fast-read-char (stream &body forms) `(let* ((%frc-stream% ,stream) (%frc-method% (ansi-stream-in %frc-stream%)) - (%frc-buffer% (ansi-stream-in-buffer %frc-stream%)) + (%frc-buffer% (ansi-stream-cin-buffer %frc-stream%)) (%frc-index% (ansi-stream-in-index %frc-stream%))) (declare (type index %frc-index%) (type ansi-stream %frc-stream%)) @@ -126,7 +126,7 @@ (prog1 (fast-read-char-refill %frc-stream% ,eof-error-p ,eof-value) (setq %frc-index% (ansi-stream-in-index %frc-stream%)))) (t - (prog1 (code-char (aref %frc-buffer% %frc-index%)) + (prog1 (aref %frc-buffer% %frc-index%) (incf %frc-index%))))) ;;;; And these for the fasloader... diff -urN sbcl/src/code/target-alieneval.lisp sbcl.char/src/code/target-alieneval.lisp --- sbcl/src/code/target-alieneval.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/code/target-alieneval.lisp 2004-10-27 13:34:22.000000000 +0200 @@ -146,8 +146,8 @@ ',initial-value ,datap)))) `((symbol-macrolet - ((,symbol (%heap-alien ',info))) - ,@body)))) + ((,symbol (%heap-alien ',info))) + ,@body)))) (:local (/show0 ":LOCAL case") (let ((var (gensym)) diff -urN sbcl/src/code/target-c-call.lisp sbcl.char/src/code/target-c-call.lisp --- sbcl/src/code/target-c-call.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/code/target-c-call.lisp 2004-10-27 13:34:22.000000000 +0200 @@ -35,10 +35,14 @@ (define-alien-type-translator void () (parse-alien-type '(values) (sb!kernel:make-null-lexenv))) +;;; FIXME: %NATURALIZE-C-STRING (and the UTF8 siblings below) would +;;; appear to be vulnerable to the lisp string moving from underneath +;;; them if the world undergoes a GC, possibly triggered by another +;;; thread. Ugh. (defun %naturalize-c-string (sap) (declare (type system-area-pointer sap)) (locally - (declare (optimize (speed 3) (safety 0))) + (declare (optimize (speed 3) (safety 0))) (let ((length (loop for offset of-type fixnum upfrom 0 until (zerop (sap-ref-8 sap offset)) finally (return offset)))) @@ -48,3 +52,98 @@ sb!vm:n-word-bits) (* length sb!vm:n-byte-bits)) result)))) + +(defun %naturalize-utf8-string (sap) + (declare (type system-area-pointer sap)) + (locally + (declare (optimize (speed 3) (safety 0))) + (let ((length (do* ((offset 0) + (byte (sap-ref-8 sap offset) (sap-ref-8 sap offset)) + (index 0 (1+ index))) + ((zerop byte) index) + (declare (type fixnum offset index)) + (cond + ;; FIXME: Here, and below, we don't defend + ;; against malformed utf-8 with any degree of + ;; rigour. + ((< byte #x80) (incf offset)) + ((< byte #xe0) (incf offset 2)) + ((< byte #xf0) (incf offset 3)) + (t (incf offset 4)))))) + (let ((result (make-string length :element-type 'character))) + (do* ((offset 0) + (byte (sap-ref-8 sap offset) (sap-ref-8 sap offset)) + (index 0 (1+ index))) + ((>= index length) result) + (declare (type fixnum offset index)) + (setf (char result index) + (cond + ((< byte #x80) + (prog1 (code-char byte) (incf offset))) + ((< byte #xe0) + (prog1 (code-char (dpb byte (byte 5 6) + (sap-ref-8 sap (1+ offset)))) + (incf offset 2))) + ((< byte #xf0) + (prog1 (code-char + (dpb byte (byte 4 12) + (dpb (sap-ref-8 sap (1+ offset)) (byte 6 6) + (sap-ref-8 sap (+ 2 offset))))) + (incf offset 3))) + (t + (prog1 + (code-char + (dpb byte (byte 3 18) + (dpb (sap-ref-8 sap (1+ offset)) (byte 6 12) + (dpb (sap-ref-8 sap (+ 2 offset)) (byte 6 6) + (sap-ref-8 sap (+ 3 offset)))))) + (incf offset 4)))))))))) + +(defun %deport-utf8-string (string) + (declare (type simple-string string)) + (locally + (declare (optimize (speed 3) (safety 0))) + (let ((length (1+ (do* ((offset 0) + (length (length string)) + (index 0 (1+ index))) + ((= index length) offset) + (declare (type fixnum offset)) + (let ((bits (char-code (char string index)))) + (cond + ((< bits #x80) (incf offset 1)) + ((< bits #x800) (incf offset 2)) + ((< bits #x10000) (incf offset 3)) + (t (incf offset 4)))))))) + (let ((vector (make-array length :element-type '(unsigned-byte 8) + :initial-element 0))) + (do* ((offset 0) + (length (length string)) + (index 0 (1+ index))) + ((= index length) vector) + (declare (type fixnum offset)) + (let ((bits (char-code (char string index)))) + (cond + ((< bits #x80) + (setf (aref vector offset) bits) + (incf offset)) + ((< bits #x800) + (setf (aref vector offset) (logior #xc0 (ldb (byte 5 6) bits))) + (setf (aref vector (1+ offset)) + (logior #x80 (ldb (byte 6 0) bits))) + (incf offset 2)) + ((< bits #x10000) + (setf (aref vector offset) (logior #xe0 (ldb (byte 4 12) bits))) + (setf (aref vector (1+ offset)) + (logior #x80 (ldb (byte 6 6) bits))) + (setf (aref vector (+ offset 2)) + (logior #x80 (ldb (byte 6 0) bits))) + (incf offset 3)) + (t + (setf (aref vector offset) (logior #xf0 (ldb (byte 3 18) bits))) + (setf (aref vector (1+ offset)) + (logior #x80 (ldb (byte 6 12) bits))) + (setf (aref vector (+ offset 2)) + (logior #x80 (ldb (byte 6 6) bits))) + (setf (aref vector (+ offset 3)) + (logior #x80 (ldb (byte 6 0) bits))) + (incf offset 4))))))))) diff -urN sbcl/src/code/target-char.lisp sbcl.char/src/code/target-char.lisp --- sbcl/src/code/target-char.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/code/target-char.lisp 2004-10-27 13:34:21.000000000 +0200 @@ -32,6 +32,27 @@ (deftype char-code () `(integer 0 (,char-code-limit))) +(defvar *character-database*) + +(macrolet ((frob () + (with-open-file (stream (merge-pathnames + (make-pathname + :directory + '(:relative :up :up "output") + :name "ucd" + :type "dat") + sb!xc:*compile-file-pathname*) + :direction :input + :element-type '(unsigned-byte 8)) + (let* ((length (file-length stream)) + (array (make-array length + :element-type '(unsigned-byte 8)))) + (read-sequence array stream) + `(defun !character-database-cold-init () + (setq *character-database* ',array)))))) + (frob)) +#+sb-xc-host (!character-database-cold-init) + ;;; This is the alist of (character-name . character) for characters ;;; with long names. The first name in this list for a given character ;;; is used on typeout and is the preferred form for input. @@ -40,8 +61,10 @@ (dolist (code char-names-list) (destructuring-bind (ccode names) code (dolist (name names) - (results (cons name (code-char ccode)))))) - `(defparameter *char-name-alist* ',(results))))) + (results (cons name ccode))))) + `(defparameter *char-name-alist* + (mapcar (lambda (x) (cons (car x) (code-char (cdr x)))) + ',(results)))))) ;; Note: The *** markers here indicate character names which are ;; required by the ANSI specification of #'CHAR-NAME. For the others, ;; we prefer the ASCII standard name. @@ -78,15 +101,76 @@ (#x1E ("Rs" "^^")) (#x1F ("Us" "^_")) (#x20 ("Space" "Sp")) ; *** See Note above. - (#x7f ("Rubout" "Delete" "Del"))))) ; *** See Note above. + (#x7f ("Rubout" "Delete" "Del")) + (#x80 ("C80")) + (#x81 ("C81")) + (#x82 ("Break-Permitted")) + (#x83 ("No-Break-Permitted")) + (#x84 ("C84")) + (#x85 ("Next-Line")) + (#x86 ("Start-Selected-Area")) + (#x87 ("End-Selected-Area")) + (#x88 ("Character-Tabulation-Set")) + (#x89 ("Character-Tabulation-With-Justification")) + (#x8A ("Line-Tabulation-Set")) + (#x8B ("Partial-Line-Forward")) + (#x8C ("Partial-Line-Backward")) + (#x8D ("Reverse-Linefeed")) + (#x8E ("Single-Shift-Two")) + (#x8F ("Single-Shift-Three")) + (#x90 ("Device-Control-String")) + (#x91 ("Private-Use-One")) + (#x92 ("Private-Use-Two")) + (#x93 ("Set-Transmit-State")) + (#x94 ("Cancel-Character")) + (#x95 ("Message-Waiting")) + (#x96 ("Start-Guarded-Area")) + (#x97 ("End-Guarded-Area")) + (#x98 ("Start-String")) + (#x99 ("C99")) + (#x9A ("Single-Character-Introducer")) + (#x9B ("Control-Sequence-Introducer")) + (#x9C ("String-Terminator")) + (#x9D ("Operating-System-Command")) + (#x9E ("Privacy-Message")) + (#x9F ("Application-Program-Command"))))) ; *** See Note above. ;;;; accessor functions +;; (* 8 186) => 1488 +;; (+ 1488 (ash #x110000 -8)) => 5840 +(defun ucd-index (char) + (let* ((cp (char-code char)) + (cp-high (ash cp -8)) + (page (aref *character-database* (+ 1488 cp-high)))) + (+ 5840 (ash page 10) (ash (ldb (byte 8 0) cp) 2)))) + +(defun ucd-value-0 (char) + (aref *character-database* (ucd-index char))) + +(defun ucd-value-1 (char) + (let ((index (ucd-index char))) + (dpb (aref *character-database* (+ index 3)) + (byte 8 16) + (dpb (aref *character-database* (+ index 2)) + (byte 8 8) + (aref *character-database* (1+ index)))))) + +(defun ucd-general-category (char) + (aref *character-database* (* 8 (ucd-value-0 char)))) + +(defun ucd-decimal-digit (char) + (let ((decimal-digit (aref *character-database* + (+ 3 (* 8 (ucd-value-0 char)))))) + (when (< decimal-digit 10) + decimal-digit))) + (defun char-code (char) #!+sb-doc "Return the integer code of CHAR." + ;; FIXME: do we actually need this? (etypecase char - (base-char (char-code (truly-the base-char char))))) + (character (char-code (truly-the character char))))) (defun char-int (char) #!+sb-doc @@ -156,41 +240,34 @@ "The argument must be a character object. GRAPHIC-CHAR-P returns T if the argument is a printing character (space through ~ in ASCII), otherwise returns NIL." - (and (typep char 'base-char) - (< 31 - (char-code (the base-char char)) - 127))) + (let ((n (char-code char))) + (or (< 31 n 127) + (< 159 n)))) (defun alpha-char-p (char) #!+sb-doc "The argument must be a character object. ALPHA-CHAR-P returns T if the argument is an alphabetic character, A-Z or a-z; otherwise NIL." - (let ((m (char-code char))) - (or (< 64 m 91) (< 96 m 123)))) + (< (ucd-general-category char) 5)) (defun upper-case-p (char) #!+sb-doc "The argument must be a character object; UPPER-CASE-P returns T if the argument is an upper-case character, NIL otherwise." - (< 64 - (char-code char) - 91)) + (= (ucd-value-0 char) 0)) (defun lower-case-p (char) #!+sb-doc "The argument must be a character object; LOWER-CASE-P returns T if the argument is a lower-case character, NIL otherwise." - (< 96 - (char-code char) - 123)) + (= (ucd-value-0 char) 1)) (defun both-case-p (char) #!+sb-doc "The argument must be a character object. BOTH-CASE-P returns T if the argument is an alphabetic character and if the character exists in both upper and lower case. For ASCII, this is the same as ALPHA-CHAR-P." - (let ((m (char-code char))) - (or (< 64 m 91) (< 96 m 123)))) + (< (ucd-value-0 char) 2)) (defun digit-char-p (char &optional (radix 10.)) #!+sb-doc @@ -208,14 +285,17 @@ ;; Also check lower case a - z. ((and (>= (setq m (- m 32)) 10) (< m radix)) m) ;; Else, fail. - (t nil)))) + (t (let ((number (ucd-decimal-digit char))) + (when (and number (< number radix)) + number)))))) (defun alphanumericp (char) #!+sb-doc "Given a character-object argument, ALPHANUMERICP returns T if the argument is either numeric or alphabetic." - (let ((m (char-code char))) - (or (< 47 m 58) (< 64 m 91) (< 96 m 123)))) + (let ((gc (ucd-general-category char))) + (or (< gc 5) + (= gc 12)))) (defun char= (character &rest more-characters) #!+sb-doc @@ -279,8 +359,11 @@ ;;; which loses font, bits, and case info. (defmacro equal-char-code (character) - `(let ((ch (char-code ,character))) - (if (< 96 ch 123) (- ch 32) ch))) + (let ((ch (gensym))) + `(let ((,ch ,character)) + (if (= (ucd-value-0 ,ch) 0) + (ucd-value-1 ,ch) + (char-code ,ch))))) (defun char-equal (character &rest more-characters) #!+sb-doc @@ -354,16 +437,17 @@ (defun char-upcase (char) #!+sb-doc - "Return CHAR converted to upper-case if that is possible." - (if (lower-case-p char) - (code-char (- (char-code char) 32)) + "Return CHAR converted to upper-case if that is possible. Don't convert + lowercase eszet (U+DF)." + (if (= (ucd-value-0 char) 1) + (code-char (ucd-value-1 char)) char)) (defun char-downcase (char) #!+sb-doc "Return CHAR converted to lower-case if that is possible." - (if (upper-case-p char) - (code-char (+ (char-code char) 32)) + (if (= (ucd-value-0 char) 0) + (code-char (ucd-value-1 char)) char)) (defun digit-char (weight &optional (radix 10)) diff -urN sbcl/src/code/target-format.lisp sbcl.char/src/code/target-format.lisp --- sbcl/src/code/target-format.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/code/target-format.lisp 2004-10-27 13:34:22.000000000 +0200 @@ -76,8 +76,11 @@ (multiple-value-bind (new-directives new-args) (let* ((character (format-directive-character directive)) (function - (svref *format-directive-interpreters* - (char-code character))) + (typecase character + (base-char + (svref *format-directive-interpreters* + (char-code character))) + (character nil))) (*default-format-error-offset* (1- (format-directive-end directive)))) (unless function diff -urN sbcl/src/code/target-load.lisp sbcl.char/src/code/target-load.lisp --- sbcl/src/code/target-load.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/code/target-load.lisp 2004-10-27 13:34:22.000000000 +0200 @@ -80,14 +80,20 @@ :element-type '(unsigned-byte 8)) (load-as-fasl stream verbose print))) (t - (let ((first-line (with-open-file (stream truename :direction :input) - (read-line stream nil))) - (fhsss *fasl-header-string-start-string*)) + (let* ((fhsss *fasl-header-string-start-string*) + (first-line (make-array (length fhsss) + :element-type '(unsigned-byte 8))) + (read-length + (with-open-file (stream truename + :direction :input + :element-type '(unsigned-byte 8)) + (read-sequence first-line stream)))) (cond - ((and first-line - (>= (length (the simple-string first-line)) - (length fhsss)) - (string= first-line fhsss :end1 (length fhsss))) + ((and (= read-length (length fhsss)) + (do ((i 0 (1+ i))) + ((= i read-length) t) + (when (/= (char-code (aref fhsss i)) (aref first-line i)) + (return)))) (internal-load pathname truename if-does-not-exist verbose print :binary)) (t @@ -289,4 +295,9 @@ (defun !loader-cold-init () (/show0 "/!loader-cold-init") (dolist (routine *!initial-assembler-routines*) - (setf (gethash (car routine) *assembler-routines*) (cdr routine)))) + (setf (gethash (car routine) *assembler-routines*) (cdr routine))) + (dolist (symbol *!initial-foreign-symbols*) + (setf (gethash (car symbol) *static-foreign-symbols*) (cdr symbol)))) + +(declaim (ftype (function (string) (unsigned-byte #.sb!vm:n-machine-word-bits)) + foreign-symbol-address-as-integer)) diff -urN sbcl/src/code/target-pathname.lisp sbcl.char/src/code/target-pathname.lisp --- sbcl/src/code/target-pathname.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/code/target-pathname.lisp 2004-10-27 13:34:22.000000000 +0200 @@ -44,7 +44,7 @@ (let ((namestring (handler-case (namestring pathname) (error nil)))) (if namestring - (format stream "#P~S" namestring) + (format stream "#P~S" (coerce namestring '(simple-array character (*)))) (print-unreadable-object (pathname stream :type t) (format stream "~@<(with no namestring) ~_:HOST ~S ~_:DEVICE ~S ~_:DIRECTORY ~S ~ @@ -202,12 +202,12 @@ (or (eq thing wild) (eq wild :wild) (typecase thing - (simple-base-string + (simple-string ;; String is matched by itself, a matching pattern or :WILD. (typecase wild (pattern (values (pattern-matches wild thing))) - (simple-base-string + (simple-string (string= thing wild)))) (pattern ;; A pattern is only matched by an identical pattern. @@ -308,7 +308,7 @@ (dolist (x in) (when (check-for pred x) (return t)))) - (simple-base-string + (simple-string (dotimes (i (length in)) (when (funcall pred (schar in i)) (return t)))) @@ -319,7 +319,7 @@ (make-pattern (mapcar (lambda (piece) (typecase piece - (simple-base-string + (simple-string (funcall fun piece)) (cons (case (car piece) @@ -333,7 +333,7 @@ (pattern-pieces thing)))) (list (mapcar fun thing)) - (simple-base-string + (simple-string (funcall fun thing)) (t thing)))) @@ -385,6 +385,7 @@ (declare (type pathname-designator pathname) (type pathname-designator defaults) (values pathname)) + (/show0 "in MERGE-PATHNAMES") (with-pathname (defaults defaults) (let ((pathname (let ((*default-pathname-defaults* defaults)) (pathname pathname)))) @@ -702,7 +703,7 @@ ;;; If NAMESTR begins with a colon-terminated, defined, logical host, ;;; then return that host, otherwise return NIL. (defun extract-logical-host-prefix (namestr start end) - (declare (type simple-base-string namestr) + (declare (type simple-string namestr) (type index start end) (values (or logical-host null))) (let ((colon-pos (position #\: namestr :start start :end end))) @@ -924,7 +925,7 @@ (defun substitute-into (pattern subs diddle-case) (declare (type pattern pattern) (type list subs) - (values (or simple-base-string pattern) list)) + (values (or simple-string pattern) list)) (let ((in-wildcard nil) (pieces nil) (strings nil)) @@ -1157,13 +1158,14 @@ (let ((word (string-upcase word))) (dotimes (i (length word)) (let ((ch (schar word i))) - (unless (or (alpha-char-p ch) (digit-char-p ch) (char= ch #\-)) + (unless (and (typep ch 'standard-char) + (or (alpha-char-p ch) (digit-char-p ch) (char= ch #\-))) (error 'namestring-parse-error :complaint "logical namestring character which ~ is not alphanumeric or hyphen:~% ~S" :args (list ch) :namestring word :offset i)))) - word)) + (coerce word 'base-string))) ;;; Given a logical host or string, return a logical host. If ERROR-P ;;; is NIL, then return NIL when no such host exists. @@ -1257,7 +1259,7 @@ ;;; Break up a logical-namestring, always a string, into its ;;; constituent parts. (defun parse-logical-namestring (namestr start end) - (declare (type simple-base-string namestr) + (declare (type simple-string namestr) (type index start end)) (collect ((directory)) (let ((host nil) @@ -1418,7 +1420,7 @@ (when type-supplied (unless name (error "cannot specify the type without a file: ~S" pathname)) - (when (typep type 'simple-base-string) + (when (typep type 'simple-string) (when (position #\. type) (error "type component can't have a #\. inside: ~S" pathname))) (strings ".") @@ -1524,12 +1526,9 @@ (t (translate-logical-pathname (pathname pathname))))) (defvar *logical-pathname-defaults* - (%make-logical-pathname (make-logical-host :name "BOGUS") - :unspecific - nil - nil - nil - nil)) + (%make-logical-pathname + (make-logical-host :name (logical-word-or-lose "BOGUS")) + :unspecific nil nil nil nil)) (defun load-logical-pathname-translations (host) #!+sb-doc diff -urN sbcl/src/code/target-thread.lisp sbcl.char/src/code/target-thread.lisp --- sbcl/src/code/target-thread.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/code/target-thread.lisp 2004-10-27 13:34:22.000000000 +0200 @@ -58,7 +58,7 @@ (declaim (inline waitqueue-data-address mutex-value-address)) (defstruct waitqueue - (name nil :type (or null simple-base-string)) + (name nil :type (or null simple-string)) (lock 0) (data nil)) diff -urN sbcl/src/code/target-type.lisp sbcl.char/src/code/target-type.lisp --- sbcl/src/code/target-type.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/code/target-type.lisp 2004-10-27 13:34:22.000000000 +0200 @@ -33,6 +33,7 @@ named-type member-type array-type + character-set-type built-in-classoid cons-type) (values (%typep obj type) t)) @@ -191,6 +192,8 @@ :specialized-element-type etype))) (cons (make-cons-type *universal-type* *universal-type*)) + (character + (specifier-type 'character)) (t (classoid-of x)))) diff -urN sbcl/src/code/target-unithread.lisp sbcl.char/src/code/target-unithread.lisp --- sbcl/src/code/target-unithread.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/code/target-unithread.lisp 2004-10-27 13:34:22.000000000 +0200 @@ -37,7 +37,7 @@ ;;;; the higher-level locking operations are based on waitqueues (defstruct waitqueue - (name nil :type (or null simple-base-string)) + (name nil :type (or null simple-string)) (lock 0) (data nil)) diff -urN sbcl/src/code/toplevel.lisp sbcl.char/src/code/toplevel.lisp --- sbcl/src/code/toplevel.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/code/toplevel.lisp 2004-10-27 13:34:22.000000000 +0200 @@ -421,6 +421,7 @@ (probe-init-files (explicitly-specified-init-file-name &rest default-init-file-names) (declare (type list default-init-file-names)) + (/show0 "in PROBE-INIT-FILES") (if explicitly-specified-init-file-name (or (probe-file explicitly-specified-init-file-name) (startup-error "The file ~S was not found." @@ -431,6 +432,7 @@ ;; shared idiom for creating default names for ;; SYSINITish and USERINITish files (init-file-name (maybe-dir-name basename) + (/show0 "in INIT-FILE-NAME") (and maybe-dir-name (concatenate 'string maybe-dir-name "/" basename)))) (let ((sysinit-truename diff -urN sbcl/src/code/typecheckfuns.lisp sbcl.char/src/code/typecheckfuns.lisp --- sbcl/src/code/typecheckfuns.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/code/typecheckfuns.lisp 2004-10-27 13:34:22.000000000 +0200 @@ -199,7 +199,11 @@ (member-type-p ctype) (numeric-type-p ctype) (array-type-p ctype) - (cons-type-p ctype)))) + (cons-type-p ctype) + (intersection-type-p ctype) + (union-type-p ctype) + (negation-type-p ctype) + (character-range-type-p ctype)))) ;;; Evaluate (at load/execute time) to a function which checks that ;;; its argument is of the specified type. diff -urN sbcl/src/code/typep.lisp sbcl.char/src/code/typep.lisp --- sbcl/src/code/typep.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/code/typep.lisp 2004-10-27 13:34:21.000000000 +0200 @@ -117,6 +117,14 @@ (and (consp object) (%%typep (car object) (cons-type-car-type type)) (%%typep (cdr object) (cons-type-cdr-type type)))) + (character-set-type + (and (characterp object) + (let ((code (char-code object)) + (pairs (character-set-type-pairs type))) + (dolist (pair pairs nil) + (destructuring-bind (low . high) pair + (when (<= low code high) + (return t))))))) (unknown-type ;; dunno how to do this ANSIly -- WHN 19990413 #+sb-xc-host (error "stub: %%TYPEP UNKNOWN-TYPE in xcompilation host") diff -urN sbcl/src/code/unix.lisp sbcl.char/src/code/unix.lisp --- sbcl/src/code/unix.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/code/unix.lisp 2004-10-27 13:34:22.000000000 +0200 @@ -811,7 +811,7 @@ ;;; paths have been converted to absolute paths, so we don't need to ;;; try to handle any more generality than that. (defun unix-resolve-links (pathname) - (declare (type simple-string pathname)) + (declare (type simple-base-string pathname)) (aver (not (relative-unix-pathname? pathname))) (/noshow "entering UNIX-RESOLVE-LINKS") (loop with previous-pathnames = nil do @@ -837,7 +837,7 @@ :from-end t))) (dir (subseq pathname 0 dir-len))) (/noshow dir) - (concatenate 'string dir link)) + (concatenate 'base-string dir link)) link)))) (if (unix-file-kind new-pathname) (setf pathname new-pathname) @@ -853,9 +853,9 @@ (push pathname previous-pathnames)))) (defun unix-simplify-pathname (src) - (declare (type simple-string src)) + (declare (type simple-base-string src)) (let* ((src-len (length src)) - (dst (make-string src-len)) + (dst (make-string src-len :element-type 'base-char)) (dst-len 0) (dots 0) (last-slash nil)) @@ -929,7 +929,8 @@ (position #\/ dst :end last-slash :from-end t))) (if prev-prev-slash (setf dst-len (1+ prev-prev-slash)) - (return-from unix-simplify-pathname "./"))))))) + (return-from unix-simplify-pathname + (coerce "./" 'simple-base-string)))))))) (cond ((zerop dst-len) "./") ((= dst-len src-len) diff -urN sbcl/src/cold/warm.lisp sbcl.char/src/cold/warm.lisp --- sbcl/src/cold/warm.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/cold/warm.lisp 2004-10-27 13:34:21.000000000 +0200 @@ -73,10 +73,13 @@ (make-pathname :directory '(:relative "contrib" :wild-inferiors) :name :wild :type :wild) sys))) + (sb-int:/show "about to set SYS logical pathname translations") (setf (logical-pathname-translations "SYS") `(("SYS:SRC;**;*.*.*" ,src) ("SYS:CONTRIB;**;*.*.*" ,contrib)))) +(sb-int:/show "set SYS logical pathname translations") + ;;; FIXME: CMU CL's pclcom.lisp had extra optional stuff wrapped around ;;; COMPILE-PCL, at least some of which we should probably have too: ;;; @@ -175,7 +178,7 @@ ;; facility, and should be compiled and loaded after ;; our DESCRIBE facility is compiled and loaded. "SRC;PCL;DESCRIBE")) - + (sb-int:/show "at head of DOLIST") (let ((fullname (concatenate 'string "SYS:" stem ".LISP"))) (sb-int:/show "about to compile" fullname) (flet ((report-recompile-restart (stream) diff -urN sbcl/src/compiler/array-tran.lisp sbcl.char/src/compiler/array-tran.lisp --- sbcl/src/compiler/array-tran.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/compiler/array-tran.lisp 2004-10-27 13:34:22.000000000 +0200 @@ -293,8 +293,7 @@ (give-up-ir1-transform "cannot open-code creation of ~S" result-type-spec)) #-sb-xc-host - (unless (csubtypep (ctype-of (sb!vm:saetp-initial-element-default saetp)) - eltype-type) + (unless (ctypep (sb!vm:saetp-initial-element-default saetp) eltype-type) ;; This situation arises e.g. in (MAKE-ARRAY 4 :ELEMENT-TYPE ;; '(INTEGER 1 5)) ANSI's definition of MAKE-ARRAY says "If ;; INITIAL-ELEMENT is not supplied, the consequences of later diff -urN sbcl/src/compiler/assem.lisp sbcl.char/src/compiler/assem.lisp --- sbcl/src/compiler/assem.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/compiler/assem.lisp 2004-10-27 13:34:21.000000000 +0200 @@ -27,7 +27,7 @@ ;;; This structure holds the state of the assembler. (defstruct (segment (:copier nil)) ;; the name of this segment (for debugging output and stuff) - (name "unnamed" :type simple-base-string) + (name "unnamed" :type simple-string) ;; Ordinarily this is a vector where instructions are written. If ;; the segment is made invalid (e.g. by APPEND-SEGMENT) then the ;; vector can be replaced by NIL. diff -urN sbcl/src/compiler/dump.lisp sbcl.char/src/compiler/dump.lisp --- sbcl/src/compiler/dump.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/compiler/dump.lisp 2004-10-27 13:34:22.000000000 +0200 @@ -192,11 +192,19 @@ (declare (type fasl-output fasl-output)) (unless *cold-load-dump* (let ((handle (gethash x (fasl-output-equal-table fasl-output)))) - (cond (handle - (dump-push handle fasl-output) - t) - (t - nil))))) + (cond + (handle (dump-push handle fasl-output) t) + (t nil))))) +(defun string-check-table (x fasl-output) + (declare (type fasl-output fasl-output) + (type string x)) + (unless *cold-load-dump* + (let ((handle (cdr (assoc + (array-element-type x) + (gethash x (fasl-output-equal-table fasl-output)))))) + (cond + (handle (dump-push handle fasl-output) t) + (t nil))))) ;;; These functions are called after dumping an object to save the ;;; object in the table. The object (also passed in as X) must already @@ -217,7 +225,16 @@ (setf (gethash x (fasl-output-eq-table fasl-output)) handle) (dump-push handle fasl-output))) (values)) - +(defun string-save-object (x fasl-output) + (declare (type fasl-output fasl-output) + (type string x)) + (unless *cold-load-dump* + (let ((handle (dump-pop fasl-output))) + (push (cons (array-element-type x) handle) + (gethash x (fasl-output-equal-table fasl-output))) + (setf (gethash x (fasl-output-eq-table fasl-output)) handle) + (dump-push handle fasl-output))) + (values)) ;;; Record X in File's CIRCULARITY-TABLE unless *COLD-LOAD-DUMP* is ;;; true. This is called on objects that we are about to dump might ;;; have a circular path through them. @@ -370,11 +387,8 @@ (dump-structure x file) (eq-save-object x file)) (array - ;; FIXME: The comment at the head of - ;; DUMP-NON-IMMEDIATE-OBJECT says it's for objects which - ;; we want to save, instead of repeatedly dumping them. - ;; But then we dump arrays here without doing anything - ;; like EQUAL-SAVE-OBJECT. What gives? + ;; DUMP-ARRAY (and its callees) are responsible for + ;; updating the EQ and EQUAL hash tables. (dump-array x file)) (number (unless (equal-check-table x file) @@ -586,7 +600,14 @@ (t (unless *cold-load-dump* (dump-fop 'fop-normal-load file)) - (dump-simple-string (package-name pkg) file) + #+sb-xc-host + (dump-simple-base-string + (coerce (package-name pkg) 'simple-base-string) + file) + #-sb-xc-host + (dump-simple-character-string + (coerce (package-name pkg) '(simple-array character (*))) + file) (dump-fop 'fop-package file) (unless *cold-load-dump* (dump-fop 'fop-maybe-cold-load file)) @@ -717,10 +738,21 @@ (*))) x))) (typecase simple-version + #+sb-xc-host + (simple-string + (unless (string-check-table x file) + (dump-simple-base-string simple-version file) + (string-save-object x file))) + #-sb-xc-host (simple-base-string - (unless (equal-check-table x file) - (dump-simple-string simple-version file) - (equal-save-object x file))) + (unless (string-check-table x file) + (dump-simple-base-string simple-version file) + (string-save-object x file))) + #-sb-xc-host + ((simple-array character (*)) + (unless (string-check-table x file) + (dump-simple-character-string simple-version file) + (string-save-object x file))) (simple-vector (dump-simple-vector simple-version file) (eq-save-object x file)) @@ -889,23 +921,31 @@ ;;; Dump characters and string-ish things. -(defun dump-character (ch file) - (dump-fop 'fop-short-character file) - (dump-byte (char-code ch) file)) - -;;; a helper function shared by DUMP-SIMPLE-STRING and DUMP-SYMBOL -(defun dump-characters-of-string (s fasl-output) - (declare (type string s) (type fasl-output fasl-output)) +(defun dump-character (char file) + (let ((code (sb!xc:char-code char))) + (cond + ((< code 256) + (dump-fop 'fop-short-character file) + (dump-byte code file)) + (t + (dump-fop 'fop-character file) + (dump-word code file))))) + +(defun dump-base-chars-of-string (s fasl-output) + (declare #+sb-xc-host (type simple-string s) + #-sb-xc-host (type simple-base-string s) + (type fasl-output fasl-output)) (dovector (c s) - (dump-byte (char-code c) fasl-output)) + (dump-byte (sb!xc:char-code c) fasl-output)) (values)) + ;;; Dump a SIMPLE-BASE-STRING. -;;; FIXME: should be called DUMP-SIMPLE-BASE-STRING then -(defun dump-simple-string (s file) - (declare (type simple-base-string s)) - (dump-fop* (length s) fop-small-string fop-string file) - (dump-characters-of-string s file) +(defun dump-simple-base-string (s file) + #+sb-xc-host (declare (type simple-string s)) + #-sb-xc-host (declare (type simple-base-string s)) + (dump-fop* (length s) fop-small-base-string fop-base-string file) + (dump-base-chars-of-string s file) (values)) ;;; If we get here, it is assumed that the symbol isn't in the table, @@ -955,7 +995,8 @@ file) (dump-word pname-length file))) - (dump-characters-of-string pname file) + #+sb-xc-host (dump-base-chars-of-string pname file) + #-sb-xc-host (dump-characters-of-string pname file) (unless *cold-load-dump* (setf (gethash s (fasl-output-eq-table file)) diff -urN sbcl/src/compiler/fndb.lisp sbcl.char/src/compiler/fndb.lisp --- sbcl/src/compiler/fndb.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/compiler/fndb.lisp 2004-10-27 13:34:22.000000000 +0200 @@ -421,7 +421,7 @@ (movable foldable flushable)) (defknown name-char (string-designator) (or character null) (movable foldable flushable)) -(defknown code-char (char-code) base-char +(defknown code-char (char-code) character ;; By suppressing constant folding on CODE-CHAR when the ;; cross-compiler is running in the cross-compilation host vanilla ;; ANSI Common Lisp, we can use CODE-CHAR expressions to delay until @@ -1192,7 +1192,7 @@ :rename-and-delete :overwrite :append :supersede nil)) (:if-does-not-exist (member :error :create nil)) - (:external-format (member :default))) + (:external-format keyword)) (or stream null)) (defknown rename-file (pathname-designator filename) diff -urN sbcl/src/compiler/generic/early-objdef.lisp sbcl.char/src/compiler/generic/early-objdef.lisp --- sbcl/src/compiler/generic/early-objdef.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/compiler/generic/early-objdef.lisp 2004-10-27 13:34:22.000000000 +0200 @@ -136,7 +136,7 @@ return-pc-header ; 00110110 value-cell-header ; 00111010 symbol-header ; 00111110 - base-char ; 01000010 + character ; 01000010 sap ; 01000110 unbound-marker ; 01001010 weak-pointer ; 01001110 @@ -167,6 +167,7 @@ simple-array-unsigned-byte-16 ; 10011110 simple-array-nil ; 10100010 simple-base-string ; 10100110 + simple-character-string simple-bit-vector ; 10101010 simple-vector ; 10101110 #!+#.(cl:if (cl:= 32 sb!vm:n-word-bits) '(and) '(or)) @@ -195,16 +196,19 @@ simple-array ; 11011110 complex-vector-nil ; 11100010 complex-base-string ; 11100110 + complex-character-string complex-bit-vector ; 11101010 complex-vector ; 11101110 complex-array ; 11110010 #!+#.(cl:if (cl:= 32 sb!vm:n-word-bits) '(and) '(or)) unused12 ; 11110110 + #| #!+#.(cl:if (cl:= 32 sb!vm:n-word-bits) '(and) '(or)) unused13 ; 11111010 #!+#.(cl:if (cl:= 32 sb!vm:n-word-bits) '(and) '(or)) unused14 ; 11111110 + |# ) ;;; the different vector subtypes diff -urN sbcl/src/compiler/generic/early-type-vops.lisp sbcl.char/src/compiler/generic/early-type-vops.lisp --- sbcl/src/compiler/generic/early-type-vops.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/compiler/generic/early-type-vops.lisp 2004-10-27 13:34:22.000000000 +0200 @@ -11,7 +11,7 @@ (in-package "SB!VM") (defparameter *immediate-types* - (list unbound-marker-widetag base-char-widetag)) + (list unbound-marker-widetag character-widetag)) (defparameter *fun-header-widetags* (list funcallable-instance-header-widetag diff -urN sbcl/src/compiler/generic/genesis.lisp sbcl.char/src/compiler/generic/genesis.lisp --- sbcl/src/compiler/generic/genesis.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/compiler/generic/genesis.lisp 2004-10-27 13:34:22.000000000 +0200 @@ -444,7 +444,7 @@ type))) (defun make-character-descriptor (data) - (make-other-immediate-descriptor data sb!vm:base-char-widetag)) + (make-other-immediate-descriptor data sb!vm:character-widetag)) (defun descriptor-beyond (des offset type) (let* ((low (logior (+ (logandc2 (descriptor-low des) sb!vm:lowtag-mask) @@ -608,9 +608,10 @@ ;;;; copying simple objects into the cold core -(defun string-to-core (string &optional (gspace *dynamic*)) +(defun base-string-to-core (string &optional (gspace *dynamic*)) #!+sb-doc - "Copy string into the cold core and return a descriptor to it." + "Copy STRING (which must only contain STANDARD-CHARs) into the cold +core and return a descriptor to it." ;; (Remember that the system convention for storage of strings leaves an ;; extra null byte at the end to aid in call-out to C.) (let* ((length (length string)) @@ -808,7 +809,7 @@ (make-fixnum-descriptor 0)) (write-wordindexed symbol sb!vm:symbol-plist-slot *nil-descriptor*) (write-wordindexed symbol sb!vm:symbol-name-slot - (string-to-core name *dynamic*)) + (base-string-to-core name *dynamic*)) (write-wordindexed symbol sb!vm:symbol-package-slot *nil-descriptor*) symbol)) @@ -1194,7 +1195,7 @@ ;; because that's the way CMU CL did it; I'm ;; not sure whether there's an underlying ;; reason. -- WHN 1990826 - (string-to-core "NIL" *dynamic*)) + (base-string-to-core "NIL" *dynamic*)) (write-wordindexed des (+ 1 sb!vm:symbol-package-slot) result) @@ -1279,7 +1280,7 @@ (let* ((cold-package (car cold-package-symbols-entry)) (symbols (cdr cold-package-symbols-entry)) (shadows (package-shadowing-symbols cold-package)) - (documentation (string-to-core (documentation cold-package t))) + (documentation (base-string-to-core (documentation cold-package t))) (internal *nil-descriptor*) (external *nil-descriptor*) (imported-internal *nil-descriptor*) @@ -1359,7 +1360,7 @@ (res *nil-descriptor*)) (dolist (u (package-use-list pkg)) (when (assoc u *cold-package-symbols*) - (cold-push (string-to-core (package-name u)) use))) + (cold-push (base-string-to-core (package-name u)) use))) (let* ((pkg-name (package-name pkg)) ;; Make the package nickname lists for the standard packages ;; be the minimum specified by ANSI, regardless of what value @@ -1380,7 +1381,7 @@ (t (package-nicknames pkg))))) (dolist (warm-nickname warm-nicknames) - (cold-push (string-to-core warm-nickname) cold-nicknames))) + (cold-push (base-string-to-core warm-nickname) cold-nicknames))) (cold-push (number-to-core (truncate (package-internal-symbol-count pkg) 0.8)) @@ -1397,7 +1398,7 @@ (cold-push use res) (cold-push (cold-intern :use) res) - (cold-push (string-to-core (package-name pkg)) res) + (cold-push (base-string-to-core (package-name pkg)) res) res)) ;;;; functions and fdefinition objects @@ -1850,7 +1851,7 @@ (defun foreign-symbols-to-core () (let ((result *nil-descriptor*)) (maphash (lambda (symbol value) - (cold-push (cold-cons (string-to-core symbol) + (cold-push (cold-cons (base-string-to-core symbol) (number-to-core value)) result)) *cold-foreign-symbol-table*) @@ -2098,12 +2099,16 @@ ;;;; cold fops for loading vectors -(clone-cold-fop (fop-string) - (fop-small-string) +(clone-cold-fop (fop-base-string) + (fop-small-base-string) (let* ((len (clone-arg)) (string (make-string len))) (read-string-as-bytes *fasl-input-stream* string) - (string-to-core string))) + (base-string-to-core string))) + +(clone-cold-fop (fop-character-string) + (fop-small-character-string) + (bug "CHARACTER-STRING dumped by cross-compiler.")) (clone-cold-fop (fop-vector) (fop-small-vector) diff -urN sbcl/src/compiler/generic/interr.lisp sbcl.char/src/compiler/generic/interr.lisp --- sbcl/src/compiler/generic/interr.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/compiler/generic/interr.lisp 2004-10-27 13:34:22.000000000 +0200 @@ -61,6 +61,10 @@ "Object is not of type STRING.") (object-not-base-string "Object is not of type BASE-STRING.") + (object-not-vector-nil + "Object is not of type (VECTOR NIL).") + (object-not-character-string + "Object is not of type (VECTOR CHARACTER).") (object-not-bit-vector "Object is not of type BIT-VECTOR.") (object-not-array @@ -130,8 +134,8 @@ "Object is not a WEAK-POINTER.") (object-not-instance "Object is not a INSTANCE.") - (object-not-base-char - "Object is not of type BASE-CHAR.") + (object-not-character + "Object is not a CHARACTER.") (nil-fun-returned "A function with declared result type NIL returned.") (nil-array-accessed diff -urN sbcl/src/compiler/generic/late-type-vops.lisp sbcl.char/src/compiler/generic/late-type-vops.lisp --- sbcl/src/compiler/generic/late-type-vops.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/compiler/generic/late-type-vops.lisp 2004-10-27 13:34:22.000000000 +0200 @@ -66,7 +66,8 @@ (!define-type-vops simple-string-p check-simple-string nil object-not-simple-string-error - (simple-base-string-widetag simple-array-nil-widetag)) + (simple-character-string-widetag + simple-base-string-widetag simple-array-nil-widetag)) (macrolet ((define-simple-array-type-vops () @@ -83,9 +84,9 @@ *specialized-array-element-type-properties*)))) (define-simple-array-type-vops)) -(!define-type-vops base-char-p check-base-char base-char - object-not-base-char-error - (base-char-widetag)) +(!define-type-vops characterp check-character character + object-not-character-error + (character-widetag)) (!define-type-vops system-area-pointer-p check-system-area-pointer system-area-pointer @@ -109,11 +110,14 @@ (funcallable-instance-header-widetag)) (!define-type-vops array-header-p nil nil nil - (simple-array-widetag complex-base-string-widetag complex-bit-vector-widetag + (simple-array-widetag + complex-character-string-widetag + complex-base-string-widetag complex-bit-vector-widetag complex-vector-widetag complex-array-widetag complex-vector-nil-widetag)) (!define-type-vops stringp check-string nil object-not-string-error - (simple-base-string-widetag complex-base-string-widetag + (simple-character-string-widetag complex-character-string-widetag + simple-base-string-widetag complex-base-string-widetag simple-array-nil-widetag complex-vector-nil-widetag)) (!define-type-vops base-string-p check-base-string nil object-not-base-string-error @@ -127,6 +131,10 @@ object-not-vector-nil-error (simple-array-nil-widetag complex-vector-nil-widetag)) +(!define-type-vops character-string-p check-character-string nil + object-not-character-string-error + (simple-character-string-widetag complex-character-string-widetag)) + (!define-type-vops vectorp check-vector nil object-not-vector-error (complex-vector-widetag . #.(append diff -urN sbcl/src/compiler/generic/primtype.lisp sbcl.char/src/compiler/generic/primtype.lisp --- sbcl/src/compiler/generic/primtype.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/compiler/generic/primtype.lisp 2004-10-27 13:34:22.000000000 +0200 @@ -66,7 +66,7 @@ ;;; other primitive immediate types (/show0 "primtype.lisp 68") -(!def-primitive-type base-char (base-char-reg any-reg)) +(!def-primitive-type character (character-reg any-reg)) ;;; primitive pointer types (/show0 "primtype.lisp 73") @@ -304,6 +304,13 @@ (ecase (named-type-name type) ((t *) (values *backend-t-primitive-type* t)) ((nil) (any)))) + (character-set-type + (let ((pairs (character-set-type-pairs type))) + (if (and (= (length pairs) 1) + (= (caar pairs) 0) + (= (cdar pairs) (1- sb!xc:char-code-limit))) + (exactly character) + (part-of character)))) (built-in-classoid (case (classoid-name type) ((complex function instance @@ -311,8 +318,6 @@ (values (primitive-type-or-lose (classoid-name type)) t)) (funcallable-instance (part-of function)) - (base-char - (exactly base-char)) (cons-type (part-of list)) (t diff -urN sbcl/src/compiler/generic/vm-array.lisp sbcl.char/src/compiler/generic/vm-array.lisp --- sbcl/src/compiler/generic/vm-array.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/compiler/generic/vm-array.lisp 2004-10-27 13:34:22.000000000 +0200 @@ -68,9 +68,13 @@ ;; (SIMPLE-BASE-STRINGs are stored with an extra ;; trailing #\NULL for convenience in calling out ;; to C.) - :n-pad-elements 1 + :n-pad-elements 1 :complex-typecode #.sb!vm:complex-base-string-widetag :importance 17) + (character ,(code-char 0) 32 simple-character-string + :n-pad-elements 1 + :complex-typecode #.sb!vm:complex-character-string-widetag + :importance 17) (single-float 0.0f0 32 simple-array-single-float :importance 6) (double-float 0.0d0 64 simple-array-double-float diff -urN sbcl/src/compiler/generic/vm-fndb.lisp sbcl.char/src/compiler/generic/vm-fndb.lisp --- sbcl/src/compiler/generic/vm-fndb.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/compiler/generic/vm-fndb.lisp 2004-10-27 13:34:22.000000000 +0200 @@ -22,6 +22,7 @@ complex-vector-p base-char-p %standard-char-p %instancep base-string-p simple-base-string-p + character-string-p simple-character-string-p array-header-p simple-array-p simple-array-nil-p vector-nil-p simple-array-unsigned-byte-2-p diff -urN sbcl/src/compiler/generic/vm-tran.lisp sbcl.char/src/compiler/generic/vm-tran.lisp --- sbcl/src/compiler/generic/vm-tran.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/compiler/generic/vm-tran.lisp 2004-10-27 13:34:22.000000000 +0200 @@ -36,6 +36,7 @@ ;;;; character support ;;; In our implementation there are really only BASE-CHARs. +#+nil (define-source-transform characterp (obj) `(base-char-p ,obj)) @@ -48,6 +49,7 @@ (give-up-ir1-transform) `(etypecase string ((simple-array character (*)) (data-vector-ref string index)) + ((simple-array base-char (*)) (data-vector-ref string index)) ((simple-array nil (*)) (data-vector-ref string index)))))) (deftransform hairy-data-vector-ref ((array index) (array t) *) @@ -98,6 +100,8 @@ `(etypecase string ((simple-array character (*)) (data-vector-set string index new-value)) + ((simple-array base-char (*)) + (data-vector-set string index new-value)) ((simple-array nil (*)) (data-vector-set string index new-value)))))) diff -urN sbcl/src/compiler/generic/vm-typetran.lisp sbcl.char/src/compiler/generic/vm-typetran.lisp --- sbcl/src/compiler/generic/vm-typetran.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/compiler/generic/vm-typetran.lisp 2004-10-27 13:34:22.000000000 +0200 @@ -17,9 +17,9 @@ ;;; These type predicates are used to implement simple cases of TYPEP. ;;; They shouldn't be used explicitly. -(define-type-predicate base-char-p base-char) (define-type-predicate base-string-p base-string) (define-type-predicate bignump bignum) +(define-type-predicate character-string-p (vector character)) (define-type-predicate complex-double-float-p (complex double-float)) (define-type-predicate complex-single-float-p (complex single-float)) #!+long-float @@ -93,6 +93,7 @@ (define-type-predicate simple-array-complex-long-float-p (simple-array (complex long-float) (*))) (define-type-predicate simple-base-string-p simple-base-string) +(define-type-predicate simple-character-string-p (simple-array character (*))) (define-type-predicate system-area-pointer-p system-area-pointer) (define-type-predicate unsigned-byte-32-p (unsigned-byte 32)) (define-type-predicate signed-byte-32-p (signed-byte 32)) diff -urN sbcl/src/compiler/ir1tran.lisp sbcl.char/src/compiler/ir1tran.lisp --- sbcl/src/compiler/ir1tran.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/compiler/ir1tran.lisp 2004-10-27 13:34:22.000000000 +0200 @@ -239,6 +239,7 @@ ;; can't contain other objects (unless (typep value '(or #-sb-xc-host unboxed-array + #+sb-xc-host (simple-array (unsigned-byte 8) (*)) symbol number character diff -urN sbcl/src/compiler/ppc/array.lisp sbcl.char/src/compiler/ppc/array.lisp --- sbcl/src/compiler/ppc/array.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/compiler/ppc/array.lisp 2004-10-27 13:34:22.000000000 +0200 @@ -110,10 +110,11 @@ (:results (result :scs ,scs)) (:result-types ,element-type))))) (def-data-vector-frobs simple-base-string byte-index - base-char base-char-reg) + character character-reg) + (def-data-vector-frobs simple-character-string word-index + character character-reg) (def-data-vector-frobs simple-vector word-index * descriptor-reg any-reg) - (def-data-vector-frobs simple-array-unsigned-byte-7 byte-index positive-fixnum unsigned-reg) (def-data-vector-frobs simple-array-unsigned-byte-8 byte-index @@ -134,7 +135,6 @@ (def-data-vector-frobs simple-array-signed-byte-32 word-index signed-num signed-reg)) - ;;; Integer vectors whos elements are smaller than a byte. I.e. bit, 2-bit, ;;; and 4-bit vectors. ;;; diff -urN sbcl/src/compiler/ppc/c-call.lisp sbcl.char/src/compiler/ppc/c-call.lisp --- sbcl/src/compiler/ppc/c-call.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/compiler/ppc/c-call.lisp 2004-10-27 13:34:21.000000000 +0200 @@ -303,7 +303,7 @@ (:translate foreign-symbol-address) (:policy :fast-safe) (:args) - (:arg-types (:constant simple-base-string)) + (:arg-types (:constant simple-string)) (:info foreign-symbol) (:results (res :scs (sap-reg))) (:result-types system-area-pointer) diff -urN sbcl/src/compiler/ppc/char.lisp sbcl.char/src/compiler/ppc/char.lisp --- sbcl/src/compiler/ppc/char.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/compiler/ppc/char.lisp 2004-10-27 13:34:22.000000000 +0200 @@ -14,35 +14,35 @@ ;;;; Moves and coercions: ;;; Move a tagged char to an untagged representation. -(define-vop (move-to-base-char) +(define-vop (move-to-character) (:args (x :scs (any-reg descriptor-reg))) - (:results (y :scs (base-char-reg))) + (:results (y :scs (character-reg))) (:note "character untagging") (:generator 1 (inst srwi y x n-widetag-bits))) -(define-move-vop move-to-base-char :move - (any-reg descriptor-reg) (base-char-reg)) +(define-move-vop move-to-character :move + (any-reg descriptor-reg) (character-reg)) ;;; Move an untagged char to a tagged representation. -(define-vop (move-from-base-char) - (:args (x :scs (base-char-reg))) +(define-vop (move-from-character) + (:args (x :scs (character-reg))) (:results (y :scs (any-reg descriptor-reg))) (:note "character tagging") (:generator 1 (inst slwi y x n-widetag-bits) - (inst ori y y base-char-widetag))) + (inst ori y y character-widetag))) -(define-move-vop move-from-base-char :move - (base-char-reg) (any-reg descriptor-reg)) +(define-move-vop move-from-character :move + (character-reg) (any-reg descriptor-reg)) -;;; Move untagged base-char values. -(define-vop (base-char-move) +;;; Move untagged character values. +(define-vop (character-move) (:args (x :target y - :scs (base-char-reg) + :scs (character-reg) :load-if (not (location= x y)))) - (:results (y :scs (base-char-reg) + (:results (y :scs (character-reg) :load-if (not (location= x y)))) (:note "character move") (:effects) @@ -50,32 +50,32 @@ (:generator 0 (move y x))) -(define-move-vop base-char-move :move - (base-char-reg) (base-char-reg)) +(define-move-vop character-move :move + (character-reg) (character-reg)) -;;; Move untagged base-char arguments/return-values. -(define-vop (move-base-char-arg) +;;; Move untagged character arguments/return-values. +(define-vop (move-character-arg) (:args (x :target y - :scs (base-char-reg)) + :scs (character-reg)) (fp :scs (any-reg) - :load-if (not (sc-is y base-char-reg)))) + :load-if (not (sc-is y character-reg)))) (:results (y)) (:note "character arg move") (:generator 0 (sc-case y - (base-char-reg + (character-reg (move y x)) - (base-char-stack + (character-stack (storew x fp (tn-offset y)))))) -(define-move-vop move-base-char-arg :move-arg - (any-reg base-char-reg) (base-char-reg)) +(define-move-vop move-character-arg :move-arg + (any-reg character-reg) (character-reg)) -;;; Use standard MOVE-ARG + coercion to move an untagged base-char +;;; Use standard MOVE-ARG + coercion to move an untagged character ;;; to a descriptor passing location. (define-move-vop move-arg :move-arg - (base-char-reg) (any-reg descriptor-reg)) + (character-reg) (any-reg descriptor-reg)) @@ -84,8 +84,8 @@ (define-vop (char-code) (:translate char-code) (:policy :fast-safe) - (:args (ch :scs (base-char-reg) :target res)) - (:arg-types base-char) + (:args (ch :scs (character-reg) :target res)) + (:arg-types character) (:results (res :scs (any-reg))) (:result-types positive-fixnum) (:generator 1 @@ -96,17 +96,17 @@ (:policy :fast-safe) (:args (code :scs (any-reg) :target res)) (:arg-types positive-fixnum) - (:results (res :scs (base-char-reg))) - (:result-types base-char) + (:results (res :scs (character-reg))) + (:result-types character) (:generator 1 (inst srwi res code 2))) -;;; Comparison of base-chars. -(define-vop (base-char-compare) - (:args (x :scs (base-char-reg)) - (y :scs (base-char-reg))) - (:arg-types base-char base-char) +;;; Comparison of characters. +(define-vop (character-compare) + (:args (x :scs (character-reg)) + (y :scs (character-reg))) + (:arg-types character character) (:conditional) (:info target not-p) (:policy :fast-safe) @@ -116,21 +116,21 @@ (inst cmplw x y) (inst b? (if not-p not-condition condition) target))) -(define-vop (fast-char=/base-char base-char-compare) +(define-vop (fast-char=/character character-compare) (:translate char=) (:variant :eq :ne)) -(define-vop (fast-char/base-char base-char-compare) +(define-vop (fast-char>/character character-compare) (:translate char>) (:variant :gt :le)) -(define-vop (base-char-compare/c) - (:args (x :scs (base-char-reg))) - (:arg-types base-char (:constant base-char)) +(define-vop (character-compare/c) + (:args (x :scs (character-reg))) + (:arg-types character (:constant character)) (:conditional) (:info target not-p y) (:policy :fast-safe) @@ -140,15 +140,15 @@ (inst cmplwi x (sb!xc:char-code y)) (inst b? (if not-p not-condition condition) target))) -(define-vop (fast-char=/base-char/c base-char-compare/c) +(define-vop (fast-char=/character/c character-compare/c) (:translate char=) (:variant :eq :ne)) -(define-vop (fast-char/base-char/c base-char-compare/c) +(define-vop (fast-char>/character/c character-compare/c) (:translate char>) (:variant :gt :le)) diff -urN sbcl/src/compiler/ppc/move.lisp sbcl.char/src/compiler/ppc/move.lisp --- sbcl/src/compiler/ppc/move.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/compiler/ppc/move.lisp 2004-10-27 13:34:22.000000000 +0200 @@ -17,15 +17,15 @@ (load-symbol y val)) (character (inst lr y (logior (ash (char-code val) n-widetag-bits) - base-char-widetag)))))) + character-widetag)))))) (define-move-fun (load-number 1) (vop x y) ((immediate zero) (signed-reg unsigned-reg)) (inst lr y (tn-value x))) -(define-move-fun (load-base-char 1) (vop x y) - ((immediate) (base-char-reg)) +(define-move-fun (load-character 1) (vop x y) + ((immediate) (character-reg)) (inst li y (char-code (tn-value x)))) (define-move-fun (load-system-area-pointer 1) (vop x y) @@ -41,7 +41,7 @@ (load-stack-tn y x)) (define-move-fun (load-number-stack 5) (vop x y) - ((base-char-stack) (base-char-reg) + ((character-stack) (character-reg) (sap-stack) (sap-reg) (signed-stack) (signed-reg) (unsigned-stack) (unsigned-reg)) @@ -53,7 +53,7 @@ (store-stack-tn y x)) (define-move-fun (store-number-stack 5) (vop x y) - ((base-char-reg) (base-char-stack) + ((character-reg) (character-stack) (sap-reg) (sap-stack) (signed-reg) (signed-stack) (unsigned-reg) (unsigned-stack)) diff -urN sbcl/src/compiler/ppc/vm.lisp sbcl.char/src/compiler/ppc/vm.lisp --- sbcl/src/compiler/ppc/vm.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/compiler/ppc/vm.lisp 2004-10-27 13:34:22.000000000 +0200 @@ -141,7 +141,7 @@ ;; The non-descriptor stacks. (signed-stack non-descriptor-stack) ; (signed-byte 32) (unsigned-stack non-descriptor-stack) ; (unsigned-byte 32) - (base-char-stack non-descriptor-stack) ; non-descriptor characters. + (character-stack non-descriptor-stack) ; non-descriptor characters. (sap-stack non-descriptor-stack) ; System area pointers. (single-stack non-descriptor-stack) ; single-floats (double-stack non-descriptor-stack @@ -169,11 +169,11 @@ :alternate-scs (control-stack)) ;; Non-Descriptor characters - (base-char-reg registers + (character-reg registers :locations #.non-descriptor-regs :constant-scs (immediate) :save-p t - :alternate-scs (base-char-stack)) + :alternate-scs (character-stack)) ;; Non-Descriptor SAP's (arbitrary pointers into address space) (sap-reg registers @@ -332,11 +332,19 @@ (immediate-constant "Immed")))) ;;; The loader uses this to convert alien names to the form they -;;; occur in the symbol table. This is ELF, so do nothing. +;;; occur in the symbol table. (defun extern-alien-name (name) - (declare (type simple-base-string name)) - ;; Darwin is non-ELF, and needs a _ prefix - #!+darwin (concatenate 'string "_" name) - ;; The other (ELF) ports currently don't need any prefix - #!-darwin name) + (declare (type string name)) + ;; Darwin is non-ELF, and needs a _ prefix. The other (ELF) ports + ;; currently don't need any prefix. + (flet ((maybe-prefix (name) + #!+darwin (concatenate 'simple-base-string "_" name) + #!-darwin name)) + (typecase name + (simple-base-string (maybe-prefix name)) + (base-string (coerce (maybe-prefix name) 'simple-base-string)) + (t + (handler-case (coerce (maybe-prefix name) 'simple-base-string) + (type-error () + (error "invalid external alien name: ~S" name))))))) diff -urN sbcl/src/compiler/seqtran.lisp sbcl.char/src/compiler/seqtran.lisp --- sbcl/src/compiler/seqtran.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/compiler/seqtran.lisp 2004-10-27 13:34:22.000000000 +0200 @@ -712,16 +712,32 @@ sb!vm:n-byte-bits))) string1)) +;;; FIXME: OAOO (should be shared with above, and probably +;;; automatically generated from SAETP). +(deftransform replace ((string1 string2 &key (start1 0) (start2 0) + end1 end2) + ((simple-array character (*)) + (simple-array character (*)) + &rest t) + * + ;; FIXME: consider replacing this policy test + ;; with some tests for the STARTx and ENDx + ;; indices being valid, conditional on high + ;; SAFETY code. + ;; + ;; FIXME: It turns out that this transform is + ;; critical for the performance of string + ;; streams. Make this more explicit. + :policy (< (max safety space) 3)) + `(sb!impl::simple-character-string-replace-from-simple-character-string* + string1 string2 start1 end1 start2 end2)) + ;;; FIXME: this would be a valid transform for certain excluded cases: ;;; * :TEST 'CHAR= or :TEST #'CHAR= ;;; * :TEST 'EQL or :TEST #'EQL ;;; * :FROM-END NIL (or :FROM-END non-NIL, with a little ingenuity) -;;; -;;; also, it should be noted that there's nothing much in this -;;; transform (as opposed to the ones for REPLACE and CONCATENATE) -;;; that particularly limits it to SIMPLE-BASE-STRINGs. (deftransform search ((pattern text &key (start1 0) (start2 0) end1 end2) - (simple-base-string simple-base-string &rest t) + (simple-string simple-string &rest t) * :policy (> speed (max space safety))) `(block search @@ -747,7 +763,7 @@ (deftransform concatenate ((rtype &rest sequences) (t &rest (or simple-base-string (simple-array nil (*)))) - simple-base-string + (simple-array base-string (*)) :policy (< safety 3)) (loop for rest-seqs on sequences for n-seq = (gensym "N-SEQ") @@ -771,7 +787,8 @@ (declare (ignore rtype)) (let* (,@lets (res (make-string (truncate (the index (+ ,@all-lengths)) - sb!vm:n-byte-bits)))) + sb!vm:n-byte-bits) + :element-type 'base-char))) (declare (type index ,@all-lengths)) (let (,@(mapcar (lambda (name) `(,name 0)) starts)) (declare (type index ,@starts)) diff -urN sbcl/src/compiler/srctran.lisp sbcl.char/src/compiler/srctran.lisp --- sbcl/src/compiler/srctran.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/compiler/srctran.lisp 2004-10-27 13:34:22.000000000 +0200 @@ -2321,7 +2321,29 @@ (specifier-type `(integer ,lo-res ,hi-res)))))) (defoptimizer (code-char derive-type) ((code)) - (specifier-type 'base-char)) + (let ((type (lvar-type code))) + ;; FIXME: unions of integral ranges? It ought to be easier to do + ;; this, given that CHARACTER-SET is basically an integral range + ;; type. -- CSR, 2004-10-04 + (when (numeric-type-p type) + (let* ((lo (numeric-type-low type)) + (hi (numeric-type-high type)) + (type (specifier-type `(character-set ((,lo . ,hi)))))) + (cond + ;; KLUDGE: when running on the host, we lose a slight amount + ;; of precision so that we don't have to "unparse" types + ;; that formally we can't, such as (CHARACTER-SET ((0 + ;; . 0))). -- CSR, 2004-10-06 + #+sb-xc-host + ((csubtypep type (specifier-type 'standard-char)) type) + #+sb-xc-host + ((csubtypep type (specifier-type 'base-char)) + (specifier-type 'base-char)) + #+sb-xc-host + ((csubtypep type (specifier-type 'extended-char)) + (specifier-type 'extended-char)) + (t #+sb-xc-host (specifier-type 'character) + #-sb-xc-host type)))))) (defoptimizer (values derive-type) ((&rest values)) (make-values-type :required (mapcar #'lvar-type values))) @@ -2962,21 +2984,18 @@ ;;; then the result is definitely false. (deftransform simple-equality-transform ((x y) * * :defun-only t) - (cond ((same-leaf-ref-p x y) - t) - ((not (types-equal-or-intersect (lvar-type x) - (lvar-type y))) - nil) - (t - (give-up-ir1-transform)))) + (cond + ((same-leaf-ref-p x y) t) + ((not (types-equal-or-intersect (lvar-type x) (lvar-type y))) + nil) + (t (give-up-ir1-transform)))) (macrolet ((def (x) `(%deftransform ',x '(function * *) #'simple-equality-transform))) (def eq) - (def char=) - (def equal)) + (def char=)) -;;; This is similar to SIMPLE-EQUALITY-PREDICATE, except that we also +;;; This is similar to SIMPLE-EQUALITY-TRANSFORM, except that we also ;;; try to convert to a type-specific predicate or EQ: ;;; -- If both args are characters, convert to CHAR=. This is better than ;;; just converting to EQ, since CHAR= may have special compilation @@ -2995,23 +3014,42 @@ (y-type (lvar-type y)) (char-type (specifier-type 'character)) (number-type (specifier-type 'number))) - (cond ((same-leaf-ref-p x y) - t) - ((not (types-equal-or-intersect x-type y-type)) - nil) - ((and (csubtypep x-type char-type) - (csubtypep y-type char-type)) - '(char= x y)) - ((or (not (types-equal-or-intersect x-type number-type)) - (not (types-equal-or-intersect y-type number-type))) - '(eq x y)) - ((and (not (constant-lvar-p y)) - (or (constant-lvar-p x) - (and (csubtypep x-type y-type) - (not (csubtypep y-type x-type))))) - '(eql y x)) - (t - (give-up-ir1-transform))))) + (cond + ((same-leaf-ref-p x y) t) + ((not (types-equal-or-intersect x-type y-type)) + nil) + ((and (csubtypep x-type char-type) + (csubtypep y-type char-type)) + '(char= x y)) + ((or (not (types-equal-or-intersect x-type number-type)) + (not (types-equal-or-intersect y-type number-type))) + '(eq x y)) + ((and (not (constant-lvar-p y)) + (or (constant-lvar-p x) + (and (csubtypep x-type y-type) + (not (csubtypep y-type x-type))))) + '(eql y x)) + (t + (give-up-ir1-transform))))) + +;;; similarly to the EQL transform above, we attempt to constant-fold +;;; or convert to a simpler predicate: mostly we have to be careful +;;; with strings. +(deftransform equal ((x y) * *) + "convert to simpler equality predicate" + (let ((x-type (lvar-type x)) + (y-type (lvar-type y)) + (string-type (specifier-type 'string))) + (cond + ((same-leaf-ref-p x y) t) + ((and (csubtypep x-type string-type) + (csubtypep y-type string-type)) + '(string= x y)) + ((and (or (not (types-equal-or-intersect x-type string-type)) + (not (types-equal-or-intersect y-type string-type))) + (not (types-equal-or-intersect x-type y-type))) + nil) + (t (give-up-ir1-transform))))) ;;; Convert to EQL if both args are rational and complexp is specified ;;; and the same for both. diff -urN sbcl/src/compiler/target/array.lisp sbcl.char/src/compiler/target/array.lisp --- sbcl/src/compiler/target/array.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/compiler/target/array.lisp 2004-10-27 13:34:22.000000000 +0200 @@ -1245,10 +1245,10 @@ (:args (object :scs (descriptor-reg)) (index :scs (unsigned-reg))) (:arg-types simple-base-string positive-fixnum) - (:results (value :scs (base-char-reg))) - (:result-types base-char) + (:results (value :scs (character-reg))) + (:result-types character) (:generator 5 - (inst mov value + (inst movzx value (make-ea :byte :base object :index index :scale 1 :disp (- (* vector-data-offset n-word-bytes) other-pointer-lowtag))))) @@ -1259,10 +1259,10 @@ (:args (object :scs (descriptor-reg))) (:info index) (:arg-types simple-base-string (:constant (signed-byte 30))) - (:results (value :scs (base-char-reg))) - (:result-types base-char) + (:results (value :scs (character-reg))) + (:result-types character) (:generator 4 - (inst mov value + (inst movzx value (make-ea :byte :base object :disp (- (+ (* vector-data-offset n-word-bytes) index) other-pointer-lowtag))))) @@ -1272,32 +1272,48 @@ (:policy :fast-safe) (:args (object :scs (descriptor-reg) :to (:eval 0)) (index :scs (unsigned-reg) :to (:eval 0)) - (value :scs (base-char-reg) :target result)) - (:arg-types simple-base-string positive-fixnum base-char) - (:results (result :scs (base-char-reg))) - (:result-types base-char) + (value :scs (character-reg) :target eax)) + (:arg-types simple-base-string positive-fixnum character) + (:temporary (:sc character-reg :offset eax-offset :target result + :from (:argument 2) :to (:result 0)) + eax) + (:results (result :scs (character-reg))) + (:result-types character) (:generator 5 + (move eax value) (inst mov (make-ea :byte :base object :index index :scale 1 :disp (- (* vector-data-offset n-word-bytes) other-pointer-lowtag)) - value) - (move result value))) + al-tn) + (move result eax))) -(define-vop (data-vector-set/simple-base-string-c) +(define-vop (data-vector-set-c/simple-base-string) (:translate data-vector-set) (:policy :fast-safe) (:args (object :scs (descriptor-reg) :to (:eval 0)) - (value :scs (base-char-reg))) + (value :scs (character-reg))) (:info index) - (:arg-types simple-base-string (:constant (signed-byte 30)) base-char) - (:results (result :scs (base-char-reg))) - (:result-types base-char) + (:arg-types simple-base-string (:constant (signed-byte 30)) character) + (:temporary (:sc unsigned-reg :offset eax-offset :target result + :from (:argument 1) :to (:result 0)) + eax) + (:results (result :scs (character-reg))) + (:result-types character) (:generator 4 - (inst mov (make-ea :byte :base object - :disp (- (+ (* vector-data-offset n-word-bytes) index) - other-pointer-lowtag)) - value) - (move result value))) + (move eax value) + (inst mov (make-ea :byte :base object + :disp (- (+ (* vector-data-offset n-word-bytes) index) + other-pointer-lowtag)) + al-tn) + (move result eax))) + +(define-full-reffer data-vector-ref/simple-character-string + simple-character-string vector-data-offset other-pointer-lowtag + (character-reg) character data-vector-ref) + +(define-full-setter data-vector-ref/simple-character-string + simple-character-string vector-data-offset other-pointer-lowtag + (character-reg) character data-vector-set) ;;; signed-byte-8 diff -urN sbcl/src/compiler/target/c-call.lisp sbcl.char/src/compiler/target/c-call.lisp --- sbcl/src/compiler/target/c-call.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/compiler/target/c-call.lisp 2004-10-27 13:34:22.000000000 +0200 @@ -185,7 +185,7 @@ (:translate foreign-symbol-address) (:policy :fast-safe) (:args) - (:arg-types (:constant simple-base-string)) + (:arg-types (:constant simple-string)) (:info foreign-symbol) (:results (res :scs (sap-reg))) (:result-types system-area-pointer) diff -urN sbcl/src/compiler/target/cell.lisp sbcl.char/src/compiler/target/cell.lisp --- sbcl/src/compiler/target/cell.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/compiler/target/cell.lisp 2004-10-27 13:34:22.000000000 +0200 @@ -46,7 +46,7 @@ (make-ea :dword :base object :disp (- (* offset n-word-bytes) lowtag)) (logior (ash (char-code val) n-widetag-bits) - base-char-widetag))))) + character-widetag))))) ;; Else, value not immediate. (storew value object offset lowtag)))) diff -urN sbcl/src/compiler/target/char.lisp sbcl.char/src/compiler/target/char.lisp --- sbcl/src/compiler/target/char.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/compiler/target/char.lisp 2004-10-27 13:34:22.000000000 +0200 @@ -14,110 +14,103 @@ ;;;; moves and coercions ;;; Move a tagged char to an untagged representation. -(define-vop (move-to-base-char) - (:args (x :scs (any-reg control-stack) :target al)) - (:temporary (:sc byte-reg :offset al-offset - :from (:argument 0) :to (:eval 0)) al) - (:ignore al) - (:temporary (:sc byte-reg :offset ah-offset :target y - :from (:argument 0) :to (:result 0)) ah) - (:results (y :scs (base-char-reg base-char-stack))) +(define-vop (move-to-character) + (:args (x :scs (any-reg descriptor-reg) :target y + :load-if (not (location= x y)))) + (:results (y :scs (character-reg) + :load-if (not (location= x y)))) (:note "character untagging") (:generator 1 - (move eax-tn x) - (move y ah))) -(define-move-vop move-to-base-char :move - (any-reg control-stack) (base-char-reg base-char-stack)) + (move y x) + (inst shr y n-widetag-bits))) +(define-move-vop move-to-character :move + (any-reg) (character-reg)) ;;; Move an untagged char to a tagged representation. -(define-vop (move-from-base-char) - (:args (x :scs (base-char-reg base-char-stack) :target ah)) - (:temporary (:sc byte-reg :offset al-offset :target y - :from (:argument 0) :to (:result 0)) al) - (:temporary (:sc byte-reg :offset ah-offset - :from (:argument 0) :to (:result 0)) ah) - (:results (y :scs (any-reg descriptor-reg control-stack))) +(define-vop (move-from-character) + (:args (x :scs (character-reg))) + (:results (y :scs (any-reg descriptor-reg))) (:note "character tagging") (:generator 1 - (move ah x) ; Maybe move char byte. - (inst mov al base-char-widetag) ; x86 to type bits - (inst and eax-tn #xffff) ; Remove any junk bits. - (move y eax-tn))) -(define-move-vop move-from-base-char :move - (base-char-reg base-char-stack) (any-reg descriptor-reg control-stack)) + ;; FIXME: is this inefficient? Is there a better way of writing + ;; it? (fixnum tagging is done with LEA). We can't use SHL + ;; because we either scribble over the source register or briefly + ;; have a non-descriptor in a descriptor register, unless we + ;; introduce a temporary. + (inst imul y x (ash 1 n-widetag-bits)) + (inst or y character-widetag))) +(define-move-vop move-from-character :move + (character-reg) (any-reg descriptor-reg)) -;;; Move untagged base-char values. -(define-vop (base-char-move) +;;; Move untagged character values. +(define-vop (character-move) (:args (x :target y - :scs (base-char-reg) + :scs (character-reg) :load-if (not (location= x y)))) - (:results (y :scs (base-char-reg base-char-stack) + (:results (y :scs (character-reg character-stack) :load-if (not (location= x y)))) (:note "character move") (:effects) (:affected) (:generator 0 (move y x))) -(define-move-vop base-char-move :move - (base-char-reg) (base-char-reg base-char-stack)) +(define-move-vop character-move :move + (character-reg) (character-reg character-stack)) -;;; Move untagged base-char arguments/return-values. -(define-vop (move-base-char-arg) +;;; Move untagged character arguments/return-values. +(define-vop (move-character-arg) (:args (x :target y - :scs (base-char-reg)) + :scs (character-reg)) (fp :scs (any-reg) - :load-if (not (sc-is y base-char-reg)))) + :load-if (not (sc-is y character-reg)))) (:results (y)) (:note "character arg move") (:generator 0 (sc-case y - (base-char-reg + (character-reg (move y x)) - (base-char-stack - (inst mov - (make-ea :byte :base fp :disp (- (* (1+ (tn-offset y)) 4))) - x))))) -(define-move-vop move-base-char-arg :move-arg - (any-reg base-char-reg) (base-char-reg)) + (character-stack + ;; copied blindly. No idea if it's right + (if (= (tn-offset fp) esp-offset) + (storew x fp (tn-offset y)) ; c-call + (storew x fp (- (1+ (tn-offset y))))))))) +(define-move-vop move-character-arg :move-arg + (any-reg character-reg) (character-reg)) -;;; Use standard MOVE-ARG + coercion to move an untagged base-char +;;; Use standard MOVE-ARG + coercion to move an untagged character ;;; to a descriptor passing location. (define-move-vop move-arg :move-arg - (base-char-reg) (any-reg descriptor-reg)) + (character-reg) (any-reg descriptor-reg)) ;;;; other operations (define-vop (char-code) (:translate char-code) (:policy :fast-safe) - (:args (ch :scs (base-char-reg base-char-stack))) - (:arg-types base-char) + (:args (ch :scs (character-reg character-stack))) + (:arg-types character) (:results (res :scs (unsigned-reg))) (:result-types positive-fixnum) (:generator 1 - (inst movzx res ch))) + (inst mov res ch))) (define-vop (code-char) (:translate code-char) (:policy :fast-safe) - (:args (code :scs (unsigned-reg unsigned-stack) :target eax)) + (:args (code :scs (unsigned-reg unsigned-stack))) (:arg-types positive-fixnum) - (:temporary (:sc unsigned-reg :offset eax-offset :target res - :from (:argument 0) :to (:result 0)) - eax) - (:results (res :scs (base-char-reg))) - (:result-types base-char) + (:results (res :scs (character-reg))) + (:result-types character) (:generator 1 - (move eax code) - (move res al-tn))) + (inst mov res code))) -;;; comparison of BASE-CHARs -(define-vop (base-char-compare) - (:args (x :scs (base-char-reg base-char-stack)) - (y :scs (base-char-reg) - :load-if (not (and (sc-is x base-char-reg) - (sc-is y base-char-stack))))) - (:arg-types base-char base-char) +;;; comparison of CHARACTERs +(define-vop (character-compare) + (:args (x :scs (character-reg character-stack)) + (y :scs (character-reg) + :load-if (not (and (sc-is x character-reg) + (sc-is y character-stack))))) + (:arg-types character character) (:conditional) (:info target not-p) (:policy :fast-safe) @@ -127,21 +120,21 @@ (inst cmp x y) (inst jmp (if not-p not-condition condition) target))) -(define-vop (fast-char=/base-char base-char-compare) +(define-vop (fast-char=/character character-compare) (:translate char=) (:variant :e :ne)) -(define-vop (fast-char/base-char base-char-compare) +(define-vop (fast-char>/character character-compare) (:translate char>) (:variant :a :na)) -(define-vop (base-char-compare/c) - (:args (x :scs (base-char-reg base-char-stack))) - (:arg-types base-char (:constant base-char)) +(define-vop (character-compare/c) + (:args (x :scs (character-reg character-stack))) + (:arg-types character (:constant character)) (:conditional) (:info target not-p y) (:policy :fast-safe) @@ -151,14 +144,14 @@ (inst cmp x (sb!xc:char-code y)) (inst jmp (if not-p not-condition condition) target))) -(define-vop (fast-char=/base-char/c base-char-compare/c) +(define-vop (fast-char=/character/c character-compare/c) (:translate char=) (:variant :e :ne)) -(define-vop (fast-char/base-char/c base-char-compare/c) +(define-vop (fast-char>/character/c character-compare/c) (:translate char>) (:variant :a :na)) diff -urN sbcl/src/compiler/target/memory.lisp sbcl.char/src/compiler/target/memory.lisp --- sbcl/src/compiler/target/memory.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/compiler/target/memory.lisp 2004-10-27 13:34:22.000000000 +0200 @@ -116,7 +116,7 @@ (make-ea :dword :base object :disp (- (* (+ base offset) n-word-bytes) lowtag)) (logior (ash (char-code val) n-widetag-bits) - base-char-widetag))))) + character-widetag))))) ;; Else, value not immediate. (storew value object (+ base offset) lowtag)))) diff -urN sbcl/src/compiler/target/move.lisp sbcl.char/src/compiler/target/move.lisp --- sbcl/src/compiler/target/move.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/compiler/target/move.lisp 2004-10-27 13:34:22.000000000 +0200 @@ -24,14 +24,14 @@ (load-symbol y val)) (character (inst mov y (logior (ash (char-code val) n-widetag-bits) - base-char-widetag)))))) + character-widetag)))))) (define-move-fun (load-number 1) (vop x y) ((immediate) (signed-reg unsigned-reg)) (inst mov y (tn-value x))) -(define-move-fun (load-base-char 1) (vop x y) - ((immediate) (base-char-reg)) +(define-move-fun (load-character 1) (vop x y) + ((immediate) (character-reg)) (inst mov y (char-code (tn-value x)))) (define-move-fun (load-system-area-pointer 1) (vop x y) @@ -44,7 +44,7 @@ (define-move-fun (load-stack 5) (vop x y) ((control-stack) (any-reg descriptor-reg) - (base-char-stack) (base-char-reg) + (character-stack) (character-reg) (sap-stack) (sap-reg) (signed-stack) (signed-reg) (unsigned-stack) (unsigned-reg)) @@ -52,7 +52,7 @@ (define-move-fun (store-stack 5) (vop x y) ((any-reg descriptor-reg) (control-stack) - (base-char-reg) (base-char-stack) + (character-reg) (character-stack) (sap-reg) (sap-stack) (signed-reg) (signed-stack) (unsigned-reg) (unsigned-stack)) @@ -82,7 +82,7 @@ (inst mov y (+ nil-value (static-symbol-offset val)))) (character (inst mov y (logior (ash (char-code val) n-widetag-bits) - base-char-widetag))))) + character-widetag))))) (move y x)))) (define-move-vop move :move @@ -121,7 +121,7 @@ (load-symbol y val)) (character (inst mov y (logior (ash (char-code val) n-widetag-bits) - base-char-widetag))))) + character-widetag))))) (move y x))) ((control-stack) (if (sc-is x immediate) @@ -136,7 +136,7 @@ fp (tn-offset y))) (character (storew (logior (ash (char-code val) n-widetag-bits) - base-char-widetag) + character-widetag) fp (tn-offset y)))) ;; Lisp stack (etypecase val @@ -147,7 +147,7 @@ fp (- (1+ (tn-offset y))))) (character (storew (logior (ash (char-code val) n-widetag-bits) - base-char-widetag) + character-widetag) fp (- (1+ (tn-offset y)))))))) (if (= (tn-offset fp) esp-offset) ;; C-call diff -urN sbcl/src/compiler/target/pred.lisp sbcl.char/src/compiler/target/pred.lisp --- sbcl/src/compiler/target/pred.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/compiler/target/pred.lisp 2004-10-27 13:34:22.000000000 +0200 @@ -50,7 +50,7 @@ (inst cmp x (+ nil-value (static-symbol-offset val)))) (character (inst cmp x (logior (ash (char-code val) n-widetag-bits) - base-char-widetag)))))) + character-widetag)))))) ((sc-is x immediate) ; and y not immediate ;; Swap the order to fit the compare instruction. (let ((val (tn-value x))) @@ -63,7 +63,7 @@ (inst cmp y (+ nil-value (static-symbol-offset val)))) (character (inst cmp y (logior (ash (char-code val) n-widetag-bits) - base-char-widetag)))))) + character-widetag)))))) (t (inst cmp x y))) diff -urN sbcl/src/compiler/target/vm.lisp sbcl.char/src/compiler/target/vm.lisp --- sbcl/src/compiler/target/vm.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/compiler/target/vm.lisp 2004-10-27 13:34:22.000000000 +0200 @@ -182,7 +182,7 @@ ;; the non-descriptor stacks (signed-stack stack) ; (signed-byte 32) (unsigned-stack stack) ; (unsigned-byte 32) - (base-char-stack stack) ; non-descriptor characters. + (character-stack stack) ; non-descriptor characters. (sap-stack stack) ; System area pointers. (single-stack stack) ; single-floats (double-stack stack :element-size 2) ; double-floats. @@ -228,12 +228,12 @@ :alternate-scs (control-stack)) ;; non-descriptor characters - (base-char-reg registers - :locations #.*byte-regs* - :reserve-locations (#.ah-offset #.al-offset) + (character-reg registers + :locations #.*dword-regs* +; :reserve-locations (#.ah-offset #.al-offset) :constant-scs (immediate) :save-p t - :alternate-scs (base-char-stack)) + :alternate-scs (character-stack)) ;; non-descriptor SAPs (arbitrary pointers into address space) (sap-reg registers @@ -322,11 +322,12 @@ (catch-block stack :element-size kludge-nondeterministic-catch-block-size)) (eval-when (:compile-toplevel :load-toplevel :execute) -(defparameter *byte-sc-names* '(base-char-reg byte-reg base-char-stack)) +(defparameter *byte-sc-names* '(byte-reg)) (defparameter *word-sc-names* '(word-reg)) (defparameter *dword-sc-names* '(any-reg descriptor-reg sap-reg signed-reg unsigned-reg control-stack - signed-stack unsigned-stack sap-stack single-stack constant)) + signed-stack unsigned-stack sap-stack single-stack + character-reg character-stack constant)) ;;; added by jrd. I guess the right thing to do is to treat floats ;;; as a separate size... ;;; @@ -450,6 +451,10 @@ ;;; The loader uses this to convert alien names to the form they need in ;;; the symbol table (for example, prepending an underscore). (defun extern-alien-name (name) - (declare (type simple-base-string name)) + (declare (type string name)) ;; ELF ports currently don't need any prefix - name) + (typecase name + (simple-base-string name) + (base-string (coerce name 'simple-base-string)) + (t (handler-case (coerce name 'simple-base-string) + (type-error () (error "invalid external alien name: ~S" name)))))) diff -urN sbcl/src/compiler/target-dump.lisp sbcl.char/src/compiler/target-dump.lisp --- sbcl/src/compiler/target-dump.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/compiler/target-dump.lisp 2004-10-27 13:34:22.000000000 +0200 @@ -13,6 +13,21 @@ (in-package "SB!FASL") +;;; a helper function shared by DUMP-SIMPLE-CHARACTER-STRING and +;;; DUMP-SYMBOL (in the target compiler: the cross-compiler uses the +;;; portability knowledge and always dumps BASE-STRINGS). +(defun dump-characters-of-string (s fasl-output) + (declare (type string s) (type fasl-output fasl-output)) + (dovector (c s) + (dump-word (char-code c) fasl-output)) + (values)) + +(defun dump-simple-character-string (s file) + (declare (type (simple-array character (*)) s)) + (dump-fop* (length s) fop-small-character-string fop-character-string file) + (dump-characters-of-string s file) + (values)) + ;;; Dump the first N bytes of VEC out to FILE. VEC is some sort of unboxed ;;; vector-like thing that we can BLT from. (defun dump-raw-bytes (vec n fasl-output) diff -urN sbcl/src/compiler/typetran.lisp sbcl.char/src/compiler/typetran.lisp --- sbcl/src/compiler/typetran.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/compiler/typetran.lisp 2004-10-27 13:34:22.000000000 +0200 @@ -115,7 +115,6 @@ ;;; just expand all its operations into toplevel code? (defun !define-standard-type-predicates () (define-type-predicate arrayp array) - ; (The ATOM predicate is handled separately as (NOT CONS).) (define-type-predicate bit-vector-p bit-vector) (define-type-predicate characterp character) (define-type-predicate compiled-function-p compiled-function) @@ -141,13 +140,11 @@ (define-type-predicate symbolp symbol) (define-type-predicate vectorp vector)) (!define-standard-type-predicates) - -;;;; transforms for type predicates not implemented primitively -;;;; -;;;; See also VM dependent transforms. (define-source-transform atom (x) `(not (consp ,x))) +(define-source-transform base-char-p (x) + `(typep ,x 'base-char)) ;;;; TYPEP source transform @@ -292,6 +289,21 @@ `((typep (cdr ,n-obj) ',(type-specifier cdr-type)))))))))) +(defun source-transform-character-set-typep (object type) + (let ((pairs (character-set-type-pairs type))) + (if (and (= (length pairs) 1) + (= (caar pairs) 0) + (= (cdar pairs) (1- sb!xc:char-code-limit))) + `(characterp ,object) + (once-only ((n-obj object)) + (let ((n-code (gensym "CODE"))) + `(and (characterp ,n-obj) + (let ((,n-code (sb!xc:char-code ,n-obj))) + (or + ,@(loop for pair in pairs + collect + `(<= ,(car pair) ,n-code ,(cdr pair))))))))))) + ;;; Return the predicate and type from the most specific entry in ;;; *TYPE-PREDICATES* that is a supertype of TYPE. (defun find-supertype-predicate (type) @@ -493,6 +505,8 @@ (source-transform-array-typep object type)) (cons-type (source-transform-cons-typep object type)) + (character-set-type + (source-transform-character-set-typep object type)) (t nil)) `(%typep ,object ,spec))) (values nil t))) diff -urN sbcl/src/compiler/x86/array.lisp sbcl.char/src/compiler/x86/array.lisp --- sbcl/src/compiler/x86/array.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/compiler/x86/array.lisp 2004-10-27 13:34:22.000000000 +0200 @@ -1245,10 +1245,10 @@ (:args (object :scs (descriptor-reg)) (index :scs (unsigned-reg))) (:arg-types simple-base-string positive-fixnum) - (:results (value :scs (base-char-reg))) - (:result-types base-char) + (:results (value :scs (character-reg))) + (:result-types character) (:generator 5 - (inst mov value + (inst movzx value (make-ea :byte :base object :index index :scale 1 :disp (- (* vector-data-offset n-word-bytes) other-pointer-lowtag))))) @@ -1259,10 +1259,10 @@ (:args (object :scs (descriptor-reg))) (:info index) (:arg-types simple-base-string (:constant (signed-byte 30))) - (:results (value :scs (base-char-reg))) - (:result-types base-char) + (:results (value :scs (character-reg))) + (:result-types character) (:generator 4 - (inst mov value + (inst movzx value (make-ea :byte :base object :disp (- (+ (* vector-data-offset n-word-bytes) index) other-pointer-lowtag))))) @@ -1272,32 +1272,48 @@ (:policy :fast-safe) (:args (object :scs (descriptor-reg) :to (:eval 0)) (index :scs (unsigned-reg) :to (:eval 0)) - (value :scs (base-char-reg) :target result)) - (:arg-types simple-base-string positive-fixnum base-char) - (:results (result :scs (base-char-reg))) - (:result-types base-char) + (value :scs (character-reg) :target eax)) + (:arg-types simple-base-string positive-fixnum character) + (:temporary (:sc character-reg :offset eax-offset :target result + :from (:argument 2) :to (:result 0)) + eax) + (:results (result :scs (character-reg))) + (:result-types character) (:generator 5 + (move eax value) (inst mov (make-ea :byte :base object :index index :scale 1 :disp (- (* vector-data-offset n-word-bytes) other-pointer-lowtag)) - value) - (move result value))) + al-tn) + (move result eax))) -(define-vop (data-vector-set/simple-base-string-c) +(define-vop (data-vector-set-c/simple-base-string) (:translate data-vector-set) (:policy :fast-safe) (:args (object :scs (descriptor-reg) :to (:eval 0)) - (value :scs (base-char-reg))) + (value :scs (character-reg))) (:info index) - (:arg-types simple-base-string (:constant (signed-byte 30)) base-char) - (:results (result :scs (base-char-reg))) - (:result-types base-char) + (:arg-types simple-base-string (:constant (signed-byte 30)) character) + (:temporary (:sc unsigned-reg :offset eax-offset :target result + :from (:argument 1) :to (:result 0)) + eax) + (:results (result :scs (character-reg))) + (:result-types character) (:generator 4 - (inst mov (make-ea :byte :base object - :disp (- (+ (* vector-data-offset n-word-bytes) index) - other-pointer-lowtag)) - value) - (move result value))) + (move eax value) + (inst mov (make-ea :byte :base object + :disp (- (+ (* vector-data-offset n-word-bytes) index) + other-pointer-lowtag)) + al-tn) + (move result eax))) + +(define-full-reffer data-vector-ref/simple-character-string + simple-character-string vector-data-offset other-pointer-lowtag + (character-reg) character data-vector-ref) + +(define-full-setter data-vector-ref/simple-character-string + simple-character-string vector-data-offset other-pointer-lowtag + (character-reg) character data-vector-set) ;;; signed-byte-8 diff -urN sbcl/src/compiler/x86/c-call.lisp sbcl.char/src/compiler/x86/c-call.lisp --- sbcl/src/compiler/x86/c-call.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/compiler/x86/c-call.lisp 2004-10-27 13:34:22.000000000 +0200 @@ -185,7 +185,7 @@ (:translate foreign-symbol-address) (:policy :fast-safe) (:args) - (:arg-types (:constant simple-base-string)) + (:arg-types (:constant simple-string)) (:info foreign-symbol) (:results (res :scs (sap-reg))) (:result-types system-area-pointer) diff -urN sbcl/src/compiler/x86/cell.lisp sbcl.char/src/compiler/x86/cell.lisp --- sbcl/src/compiler/x86/cell.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/compiler/x86/cell.lisp 2004-10-27 13:34:22.000000000 +0200 @@ -46,7 +46,7 @@ (make-ea :dword :base object :disp (- (* offset n-word-bytes) lowtag)) (logior (ash (char-code val) n-widetag-bits) - base-char-widetag))))) + character-widetag))))) ;; Else, value not immediate. (storew value object offset lowtag)))) diff -urN sbcl/src/compiler/x86/char.lisp sbcl.char/src/compiler/x86/char.lisp --- sbcl/src/compiler/x86/char.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/compiler/x86/char.lisp 2004-10-27 13:34:22.000000000 +0200 @@ -14,110 +14,103 @@ ;;;; moves and coercions ;;; Move a tagged char to an untagged representation. -(define-vop (move-to-base-char) - (:args (x :scs (any-reg control-stack) :target al)) - (:temporary (:sc byte-reg :offset al-offset - :from (:argument 0) :to (:eval 0)) al) - (:ignore al) - (:temporary (:sc byte-reg :offset ah-offset :target y - :from (:argument 0) :to (:result 0)) ah) - (:results (y :scs (base-char-reg base-char-stack))) +(define-vop (move-to-character) + (:args (x :scs (any-reg descriptor-reg) :target y + :load-if (not (location= x y)))) + (:results (y :scs (character-reg) + :load-if (not (location= x y)))) (:note "character untagging") (:generator 1 - (move eax-tn x) - (move y ah))) -(define-move-vop move-to-base-char :move - (any-reg control-stack) (base-char-reg base-char-stack)) + (move y x) + (inst shr y n-widetag-bits))) +(define-move-vop move-to-character :move + (any-reg) (character-reg)) ;;; Move an untagged char to a tagged representation. -(define-vop (move-from-base-char) - (:args (x :scs (base-char-reg base-char-stack) :target ah)) - (:temporary (:sc byte-reg :offset al-offset :target y - :from (:argument 0) :to (:result 0)) al) - (:temporary (:sc byte-reg :offset ah-offset - :from (:argument 0) :to (:result 0)) ah) - (:results (y :scs (any-reg descriptor-reg control-stack))) +(define-vop (move-from-character) + (:args (x :scs (character-reg))) + (:results (y :scs (any-reg descriptor-reg))) (:note "character tagging") (:generator 1 - (move ah x) ; Maybe move char byte. - (inst mov al base-char-widetag) ; x86 to type bits - (inst and eax-tn #xffff) ; Remove any junk bits. - (move y eax-tn))) -(define-move-vop move-from-base-char :move - (base-char-reg base-char-stack) (any-reg descriptor-reg control-stack)) + ;; FIXME: is this inefficient? Is there a better way of writing + ;; it? (fixnum tagging is done with LEA). We can't use SHL + ;; because we either scribble over the source register or briefly + ;; have a non-descriptor in a descriptor register, unless we + ;; introduce a temporary. + (inst imul y x (ash 1 n-widetag-bits)) + (inst or y character-widetag))) +(define-move-vop move-from-character :move + (character-reg) (any-reg descriptor-reg)) -;;; Move untagged base-char values. -(define-vop (base-char-move) +;;; Move untagged character values. +(define-vop (character-move) (:args (x :target y - :scs (base-char-reg) + :scs (character-reg) :load-if (not (location= x y)))) - (:results (y :scs (base-char-reg base-char-stack) + (:results (y :scs (character-reg character-stack) :load-if (not (location= x y)))) (:note "character move") (:effects) (:affected) (:generator 0 (move y x))) -(define-move-vop base-char-move :move - (base-char-reg) (base-char-reg base-char-stack)) +(define-move-vop character-move :move + (character-reg) (character-reg character-stack)) -;;; Move untagged base-char arguments/return-values. -(define-vop (move-base-char-arg) +;;; Move untagged character arguments/return-values. +(define-vop (move-character-arg) (:args (x :target y - :scs (base-char-reg)) + :scs (character-reg)) (fp :scs (any-reg) - :load-if (not (sc-is y base-char-reg)))) + :load-if (not (sc-is y character-reg)))) (:results (y)) (:note "character arg move") (:generator 0 (sc-case y - (base-char-reg + (character-reg (move y x)) - (base-char-stack - (inst mov - (make-ea :byte :base fp :disp (- (* (1+ (tn-offset y)) 4))) - x))))) -(define-move-vop move-base-char-arg :move-arg - (any-reg base-char-reg) (base-char-reg)) + (character-stack + ;; copied blindly. No idea if it's right + (if (= (tn-offset fp) esp-offset) + (storew x fp (tn-offset y)) ; c-call + (storew x fp (- (1+ (tn-offset y))))))))) +(define-move-vop move-character-arg :move-arg + (any-reg character-reg) (character-reg)) -;;; Use standard MOVE-ARG + coercion to move an untagged base-char +;;; Use standard MOVE-ARG + coercion to move an untagged character ;;; to a descriptor passing location. (define-move-vop move-arg :move-arg - (base-char-reg) (any-reg descriptor-reg)) + (character-reg) (any-reg descriptor-reg)) ;;;; other operations (define-vop (char-code) (:translate char-code) (:policy :fast-safe) - (:args (ch :scs (base-char-reg base-char-stack))) - (:arg-types base-char) + (:args (ch :scs (character-reg character-stack))) + (:arg-types character) (:results (res :scs (unsigned-reg))) (:result-types positive-fixnum) (:generator 1 - (inst movzx res ch))) + (inst mov res ch))) (define-vop (code-char) (:translate code-char) (:policy :fast-safe) - (:args (code :scs (unsigned-reg unsigned-stack) :target eax)) + (:args (code :scs (unsigned-reg unsigned-stack))) (:arg-types positive-fixnum) - (:temporary (:sc unsigned-reg :offset eax-offset :target res - :from (:argument 0) :to (:result 0)) - eax) - (:results (res :scs (base-char-reg))) - (:result-types base-char) + (:results (res :scs (character-reg))) + (:result-types character) (:generator 1 - (move eax code) - (move res al-tn))) + (inst mov res code))) -;;; comparison of BASE-CHARs -(define-vop (base-char-compare) - (:args (x :scs (base-char-reg base-char-stack)) - (y :scs (base-char-reg) - :load-if (not (and (sc-is x base-char-reg) - (sc-is y base-char-stack))))) - (:arg-types base-char base-char) +;;; comparison of CHARACTERs +(define-vop (character-compare) + (:args (x :scs (character-reg character-stack)) + (y :scs (character-reg) + :load-if (not (and (sc-is x character-reg) + (sc-is y character-stack))))) + (:arg-types character character) (:conditional) (:info target not-p) (:policy :fast-safe) @@ -127,21 +120,21 @@ (inst cmp x y) (inst jmp (if not-p not-condition condition) target))) -(define-vop (fast-char=/base-char base-char-compare) +(define-vop (fast-char=/character character-compare) (:translate char=) (:variant :e :ne)) -(define-vop (fast-char/base-char base-char-compare) +(define-vop (fast-char>/character character-compare) (:translate char>) (:variant :a :na)) -(define-vop (base-char-compare/c) - (:args (x :scs (base-char-reg base-char-stack))) - (:arg-types base-char (:constant base-char)) +(define-vop (character-compare/c) + (:args (x :scs (character-reg character-stack))) + (:arg-types character (:constant character)) (:conditional) (:info target not-p y) (:policy :fast-safe) @@ -151,14 +144,14 @@ (inst cmp x (sb!xc:char-code y)) (inst jmp (if not-p not-condition condition) target))) -(define-vop (fast-char=/base-char/c base-char-compare/c) +(define-vop (fast-char=/character/c character-compare/c) (:translate char=) (:variant :e :ne)) -(define-vop (fast-char/base-char/c base-char-compare/c) +(define-vop (fast-char>/character/c character-compare/c) (:translate char>) (:variant :a :na)) diff -urN sbcl/src/compiler/x86/memory.lisp sbcl.char/src/compiler/x86/memory.lisp --- sbcl/src/compiler/x86/memory.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/compiler/x86/memory.lisp 2004-10-27 13:34:22.000000000 +0200 @@ -116,7 +116,7 @@ (make-ea :dword :base object :disp (- (* (+ base offset) n-word-bytes) lowtag)) (logior (ash (char-code val) n-widetag-bits) - base-char-widetag))))) + character-widetag))))) ;; Else, value not immediate. (storew value object (+ base offset) lowtag)))) diff -urN sbcl/src/compiler/x86/move.lisp sbcl.char/src/compiler/x86/move.lisp --- sbcl/src/compiler/x86/move.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/compiler/x86/move.lisp 2004-10-27 13:34:22.000000000 +0200 @@ -24,14 +24,14 @@ (load-symbol y val)) (character (inst mov y (logior (ash (char-code val) n-widetag-bits) - base-char-widetag)))))) + character-widetag)))))) (define-move-fun (load-number 1) (vop x y) ((immediate) (signed-reg unsigned-reg)) (inst mov y (tn-value x))) -(define-move-fun (load-base-char 1) (vop x y) - ((immediate) (base-char-reg)) +(define-move-fun (load-character 1) (vop x y) + ((immediate) (character-reg)) (inst mov y (char-code (tn-value x)))) (define-move-fun (load-system-area-pointer 1) (vop x y) @@ -44,7 +44,7 @@ (define-move-fun (load-stack 5) (vop x y) ((control-stack) (any-reg descriptor-reg) - (base-char-stack) (base-char-reg) + (character-stack) (character-reg) (sap-stack) (sap-reg) (signed-stack) (signed-reg) (unsigned-stack) (unsigned-reg)) @@ -52,7 +52,7 @@ (define-move-fun (store-stack 5) (vop x y) ((any-reg descriptor-reg) (control-stack) - (base-char-reg) (base-char-stack) + (character-reg) (character-stack) (sap-reg) (sap-stack) (signed-reg) (signed-stack) (unsigned-reg) (unsigned-stack)) @@ -82,7 +82,7 @@ (inst mov y (+ nil-value (static-symbol-offset val)))) (character (inst mov y (logior (ash (char-code val) n-widetag-bits) - base-char-widetag))))) + character-widetag))))) (move y x)))) (define-move-vop move :move @@ -121,7 +121,7 @@ (load-symbol y val)) (character (inst mov y (logior (ash (char-code val) n-widetag-bits) - base-char-widetag))))) + character-widetag))))) (move y x))) ((control-stack) (if (sc-is x immediate) @@ -136,7 +136,7 @@ fp (tn-offset y))) (character (storew (logior (ash (char-code val) n-widetag-bits) - base-char-widetag) + character-widetag) fp (tn-offset y)))) ;; Lisp stack (etypecase val @@ -147,7 +147,7 @@ fp (- (1+ (tn-offset y))))) (character (storew (logior (ash (char-code val) n-widetag-bits) - base-char-widetag) + character-widetag) fp (- (1+ (tn-offset y)))))))) (if (= (tn-offset fp) esp-offset) ;; C-call diff -urN sbcl/src/compiler/x86/pred.lisp sbcl.char/src/compiler/x86/pred.lisp --- sbcl/src/compiler/x86/pred.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/compiler/x86/pred.lisp 2004-10-27 13:34:22.000000000 +0200 @@ -50,7 +50,7 @@ (inst cmp x (+ nil-value (static-symbol-offset val)))) (character (inst cmp x (logior (ash (char-code val) n-widetag-bits) - base-char-widetag)))))) + character-widetag)))))) ((sc-is x immediate) ; and y not immediate ;; Swap the order to fit the compare instruction. (let ((val (tn-value x))) @@ -63,7 +63,7 @@ (inst cmp y (+ nil-value (static-symbol-offset val)))) (character (inst cmp y (logior (ash (char-code val) n-widetag-bits) - base-char-widetag)))))) + character-widetag)))))) (t (inst cmp x y))) diff -urN sbcl/src/compiler/x86/vm.lisp sbcl.char/src/compiler/x86/vm.lisp --- sbcl/src/compiler/x86/vm.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/compiler/x86/vm.lisp 2004-10-27 13:34:22.000000000 +0200 @@ -182,7 +182,7 @@ ;; the non-descriptor stacks (signed-stack stack) ; (signed-byte 32) (unsigned-stack stack) ; (unsigned-byte 32) - (base-char-stack stack) ; non-descriptor characters. + (character-stack stack) ; non-descriptor characters. (sap-stack stack) ; System area pointers. (single-stack stack) ; single-floats (double-stack stack :element-size 2) ; double-floats. @@ -228,12 +228,12 @@ :alternate-scs (control-stack)) ;; non-descriptor characters - (base-char-reg registers - :locations #.*byte-regs* - :reserve-locations (#.ah-offset #.al-offset) + (character-reg registers + :locations #.*dword-regs* +; :reserve-locations (#.ah-offset #.al-offset) :constant-scs (immediate) :save-p t - :alternate-scs (base-char-stack)) + :alternate-scs (character-stack)) ;; non-descriptor SAPs (arbitrary pointers into address space) (sap-reg registers @@ -322,11 +322,12 @@ (catch-block stack :element-size kludge-nondeterministic-catch-block-size)) (eval-when (:compile-toplevel :load-toplevel :execute) -(defparameter *byte-sc-names* '(base-char-reg byte-reg base-char-stack)) +(defparameter *byte-sc-names* '(byte-reg)) (defparameter *word-sc-names* '(word-reg)) (defparameter *dword-sc-names* '(any-reg descriptor-reg sap-reg signed-reg unsigned-reg control-stack - signed-stack unsigned-stack sap-stack single-stack constant)) + signed-stack unsigned-stack sap-stack single-stack + character-reg character-stack constant)) ;;; added by jrd. I guess the right thing to do is to treat floats ;;; as a separate size... ;;; @@ -450,6 +451,10 @@ ;;; The loader uses this to convert alien names to the form they need in ;;; the symbol table (for example, prepending an underscore). (defun extern-alien-name (name) - (declare (type simple-base-string name)) + (declare (type string name)) ;; ELF ports currently don't need any prefix - name) + (typecase name + (simple-base-string name) + (base-string (coerce name 'simple-base-string)) + (t (handler-case (coerce name 'simple-base-string) + (type-error () (error "invalid external alien name: ~S" name)))))) diff -urN sbcl/src/pcl/walk.lisp sbcl.char/src/pcl/walk.lisp --- sbcl/src/pcl/walk.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/pcl/walk.lisp 2004-10-27 13:34:22.000000000 +0200 @@ -553,7 +553,7 @@ ~%ran into STOP while still in REPEAT template"))) ((null repeat-template) (walk-template-handle-repeat-1 - form template (car template) stop-form context env)) + form template (car template) stop-form context env)) (t (recons form (walk-template (car form) (car repeat-template) context env) diff -urN sbcl/src/runtime/backtrace.c sbcl.char/src/runtime/backtrace.c --- sbcl/src/runtime/backtrace.c 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/runtime/backtrace.c 2004-10-27 13:34:22.000000000 +0200 @@ -244,6 +244,7 @@ string = (struct vector *) object; printf("%s, ", (char *) string->data); } else + /* FIXME: broken from (VECTOR NIL) */ printf("(Not simple string??\?), "); } else printf("(Not other pointer??\?), "); diff -urN sbcl/src/runtime/gc-common.c sbcl.char/src/runtime/gc-common.c --- sbcl/src/runtime/gc-common.c 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/runtime/gc-common.c 2004-10-27 13:34:22.000000000 +0200 @@ -779,6 +779,56 @@ } static int +size_character_string(lispobj *where) +{ + struct vector *vector; + int length, nwords; + + /* NOTE: A string contains one more byte of data (a terminating + * '\0' to help when interfacing with C functions) than indicated + * by the length slot. */ + + vector = (struct vector *) where; + length = fixnum_value(vector->length) + 1; + nwords = CEILING(NWORDS(length, 32) + 2, 2); + + return nwords; +} + +scav_character_string(lispobj *where, lispobj object) +{ + struct vector *vector; + int length, nwords; + + /* NOTE: Strings contain one more byte of data than the length */ + /* slot indicates. */ + + vector = (struct vector *) where; + length = fixnum_value(vector->length) + 1; + nwords = CEILING(NWORDS(length, 32) + 2, 2); + + return nwords; +} +static lispobj +trans_character_string(lispobj object) +{ + struct vector *vector; + int length, nwords; + + gc_assert(is_lisp_pointer(object)); + + /* NOTE: A string contains one more byte of data (a terminating + * '\0' to help when interfacing with C functions) than indicated + * by the length slot. */ + + vector = (struct vector *) native_pointer(object); + length = fixnum_value(vector->length) + 1; + nwords = CEILING(NWORDS(length, 32) + 2, 2); + + return copy_large_unboxed_object(object, nwords); +} + +static int size_base_string(lispobj *where) { struct vector *vector; @@ -1550,6 +1600,7 @@ #endif scavtab[SIMPLE_ARRAY_WIDETAG] = scav_boxed; scavtab[SIMPLE_BASE_STRING_WIDETAG] = scav_base_string; + scavtab[SIMPLE_CHARACTER_STRING_WIDETAG] = scav_character_string; scavtab[SIMPLE_BIT_VECTOR_WIDETAG] = scav_vector_bit; scavtab[SIMPLE_ARRAY_NIL_WIDETAG] = scav_vector_nil; scavtab[SIMPLE_ARRAY_UNSIGNED_BYTE_2_WIDETAG] = @@ -1625,6 +1676,7 @@ scav_vector_complex_long_float; #endif scavtab[COMPLEX_BASE_STRING_WIDETAG] = scav_boxed; + scavtab[COMPLEX_CHARACTER_STRING_WIDETAG] = scav_boxed; scavtab[COMPLEX_VECTOR_NIL_WIDETAG] = scav_boxed; scavtab[COMPLEX_BIT_VECTOR_WIDETAG] = scav_boxed; scavtab[COMPLEX_VECTOR_WIDETAG] = scav_boxed; @@ -1643,7 +1695,7 @@ #endif scavtab[VALUE_CELL_HEADER_WIDETAG] = scav_boxed; scavtab[SYMBOL_HEADER_WIDETAG] = scav_boxed; - scavtab[BASE_CHAR_WIDETAG] = scav_immediate; + scavtab[CHARACTER_WIDETAG] = scav_immediate; scavtab[SAP_WIDETAG] = scav_unboxed; scavtab[UNBOUND_MARKER_WIDETAG] = scav_immediate; scavtab[INSTANCE_HEADER_WIDETAG] = scav_boxed; @@ -1675,6 +1727,7 @@ #endif transother[SIMPLE_ARRAY_WIDETAG] = trans_boxed; /* but not GENCGC */ transother[SIMPLE_BASE_STRING_WIDETAG] = trans_base_string; + transother[SIMPLE_CHARACTER_STRING_WIDETAG] = trans_character_string; transother[SIMPLE_BIT_VECTOR_WIDETAG] = trans_vector_bit; transother[SIMPLE_VECTOR_WIDETAG] = trans_vector; transother[SIMPLE_ARRAY_NIL_WIDETAG] = trans_vector_nil; @@ -1755,6 +1808,7 @@ trans_vector_complex_long_float; #endif transother[COMPLEX_BASE_STRING_WIDETAG] = trans_boxed; + transother[COMPLEX_CHARACTER_STRING_WIDETAG] = trans_boxed; transother[COMPLEX_BIT_VECTOR_WIDETAG] = trans_boxed; transother[COMPLEX_VECTOR_NIL_WIDETAG] = trans_boxed; transother[COMPLEX_VECTOR_WIDETAG] = trans_boxed; @@ -1766,7 +1820,7 @@ transother[FUNCALLABLE_INSTANCE_HEADER_WIDETAG] = trans_boxed; transother[VALUE_CELL_HEADER_WIDETAG] = trans_boxed; transother[SYMBOL_HEADER_WIDETAG] = trans_boxed; - transother[BASE_CHAR_WIDETAG] = trans_immediate; + transother[CHARACTER_WIDETAG] = trans_immediate; transother[SAP_WIDETAG] = trans_unboxed; transother[UNBOUND_MARKER_WIDETAG] = trans_immediate; transother[WEAK_POINTER_WIDETAG] = trans_weak_pointer; @@ -1805,6 +1859,7 @@ #endif sizetab[SIMPLE_ARRAY_WIDETAG] = size_boxed; sizetab[SIMPLE_BASE_STRING_WIDETAG] = size_base_string; + sizetab[SIMPLE_CHARACTER_STRING_WIDETAG] = size_character_string; sizetab[SIMPLE_BIT_VECTOR_WIDETAG] = size_vector_bit; sizetab[SIMPLE_VECTOR_WIDETAG] = size_vector; sizetab[SIMPLE_ARRAY_NIL_WIDETAG] = size_vector_nil; @@ -1881,6 +1936,7 @@ size_vector_complex_long_float; #endif sizetab[COMPLEX_BASE_STRING_WIDETAG] = size_boxed; + sizetab[COMPLEX_CHARACTER_STRING_WIDETAG] = size_boxed; sizetab[COMPLEX_VECTOR_NIL_WIDETAG] = size_boxed; sizetab[COMPLEX_BIT_VECTOR_WIDETAG] = size_boxed; sizetab[COMPLEX_VECTOR_WIDETAG] = size_boxed; @@ -1895,7 +1951,7 @@ sizetab[FUNCALLABLE_INSTANCE_HEADER_WIDETAG] = size_boxed; sizetab[VALUE_CELL_HEADER_WIDETAG] = size_boxed; sizetab[SYMBOL_HEADER_WIDETAG] = size_boxed; - sizetab[BASE_CHAR_WIDETAG] = size_immediate; + sizetab[CHARACTER_WIDETAG] = size_immediate; sizetab[SAP_WIDETAG] = size_unboxed; sizetab[UNBOUND_MARKER_WIDETAG] = size_immediate; sizetab[WEAK_POINTER_WIDETAG] = size_weak_pointer; diff -urN sbcl/src/runtime/gc-internal.h sbcl.char/src/runtime/gc-internal.h --- sbcl/src/runtime/gc-internal.h 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/runtime/gc-internal.h 2004-10-27 13:34:22.000000000 +0200 @@ -94,7 +94,7 @@ /* If thing is an immediate then this is a cons. */ if (is_lisp_pointer(thing) || (fixnump(thing)) - || (widetag_of(thing) == BASE_CHAR_WIDETAG) + || (widetag_of(thing) == CHARACTER_WIDETAG) || (widetag_of(thing) == UNBOUND_MARKER_WIDETAG)) count = 2; else diff -urN sbcl/src/runtime/gencgc.c sbcl.char/src/runtime/gencgc.c --- sbcl/src/runtime/gencgc.c 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/runtime/gencgc.c 2004-10-27 13:34:22.000000000 +0200 @@ -2057,11 +2057,11 @@ /* Is it plausible cons? */ if ((is_lisp_pointer(start_addr[0]) || (fixnump(start_addr[0])) - || (widetag_of(start_addr[0]) == BASE_CHAR_WIDETAG) + || (widetag_of(start_addr[0]) == CHARACTER_WIDETAG) || (widetag_of(start_addr[0]) == UNBOUND_MARKER_WIDETAG)) && (is_lisp_pointer(start_addr[1]) || (fixnump(start_addr[1])) - || (widetag_of(start_addr[1]) == BASE_CHAR_WIDETAG) + || (widetag_of(start_addr[1]) == CHARACTER_WIDETAG) || (widetag_of(start_addr[1]) == UNBOUND_MARKER_WIDETAG))) break; else { @@ -2107,7 +2107,7 @@ } switch (widetag_of(start_addr[0])) { case UNBOUND_MARKER_WIDETAG: - case BASE_CHAR_WIDETAG: + case CHARACTER_WIDETAG: if (gencgc_verbose) FSHOW((stderr, "*Wo3: %x %x %x\n", @@ -2145,6 +2145,7 @@ #endif case SIMPLE_ARRAY_WIDETAG: case COMPLEX_BASE_STRING_WIDETAG: + case COMPLEX_CHARACTER_STRING_WIDETAG: case COMPLEX_VECTOR_NIL_WIDETAG: case COMPLEX_BIT_VECTOR_WIDETAG: case COMPLEX_VECTOR_WIDETAG: @@ -2160,6 +2161,7 @@ case LONG_FLOAT_WIDETAG: #endif case SIMPLE_BASE_STRING_WIDETAG: + case SIMPLE_CHARACTER_STRING_WIDETAG: case SIMPLE_BIT_VECTOR_WIDETAG: case SIMPLE_ARRAY_NIL_WIDETAG: case SIMPLE_ARRAY_UNSIGNED_BYTE_2_WIDETAG: @@ -2248,6 +2250,7 @@ break; case BIGNUM_WIDETAG: case SIMPLE_BASE_STRING_WIDETAG: + case SIMPLE_CHARACTER_STRING_WIDETAG: case SIMPLE_BIT_VECTOR_WIDETAG: case SIMPLE_ARRAY_NIL_WIDETAG: case SIMPLE_ARRAY_UNSIGNED_BYTE_2_WIDETAG: @@ -3111,6 +3114,7 @@ case COMPLEX_WIDETAG: case SIMPLE_ARRAY_WIDETAG: case COMPLEX_BASE_STRING_WIDETAG: + case COMPLEX_CHARACTER_STRING_WIDETAG: case COMPLEX_VECTOR_NIL_WIDETAG: case COMPLEX_BIT_VECTOR_WIDETAG: case COMPLEX_VECTOR_WIDETAG: @@ -3119,7 +3123,7 @@ case FUNCALLABLE_INSTANCE_HEADER_WIDETAG: case VALUE_CELL_HEADER_WIDETAG: case SYMBOL_HEADER_WIDETAG: - case BASE_CHAR_WIDETAG: + case CHARACTER_WIDETAG: case UNBOUND_MARKER_WIDETAG: case INSTANCE_HEADER_WIDETAG: case FDEFN_WIDETAG: @@ -3197,6 +3201,7 @@ case COMPLEX_LONG_FLOAT_WIDETAG: #endif case SIMPLE_BASE_STRING_WIDETAG: + case SIMPLE_CHARACTER_STRING_WIDETAG: case SIMPLE_BIT_VECTOR_WIDETAG: case SIMPLE_ARRAY_NIL_WIDETAG: case SIMPLE_ARRAY_UNSIGNED_BYTE_2_WIDETAG: diff -urN sbcl/src/runtime/interr.c sbcl.char/src/runtime/interr.c --- sbcl/src/runtime/interr.c 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/runtime/interr.c 2004-10-27 13:34:22.000000000 +0200 @@ -112,7 +112,7 @@ brief_print(*os_context_register_addr(context, offset)); break; - case sc_BaseCharReg: + case sc_CharacterReg: ch = *os_context_register_addr(context, offset); #ifdef LISP_FEATURE_X86 if (offset&1) diff -urN sbcl/src/runtime/print.c sbcl.char/src/runtime/print.c --- sbcl/src/runtime/print.c 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/runtime/print.c 2004-10-27 13:34:22.000000000 +0200 @@ -218,7 +218,7 @@ type = widetag_of(obj); switch (type) { - case BASE_CHAR_WIDETAG: + case CHARACTER_WIDETAG: c = (obj>>8)&0xff; switch (c) { case '\0': @@ -275,7 +275,7 @@ printf(", unknown type (0x%0x)", type); switch (widetag_of(obj)) { - case BASE_CHAR_WIDETAG: + case CHARACTER_WIDETAG: printf(": "); brief_otherimm(obj); break; @@ -471,191 +471,193 @@ } switch (type) { - case BIGNUM_WIDETAG: - ptr += count; - NEWLINE_OR_RETURN; - printf("0x"); - while (count-- > 0) - printf("%08lx", (unsigned long) *--ptr); - break; - - case RATIO_WIDETAG: - print_slots(ratio_slots, count, ptr); - break; - - case COMPLEX_WIDETAG: - print_slots(complex_slots, count, ptr); - break; - - case SYMBOL_HEADER_WIDETAG: - print_slots(symbol_slots, count, ptr); - break; - - case SINGLE_FLOAT_WIDETAG: - NEWLINE_OR_RETURN; - printf("%g", ((struct single_float *)native_pointer(obj))->value); - break; - - case DOUBLE_FLOAT_WIDETAG: - NEWLINE_OR_RETURN; - printf("%g", ((struct double_float *)native_pointer(obj))->value); - break; - + case BIGNUM_WIDETAG: + ptr += count; + NEWLINE_OR_RETURN; + printf("0x"); + while (count-- > 0) + printf("%08lx", (unsigned long) *--ptr); + break; + + case RATIO_WIDETAG: + print_slots(ratio_slots, count, ptr); + break; + + case COMPLEX_WIDETAG: + print_slots(complex_slots, count, ptr); + break; + + case SYMBOL_HEADER_WIDETAG: + print_slots(symbol_slots, count, ptr); + break; + + case SINGLE_FLOAT_WIDETAG: + NEWLINE_OR_RETURN; + printf("%g", ((struct single_float *)native_pointer(obj))->value); + break; + + case DOUBLE_FLOAT_WIDETAG: + NEWLINE_OR_RETURN; + printf("%g", ((struct double_float *)native_pointer(obj))->value); + break; + #ifdef LONG_FLOAT_WIDETAG - case LONG_FLOAT_WIDETAG: - NEWLINE_OR_RETURN; - printf("%Lg", ((struct long_float *)native_pointer(obj))->value); - break; + case LONG_FLOAT_WIDETAG: + NEWLINE_OR_RETURN; + printf("%Lg", ((struct long_float *)native_pointer(obj))->value); + break; #endif - + #ifdef COMPLEX_SINGLE_FLOAT_WIDETAG - case COMPLEX_SINGLE_FLOAT_WIDETAG: - NEWLINE_OR_RETURN; - printf("%g", ((struct complex_single_float *)native_pointer(obj))->real); - NEWLINE_OR_RETURN; - printf("%g", ((struct complex_single_float *)native_pointer(obj))->imag); - break; + case COMPLEX_SINGLE_FLOAT_WIDETAG: + NEWLINE_OR_RETURN; + printf("%g", ((struct complex_single_float *)native_pointer(obj))->real); + NEWLINE_OR_RETURN; + printf("%g", ((struct complex_single_float *)native_pointer(obj))->imag); + break; #endif - + #ifdef COMPLEX_DOUBLE_FLOAT_WIDETAG - case COMPLEX_DOUBLE_FLOAT_WIDETAG: - NEWLINE_OR_RETURN; - printf("%g", ((struct complex_double_float *)native_pointer(obj))->real); - NEWLINE_OR_RETURN; - printf("%g", ((struct complex_double_float *)native_pointer(obj))->imag); - break; + case COMPLEX_DOUBLE_FLOAT_WIDETAG: + NEWLINE_OR_RETURN; + printf("%g", ((struct complex_double_float *)native_pointer(obj))->real); + NEWLINE_OR_RETURN; + printf("%g", ((struct complex_double_float *)native_pointer(obj))->imag); + break; #endif - + #ifdef COMPLEX_LONG_FLOAT_WIDETAG - case COMPLEX_LONG_FLOAT_WIDETAG: - NEWLINE_OR_RETURN; - printf("%Lg", ((struct complex_long_float *)native_pointer(obj))->real); - NEWLINE_OR_RETURN; - printf("%Lg", ((struct complex_long_float *)native_pointer(obj))->imag); - break; -#endif - - case SIMPLE_BASE_STRING_WIDETAG: - NEWLINE_OR_RETURN; - cptr = (char *)(ptr+1); - putchar('"'); - while (length-- > 0) - putchar(*cptr++); - putchar('"'); - break; - - case SIMPLE_VECTOR_WIDETAG: - NEWLINE_OR_RETURN; - printf("length = %ld", length); - ptr++; - index = 0; - while (length-- > 0) { - sprintf(buffer, "%d: ", index++); - print_obj(buffer, *ptr++); - } - break; - - case INSTANCE_HEADER_WIDETAG: - NEWLINE_OR_RETURN; - printf("length = %ld", (long) count); - index = 0; - while (count-- > 0) { - sprintf(buffer, "%d: ", index++); - print_obj(buffer, *ptr++); - } - break; - - case SIMPLE_ARRAY_WIDETAG: - case SIMPLE_BIT_VECTOR_WIDETAG: - case SIMPLE_ARRAY_UNSIGNED_BYTE_2_WIDETAG: - case SIMPLE_ARRAY_UNSIGNED_BYTE_4_WIDETAG: - case SIMPLE_ARRAY_UNSIGNED_BYTE_8_WIDETAG: - case SIMPLE_ARRAY_UNSIGNED_BYTE_16_WIDETAG: - case SIMPLE_ARRAY_UNSIGNED_BYTE_32_WIDETAG: + case COMPLEX_LONG_FLOAT_WIDETAG: + NEWLINE_OR_RETURN; + printf("%Lg", ((struct complex_long_float *)native_pointer(obj))->real); + NEWLINE_OR_RETURN; + printf("%Lg", ((struct complex_long_float *)native_pointer(obj))->imag); + break; +#endif + + case SIMPLE_BASE_STRING_WIDETAG: + case SIMPLE_CHARACTER_STRING_WIDETAG: /* FIXME */ + NEWLINE_OR_RETURN; + cptr = (char *)(ptr+1); + putchar('"'); + while (length-- > 0) + putchar(*cptr++); + putchar('"'); + break; + + case SIMPLE_VECTOR_WIDETAG: + NEWLINE_OR_RETURN; + printf("length = %ld", length); + ptr++; + index = 0; + while (length-- > 0) { + sprintf(buffer, "%d: ", index++); + print_obj(buffer, *ptr++); + } + break; + + case INSTANCE_HEADER_WIDETAG: + NEWLINE_OR_RETURN; + printf("length = %ld", (long) count); + index = 0; + while (count-- > 0) { + sprintf(buffer, "%d: ", index++); + print_obj(buffer, *ptr++); + } + break; + + case SIMPLE_ARRAY_WIDETAG: + case SIMPLE_BIT_VECTOR_WIDETAG: + case SIMPLE_ARRAY_UNSIGNED_BYTE_2_WIDETAG: + case SIMPLE_ARRAY_UNSIGNED_BYTE_4_WIDETAG: + case SIMPLE_ARRAY_UNSIGNED_BYTE_8_WIDETAG: + case SIMPLE_ARRAY_UNSIGNED_BYTE_16_WIDETAG: + case SIMPLE_ARRAY_UNSIGNED_BYTE_32_WIDETAG: #ifdef SIMPLE_ARRAY_SIGNED_BYTE_8_WIDETAG - case SIMPLE_ARRAY_SIGNED_BYTE_8_WIDETAG: + case SIMPLE_ARRAY_SIGNED_BYTE_8_WIDETAG: #endif #ifdef SIMPLE_ARRAY_SIGNED_BYTE_16_WIDETAG - case SIMPLE_ARRAY_SIGNED_BYTE_16_WIDETAG: + case SIMPLE_ARRAY_SIGNED_BYTE_16_WIDETAG: #endif #ifdef SIMPLE_ARRAY_SIGNED_BYTE_30_WIDETAG - case SIMPLE_ARRAY_SIGNED_BYTE_30_WIDETAG: + case SIMPLE_ARRAY_SIGNED_BYTE_30_WIDETAG: #endif #ifdef SIMPLE_ARRAY_SIGNED_BYTE_32_WIDETAG - case SIMPLE_ARRAY_SIGNED_BYTE_32_WIDETAG: + case SIMPLE_ARRAY_SIGNED_BYTE_32_WIDETAG: #endif - case SIMPLE_ARRAY_SINGLE_FLOAT_WIDETAG: - case SIMPLE_ARRAY_DOUBLE_FLOAT_WIDETAG: + case SIMPLE_ARRAY_SINGLE_FLOAT_WIDETAG: + case SIMPLE_ARRAY_DOUBLE_FLOAT_WIDETAG: #ifdef SIMPLE_ARRAY_LONG_FLOAT_WIDETAG - case SIMPLE_ARRAY_LONG_FLOAT_WIDETAG: + case SIMPLE_ARRAY_LONG_FLOAT_WIDETAG: #endif #ifdef SIMPLE_ARRAY_COMPLEX_SINGLE_FLOAT_WIDETAG - case SIMPLE_ARRAY_COMPLEX_SINGLE_FLOAT_WIDETAG: + case SIMPLE_ARRAY_COMPLEX_SINGLE_FLOAT_WIDETAG: #endif #ifdef SIMPLE_ARRAY_COMPLEX_DOUBLE_FLOAT_WIDETAG - case SIMPLE_ARRAY_COMPLEX_DOUBLE_FLOAT_WIDETAG: + case SIMPLE_ARRAY_COMPLEX_DOUBLE_FLOAT_WIDETAG: #endif #ifdef SIMPLE_ARRAY_COMPLEX_LONG_FLOAT_WIDETAG - case SIMPLE_ARRAY_COMPLEX_LONG_FLOAT_WIDETAG: + case SIMPLE_ARRAY_COMPLEX_LONG_FLOAT_WIDETAG: #endif - case COMPLEX_BASE_STRING_WIDETAG: - case COMPLEX_VECTOR_NIL_WIDETAG: - case COMPLEX_BIT_VECTOR_WIDETAG: - case COMPLEX_VECTOR_WIDETAG: - case COMPLEX_ARRAY_WIDETAG: - break; - - case CODE_HEADER_WIDETAG: - print_slots(code_slots, count-1, ptr); - break; - - case SIMPLE_FUN_HEADER_WIDETAG: - print_slots(fn_slots, 5, ptr); - break; - - case RETURN_PC_HEADER_WIDETAG: - print_obj("code: ", obj - (count * 4)); - break; - - case CLOSURE_HEADER_WIDETAG: - print_slots(closure_slots, count, ptr); - break; - - case FUNCALLABLE_INSTANCE_HEADER_WIDETAG: - print_slots(funcallable_instance_slots, count, ptr); - break; - - case VALUE_CELL_HEADER_WIDETAG: - print_slots(value_cell_slots, 1, ptr); - break; - - case SAP_WIDETAG: - NEWLINE_OR_RETURN; + case COMPLEX_BASE_STRING_WIDETAG: + case COMPLEX_CHARACTER_STRING_WIDETAG: + case COMPLEX_VECTOR_NIL_WIDETAG: + case COMPLEX_BIT_VECTOR_WIDETAG: + case COMPLEX_VECTOR_WIDETAG: + case COMPLEX_ARRAY_WIDETAG: + break; + + case CODE_HEADER_WIDETAG: + print_slots(code_slots, count-1, ptr); + break; + + case SIMPLE_FUN_HEADER_WIDETAG: + print_slots(fn_slots, 5, ptr); + break; + + case RETURN_PC_HEADER_WIDETAG: + print_obj("code: ", obj - (count * 4)); + break; + + case CLOSURE_HEADER_WIDETAG: + print_slots(closure_slots, count, ptr); + break; + + case FUNCALLABLE_INSTANCE_HEADER_WIDETAG: + print_slots(funcallable_instance_slots, count, ptr); + break; + + case VALUE_CELL_HEADER_WIDETAG: + print_slots(value_cell_slots, 1, ptr); + break; + + case SAP_WIDETAG: + NEWLINE_OR_RETURN; #ifndef alpha - printf("0x%08lx", (unsigned long) *ptr); + printf("0x%08lx", (unsigned long) *ptr); #else - printf("0x%016lx", *(lispobj*)(ptr+1)); + printf("0x%016lx", *(lispobj*)(ptr+1)); #endif - break; - - case WEAK_POINTER_WIDETAG: - print_slots(weak_pointer_slots, 1, ptr); - break; - - case BASE_CHAR_WIDETAG: - case UNBOUND_MARKER_WIDETAG: - NEWLINE_OR_RETURN; - printf("pointer to an immediate?"); - break; - - case FDEFN_WIDETAG: - print_slots(fdefn_slots, count, ptr); - break; - - default: - NEWLINE_OR_RETURN; - printf("Unknown header object?"); - break; + break; + + case WEAK_POINTER_WIDETAG: + print_slots(weak_pointer_slots, 1, ptr); + break; + + case CHARACTER_WIDETAG: + case UNBOUND_MARKER_WIDETAG: + NEWLINE_OR_RETURN; + printf("pointer to an immediate?"); + break; + + case FDEFN_WIDETAG: + print_slots(fdefn_slots, count, ptr); + break; + + default: + NEWLINE_OR_RETURN; + printf("Unknown header object?"); + break; } } } diff -urN sbcl/src/runtime/purify.c sbcl.char/src/runtime/purify.c --- sbcl/src/runtime/purify.c 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/runtime/purify.c 2004-10-27 13:34:22.000000000 +0200 @@ -199,11 +199,11 @@ /* Is it plausible cons? */ if ((is_lisp_pointer(start_addr[0]) || ((start_addr[0] & 3) == 0) /* fixnum */ - || (widetag_of(start_addr[0]) == BASE_CHAR_WIDETAG) + || (widetag_of(start_addr[0]) == CHARACTER_WIDETAG) || (widetag_of(start_addr[0]) == UNBOUND_MARKER_WIDETAG)) && (is_lisp_pointer(start_addr[1]) || ((start_addr[1] & 3) == 0) /* fixnum */ - || (widetag_of(start_addr[1]) == BASE_CHAR_WIDETAG) + || (widetag_of(start_addr[1]) == CHARACTER_WIDETAG) || (widetag_of(start_addr[1]) == UNBOUND_MARKER_WIDETAG))) { break; } else { @@ -247,7 +247,7 @@ } switch (widetag_of(start_addr[0])) { case UNBOUND_MARKER_WIDETAG: - case BASE_CHAR_WIDETAG: + case CHARACTER_WIDETAG: if (pointer_filter_verbose) { fprintf(stderr,"*Wo3: %x %x %x\n", (unsigned int) pointer, (unsigned int) start_addr, *start_addr); @@ -285,6 +285,7 @@ #endif case SIMPLE_ARRAY_WIDETAG: case COMPLEX_BASE_STRING_WIDETAG: + case COMPLEX_CHARACTER_STRING_WIDETAG: case COMPLEX_VECTOR_NIL_WIDETAG: case COMPLEX_BIT_VECTOR_WIDETAG: case COMPLEX_VECTOR_WIDETAG: @@ -301,6 +302,7 @@ #endif case SIMPLE_ARRAY_NIL_WIDETAG: case SIMPLE_BASE_STRING_WIDETAG: + case SIMPLE_CHARACTER_STRING_WIDETAG: case SIMPLE_BIT_VECTOR_WIDETAG: case SIMPLE_ARRAY_UNSIGNED_BYTE_2_WIDETAG: case SIMPLE_ARRAY_UNSIGNED_BYTE_4_WIDETAG: @@ -890,92 +892,96 @@ { switch (widetag_of(header)) { /* FIXME: this needs a reindent */ - case BIGNUM_WIDETAG: - case SINGLE_FLOAT_WIDETAG: - case DOUBLE_FLOAT_WIDETAG: + case BIGNUM_WIDETAG: + case SINGLE_FLOAT_WIDETAG: + case DOUBLE_FLOAT_WIDETAG: #ifdef LONG_FLOAT_WIDETAG - case LONG_FLOAT_WIDETAG: + case LONG_FLOAT_WIDETAG: #endif #ifdef COMPLEX_SINGLE_FLOAT_WIDETAG - case COMPLEX_SINGLE_FLOAT_WIDETAG: + case COMPLEX_SINGLE_FLOAT_WIDETAG: #endif #ifdef COMPLEX_DOUBLE_FLOAT_WIDETAG - case COMPLEX_DOUBLE_FLOAT_WIDETAG: + case COMPLEX_DOUBLE_FLOAT_WIDETAG: #endif #ifdef COMPLEX_LONG_FLOAT_WIDETAG - case COMPLEX_LONG_FLOAT_WIDETAG: + case COMPLEX_LONG_FLOAT_WIDETAG: #endif - case SAP_WIDETAG: - return ptrans_unboxed(thing, header); - - case RATIO_WIDETAG: - case COMPLEX_WIDETAG: - case SIMPLE_ARRAY_WIDETAG: - case COMPLEX_BASE_STRING_WIDETAG: - case COMPLEX_BIT_VECTOR_WIDETAG: - case COMPLEX_VECTOR_NIL_WIDETAG: - case COMPLEX_VECTOR_WIDETAG: - case COMPLEX_ARRAY_WIDETAG: + case SAP_WIDETAG: + return ptrans_unboxed(thing, header); + + case RATIO_WIDETAG: + case COMPLEX_WIDETAG: + case SIMPLE_ARRAY_WIDETAG: + case COMPLEX_BASE_STRING_WIDETAG: + case COMPLEX_CHARACTER_STRING_WIDETAG: + case COMPLEX_BIT_VECTOR_WIDETAG: + case COMPLEX_VECTOR_NIL_WIDETAG: + case COMPLEX_VECTOR_WIDETAG: + case COMPLEX_ARRAY_WIDETAG: return ptrans_boxed(thing, header, constant); - case VALUE_CELL_HEADER_WIDETAG: - case WEAK_POINTER_WIDETAG: + case VALUE_CELL_HEADER_WIDETAG: + case WEAK_POINTER_WIDETAG: return ptrans_boxed(thing, header, 0); - - case SYMBOL_HEADER_WIDETAG: + + case SYMBOL_HEADER_WIDETAG: return ptrans_boxed(thing, header, 0); - - case SIMPLE_ARRAY_NIL_WIDETAG: + + case SIMPLE_ARRAY_NIL_WIDETAG: return ptrans_vector(thing, 0, 0, 0, constant); - - case SIMPLE_BASE_STRING_WIDETAG: + + case SIMPLE_BASE_STRING_WIDETAG: return ptrans_vector(thing, 8, 1, 0, constant); - - case SIMPLE_BIT_VECTOR_WIDETAG: + + case SIMPLE_CHARACTER_STRING_WIDETAG: + return ptrans_vector(thing, 32, 1, 0, constant); + + case SIMPLE_BIT_VECTOR_WIDETAG: return ptrans_vector(thing, 1, 0, 0, constant); - - case SIMPLE_VECTOR_WIDETAG: + + case SIMPLE_VECTOR_WIDETAG: return ptrans_vector(thing, 32, 0, 1, constant); - - case SIMPLE_ARRAY_UNSIGNED_BYTE_2_WIDETAG: + + case SIMPLE_ARRAY_UNSIGNED_BYTE_2_WIDETAG: return ptrans_vector(thing, 2, 0, 0, constant); - - case SIMPLE_ARRAY_UNSIGNED_BYTE_4_WIDETAG: + + case SIMPLE_ARRAY_UNSIGNED_BYTE_4_WIDETAG: return ptrans_vector(thing, 4, 0, 0, constant); - - case SIMPLE_ARRAY_UNSIGNED_BYTE_8_WIDETAG: + + case SIMPLE_ARRAY_UNSIGNED_BYTE_8_WIDETAG: #ifdef SIMPLE_ARRAY_SIGNED_BYTE_8_WIDETAG - case SIMPLE_ARRAY_SIGNED_BYTE_8_WIDETAG: - case SIMPLE_ARRAY_UNSIGNED_BYTE_7_WIDETAG: + case SIMPLE_ARRAY_SIGNED_BYTE_8_WIDETAG: + case SIMPLE_ARRAY_UNSIGNED_BYTE_7_WIDETAG: #endif return ptrans_vector(thing, 8, 0, 0, constant); - case SIMPLE_ARRAY_UNSIGNED_BYTE_16_WIDETAG: + case SIMPLE_ARRAY_UNSIGNED_BYTE_16_WIDETAG: #ifdef SIMPLE_ARRAY_SIGNED_BYTE_16_WIDETAG - case SIMPLE_ARRAY_SIGNED_BYTE_16_WIDETAG: - case SIMPLE_ARRAY_UNSIGNED_BYTE_15_WIDETAG: + case SIMPLE_ARRAY_SIGNED_BYTE_16_WIDETAG: + case SIMPLE_ARRAY_UNSIGNED_BYTE_15_WIDETAG: #endif return ptrans_vector(thing, 16, 0, 0, constant); - - case SIMPLE_ARRAY_UNSIGNED_BYTE_32_WIDETAG: + + case SIMPLE_ARRAY_UNSIGNED_BYTE_32_WIDETAG: #ifdef SIMPLE_ARRAY_SIGNED_BYTE_30_WIDETAG - case SIMPLE_ARRAY_SIGNED_BYTE_30_WIDETAG: - case SIMPLE_ARRAY_UNSIGNED_BYTE_29_WIDETAG: + case SIMPLE_ARRAY_SIGNED_BYTE_30_WIDETAG: + case SIMPLE_ARRAY_UNSIGNED_BYTE_29_WIDETAG: #endif #ifdef SIMPLE_ARRAY_SIGNED_BYTE_32_WIDETAG - case SIMPLE_ARRAY_SIGNED_BYTE_32_WIDETAG: - case SIMPLE_ARRAY_UNSIGNED_BYTE_31_WIDETAG: + case SIMPLE_ARRAY_SIGNED_BYTE_32_WIDETAG: + case SIMPLE_ARRAY_UNSIGNED_BYTE_31_WIDETAG: #endif return ptrans_vector(thing, 32, 0, 0, constant); - - case SIMPLE_ARRAY_SINGLE_FLOAT_WIDETAG: + + case SIMPLE_ARRAY_SINGLE_FLOAT_WIDETAG: return ptrans_vector(thing, 32, 0, 0, constant); - - case SIMPLE_ARRAY_DOUBLE_FLOAT_WIDETAG: + + case SIMPLE_ARRAY_DOUBLE_FLOAT_WIDETAG: return ptrans_vector(thing, 64, 0, 0, constant); - + #ifdef SIMPLE_ARRAY_LONG_FLOAT_WIDETAG - case SIMPLE_ARRAY_LONG_FLOAT_WIDETAG: + case SIMPLE_ARRAY_LONG_FLOAT_WIDETAG: #ifdef LISP_FEATURE_X86 return ptrans_vector(thing, 96, 0, 0, constant); #endif @@ -985,17 +991,17 @@ #endif #ifdef SIMPLE_ARRAY_COMPLEX_SINGLE_FLOAT_WIDETAG - case SIMPLE_ARRAY_COMPLEX_SINGLE_FLOAT_WIDETAG: + case SIMPLE_ARRAY_COMPLEX_SINGLE_FLOAT_WIDETAG: return ptrans_vector(thing, 64, 0, 0, constant); #endif #ifdef SIMPLE_ARRAY_COMPLEX_DOUBLE_FLOAT_WIDETAG - case SIMPLE_ARRAY_COMPLEX_DOUBLE_FLOAT_WIDETAG: + case SIMPLE_ARRAY_COMPLEX_DOUBLE_FLOAT_WIDETAG: return ptrans_vector(thing, 128, 0, 0, constant); #endif #ifdef SIMPLE_ARRAY_COMPLEX_LONG_FLOAT_WIDETAG - case SIMPLE_ARRAY_COMPLEX_LONG_FLOAT_WIDETAG: + case SIMPLE_ARRAY_COMPLEX_LONG_FLOAT_WIDETAG: #ifdef LISP_FEATURE_X86 return ptrans_vector(thing, 192, 0, 0, constant); #endif @@ -1004,16 +1010,16 @@ #endif #endif - case CODE_HEADER_WIDETAG: + case CODE_HEADER_WIDETAG: return ptrans_code(thing); - case RETURN_PC_HEADER_WIDETAG: + case RETURN_PC_HEADER_WIDETAG: return ptrans_returnpc(thing, header); - case FDEFN_WIDETAG: + case FDEFN_WIDETAG: return ptrans_fdefn(thing, header); - default: + default: /* Should only come across other pointers to the above stuff. */ gc_abort(); return NIL; @@ -1124,110 +1130,115 @@ /* It's an other immediate. Maybe the header for an unboxed */ /* object. */ switch (widetag_of(thing)) { - case BIGNUM_WIDETAG: - case SINGLE_FLOAT_WIDETAG: - case DOUBLE_FLOAT_WIDETAG: + case BIGNUM_WIDETAG: + case SINGLE_FLOAT_WIDETAG: + case DOUBLE_FLOAT_WIDETAG: #ifdef LONG_FLOAT_WIDETAG - case LONG_FLOAT_WIDETAG: + case LONG_FLOAT_WIDETAG: #endif - case SAP_WIDETAG: + case SAP_WIDETAG: /* It's an unboxed simple object. */ count = HeaderValue(thing)+1; break; - case SIMPLE_VECTOR_WIDETAG: + case SIMPLE_VECTOR_WIDETAG: if (HeaderValue(thing) == subtype_VectorValidHashing) { *addr = (subtype_VectorMustRehash << N_WIDETAG_BITS) | SIMPLE_VECTOR_WIDETAG; } count = 1; break; - - case SIMPLE_ARRAY_NIL_WIDETAG: + + case SIMPLE_ARRAY_NIL_WIDETAG: count = 2; break; - - case SIMPLE_BASE_STRING_WIDETAG: + + case SIMPLE_BASE_STRING_WIDETAG: vector = (struct vector *)addr; count = CEILING(NWORDS(fixnum_value(vector->length)+1,8)+2,2); break; - case SIMPLE_BIT_VECTOR_WIDETAG: + case SIMPLE_CHARACTER_STRING_WIDETAG: + vector = (struct vector *)addr; + count = CEILING(NWORDS(fixnum_value(vector->length)+1,32)+2,2); + break; + + case SIMPLE_BIT_VECTOR_WIDETAG: vector = (struct vector *)addr; count = CEILING(NWORDS(fixnum_value(vector->length),1)+2,2); break; - case SIMPLE_ARRAY_UNSIGNED_BYTE_2_WIDETAG: + case SIMPLE_ARRAY_UNSIGNED_BYTE_2_WIDETAG: vector = (struct vector *)addr; count = CEILING(NWORDS(fixnum_value(vector->length),2)+2,2); break; - case SIMPLE_ARRAY_UNSIGNED_BYTE_4_WIDETAG: + case SIMPLE_ARRAY_UNSIGNED_BYTE_4_WIDETAG: vector = (struct vector *)addr; count = CEILING(NWORDS(fixnum_value(vector->length),4)+2,2); break; - case SIMPLE_ARRAY_UNSIGNED_BYTE_8_WIDETAG: + case SIMPLE_ARRAY_UNSIGNED_BYTE_8_WIDETAG: #ifdef SIMPLE_ARRAY_SIGNED_BYTE_8_WIDETAG - case SIMPLE_ARRAY_SIGNED_BYTE_8_WIDETAG: - case SIMPLE_ARRAY_UNSIGNED_BYTE_7_WIDETAG: + case SIMPLE_ARRAY_SIGNED_BYTE_8_WIDETAG: + case SIMPLE_ARRAY_UNSIGNED_BYTE_7_WIDETAG: #endif vector = (struct vector *)addr; count = CEILING(NWORDS(fixnum_value(vector->length),8)+2,2); break; - - case SIMPLE_ARRAY_UNSIGNED_BYTE_16_WIDETAG: + + case SIMPLE_ARRAY_UNSIGNED_BYTE_16_WIDETAG: #ifdef SIMPLE_ARRAY_SIGNED_BYTE_16_WIDETAG - case SIMPLE_ARRAY_SIGNED_BYTE_16_WIDETAG: - case SIMPLE_ARRAY_UNSIGNED_BYTE_15_WIDETAG: + case SIMPLE_ARRAY_SIGNED_BYTE_16_WIDETAG: + case SIMPLE_ARRAY_UNSIGNED_BYTE_15_WIDETAG: #endif vector = (struct vector *)addr; count = CEILING(NWORDS(fixnum_value(vector->length),16)+2,2); break; - case SIMPLE_ARRAY_UNSIGNED_BYTE_32_WIDETAG: + case SIMPLE_ARRAY_UNSIGNED_BYTE_32_WIDETAG: #ifdef SIMPLE_ARRAY_SIGNED_BYTE_30_WIDETAG - case SIMPLE_ARRAY_SIGNED_BYTE_30_WIDETAG: - case SIMPLE_ARRAY_UNSIGNED_BYTE_29_WIDETAG: + case SIMPLE_ARRAY_SIGNED_BYTE_30_WIDETAG: + case SIMPLE_ARRAY_UNSIGNED_BYTE_29_WIDETAG: #endif #ifdef SIMPLE_ARRAY_SIGNED_BYTE_32_WIDETAG - case SIMPLE_ARRAY_SIGNED_BYTE_32_WIDETAG: - case SIMPLE_ARRAY_UNSIGNED_BYTE_31_WIDETAG: + case SIMPLE_ARRAY_SIGNED_BYTE_32_WIDETAG: + case SIMPLE_ARRAY_UNSIGNED_BYTE_31_WIDETAG: #endif vector = (struct vector *)addr; count = CEILING(NWORDS(fixnum_value(vector->length),32)+2,2); break; #if N_WORD_BITS == 64 - case SIMPLE_ARRAY_UNSIGNED_BYTE_64_WIDETAG: + case SIMPLE_ARRAY_UNSIGNED_BYTE_64_WIDETAG: #ifdef SIMPLE_ARRAY_SIGNED_BYTE_61_WIDETAG - case SIMPLE_ARRAY_SIGNED_BYTE_61_WIDETAG: - case SIMPLE_ARRAY_UNSIGNED_BYTE_60_WIDETAG: + case SIMPLE_ARRAY_SIGNED_BYTE_61_WIDETAG: + case SIMPLE_ARRAY_UNSIGNED_BYTE_60_WIDETAG: #endif #ifdef SIMPLE_ARRAY_SIGNED_BYTE_64_WIDETAG - case SIMPLE_ARRAY_SIGNED_BYTE_64_WIDETAG: - case SIMPLE_ARRAY_UNSIGNED_BYTE_63_WIDETAG: + case SIMPLE_ARRAY_SIGNED_BYTE_64_WIDETAG: + case SIMPLE_ARRAY_UNSIGNED_BYTE_63_WIDETAG: #endif vector = (struct vector *)addr; count = CEILING(NWORDS(fixnum_value(vector->length),64)+2,2); break; #endif - case SIMPLE_ARRAY_SINGLE_FLOAT_WIDETAG: + case SIMPLE_ARRAY_SINGLE_FLOAT_WIDETAG: vector = (struct vector *)addr; count = CEILING(fixnum_value(vector->length)+2,2); break; - case SIMPLE_ARRAY_DOUBLE_FLOAT_WIDETAG: + case SIMPLE_ARRAY_DOUBLE_FLOAT_WIDETAG: #ifdef SIMPLE_ARRAY_COMPLEX_SINGLE_FLOAT_WIDETAG - case SIMPLE_ARRAY_COMPLEX_SINGLE_FLOAT_WIDETAG: + case SIMPLE_ARRAY_COMPLEX_SINGLE_FLOAT_WIDETAG: #endif vector = (struct vector *)addr; count = fixnum_value(vector->length)*2+2; break; #ifdef SIMPLE_ARRAY_LONG_FLOAT_WIDETAG - case SIMPLE_ARRAY_LONG_FLOAT_WIDETAG: + case SIMPLE_ARRAY_LONG_FLOAT_WIDETAG: vector = (struct vector *)addr; #ifdef LISP_FEATURE_X86 count = fixnum_value(vector->length)*3+2; @@ -1239,14 +1250,14 @@ #endif #ifdef SIMPLE_ARRAY_COMPLEX_DOUBLE_FLOAT_WIDETAG - case SIMPLE_ARRAY_COMPLEX_DOUBLE_FLOAT_WIDETAG: + case SIMPLE_ARRAY_COMPLEX_DOUBLE_FLOAT_WIDETAG: vector = (struct vector *)addr; count = fixnum_value(vector->length)*4+2; break; #endif #ifdef SIMPLE_ARRAY_COMPLEX_LONG_FLOAT_WIDETAG - case SIMPLE_ARRAY_COMPLEX_LONG_FLOAT_WIDETAG: + case SIMPLE_ARRAY_COMPLEX_LONG_FLOAT_WIDETAG: vector = (struct vector *)addr; #ifdef LISP_FEATURE_X86 count = fixnum_value(vector->length)*6+2; @@ -1257,7 +1268,7 @@ break; #endif - case CODE_HEADER_WIDETAG: + case CODE_HEADER_WIDETAG: #ifndef LISP_FEATURE_X86 gc_abort(); /* no code headers in static space */ #else @@ -1265,16 +1276,16 @@ #endif break; - case SIMPLE_FUN_HEADER_WIDETAG: - case RETURN_PC_HEADER_WIDETAG: + case SIMPLE_FUN_HEADER_WIDETAG: + case RETURN_PC_HEADER_WIDETAG: /* We should never hit any of these, 'cause they occur * buried in the middle of code objects. */ gc_abort(); break; #ifdef LISP_FEATURE_X86 - case CLOSURE_HEADER_WIDETAG: - case FUNCALLABLE_INSTANCE_HEADER_WIDETAG: + case CLOSURE_HEADER_WIDETAG: + case FUNCALLABLE_INSTANCE_HEADER_WIDETAG: /* The function self pointer needs special care on the * x86 because it is the real entry point. */ { @@ -1287,20 +1298,20 @@ break; #endif - case WEAK_POINTER_WIDETAG: + case WEAK_POINTER_WIDETAG: /* Weak pointers get preserved during purify, 'cause I * don't feel like figuring out how to break them. */ pscav(addr+1, 2, constant); count = 4; break; - case FDEFN_WIDETAG: + case FDEFN_WIDETAG: /* We have to handle fdefn objects specially, so we * can fix up the raw function address. */ count = pscav_fdefn((struct fdefn *)addr); break; - default: + default: count = 1; break; } diff -urN sbcl/src/runtime/runtime.c sbcl.char/src/runtime/runtime.c --- sbcl/src/runtime/runtime.c 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/runtime/runtime.c 2004-10-27 13:34:22.000000000 +0200 @@ -28,6 +28,7 @@ #include #include #include +#include #if defined(SVR4) || defined(__linux__) #include @@ -189,6 +190,8 @@ lispobj initial_function; + setlocale(LC_ALL, ""); + /* KLUDGE: os_vm_page_size is set by os_init(), and on some * systems (e.g. Alpha) arch_init() needs need os_vm_page_size, so * it must follow os_init(). -- WHN 2000-01-26 */ diff -urN sbcl/src/runtime/search.c sbcl.char/src/runtime/search.c --- sbcl/src/runtime/search.c 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/runtime/search.c 2004-10-27 13:34:21.000000000 +0200 @@ -47,6 +47,8 @@ if (lowtag_of(symbol->name) == OTHER_POINTER_LOWTAG) { symbol_name = (struct vector *)native_pointer(symbol->name); if (is_valid_lisp_addr((os_vm_address_t)symbol_name) && + /* FIXME: Broken with more than one type of string + (i.e. even broken given (VECTOR NIL) */ widetag_of(symbol_name->header) == SIMPLE_BASE_STRING_WIDETAG && strcmp((char *)symbol_name->data, name) == 0) return 1; diff -urN sbcl/tests/character.pure.lisp sbcl.char/tests/character.pure.lisp --- sbcl/tests/character.pure.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/tests/character.pure.lisp 2004-10-27 13:34:22.000000000 +0200 @@ -54,3 +54,10 @@ (digit-char 4 1) (digit-char 4 37))) (assert (raises-error? (apply (car form) (mapcar 'eval (cdr form))) type-error))) + +(dotimes (i 256) + (let* ((char (code-char i)) + (graphicp (graphic-char-p char)) + (name (char-name char))) + (unless graphicp + (assert name)))) diff -urN sbcl/tests/compiler.test.sh sbcl.char/tests/compiler.test.sh --- sbcl/tests/compiler.test.sh 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/tests/compiler.test.sh 2004-10-27 13:34:22.000000000 +0200 @@ -286,6 +286,12 @@ EOF expect_failed_compile $tmpfilename +# This should be clean +cat > $tmpfilename < #include #include +#include #include "genesis/config.h" @@ -72,6 +73,9 @@ printf("(in-package \"SB!UNIX\")\n\n"); + printf(";;; langinfo\n"); + defconstant("codeset", CODESET); + printf(";;; types, types, types\n"); DEFTYPE("clock-t", clock_t); DEFTYPE("dev-t", dev_t); diff -urN sbcl/tools-for-build/ldso-stubs.lisp sbcl.char/tools-for-build/ldso-stubs.lisp --- sbcl/tools-for-build/ldso-stubs.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/tools-for-build/ldso-stubs.lisp 2004-10-27 13:34:22.000000000 +0200 @@ -198,6 +198,7 @@ "malloc" "memmove" "mkdir" + "nl_langinfo" "open" "opendir" "pipe" diff -urN sbcl/version.lisp-expr sbcl.char/version.lisp-expr --- sbcl/version.lisp-expr 2004-10-27 13:23:55.000000000 +0200 +++ sbcl.char/version.lisp-expr 2004-10-27 13:34:22.000000000 +0200 @@ -17,4 +17,4 @@ ;;; checkins which aren't released. (And occasionally for internal ;;; versions, especially for internal versions off the main CVS ;;; branch, it gets hairier, e.g. "0.pre7.14.flaky4.13".) -"0.8.16.8" +"0.8.16.character_branch"