diff -urNb sbcl/BUGS sbcl.char/BUGS --- sbcl/BUGS 2004-10-29 16:46:49.000000000 +0200 +++ sbcl.char/BUGS 2004-10-29 16:47:30.000000000 +0200 @@ -1153,6 +1153,23 @@ collect `(array ,(sb-vm:saetp-specifier x))))) => NIL, T (when it should be T, T) +308: "Characters without names" + (reported by Bruno Haible sbcl-devel "character names are missing" + 2004-04-19) + (graphic-char-p (code-char 255)) + => NIL + (char-name (code-char 255)) + => NIL + + SBCL is unsure of what to do about characters with codes in the + range 128-255. Currently they are treated as non-graphic, but don't + have names, which is not compliant with the standard. Various fixes + are possible, such as + * giving them names such as NON-ASCII-128; + * reducing CHAR-CODE-LIMIT to 127 (almost certainly unpopular); + * making the characters graphic (makes a certain amount of sense); + * biting the bullet and implementing Unicode (probably quite hard). + 309: "Dubious values for implementation limits" (reported by Bruno Haible sbcl-devel "Incorrect value of multiple-values-limit" 2004-04-19) diff -urNb sbcl/NEWS sbcl.char/NEWS --- sbcl/NEWS 2004-10-29 16:46:49.000000000 +0200 +++ sbcl.char/NEWS 2004-10-29 16:47:30.000000000 +0200 @@ -12,8 +12,6 @@ * minor incompatible change: SB-C::*COMPILER-ERROR-PRINT-FOO* variables are no longer supported: use SB-EXT:*COMPILER-PRINT-VARIABLE-ALIST* instead. - * fixed bug #308: non-graphic characters now all have names, as - required. (reported by Bruno Haible) * bug fix: Cyclic structures and unprintable objects in compiler messages no longer cause errors. (reported by Bruno Haible) * bug fix: READ, READ-PRESERVING-WHITESPACE, READ-DELIMITED-LIST, diff -urNb sbcl/contrib/sb-simple-streams/impl.lisp sbcl.char/contrib/sb-simple-streams/impl.lisp --- sbcl/contrib/sb-simple-streams/impl.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/contrib/sb-simple-streams/impl.lisp 2004-10-29 16:47:30.000000000 +0200 @@ -173,7 +173,7 @@ (index 0) ; current index in current buffer (total 0)) ; total characters (declare (type simple-stream encap) - (type simple-base-string cbuf) + (type simple-string cbuf) (type cons bufs tail) (type sb-int:index index total)) (loop @@ -200,7 +200,7 @@ (do ((list bufs (cdr list))) ((eq list tail)) (let ((buf (car list))) - (declare (type simple-base-string buf)) + (declare (type simple-string buf)) (replace cbuf buf :start1 idx) (incf idx (length buf))))) (return (values (sb-kernel:shrink-vector cbuf total) @@ -210,7 +210,7 @@ (index 0)) (declare (type sb-int:index index)) (dolist (buf bufs) - (declare (type simple-base-string buf)) + (declare (type simple-string buf)) (replace string buf :start1 index) (incf index (length buf))) (return (values string (eq done :eof))))) diff -urNb sbcl/package-data-list.lisp-expr sbcl.char/package-data-list.lisp-expr --- sbcl/package-data-list.lisp-expr 2004-10-29 14:57:26.000000000 +0200 +++ sbcl.char/package-data-list.lisp-expr 2004-10-29 16:47:30.000000000 +0200 @@ -63,6 +63,7 @@ "SAP-ALIEN" "SHORT" "SIGNED" "SLOT" "STRUCT" "UNSIGNED" "UNSIGNED-CHAR" "UNSIGNED-INT" "UNSIGNED-LONG" "UNSIGNED-SHORT" + "UTF8-STRING" "VOID" "WITH-ALIEN")) @@ -910,7 +911,7 @@ ;; various internal defaults "DEFAULT-INIT-CHAR" "*DEFAULT-INIT-CHAR-FORM*" - "*LOAD-SOURCE-DEFAULT-TYPE*" + "*LOAD-SOURCE-DEFAULT-TYPE*" "BASE-CHAR-CODE-LIMIT" ;; hash caches "DEFINE-HASH-CACHE" @@ -1122,13 +1123,14 @@ "ARRAY-TYPE" "ARRAY-TYPE-COMPLEXP" "ARRAY-TYPE-DIMENSIONS" "ARRAY-TYPE-ELEMENT-TYPE" "ARRAY-TYPE-P" "ARRAY-TYPE-SPECIALIZED-ELEMENT-TYPE" - "ASH-INDEX" "ASSERT-ERROR" "BASE-STRING-P" + "ASH-INDEX" "ASSERT-ERROR" "BASE-CHAR-P" "BASE-STRING-P" "BINDING-STACK-POINTER-SAP" "BIT-BASH-COPY" "BIT-INDEX" "BOGUS-ARG-TO-VALUES-LIST-ERROR" "BOOLE-CODE" "BOUNDING-INDICES-BAD-ERROR" "BYTE-SPECIFIER" "%BYTE-BLT" "CALLABLE" "CASE-BODY-ERROR" "CHARACTER-SET" "CHARACTER-SET-TYPE" "CHARACTER-SET-TYPE-PAIRS" + "CHARACTER-STRING-P" "CHARPOS" "CHECK-FOR-CIRCULARITY" "CHECK-TYPE-ERROR" "CLOSED-FLAME" "CODE-COMPONENT" "CODE-COMPONENT-P" "CODE-DEBUG-INFO" @@ -1232,7 +1234,8 @@ "NUMERIC-TYPE-HIGH" "NUMERIC-TYPE-LOW" "NUMERIC-TYPE-P" "OBJECT-NOT-ARRAY-ERROR" "OBJECT-NOT-CHARACTER-ERROR" "OBJECT-NOT-BASE-STRING-ERROR" "OBJECT-NOT-BIGNUM-ERROR" - "OBJECT-NOT-BIT-VECTOR-ERROR" "OBJECT-NOT-COMPLEX-ERROR" + "OBJECT-NOT-BIT-VECTOR-ERROR" + "OBJECT-NOT-CHARACTER-STRING-ERROR" "OBJECT-NOT-COMPLEX-ERROR" "OBJECT-NOT-COMPLEX-FLOAT-ERROR" "OBJECT-NOT-COMPLEX-SINGLE-FLOAT-ERROR" #!+long-float "OBJECT-NOT-COMPLEX-LONG-FLOAT-ERROR" @@ -1298,6 +1301,7 @@ "OBJECT-NOT-SIMPLE-ARRAY-SIGNED-BYTE-8-ERROR" "OBJECT-NOT-SIMPLE-BIT-VECTOR-ERROR" "OBJECT-NOT-SIMPLE-BASE-STRING-ERROR" + "OBJECT-NOT-SIMPLE-CHARACTER-STRING-ERROR" "OBJECT-NOT-SIMPLE-STRING-ERROR" "OBJECT-NOT-SIMPLE-VECTOR-ERROR" "OBJECT-NOT-SINGLE-FLOAT-ERROR" "OBJECT-NOT-STRING-ERROR" @@ -1306,7 +1310,9 @@ "OBJECT-NOT-UNSIGNED-BYTE-32-ERROR" ;; FIXME: 32/64-bit issues "OBJECT-NOT-UNSIGNED-BYTE-64-ERROR" - "OBJECT-NOT-VECTOR-ERROR" "OBJECT-NOT-WEAK-POINTER-ERROR" + "OBJECT-NOT-VECTOR-ERROR" + "OBJECT-NOT-VECTOR-NIL-ERROR" + "OBJECT-NOT-WEAK-POINTER-ERROR" "ODD-KEY-ARGS-ERROR" "OUTPUT-OBJECT" "OUTPUT-UGLY-OBJECT" "PACKAGE-DESIGNATOR" "PACKAGE-DOC-STRING" "PACKAGE-HASHTABLE-SIZE" "PACKAGE-HASHTABLE-FREE" @@ -1353,6 +1359,7 @@ "SIMPLE-ARRAY-SIGNED-BYTE-61-P" "SIMPLE-ARRAY-SIGNED-BYTE-64-P" "SIMPLE-ARRAY-SIGNED-BYTE-8-P" "SIMPLE-BASE-STRING-P" + "SIMPLE-CHARACTER-STRING-P" "SIMPLE-PACKAGE-ERROR" "SIMPLE-UNBOXED-ARRAY" "SINGLE-FLOAT-BITS" "SINGLE-FLOAT-EXPONENT" "SINGLE-FLOAT-INT-EXPONENT" "SINGLE-FLOAT-SIGNIFICAND" @@ -1997,7 +2004,8 @@ "COMPLEX-SINGLE-FLOAT-IMAG-SLOT" "COMPLEX-SINGLE-FLOAT-REAL-SLOT" "COMPLEX-SINGLE-FLOAT-SIZE" "COMPLEX-SINGLE-FLOAT-WIDETAG" "COMPLEX-SINGLE-REG-SC-NUMBER" "COMPLEX-SINGLE-STACK-SC-NUMBER" - "COMPLEX-SIZE" "COMPLEX-BASE-STRING-WIDETAG" "COMPLEX-WIDETAG" + "COMPLEX-SIZE" "COMPLEX-BASE-STRING-WIDETAG" + "COMPLEX-CHARACTER-STRING-WIDETAG" "COMPLEX-WIDETAG" "COMPLEX-VECTOR-NIL-WIDETAG" "COMPLEX-VECTOR-WIDETAG" "CONS-CAR-SLOT" "CONS-CDR-SLOT" "CONS-SIZE" "CONSTANT-SC-NUMBER" @@ -2137,6 +2145,7 @@ "SIMPLE-ARRAY-SIGNED-BYTE-8-WIDETAG" "SIMPLE-BIT-VECTOR-WIDETAG" "SIMPLE-BASE-STRING-WIDETAG" + "SIMPLE-CHARACTER-STRING-WIDETAG" "SIMPLE-VECTOR-WIDETAG" "SINGLE-FLOAT-BIAS" "SINGLE-FLOAT-DIGITS" "SINGLE-FLOAT-EXPONENT-BYTE" "SINGLE-FLOAT-HIDDEN-BIT" "SINGLE-FLOAT-NORMAL-EXPONENT-MAX" diff -urNb sbcl/src/code/array.lisp sbcl.char/src/code/array.lisp --- sbcl/src/code/array.lisp 2004-10-28 22:28:17.000000000 +0200 +++ sbcl.char/src/code/array.lisp 2004-10-29 16:47:30.000000000 +0200 @@ -91,8 +91,10 @@ ;; and for all in any reasonable user programs.) ((t) (values #.sb!vm:simple-vector-widetag #.sb!vm:n-word-bits)) - ((base-char standard-char character) + ((base-char standard-char) (values #.sb!vm:simple-base-string-widetag #.sb!vm:n-byte-bits)) + ((character) + (values #.sb!vm:simple-character-string-widetag #.sb!vm:n-word-bits)) ((bit) (values #.sb!vm:simple-bit-vector-widetag 1)) ;; OK, we have to wade into SUBTYPEPing after all. @@ -110,8 +112,10 @@ ;; Pick off some easy common cases. ((t) #.sb!vm:complex-vector-widetag) - ((base-char character) + ((base-char) #.sb!vm:complex-base-string-widetag) + ((character) + #.sb!vm:complex-character-string-widetag) ((nil) #.sb!vm:complex-vector-nil-widetag) ((bit) @@ -120,7 +124,8 @@ (t (pick-vector-type type (nil #.sb!vm:complex-vector-nil-widetag) - (character #.sb!vm:complex-base-string-widetag) + (base-char #.sb!vm:complex-base-string-widetag) + (character #.sb!vm:complex-character-string-widetag) (bit #.sb!vm:complex-bit-vector-widetag) (t #.sb!vm:complex-vector-widetag))))) @@ -148,7 +153,8 @@ (array (allocate-vector type length - (ceiling (* (if (= type sb!vm:simple-base-string-widetag) + (ceiling (* (if (or (= type sb!vm:simple-base-string-widetag) + (= type sb!vm:simple-character-string-widetag)) (1+ length) length) n-bits) @@ -854,7 +860,8 @@ ,@(map 'list (lambda (saetp) `((simple-array ,(sb!vm:saetp-specifier saetp) (*)) - ,(if (eq (sb!vm:saetp-specifier saetp) 'character) + ,(if (or (eq (sb!vm:saetp-specifier saetp) 'character) + (eq (sb!vm:saetp-specifier saetp) 'base-char)) *default-init-char-form* (sb!vm:saetp-initial-element-default saetp)))) (remove-if-not diff -urNb sbcl/src/code/char.lisp sbcl.char/src/code/char.lisp --- sbcl/src/code/char.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/code/char.lisp 2004-10-29 16:47:30.000000000 +0200 @@ -12,6 +12,8 @@ (in-package "SB!IMPL") -(def!constant sb!xc:char-code-limit 256 +(def!constant sb!xc:char-code-limit #x110000 #!+sb-doc "the upper exclusive bound on values produced by CHAR-CODE") + +(def!constant base-char-code-limit 128) diff -urNb sbcl/src/code/class.lisp sbcl.char/src/code/class.lisp --- sbcl/src/code/class.lisp 2004-10-28 22:28:32.000000000 +0200 +++ sbcl.char/src/code/class.lisp 2004-10-29 16:47:30.000000000 +0200 @@ -939,6 +939,11 @@ :codes (#.sb!vm:character-widetag) :translation (character-set) :prototype-form (code-char 42)) + #+nil + (base-char :enumerable t + :inherits (character) + :codes (#.sb!vm:base-char-widetag) + :prototype-form (code-char 42)) (symbol :codes (#.sb!vm:symbol-header-widetag) :prototype-form '#:mu) @@ -1243,6 +1248,19 @@ :inherits (base-string simple-string string vector simple-array array sequence) :prototype-form (make-array 0 :element-type 'base-char)) + (character-string + :translation (vector character) + :codes (#.sb!vm:complex-character-string-widetag) + :direct-superclasses (string) + :inherits (string vector array sequence) + :prototype-form (make-array 0 :element-type 'character :fill-pointer t)) + (simple-character-string + :translation (simple-array character (*)) + :codes (#.sb!vm:simple-character-string-widetag) + :direct-superclasses (character-string simple-string) + :inherits (character-string simple-string string vector simple-array + array sequence) + :prototype-form (make-array 0 :element-type 'character)) (list :translation (or cons (member nil)) :inherits (sequence)) diff -urNb sbcl/src/code/cold-init.lisp sbcl.char/src/code/cold-init.lisp --- sbcl/src/code/cold-init.lisp 2004-10-29 16:46:52.000000000 +0200 +++ sbcl.char/src/code/cold-init.lisp 2004-10-29 16:47:30.000000000 +0200 @@ -315,7 +315,7 @@ (defun hexstr (thing) (/noshow0 "entering HEXSTR") (let ((addr (get-lisp-obj-address thing)) - (str (make-string 10))) + (str (make-string 10 :element-type 'base-char))) (/noshow0 "ADDR and STR calculated") (setf (char str 0) #\0 (char str 1) #\x) diff -urNb sbcl/src/code/cross-condition.lisp sbcl.char/src/code/cross-condition.lisp --- sbcl/src/code/cross-condition.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/code/cross-condition.lisp 2004-10-29 16:47:30.000000000 +0200 @@ -31,6 +31,9 @@ (define-condition reference-condition () ((references :initarg :references :reader reference-condition-references))) +;;; KLUDGE +(define-condition type-warning (simple-warning) ()) + (define-condition bug (simple-error) () (:report diff -urNb sbcl/src/code/cross-type.lisp sbcl.char/src/code/cross-type.lisp --- sbcl/src/code/cross-type.lisp 2004-10-28 22:28:32.000000000 +0200 +++ sbcl.char/src/code/cross-type.lisp 2004-10-29 16:47:30.000000000 +0200 @@ -82,6 +82,9 @@ 'fixnum) (t 'integer))) + ((subtypep raw-result 'simple-string) + `(simple-base-string ,(length object))) + ((subtypep raw-result 'string) 'base-string) ((some (lambda (type) (subtypep raw-result type)) '(array character list symbol)) raw-result) @@ -360,6 +363,11 @@ (make-member-type :members (list x))) (number (ctype-of-number x)) + (string + (make-array-type :dimensions (array-dimensions x) + :complexp (not (typep x 'simple-array)) + :element-type (specifier-type 'base-char) + :specialized-element-type (specifier-type 'base-char))) (array (let ((etype (specifier-type (array-element-type x)))) (make-array-type :dimensions (array-dimensions x) @@ -369,9 +377,7 @@ (cons (specifier-type 'cons)) (character (cond ((typep x 'standard-char) - ;; (Note that SBCL doesn't distinguish between BASE-CHAR and - ;; CHARACTER.) - (specifier-type 'base-char)) + (specifier-type 'character)) ((not (characterp x)) nil) (t diff -urNb sbcl/src/code/deftypes-for-target.lisp sbcl.char/src/code/deftypes-for-target.lisp --- sbcl/src/code/deftypes-for-target.lisp 2004-10-28 22:28:33.000000000 +0200 +++ sbcl.char/src/code/deftypes-for-target.lisp 2004-10-29 16:47:30.000000000 +0200 @@ -60,7 +60,8 @@ (sb!xc:deftype atom () '(not cons)) -(sb!xc:deftype base-char () 'character) +(sb!xc:deftype base-char () + '(character-set ((0 . #.(1- base-char-code-limit))))) (sb!xc:deftype extended-char () #!+sb-doc diff -urNb sbcl/src/code/early-format.lisp sbcl.char/src/code/early-format.lisp --- sbcl/src/code/early-format.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/code/early-format.lisp 2004-10-29 16:47:30.000000000 +0200 @@ -18,9 +18,9 @@ #-sb-xc-host (code-char tab-char-code))) (defvar *format-directive-expanders* - (make-array char-code-limit :initial-element nil)) + (make-array base-char-code-limit :initial-element nil)) (defvar *format-directive-interpreters* - (make-array char-code-limit :initial-element nil)) + (make-array base-char-code-limit :initial-element nil)) (defvar *default-format-error-control-string* nil) (defvar *default-format-error-offset* nil) diff -urNb sbcl/src/code/fd-stream.lisp sbcl.char/src/code/fd-stream.lisp --- sbcl/src/code/fd-stream.lisp 2004-10-29 14:57:35.000000000 +0200 +++ sbcl.char/src/code/fd-stream.lisp 2004-10-29 16:47:30.000000000 +0200 @@ -357,6 +357,8 @@ (fd-stream-obuf-tail stream)) byte)) + + ;;; Do the actual output. If there is space to buffer the string, ;;; buffer it. If the string would normally fit in the buffer, but ;;; doesn't because of other stuff in the buffer, flush the old noise @@ -1112,6 +1114,37 @@ (setf (sap-ref-8 sap tail) bits) (code-char byte)) +(let ((latin-9-table (let ((table (make-string 256))) + (do ((i 0 (1+ i))) + ((= i 256)) + (setf (aref table i) (code-char i))) + (setf (aref table #xa4) (code-char #x20ac)) + (setf (aref table #xa6) (code-char #x0160)) + (setf (aref table #xa8) (code-char #x0161)) + (setf (aref table #xb4) (code-char #x017d)) + (setf (aref table #xb8) (code-char #x017e)) + (setf (aref table #xbc) (code-char #x0152)) + (setf (aref table #xbd) (code-char #x0153)) + (setf (aref table #xbe) (code-char #x0178)) + table)) + (latin-9-reverse-1 (make-array 16 + :element-type '(unsigned-byte 21) + :initial-contents '(#x0160 #x0161 #x0152 #x0153 0 0 0 0 #x0178 0 0 0 #x20ac #x017d #x017e 0))) + (latin-9-reverse-2 (make-array 16 + :element-type '(unsigned-byte 8) + :initial-contents '(#xa6 #xa8 #xbc #xbd 0 0 0 0 #xbe 0 0 0 #xa4 #xb4 #xb8 0)))) + (define-external-format (:latin-9 :latin9 :iso-8859-15) + 1 + (setf (sap-ref-8 sap tail) + (if (< bits 256) + (if (= bits (char-code (aref latin-9-table bits))) + bits + (error "cannot encode ~A in latin-9" bits)) + (if (= (aref latin-9-reverse-1 (logand bits 15)) bits) + (aref latin-9-reverse-2 (logand bits 15)) + (error "cannot encode ~A in latin-9" bits)))) + (aref latin-9-table byte))) + (define-external-format/variable-width (:utf-8 :utf8) (let ((bits (char-code byte))) (cond ((< bits #x80) 1) @@ -1811,7 +1844,8 @@ (make-fd-stream 1 :name "standard output" :output t :buffering :line)) (setf *stderr* (make-fd-stream 2 :name "standard error" :output t :buffering :line)) - (let ((tty (sb!unix:unix-open "/dev/tty" sb!unix:o_rdwr #o666))) + (let* ((ttyname #.(coerce "/dev/tty" 'simple-base-string)) + (tty (sb!unix:unix-open ttyname sb!unix:o_rdwr #o666))) (if tty (setf *tty* (make-fd-stream tty diff -urNb sbcl/src/code/filesys.lisp sbcl.char/src/code/filesys.lisp --- sbcl/src/code/filesys.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/code/filesys.lisp 2004-10-29 16:47:30.000000000 +0200 @@ -192,8 +192,9 @@ (values absolute (pieces))))) (defun parse-unix-namestring (namestr start end) - (declare (type simple-base-string namestr) + (declare (type simple-string namestr) (type index start end)) + (setf namestr (coerce namestr 'simple-base-string)) (multiple-value-bind (absolute pieces) (split-at-slashes namestr start end) (multiple-value-bind (name type version) (let* ((tail (car (last pieces))) @@ -296,7 +297,7 @@ (t (error "invalid pattern piece: ~S" piece)))))) (apply #'concatenate - 'simple-string + 'simple-base-string (strings)))))) (defun unparse-unix-directory-list (directory) @@ -322,7 +323,7 @@ (pieces "/")) (t (error "invalid directory component: ~S" dir))))) - (apply #'concatenate 'simple-string (pieces)))) + (apply #'concatenate 'simple-base-string (pieces)))) (defun unparse-unix-directory (pathname) (declare (type pathname pathname)) @@ -350,18 +351,18 @@ (when type-supplied (unless name (error "cannot specify the type without a file: ~S" pathname)) - (when (typep type 'simple-base-string) + (when (typep type 'simple-string) (when (position #\. type) (error "type component can't have a #\. inside: ~S" pathname))) (strings ".") (strings (unparse-unix-piece type)))) - (apply #'concatenate 'simple-string (strings)))) + (apply #'concatenate 'simple-base-string (strings)))) (/show0 "filesys.lisp 406") (defun unparse-unix-namestring (pathname) (declare (type pathname pathname)) - (concatenate 'simple-string + (concatenate 'simple-base-string (unparse-unix-directory pathname) (unparse-unix-file pathname))) @@ -445,6 +446,7 @@ (follow-links t)) &body body) `(block nil + (/show0 "about to call %ENUMERATE-MATCHES") (%enumerate-matches (pathname ,pathname) ,verify-existence ,follow-links @@ -455,7 +457,7 @@ ;;; Call FUNCTION on matches. (defun %enumerate-matches (pathname verify-existence follow-links function) - (/noshow0 "entering %ENUMERATE-MATCHES") + (/show0 "entering %ENUMERATE-MATCHES") (when (pathname-type pathname) (unless (pathname-name pathname) (error "cannot supply a type without a name:~% ~S" pathname))) @@ -463,16 +465,16 @@ (member (pathname-type pathname) '(nil :unspecific))) (error "cannot supply a version without a type:~% ~S" pathname)) (let ((directory (pathname-directory pathname))) - (/noshow0 "computed DIRECTORY") + (/show0 "computed DIRECTORY") (if directory (ecase (first directory) (:absolute - (/noshow0 "absolute directory") + (/show0 "absolute directory") (%enumerate-directories "/" (rest directory) pathname verify-existence follow-links nil function)) (:relative - (/noshow0 "relative directory") + (/show0 "relative directory") (%enumerate-directories "" (rest directory) pathname verify-existence follow-links nil function))) @@ -576,16 +578,17 @@ ;;; Call FUNCTION on files. (defun %enumerate-files (directory pathname verify-existence function) (declare (simple-string directory)) - (/noshow0 "entering %ENUMERATE-FILES") + (/show0 "entering %ENUMERATE-FILES") (let ((name (%pathname-name pathname)) (type (%pathname-type pathname)) (version (%pathname-version pathname))) - (/noshow0 "computed NAME, TYPE, and VERSION") + (/show0 "computed NAME, TYPE, and VERSION") (cond ((member name '(nil :unspecific)) (/noshow0 "UNSPECIFIC, more or less") + (let ((directory (coerce directory 'base-string))) (when (or (not verify-existence) (sb!unix:unix-file-kind directory)) - (funcall function directory))) + (funcall function directory)))) ((or (pattern-p name) (pattern-p type) (eq name :wild) @@ -613,20 +616,20 @@ directory complete-filename)))))) (t - (/noshow0 "default case") + (/show0 "default case") (let ((file (concatenate 'base-string directory name))) - (/noshow "computed basic FILE") + (/show0 "computed basic FILE") (unless (or (null type) (eq type :unspecific)) - (/noshow0 "tweaking FILE for more-or-less-:UNSPECIFIC case") + (/show0 "tweaking FILE for more-or-less-:UNSPECIFIC case") (setf file (concatenate 'base-string file "." type))) (unless (member version '(nil :newest :wild :unspecific)) - (/noshow0 "tweaking FILE for more-or-less-:WILD case") + (/show0 "tweaking FILE for more-or-less-:WILD case") (setf file (concatenate 'base-string file "." (quick-integer-to-string version)))) - (/noshow0 "finished possibly tweaking FILE") + (/show0 "finished possibly tweaking FILE") (when (or (not verify-existence) (sb!unix:unix-file-kind file t)) - (/noshow0 "calling FUNCTION on FILE") + (/show0 "calling FUNCTION on FILE") (funcall function file))))))) (/noshow0 "filesys.lisp 603") @@ -679,6 +682,7 @@ ;;; Convert PATHNAME into a string that can be used with UNIX system ;;; calls, or return NIL if no match is found. Wild-cards are expanded. (defun unix-namestring (pathname-spec &optional (for-input t)) + (/show0 "in UNIX-NAMESTRING") (let* ((namestring (physicalize-pathname (merge-pathnames pathname-spec))) (matches nil)) ; an accumulator for actual matches (when (wild-pathname-p namestring) @@ -716,12 +720,16 @@ #!+sb-doc "Return a pathname which is the truename of the file if it exists, or NIL otherwise. An error of type FILE-ERROR is signaled if pathname is wild." + (/show0 "in PROBE-FILE") (let* ((defaulted-pathname (merge-pathnames pathname (sane-default-pathname-defaults))) (namestring (unix-namestring defaulted-pathname t))) + (/show0 "got NAMESTRING") (when (and namestring (sb!unix:unix-file-kind namestring t)) + (/show0 "got FILE-KIND") (let ((trueishname (sb!unix:unix-resolve-links namestring))) + (/show0 "resolved links") (when trueishname (let* ((*ignore-wildcards* t) (name (sb!unix:unix-simplify-pathname trueishname))) @@ -1040,7 +1048,7 @@ :device (pathname-device pathname) :directory (subseq dir 0 i)))) (unless (probe-file newpath) - (let ((namestring (namestring newpath))) + (let ((namestring (coerce (namestring newpath) 'base-string))) (when verbose (format *standard-output* "~&creating directory: ~A~%" diff -urNb sbcl/src/code/fop.lisp sbcl.char/src/code/fop.lisp --- sbcl/src/code/fop.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/code/fop.lisp 2004-10-29 16:47:30.000000000 +0200 @@ -71,11 +71,12 @@ ;;; of like READ-SEQUENCE specialized for files of (UNSIGNED-BYTE 8), ;;; with an automatic conversion from (UNSIGNED-BYTE 8) into CHARACTER ;;; for each element read -(declaim (ftype (function (stream simple-string &optional index) (values)) read-string-as-bytes)) +(declaim (ftype (function (stream simple-string &optional index) (values)) + read-string-as-bytes read-string-as-words)) (defun read-string-as-bytes (stream string &optional (length (length string))) (dotimes (i length) (setf (aref string i) - (code-char (read-byte stream)))) + (sb!xc:code-char (read-byte stream)))) ;; FIXME: The classic CMU CL code to do this was ;; (READ-N-BYTES FILE STRING START END). ;; It was changed for SBCL because we needed a portable version for @@ -83,6 +84,16 @@ ;; significantly better than the portable version here. If it is, then use ;; it as an alternate definition, protected with #-SB-XC-HOST. (values)) +(defun read-string-as-words (stream string &optional (length (length string))) + #+sb-xc-host (bug "READ-STRING-AS-WORDS called") + (dotimes (i length) + (setf (aref string i) + (sb!xc:code-char (logior + (read-byte stream) + (ash (read-byte stream) 8) + (ash (read-byte stream) 16) + (ash (read-byte stream) 24))))) + (values)) ;;;; miscellaneous fops @@ -123,11 +134,8 @@ #-sb-xc-host (%primitive sb!c:make-other-immediate-type 0 sb!vm:unbound-marker-widetag)) -;;; CMU CL had FOP-CHARACTER as fop 68, but it's not needed in current -;;; SBCL as we have no extended characters, only 1-byte characters. -;;; (Ditto for CMU CL, actually: FOP-CHARACTER was speculative generality.) -(define-fop (fop-short-character 69) - (code-char (read-byte-arg))) +(define-cloned-fops (fop-character 68) (fop-short-character 69) + (code-char (clone-arg))) (define-cloned-fops (fop-struct 48) (fop-small-struct 49) (let* ((size (clone-arg)) @@ -183,9 +191,14 @@ (make-string (* ,n-size 2)))) (done-with-fast-read-byte) (let ((,n-buffer *fasl-symbol-buffer*)) + #+sb-xc-host (read-string-as-bytes *fasl-input-stream* ,n-buffer ,n-size) + #-sb-xc-host + (read-string-as-words *fasl-input-stream* + ,n-buffer + ,n-size) (push-fop-table (without-package-locks (intern* ,n-buffer ,n-size @@ -232,7 +245,7 @@ (fop-uninterned-small-symbol-save 13) (let* ((arg (clone-arg)) (res (make-string arg))) - (read-string-as-bytes *fasl-input-stream* res) + (read-string-as-words *fasl-input-stream* res) (push-fop-table (make-symbol res)))) (define-fop (fop-package 14) @@ -344,12 +357,23 @@ ;;;; fops for loading arrays -(define-cloned-fops (fop-string 37) (fop-small-string 38) +(define-cloned-fops (fop-base-string 37) (fop-small-base-string 38) (let* ((arg (clone-arg)) - (res (make-string arg))) + (res (make-string arg :element-type 'base-char))) (read-string-as-bytes *fasl-input-stream* res) res)) +#+sb-xc-host +(define-cloned-fops (fop-character-string 161) (fop-small-character-string 162) + (bug "CHARACTER-STRING FOP encountered")) + +#-sb-xc-host +(define-cloned-fops (fop-character-string 161) (fop-small-character-string 162) + (let* ((arg (clone-arg)) + (res (make-string arg))) + (read-string-as-words *fasl-input-stream* res) + res)) + (define-cloned-fops (fop-vector 39) (fop-small-vector 40) (let* ((size (clone-arg)) (res (make-array size))) @@ -639,7 +663,7 @@ (let* ((kind (pop-stack)) (code-object (pop-stack)) (len (read-byte-arg)) - (sym (make-string len))) + (sym (make-string len :element-type 'base-char))) (read-n-bytes *fasl-input-stream* sym 0 len) (sb!vm:fixup-code-object code-object (read-word-arg) diff -urNb sbcl/src/code/host-c-call.lisp sbcl.char/src/code/host-c-call.lisp --- sbcl/src/code/host-c-call.lisp 2004-10-29 14:57:35.000000000 +0200 +++ sbcl.char/src/code/host-c-call.lisp 2004-10-29 16:47:30.000000000 +0200 @@ -14,9 +14,8 @@ (define-alien-type-class (c-string :include pointer :include-args (to))) (define-alien-type-translator c-string () - (make-alien-c-string-type :to - (parse-alien-type 'char - (sb!kernel::make-null-lexenv)))) + (make-alien-c-string-type + :to (parse-alien-type 'char (sb!kernel:make-null-lexenv)))) (define-alien-type-method (c-string :unparse) (type) (declare (ignore type)) @@ -24,7 +23,7 @@ (define-alien-type-method (c-string :lisp-rep) (type) (declare (ignore type)) - '(or simple-base-string null (alien (* char)))) + '(or simple-string null (alien (* char)))) (define-alien-type-method (c-string :naturalize-gen) (type alien) (declare (ignore type)) @@ -37,7 +36,8 @@ `(etypecase ,value (null (int-sap 0)) ((alien (* char)) (alien-sap ,value)) - (simple-base-string (vector-sap ,value)))) + (simple-base-string (vector-sap ,value)) + (simple-string (vector-sap (coerce ,value 'simple-base-string))))) (/show0 "host-c-call.lisp 42") diff -urNb sbcl/src/code/interr.lisp sbcl.char/src/code/interr.lisp --- sbcl/src/code/interr.lisp 2004-10-28 22:28:34.000000000 +0200 +++ sbcl.char/src/code/interr.lisp 2004-10-29 16:47:30.000000000 +0200 @@ -123,6 +123,16 @@ :datum object :expected-type 'base-string)) +(deferr object-not-vector-nil-error (object) + (error 'type-error + :datum object + :expected-type '(vector nil))) + +(deferr object-not-character-string-error (object) + (error 'type-error + :datum object + :expected-type '(vector character))) + (deferr object-not-bit-vector-error (object) (error 'type-error :datum object diff -urNb sbcl/src/code/late-format.lisp sbcl.char/src/code/late-format.lisp --- sbcl/src/code/late-format.lisp 2004-10-28 22:28:35.000000000 +0200 +++ sbcl.char/src/code/late-format.lisp 2004-10-29 16:47:30.000000000 +0200 @@ -39,7 +39,7 @@ (string (missing-arg) :type simple-string) (start (missing-arg) :type (and unsigned-byte fixnum)) (end (missing-arg) :type (and unsigned-byte fixnum)) - (character (missing-arg) :type base-char) + (character (missing-arg) :type character) (colonp nil :type (member t nil)) (atsignp nil :type (member t nil)) (params nil :type list)) @@ -270,8 +270,11 @@ (etypecase directive (format-directive (let ((expander - (aref *format-directive-expanders* - (char-code (format-directive-character directive)))) + (let ((char (format-directive-character directive))) + (typecase char + (base-char + (aref *format-directive-expanders* (char-code char))) + (character nil)))) (*default-format-error-offset* (1- (format-directive-end directive)))) (declare (type (or null function) expander)) diff -urNb sbcl/src/code/late-type.lisp sbcl.char/src/code/late-type.lisp --- sbcl/src/code/late-type.lisp 2004-10-28 22:28:35.000000000 +0200 +++ sbcl.char/src/code/late-type.lisp 2004-10-29 16:47:30.000000000 +0200 @@ -2096,22 +2096,22 @@ (if (eq (car dims) '*) (case eltype (bit 'bit-vector) - ((base-char character) 'base-string) + ((base-char) 'base-string) (* 'vector) (t `(vector ,eltype))) (case eltype (bit `(bit-vector ,(car dims))) - ((base-char character) `(base-string ,(car dims))) + ((base-char) `(base-string ,(car dims))) (t `(vector ,eltype ,(car dims))))) (if (eq (car dims) '*) (case eltype (bit 'simple-bit-vector) - ((base-char character) 'simple-base-string) + ((base-char) 'simple-base-string) ((t) 'simple-vector) (t `(simple-array ,eltype (*)))) (case eltype (bit `(simple-bit-vector ,(car dims))) - ((base-char character) `(simple-base-string ,(car dims))) + ((base-char) `(simple-base-string ,(car dims))) ((t) `(simple-vector ,(car dims))) (t `(simple-array ,eltype ,dims)))))) (t diff -urNb sbcl/src/code/pprint.lisp sbcl.char/src/code/pprint.lisp --- sbcl/src/code/pprint.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/code/pprint.lisp 2004-10-29 16:47:30.000000000 +0200 @@ -119,7 +119,7 @@ (defun pretty-out (stream char) (declare (type pretty-stream stream) - (type base-char char)) + (type character char)) (cond ((char= char #\newline) (enqueue-newline stream :literal)) (t diff -urNb sbcl/src/code/pred.lisp sbcl.char/src/code/pred.lisp --- sbcl/src/code/pred.lisp 2004-10-28 22:28:35.000000000 +0200 +++ sbcl.char/src/code/pred.lisp 2004-10-29 16:47:30.000000000 +0200 @@ -128,6 +128,8 @@ '(integer #.(1+ sb!xc:most-positive-fixnum)) 'bignum)) (standard-char 'standard-char) + (base-char 'base-char) + (extended-char 'extended-char) ((member t) 'boolean) (keyword 'keyword) ((or array complex) (type-specifier (ctype-of object))) diff -urNb sbcl/src/code/primordial-extensions.lisp sbcl.char/src/code/primordial-extensions.lisp --- sbcl/src/code/primordial-extensions.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/code/primordial-extensions.lisp 2004-10-29 16:47:30.000000000 +0200 @@ -167,30 +167,15 @@ ;;; producing a symbol in the current package. (eval-when (#-sb-xc :compile-toplevel :load-toplevel :execute) (defun symbolicate (&rest things) - (let ((name (case (length things) - ;; Why isn't this just the value in the T branch? - ;; Well, this is called early in cold-init, before - ;; the type system is set up; however, now that we - ;; check for bad lengths, the type system is needed - ;; for calls to CONCATENATE. So we need to make sure - ;; that the calls are transformed away: - (1 (concatenate 'string - (the simple-base-string - (string (car things))))) - (2 (concatenate 'string - (the simple-base-string - (string (car things))) - (the simple-base-string - (string (cadr things))))) - (3 (concatenate 'string - (the simple-base-string - (string (car things))) - (the simple-base-string - (string (cadr things))) - (the simple-base-string - (string (caddr things))))) - (t (apply #'concatenate 'string (mapcar #'string things)))))) - (values (intern name))))) + (let* ((length (reduce #'+ things + :key (lambda (x) (length (string x))))) + (name (make-array length :element-type 'character))) + (let ((index 0)) + (dolist (thing things (values (intern name))) + (let* ((x (string thing)) + (len (length x))) + (replace name x :start1 index) + (incf index len))))))) ;;; like SYMBOLICATE, but producing keywords (defun keywordicate (&rest things) diff -urNb sbcl/src/code/print.lisp sbcl.char/src/code/print.lisp --- sbcl/src/code/print.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/code/print.lisp 2004-10-29 16:47:30.000000000 +0200 @@ -615,10 +615,10 @@ ;;; character has. At characters have at least one bit set, so we can ;;; search for any character with a positive test. (defvar *character-attributes* - (make-array char-code-limit + (make-array 160 ; FIXME :element-type '(unsigned-byte 16) :initial-element 0)) -(declaim (type (simple-array (unsigned-byte 16) (#.char-code-limit)) +(declaim (type (simple-array (unsigned-byte 16) (#.160)) ; FIXME *character-attributes*)) ;;; constants which are a bit-mask for each interesting character attribute @@ -672,17 +672,17 @@ (set-bit #\/ slash-attribute) ;; Mark anything not explicitly allowed as funny. - (dotimes (i char-code-limit) + (dotimes (i 160) ; FIXME (when (zerop (aref *character-attributes* i)) (setf (aref *character-attributes* i) funny-attribute)))) ;;; For each character, the value of the corresponding element is the ;;; lowest base in which that character is a digit. (defvar *digit-bases* - (make-array char-code-limit + (make-array 128 ; FIXME :element-type '(unsigned-byte 8) :initial-element 36)) -(declaim (type (simple-array (unsigned-byte 8) (#.char-code-limit)) +(declaim (type (simple-array (unsigned-byte 8) (#.128)) ; FIXME *digit-bases*)) (dotimes (i 36) (let ((char (digit-char i 36))) @@ -698,7 +698,11 @@ ,(if at-end '(go TEST-SIGN) '(return nil))) (setq current (schar name index) code (char-code current) - bits (aref attributes code)) + bits (cond ; FIXME + ((< code 160) (aref attributes code)) + ((upper-case-p current) uppercase-attribute) + ((lower-case-p current) lowercase-attribute) + (t other-attribute))) (incf index) (go ,tag))) (test (&rest attributes) @@ -713,7 +717,8 @@ attributes)) bits))))) (digitp () - `(< (the fixnum (aref bases code)) base))) + `(and (< code 128) ; FIXME + (< (the fixnum (aref bases code)) base)))) (prog ((len (length name)) (attributes *character-attributes*) @@ -740,7 +745,13 @@ letter-attribute))) (do ((i (1- index) (1+ i))) ((= i len) (return-from symbol-quotep nil)) - (unless (zerop (logand (aref attributes (char-code (schar name i))) + (unless (zerop (logand (let* ((char (schar name i)) + (code (char-code char))) + (cond + ((< code 160) (aref attributes code)) + ((upper-case-p char) uppercase-attribute) + ((lower-case-p char) lowercase-attribute) + (t other-attribute))) mask)) (return-from symbol-quotep t)))) @@ -1251,7 +1262,11 @@ ;;; [CMUC]tradix.press. DO NOT EVEN THINK OF ATTEMPTING TO ;;; UNDERSTAND THIS CODE WITHOUT READING THE PAPER! -(declaim (type (simple-array character (10)) *digits*)) +;;; and, to add to the confusion: this declaration may look wrong, but +;;; in fact it's right. FIXME: it should probably be local to +;;; FLOAT-STRING, 'cause we're probably not ever going to print floats +;;; in bases other than 10; then this declaration can go away. +(declaim (type (simple-array base-char (10)) *digits*)) (defvar *digits* "0123456789") (defun flonum-to-string (x &optional width fdigits scale fmin) diff -urNb sbcl/src/code/reader.lisp sbcl.char/src/code/reader.lisp --- sbcl/src/code/reader.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/code/reader.lisp 2004-10-29 16:47:30.000000000 +0200 @@ -61,21 +61,28 @@ (defmacro get-cat-entry (char rt) ;; KLUDGE: Only give this side-effect-free args. ;; FIXME: should probably become inline function - `(elt (character-attribute-table ,rt) - (char-code ,char))) + `(if (typep ,char 'base-char) + (elt (character-attribute-array ,rt) (char-code ,char)) + (gethash ,char (character-attribute-hash-table ,rt) +char-attr-constituent+))) (defun set-cat-entry (char newvalue &optional (rt *readtable*)) - (setf (elt (character-attribute-table rt) - (char-code char)) - newvalue)) + (if (typep char 'base-char) + (setf (elt (character-attribute-array rt) (char-code char)) newvalue) + ;; FIXME: could REMHASH if we're setting to + ;; +CHAR-ATTR-CONSTITUENT+ + (setf (gethash char (character-attribute-hash-table rt)) newvalue))) ;;; the value actually stored in the character macro table. As per ;;; ANSI #'GET-MACRO-CHARACTER and #'SET-MACRO-CHARACTER, this can ;;; be either a function or NIL. (eval-when (:compile-toplevel :execute) (sb!xc:defmacro get-raw-cmt-entry (char readtable) - `(svref (character-macro-table ,readtable) - (char-code ,char)))) + `(if (typep ,char 'base-char) + (svref (character-macro-array ,readtable) (char-code ,char)) + ;; Note: DEFAULT here is NIL, not #'UNDEFINED-MACRO-CHAR, so + ;; that everything above the base-char range is a non-macro + ;; constituent by default. + (gethash ,char (character-macro-hash-table ,readtable) nil)))) ;;; the value represented by whatever is stored in the character macro ;;; table. As per ANSI #'GET-MACRO-CHARACTER and #'SET-MACRO-CHARACTER, @@ -87,10 +94,13 @@ #'read-token))) (defun set-cmt-entry (char new-value-designator &optional (rt *readtable*)) - (setf (svref (character-macro-table rt) - (char-code char)) + (if (typep char 'base-char) + (setf (svref (character-macro-array rt) (char-code char)) + (and new-value-designator + (%coerce-callable-to-fun new-value-designator))) + (setf (gethash char (character-macro-hash-table rt)) (and new-value-designator - (%coerce-callable-to-fun new-value-designator)))) + (%coerce-callable-to-fun new-value-designator))))) (defun undefined-macro-char (stream char) (unless *read-suppress* @@ -137,7 +147,7 @@ (defun !cold-init-secondary-attribute-table () (setq *secondary-attribute-table* - (make-array char-code-limit :element-type '(unsigned-byte 8) + (make-array base-char-code-limit :element-type '(unsigned-byte 8) :initial-element +char-attr-constituent+)) (!set-secondary-attribute #\: +char-attr-package-delimiter+) (!set-secondary-attribute #\| +char-attr-multiple-escape+) ; |) [for EMACS] @@ -165,17 +175,29 @@ ;;;; readtable operations +(defun shallow-replace/eql-hash-table (to from) + (maphash (lambda (k v) (setf (gethash k to) v)) from)) + (defun copy-readtable (&optional (from-readtable *readtable*) to-readtable) (let ((really-from-readtable (or from-readtable *standard-readtable*)) (really-to-readtable (or to-readtable (make-readtable)))) - (replace (character-attribute-table really-to-readtable) - (character-attribute-table really-from-readtable)) - (replace (character-macro-table really-to-readtable) - (character-macro-table really-from-readtable)) + (replace (character-attribute-array really-to-readtable) + (character-attribute-array really-from-readtable)) + (shallow-replace/eql-hash-table + (character-attribute-hash-table really-to-readtable) + (character-attribute-hash-table really-from-readtable)) + (replace (character-macro-array really-to-readtable) + (character-macro-array really-from-readtable)) + (shallow-replace/eql-hash-table + (character-macro-hash-table really-to-readtable) + (character-macro-hash-table really-from-readtable)) (setf (dispatch-tables really-to-readtable) - (mapcar (lambda (pair) (cons (car pair) - (copy-seq (cdr pair)))) + (mapcar (lambda (pair) + (cons (car pair) + (let ((table (make-hash-table))) + (shallow-replace/eql-hash-table table (cdr pair)) + table))) (dispatch-tables really-from-readtable))) (setf (readtable-case really-to-readtable) (readtable-case really-from-readtable)) @@ -250,17 +272,27 @@ (let ((stream (in-synonym-of stream))) (if (ansi-stream-p stream) (prepare-for-fast-read-char stream - (do ((attribute-table (character-attribute-table *readtable*)) + (do ((attribute-array (character-attribute-array *readtable*)) + (attribute-hash-table + (character-attribute-hash-table *readtable*)) (char (fast-read-char t) (fast-read-char t))) - ((/= (the fixnum (aref attribute-table (char-code char))) + ((/= (the fixnum + (if (typep char 'base-char) + (aref attribute-array (char-code char)) + (gethash char attribute-hash-table +char-attr-constituent+))) +char-attr-whitespace+) (done-with-fast-read-char) char))) ;; CLOS stream - (do ((attribute-table (character-attribute-table *readtable*)) + (do ((attribute-array (character-attribute-array *readtable*)) + (attribute-hash-table + (character-attribute-hash-table *readtable*)) (char (read-char stream nil :eof) (read-char stream nil :eof))) ((or (eq char :eof) - (/= (the fixnum (aref attribute-table (char-code char))) + (/= (the fixnum + (if (typep char 'base-char) + (aref attribute-array (char-code char)) + (gethash char attribute-hash-table +char-attr-constituent+))) +char-attr-whitespace+)) (if (eq char :eof) (error 'end-of-file :stream stream) @@ -299,7 +331,7 @@ ;; all constituents (do ((ichar 0 (1+ ichar)) (char)) - ((= ichar #O200)) + ((= ichar base-char-code-limit)) (setq char (code-char ichar)) (when (constituentp char *standard-readtable*) (set-cat-entry char (get-secondary-attribute char)) @@ -622,8 +654,13 @@ ;;;; character classes ;;; Return the character class for CHAR. -(defmacro char-class (char attable) - `(let ((att (aref ,attable (char-code ,char)))) +;;; +;;; FIXME: why aren't these ATT-getting forms using GET-CAT-ENTRY? +;;; Because we've cached the readtable tables? +(defmacro char-class (char attarray atthash) + `(let ((att (if (typep ,char 'base-char) + (aref ,attarray (char-code ,char)) + (gethash ,char ,atthash +char-attr-constituent+)))) (declare (fixnum att)) (if (<= att +char-attr-terminating-macro+) +char-attr-delimiter+ @@ -631,8 +668,10 @@ ;;; Return the character class for CHAR, which might be part of a ;;; rational number. -(defmacro char-class2 (char attable) - `(let ((att (aref ,attable (char-code ,char)))) +(defmacro char-class2 (char attarray atthash) + `(let ((att (if (typep ,char 'base-char) + (aref ,attarray (char-code ,char)) + (gethash ,char ,atthash +char-attr-constituent+)))) (declare (fixnum att)) (if (<= att +char-attr-terminating-macro+) +char-attr-delimiter+ @@ -645,8 +684,10 @@ ;;; Return the character class for a char which might be part of a ;;; rational or floating number. (Assume that it is a digit if it ;;; could be.) -(defmacro char-class3 (char attable) - `(let ((att (aref ,attable (char-code ,char)))) +(defmacro char-class3 (char attarray atthash) + `(let ((att (if (typep ,char 'base-char) + (aref ,attarray (char-code ,char)) + (gethash ,char ,atthash +char-attr-constituent+)))) (declare (fixnum att)) (if possibly-rational (setq possibly-rational @@ -735,7 +776,8 @@ (when *read-suppress* (internal-read-extended-token stream firstchar nil) (return-from read-token nil)) - (let ((attribute-table (character-attribute-table *readtable*)) + (let ((attribute-array (character-attribute-array *readtable*)) + (attribute-hash-table (character-attribute-hash-table *readtable*)) (package-designator nil) (colons 0) (possibly-rational t) @@ -746,7 +788,7 @@ (seen-multiple-escapes nil)) (reset-read-buffer) (prog ((char firstchar)) - (case (char-class3 char attribute-table) + (case (char-class3 char attribute-array attribute-hash-table) (#.+char-attr-constituent-sign+ (go SIGN)) (#.+char-attr-constituent-digit+ (go LEFTDIGIT)) (#.+char-attr-constituent-digit-or-expt+ @@ -765,7 +807,7 @@ (unless char (go RETURN-SYMBOL)) (setq possibly-rational t possibly-float t) - (case (char-class3 char attribute-table) + (case (char-class3 char attribute-array attribute-hash-table) (#.+char-attr-constituent-digit+ (go LEFTDIGIT)) (#.+char-attr-constituent-digit-or-expt+ (setq seen-digit-or-expt t) @@ -782,7 +824,7 @@ (setq char (read-char stream nil nil)) (unless char (return (make-integer))) (setq was-possibly-float possibly-float) - (case (char-class3 char attribute-table) + (case (char-class3 char attribute-array attribute-hash-table) (#.+char-attr-constituent-digit+ (go LEFTDIGIT)) (#.+char-attr-constituent-decimal-digit+ (if possibly-float (go LEFTDECIMALDIGIT) @@ -811,7 +853,7 @@ (ouch-read-buffer char) (setq char (read-char stream nil nil)) (unless char (return (make-integer))) - (case (char-class3 char attribute-table) + (case (char-class3 char attribute-array attribute-hash-table) (#.+char-attr-constituent-digit+ (go LEFTDIGIT)) (#.+char-attr-constituent-decimal-digit+ (bug "impossible!")) (#.+char-attr-constituent-dot+ (go SYMBOL)) @@ -832,7 +874,7 @@ (ouch-read-buffer char) (setq char (read-char stream nil nil)) (unless char (go RETURN-SYMBOL)) - (case (char-class char attribute-table) + (case (char-class char attribute-array attribute-hash-table) (#.+char-attr-constituent-digit+ (go LEFTDECIMALDIGIT)) (#.+char-attr-constituent-dot+ (go MIDDLEDOT)) (#.+char-attr-constituent-expt+ (go EXPONENT)) @@ -849,7 +891,7 @@ (setq char (read-char stream nil nil)) (unless char (return (let ((*read-base* 10)) (make-integer)))) - (case (char-class char attribute-table) + (case (char-class char attribute-array attribute-hash-table) (#.+char-attr-constituent-digit+ (go RIGHTDIGIT)) (#.+char-attr-constituent-expt+ (go EXPONENT)) (#.+char-attr-delimiter+ @@ -864,7 +906,7 @@ (ouch-read-buffer char) (setq char (read-char stream nil nil)) (unless char (return (make-float stream))) - (case (char-class char attribute-table) + (case (char-class char attribute-array attribute-hash-table) (#.+char-attr-constituent-digit+ (go RIGHTDIGIT)) (#.+char-attr-constituent-expt+ (go EXPONENT)) (#.+char-attr-delimiter+ @@ -878,7 +920,7 @@ (ouch-read-buffer char) (setq char (read-char stream nil nil)) (unless char (go RETURN-SYMBOL)) - (case (char-class char attribute-table) + (case (char-class char attribute-array attribute-hash-table) (#.+char-attr-constituent-digit+ (go RIGHTDIGIT)) (#.+char-attr-delimiter+ (unread-char char stream) (go RETURN-SYMBOL)) (#.+char-attr-escape+ (go ESCAPE)) @@ -888,7 +930,7 @@ (ouch-read-buffer char) (setq char (read-char stream nil nil)) (unless char (%reader-error stream "dot context error")) - (case (char-class char attribute-table) + (case (char-class char attribute-array attribute-hash-table) (#.+char-attr-constituent-digit+ (go RIGHTDIGIT)) (#.+char-attr-constituent-dot+ (go DOTS)) (#.+char-attr-delimiter+ (%reader-error stream "dot context error")) @@ -901,7 +943,7 @@ (setq char (read-char stream nil nil)) (unless char (go RETURN-SYMBOL)) (setq possibly-float t) - (case (char-class char attribute-table) + (case (char-class char attribute-array attribute-hash-table) (#.+char-attr-constituent-sign+ (go EXPTSIGN)) (#.+char-attr-constituent-digit+ (go EXPTDIGIT)) (#.+char-attr-delimiter+ (unread-char char stream) (go RETURN-SYMBOL)) @@ -913,7 +955,7 @@ (ouch-read-buffer char) (setq char (read-char stream nil nil)) (unless char (go RETURN-SYMBOL)) - (case (char-class char attribute-table) + (case (char-class char attribute-array attribute-hash-table) (#.+char-attr-constituent-digit+ (go EXPTDIGIT)) (#.+char-attr-delimiter+ (unread-char char stream) (go RETURN-SYMBOL)) (#.+char-attr-escape+ (go ESCAPE)) @@ -924,7 +966,7 @@ (ouch-read-buffer char) (setq char (read-char stream nil nil)) (unless char (return (make-float stream))) - (case (char-class char attribute-table) + (case (char-class char attribute-array attribute-hash-table) (#.+char-attr-constituent-digit+ (go EXPTDIGIT)) (#.+char-attr-delimiter+ (unread-char char stream) @@ -937,7 +979,7 @@ (ouch-read-buffer char) (setq char (read-char stream nil nil)) (unless char (go RETURN-SYMBOL)) - (case (char-class2 char attribute-table) + (case (char-class2 char attribute-array attribute-hash-table) (#.+char-attr-constituent-digit+ (go RATIODIGIT)) (#.+char-attr-delimiter+ (unread-char char stream) (go RETURN-SYMBOL)) (#.+char-attr-escape+ (go ESCAPE)) @@ -948,7 +990,7 @@ (ouch-read-buffer char) (setq char (read-char stream nil nil)) (unless char (return (make-ratio stream))) - (case (char-class2 char attribute-table) + (case (char-class2 char attribute-array attribute-hash-table) (#.+char-attr-constituent-digit+ (go RATIODIGIT)) (#.+char-attr-delimiter+ (unread-char char stream) @@ -961,7 +1003,7 @@ (ouch-read-buffer char) (setq char (read-char stream nil nil)) (unless char (%reader-error stream "too many dots")) - (case (char-class char attribute-table) + (case (char-class char attribute-array attribute-hash-table) (#.+char-attr-constituent-dot+ (go DOTS)) (#.+char-attr-delimiter+ (unread-char char stream) @@ -979,7 +1021,7 @@ (ouch-read-buffer char) (setq char (fast-read-char nil nil)) (unless char (go RETURN-SYMBOL)) - (case (char-class char attribute-table) + (case (char-class char attribute-array attribute-hash-table) (#.+char-attr-escape+ (done-with-fast-read-char) (go ESCAPE)) (#.+char-attr-delimiter+ (done-with-fast-read-char) @@ -996,7 +1038,7 @@ (ouch-read-buffer char) (setq char (read-char stream nil :eof)) (when (eq char :eof) (go RETURN-SYMBOL)) - (case (char-class char attribute-table) + (case (char-class char attribute-array attribute-hash-table) (#.+char-attr-escape+ (go ESCAPE)) (#.+char-attr-delimiter+ (unread-char char stream) (go RETURN-SYMBOL)) @@ -1013,7 +1055,7 @@ (ouch-read-buffer nextchar)) (setq char (read-char stream nil nil)) (unless char (go RETURN-SYMBOL)) - (case (char-class char attribute-table) + (case (char-class char attribute-array attribute-hash-table) (#.+char-attr-delimiter+ (unread-char char stream) (go RETURN-SYMBOL)) (#.+char-attr-escape+ (go ESCAPE)) (#.+char-attr-multiple-escape+ (go MULT-ESCAPE)) @@ -1028,7 +1070,7 @@ (ouch-read-buffer char)) (setq char (read-char stream nil nil)) (unless char (go RETURN-SYMBOL)) - (case (char-class char attribute-table) + (case (char-class char attribute-array attribute-hash-table) (#.+char-attr-delimiter+ (unread-char char stream) (go RETURN-SYMBOL)) (#.+char-attr-escape+ (go ESCAPE)) (#.+char-attr-multiple-escape+ (go MULT-ESCAPE)) @@ -1055,7 +1097,7 @@ (setq escapes ()) (setq char (read-char stream nil nil)) (unless char (reader-eof-error stream "after reading a colon")) - (case (char-class char attribute-table) + (case (char-class char attribute-array attribute-hash-table) (#.+char-attr-delimiter+ (unread-char char stream) (%reader-error stream @@ -1070,7 +1112,7 @@ (setq char (read-char stream nil nil)) (unless char (reader-eof-error stream "after reading a colon")) - (case (char-class char attribute-table) + (case (char-class char attribute-array attribute-hash-table) (#.+char-attr-delimiter+ (unread-char char stream) (%reader-error stream @@ -1323,7 +1365,7 @@ ;;;; cruft for dispatch macros (defun make-char-dispatch-table () - (make-array char-code-limit :initial-element #'dispatch-char-error)) + (make-hash-table)) (defun dispatch-char-error (stream sub-char ignore) (declare (ignore ignore)) @@ -1362,9 +1404,7 @@ (dpair (find disp-char (dispatch-tables rt) :test #'char= :key #'car))) (if dpair - (setf (elt (the simple-vector (cdr dpair)) - (char-code sub-char)) - (coerce function 'function)) + (setf (gethash sub-char (cdr dpair)) (coerce function 'function)) (error "~S is not a dispatch char." disp-char)))) (defun get-dispatch-macro-character (disp-char sub-char @@ -1377,14 +1417,7 @@ (dpair (find disp-char (dispatch-tables rt) :test #'char= :key #'car))) (if dpair - (let ((dispatch-fun (elt (the simple-vector (cdr dpair)) - (char-code sub-char)))) - ;; Digits are also initialized in a dispatch table to - ;; #'dispatch-char-error; READ-DISPATCH-CHAR handles them - ;; separately. - CSR, 2002-04-12 - (if (eq dispatch-fun #'dispatch-char-error) - nil - dispatch-fun)) + (values (gethash sub-char (cdr dpair))) (error "~S is not a dispatch char." disp-char)))) (defun read-dispatch-char (stream char) @@ -1408,8 +1441,7 @@ :test #'char= :key #'car))) (if dpair (funcall (the function - (elt (the simple-vector (cdr dpair)) - (char-code sub-char))) + (gethash sub-char (cdr dpair) #'dispatch-char-error)) stream sub-char (if numargp numarg nil)) (%reader-error stream "no dispatch table for dispatch char"))))) @@ -1433,10 +1465,10 @@ (start start) (end (%check-vector-sequence-bounds string start end))) (unless *read-from-string-spares* - (push (internal-make-string-input-stream "" 0 0) - *read-from-string-spares*)) + (push (make-string-input-stream "" 0 0) *read-from-string-spares*)) (let ((stream (pop *read-from-string-spares*))) - (setf (string-input-stream-string stream) string) + (setf (string-input-stream-string stream) + (coerce string '(simple-array character (*)))) (setf (string-input-stream-current stream) start) (setf (string-input-stream-end stream) end) (unwind-protect diff -urNb sbcl/src/code/readtable.lisp sbcl.char/src/code/readtable.lisp --- sbcl/src/code/readtable.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/code/readtable.lisp 2004-10-29 16:47:30.000000000 +0200 @@ -12,7 +12,7 @@ (in-package "SB!IMPL") (sb!xc:deftype attribute-table () - '(simple-array (unsigned-byte 8) (#.sb!xc:char-code-limit))) + '(simple-array (unsigned-byte 8) (#.base-char-code-limit))) ;;; constants for readtable character attributes. These are all as in ;;; the manual. @@ -58,20 +58,22 @@ ;; In order to make READ-TOKEN fast, all this information is stored ;; in the character attribute table by having different varieties of ;; constituents. - (character-attribute-table - (make-array sb!xc:char-code-limit + (character-attribute-array + (make-array base-char-code-limit :element-type '(unsigned-byte 8) :initial-element +char-attr-constituent+) :type attribute-table) + (character-attribute-hash-table (make-hash-table) :type hash-table) ;; The CHARACTER-MACRO-TABLE is a vector of CHAR-CODE-LIMIT ;; functions. One of these functions called with appropriate ;; arguments whenever any non-WHITESPACE character is encountered ;; inside READ-PRESERVING-WHITESPACE. These functions are used to ;; implement user-defined read-macros, system read-macros, and the ;; number-symbol reader. - (character-macro-table - (make-array sb!xc:char-code-limit :initial-element #'undefined-macro-char) - :type (simple-vector #.sb!xc:char-code-limit)) + (character-macro-array + (make-array base-char-code-limit :initial-element #'undefined-macro-char) + :type (simple-vector #.base-char-code-limit)) + (character-macro-hash-table (make-hash-table) :type hash-table) ;; an alist from dispatch characters to vectors of CHAR-CODE-LIMIT ;; functions, for use in defining dispatching macros (like #-macro) (dispatch-tables () :type list) diff -urNb sbcl/src/code/room.lisp sbcl.char/src/code/room.lisp --- sbcl/src/code/room.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/code/room.lisp 2004-10-29 16:47:30.000000000 +0200 @@ -72,7 +72,8 @@ :kind :fixed :length size)))))) -(dolist (code (list complex-base-string-widetag simple-array-widetag +(dolist (code (list complex-character-string-widetag + complex-base-string-widetag simple-array-widetag complex-bit-vector-widetag complex-vector-widetag complex-array-widetag complex-vector-nil-widetag)) (setf (svref *meta-room-info* code) @@ -122,6 +123,11 @@ :kind :string :length 0)) +(setf (svref *meta-room-info* simple-character-string-widetag) + (make-room-info :name 'simple-character-string + :kind :string + :length 2)) + (setf (svref *meta-room-info* simple-array-nil-widetag) (make-room-info :name 'simple-array-nil :kind :fixed @@ -176,7 +182,7 @@ ;;; Return the total size of a vector in bytes, including any pad. #!-sb-fluid (declaim (inline vector-total-size)) (defun vector-total-size (obj info) - (let ((shift (room-info-length info)) + (let* ((shift (room-info-length info)) (len (+ (length (the (simple-array * (*)) obj)) (ecase (room-info-kind info) (:vector 0) @@ -467,6 +473,7 @@ #.single-float-widetag #.double-float-widetag #.simple-base-string-widetag + #.simple-character-string-widetag #.simple-array-nil-widetag #.simple-bit-vector-widetag #.simple-array-unsigned-byte-2-widetag diff -urNb sbcl/src/code/run-program.lisp sbcl.char/src/code/run-program.lisp --- sbcl/src/code/run-program.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/code/run-program.lisp 2004-10-29 16:47:30.000000000 +0200 @@ -289,12 +289,12 @@ (defun find-a-pty () (dolist (char '(#\p #\q)) (dotimes (digit 16) - (let* ((master-name (format nil "/dev/pty~C~X" char digit)) + (let* ((master-name (coerce (format nil "/dev/pty~C~X" char digit) 'base-string)) (master-fd (sb-unix:unix-open master-name sb-unix:o_rdwr #o666))) (when master-fd - (let* ((slave-name (format nil "/dev/tty~C~X" char digit)) + (let* ((slave-name (coerce (format nil "/dev/tty~C~X" char digit) 'base-string)) (slave-fd (sb-unix:unix-open slave-name sb-unix:o_rdwr #o666))) @@ -347,7 +347,9 @@ (declare (simple-string s)) (let ((n (length s))) ;; Blast the string into place. - (sb-kernel:copy-to-system-area (the simple-string s) + (sb-kernel:copy-to-system-area (the simple-base-string + ;; FIXME + (coerce s 'simple-base-string)) (* sb-vm:vector-data-offset sb-vm:n-word-bits) string-sap 0 @@ -382,6 +384,7 @@ ;;; Is UNIX-FILENAME the name of a file that we can execute? (defun unix-filename-is-executable-p (unix-filename) (declare (type simple-string unix-filename)) + (setf unix-filename (coerce unix-filename 'base-string)) (values (and (eq (sb-unix:unix-file-kind unix-filename) :file) (sb-unix:unix-access unix-filename sb-unix:x_ok)))) @@ -614,7 +617,7 @@ ;;; stream. (defun copy-descriptor-to-stream (descriptor stream cookie) (incf (car cookie)) - (let ((string (make-string 256)) + (let ((string (make-string 256 :element-type 'base-char)) handler) (setf handler (sb-sys:add-fd-handler @@ -683,7 +686,7 @@ ;; Use /dev/null. (multiple-value-bind (fd errno) - (sb-unix:unix-open "/dev/null" + (sb-unix:unix-open #.(coerce "/dev/null" 'base-string) (case direction (:input sb-unix:o_rdonly) (:output sb-unix:o_wronly) @@ -735,7 +738,7 @@ (dotimes (count 256 (error "could not open a temporary file in /tmp")) - (let* ((name (format nil "/tmp/.run-program-~D" count)) + (let* ((name (coerce (format nil "/tmp/.run-program-~D" count) 'base-string)) (fd (sb-unix:unix-open name (logior sb-unix:o_rdwr sb-unix:o_creat diff -urNb sbcl/src/code/seq.lisp sbcl.char/src/code/seq.lisp --- sbcl/src/code/seq.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/code/seq.lisp 2004-10-29 16:47:30.000000000 +0200 @@ -569,6 +569,14 @@ (when (null source-end) (setq source-end (length source-sequence))) (mumble-replace-from-mumble)) +(defun simple-character-string-replace-from-simple-character-string* + (target-sequence source-sequence + target-start target-end source-start source-end) + (declare (type (simple-array character (*)) target-sequence source-sequence)) + (when (null target-end) (setq target-end (length target-sequence))) + (when (null source-end) (setq source-end (length source-sequence))) + (mumble-replace-from-mumble)) + (define-sequence-traverser replace (sequence1 sequence2 &key start1 end1 start2 end2) #!+sb-doc diff -urNb sbcl/src/code/stream.lisp sbcl.char/src/code/stream.lisp --- sbcl/src/code/stream.lisp 2004-10-29 14:57:35.000000000 +0200 +++ sbcl.char/src/code/stream.lisp 2004-10-29 16:47:30.000000000 +0200 @@ -433,14 +433,15 @@ (funcall (ansi-stream-in stream) stream eof-error-p eof-value)) (t (when (/= start +ansi-stream-in-buffer-extra+) + ;; FIXME AARGH KLUDGE There's no sb!vm:n-character-bits. (bit-bash-copy ibuf (+ (* +ansi-stream-in-buffer-extra+ - sb!vm:n-byte-bits) + sb!vm:n-byte-bits 4) (* sb!vm:vector-data-offset sb!vm:n-word-bits)) - ibuf (+ (the index (* start sb!vm:n-byte-bits)) + ibuf (+ (the index (* start sb!vm:n-byte-bits 4)) (* sb!vm:vector-data-offset sb!vm:n-word-bits)) - (* count sb!vm:n-byte-bits))) + (* count sb!vm:n-byte-bits 4))) (setf (ansi-stream-in-index stream) (1+ start)) (aref ibuf start))))) @@ -1028,7 +1029,7 @@ (:include string-stream (in #'string-inch) (bin #'ill-bin) - (n-bin #'string-stream-read-n-bytes) + (n-bin #'ill-bin) (misc #'string-in-misc) (string (missing-arg) :type simple-string)) (:constructor internal-make-string-input-stream @@ -1233,14 +1234,19 @@ (element-type (string-output-stream-element-type stream)) (result (case element-type - ;; Overwhelmingly common case; can be inlined. + ;; overwhelmingly common case: can be inlined ((character) (make-string length)) + ;; slightly less common cases: inline it anyway + ((base-char standard-char) + (make-string length :element-type 'base-char)) (t (make-string length :element-type element-type))))) ;; For the benefit of the REPLACE transform, let's do this, so ;; that the common case isn't ludicrously expensive. (etypecase result ((simple-array character (*)) (replace result (string-output-stream-string stream))) + (simple-base-string + (replace result (string-output-stream-string stream))) ((simple-array nil (*)) (replace result (string-output-stream-string stream)))) (setf (string-output-stream-index stream) 0 @@ -1263,6 +1269,8 @@ ;;; the CLM, but they are required for the implementation of ;;; WITH-OUTPUT-TO-STRING. +;;; FIXME: need to support (VECTOR BASE-CHAR) and (VECTOR NIL), +;;; ideally without destroying all hope of efficiency. (deftype string-with-fill-pointer () '(and (vector character) (satisfies array-has-fill-pointer-p))) @@ -1292,9 +1300,9 @@ (if (= offset-current end) (let* ((new-length (1+ (* current 2))) (new-workspace (make-string new-length))) - (declare (simple-string new-workspace)) - (%byte-blt workspace start - new-workspace 0 current) + (declare (type (simple-array character (*)) new-workspace)) + (replace new-workspace workspace + :start2 start :end2 offset-current) (setf workspace new-workspace offset-current current) (set-array-header buffer workspace new-length @@ -1322,21 +1330,16 @@ (let* ((new-length (+ (the fixnum (* current 2)) string-len)) (new-workspace (make-string new-length))) (declare (type (simple-array character (*)) new-workspace)) - (%byte-blt workspace dst-start - new-workspace 0 current) - (setf workspace new-workspace) - (setf offset-current current) - (setf offset-dst-end dst-end) - (set-array-header buffer - workspace - new-length - dst-end - 0 - new-length - nil)) + (replace new-workspace workspace + :start2 dst-start :end2 offset-current) + (setf workspace new-workspace + offset-current current + offset-dst-end dst-end) + (set-array-header buffer workspace new-length + dst-end 0 new-length nil)) (setf (fill-pointer buffer) dst-end)) - (%byte-blt string start - workspace offset-current offset-dst-end))) + (replace workspace string + :start1 offset-current :start2 start :end2 end))) dst-end)) (defun fill-pointer-misc (stream operation &optional arg1 arg2) @@ -1525,7 +1528,7 @@ (defun case-frob-upcase-sout (stream str start end) (declare (type case-frob-stream stream) - (type simple-base-string str) + (type simple-string str) (type index start) (type (or index null) end)) (let* ((target (case-frob-stream-target stream)) @@ -1550,7 +1553,7 @@ (defun case-frob-downcase-sout (stream str start end) (declare (type case-frob-stream stream) - (type simple-base-string str) + (type simple-string str) (type index start) (type (or index null) end)) (let* ((target (case-frob-stream-target stream)) @@ -1583,7 +1586,7 @@ (defun case-frob-capitalize-sout (stream str start end) (declare (type case-frob-stream stream) - (type simple-base-string str) + (type simple-string str) (type index start) (type (or index null) end)) (let* ((target (case-frob-stream-target stream)) @@ -1628,7 +1631,7 @@ (defun case-frob-capitalize-aux-sout (stream str start end) (declare (type case-frob-stream stream) - (type simple-base-string str) + (type simple-string str) (type index start) (type (or index null) end)) (let* ((target (case-frob-stream-target stream)) @@ -1673,7 +1676,7 @@ (defun case-frob-capitalize-first-sout (stream str start end) (declare (type case-frob-stream stream) - (type simple-base-string str) + (type simple-string str) (type index start) (type (or index null) end)) (let* ((target (case-frob-stream-target stream)) diff -urNb sbcl/src/code/target-format.lisp sbcl.char/src/code/target-format.lisp --- sbcl/src/code/target-format.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/code/target-format.lisp 2004-10-29 16:47:30.000000000 +0200 @@ -76,8 +76,11 @@ (multiple-value-bind (new-directives new-args) (let* ((character (format-directive-character directive)) (function + (typecase character + (base-char (svref *format-directive-interpreters* (char-code character))) + (character nil))) (*default-format-error-offset* (1- (format-directive-end directive)))) (unless function diff -urNb sbcl/src/code/target-load.lisp sbcl.char/src/code/target-load.lisp --- sbcl/src/code/target-load.lisp 2004-10-29 14:57:35.000000000 +0200 +++ sbcl.char/src/code/target-load.lisp 2004-10-29 16:47:30.000000000 +0200 @@ -295,4 +295,9 @@ (defun !loader-cold-init () (/show0 "/!loader-cold-init") (dolist (routine *!initial-assembler-routines*) - (setf (gethash (car routine) *assembler-routines*) (cdr routine)))) + (setf (gethash (car routine) *assembler-routines*) (cdr routine))) + (dolist (symbol *!initial-foreign-symbols*) + (setf (gethash (car symbol) *static-foreign-symbols*) (cdr symbol)))) + +(declaim (ftype (function (string) (unsigned-byte #.sb!vm:n-machine-word-bits)) + foreign-symbol-address-as-integer)) diff -urNb sbcl/src/code/target-pathname.lisp sbcl.char/src/code/target-pathname.lisp --- sbcl/src/code/target-pathname.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/code/target-pathname.lisp 2004-10-29 16:47:30.000000000 +0200 @@ -44,7 +44,7 @@ (let ((namestring (handler-case (namestring pathname) (error nil)))) (if namestring - (format stream "#P~S" namestring) + (format stream "#P~S" (coerce namestring '(simple-array character (*)))) (print-unreadable-object (pathname stream :type t) (format stream "~@<(with no namestring) ~_:HOST ~S ~_:DEVICE ~S ~_:DIRECTORY ~S ~ @@ -202,12 +202,12 @@ (or (eq thing wild) (eq wild :wild) (typecase thing - (simple-base-string + (simple-string ;; String is matched by itself, a matching pattern or :WILD. (typecase wild (pattern (values (pattern-matches wild thing))) - (simple-base-string + (simple-string (string= thing wild)))) (pattern ;; A pattern is only matched by an identical pattern. @@ -308,7 +308,7 @@ (dolist (x in) (when (check-for pred x) (return t)))) - (simple-base-string + (simple-string (dotimes (i (length in)) (when (funcall pred (schar in i)) (return t)))) @@ -319,7 +319,7 @@ (make-pattern (mapcar (lambda (piece) (typecase piece - (simple-base-string + (simple-string (funcall fun piece)) (cons (case (car piece) @@ -333,7 +333,7 @@ (pattern-pieces thing)))) (list (mapcar fun thing)) - (simple-base-string + (simple-string (funcall fun thing)) (t thing)))) @@ -385,6 +385,7 @@ (declare (type pathname-designator pathname) (type pathname-designator defaults) (values pathname)) + (/show0 "in MERGE-PATHNAMES") (with-pathname (defaults defaults) (let ((pathname (let ((*default-pathname-defaults* defaults)) (pathname pathname)))) @@ -702,7 +703,7 @@ ;;; If NAMESTR begins with a colon-terminated, defined, logical host, ;;; then return that host, otherwise return NIL. (defun extract-logical-host-prefix (namestr start end) - (declare (type simple-base-string namestr) + (declare (type simple-string namestr) (type index start end) (values (or logical-host null))) (let ((colon-pos (position #\: namestr :start start :end end))) @@ -924,7 +925,7 @@ (defun substitute-into (pattern subs diddle-case) (declare (type pattern pattern) (type list subs) - (values (or simple-base-string pattern) list)) + (values (or simple-string pattern) list)) (let ((in-wildcard nil) (pieces nil) (strings nil)) @@ -1157,13 +1158,14 @@ (let ((word (string-upcase word))) (dotimes (i (length word)) (let ((ch (schar word i))) - (unless (or (alpha-char-p ch) (digit-char-p ch) (char= ch #\-)) + (unless (and (typep ch 'standard-char) + (or (alpha-char-p ch) (digit-char-p ch) (char= ch #\-))) (error 'namestring-parse-error :complaint "logical namestring character which ~ is not alphanumeric or hyphen:~% ~S" :args (list ch) :namestring word :offset i)))) - word)) + (coerce word 'base-string))) ;;; Given a logical host or string, return a logical host. If ERROR-P ;;; is NIL, then return NIL when no such host exists. @@ -1257,7 +1259,7 @@ ;;; Break up a logical-namestring, always a string, into its ;;; constituent parts. (defun parse-logical-namestring (namestr start end) - (declare (type simple-base-string namestr) + (declare (type simple-string namestr) (type index start end)) (collect ((directory)) (let ((host nil) @@ -1418,7 +1420,7 @@ (when type-supplied (unless name (error "cannot specify the type without a file: ~S" pathname)) - (when (typep type 'simple-base-string) + (when (typep type 'simple-string) (when (position #\. type) (error "type component can't have a #\. inside: ~S" pathname))) (strings ".") @@ -1524,12 +1526,9 @@ (t (translate-logical-pathname (pathname pathname))))) (defvar *logical-pathname-defaults* - (%make-logical-pathname (make-logical-host :name "BOGUS") - :unspecific - nil - nil - nil - nil)) + (%make-logical-pathname + (make-logical-host :name (logical-word-or-lose "BOGUS")) + :unspecific nil nil nil nil)) (defun load-logical-pathname-translations (host) #!+sb-doc diff -urNb sbcl/src/code/target-thread.lisp sbcl.char/src/code/target-thread.lisp --- sbcl/src/code/target-thread.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/code/target-thread.lisp 2004-10-29 16:47:30.000000000 +0200 @@ -58,7 +58,7 @@ (declaim (inline waitqueue-data-address mutex-value-address)) (defstruct waitqueue - (name nil :type (or null simple-base-string)) + (name nil :type (or null simple-string)) (lock 0) (data nil)) diff -urNb sbcl/src/code/target-unithread.lisp sbcl.char/src/code/target-unithread.lisp --- sbcl/src/code/target-unithread.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/code/target-unithread.lisp 2004-10-29 16:47:30.000000000 +0200 @@ -37,7 +37,7 @@ ;;;; the higher-level locking operations are based on waitqueues (defstruct waitqueue - (name nil :type (or null simple-base-string)) + (name nil :type (or null simple-string)) (lock 0) (data nil)) diff -urNb sbcl/src/code/toplevel.lisp sbcl.char/src/code/toplevel.lisp --- sbcl/src/code/toplevel.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/code/toplevel.lisp 2004-10-29 16:47:30.000000000 +0200 @@ -421,6 +421,7 @@ (probe-init-files (explicitly-specified-init-file-name &rest default-init-file-names) (declare (type list default-init-file-names)) + (/show0 "in PROBE-INIT-FILES") (if explicitly-specified-init-file-name (or (probe-file explicitly-specified-init-file-name) (startup-error "The file ~S was not found." @@ -431,6 +432,7 @@ ;; shared idiom for creating default names for ;; SYSINITish and USERINITish files (init-file-name (maybe-dir-name basename) + (/show0 "in INIT-FILE-NAME") (and maybe-dir-name (concatenate 'string maybe-dir-name "/" basename)))) (let ((sysinit-truename diff -urNb sbcl/src/code/typecheckfuns.lisp sbcl.char/src/code/typecheckfuns.lisp --- sbcl/src/code/typecheckfuns.lisp 2004-10-28 22:28:37.000000000 +0200 +++ sbcl.char/src/code/typecheckfuns.lisp 2004-10-29 16:47:30.000000000 +0200 @@ -203,7 +203,7 @@ (intersection-type-p ctype) (union-type-p ctype) (negation-type-p ctype) - (character-set-type-p ctype)))) + (character-range-type-p ctype)))) ;;; Evaluate (at load/execute time) to a function which checks that ;;; its argument is of the specified type. diff -urNb sbcl/src/code/unix.lisp sbcl.char/src/code/unix.lisp --- sbcl/src/code/unix.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/code/unix.lisp 2004-10-29 16:47:30.000000000 +0200 @@ -811,7 +811,7 @@ ;;; paths have been converted to absolute paths, so we don't need to ;;; try to handle any more generality than that. (defun unix-resolve-links (pathname) - (declare (type simple-string pathname)) + (declare (type simple-base-string pathname)) (aver (not (relative-unix-pathname? pathname))) (/noshow "entering UNIX-RESOLVE-LINKS") (loop with previous-pathnames = nil do @@ -837,7 +837,7 @@ :from-end t))) (dir (subseq pathname 0 dir-len))) (/noshow dir) - (concatenate 'string dir link)) + (concatenate 'base-string dir link)) link)))) (if (unix-file-kind new-pathname) (setf pathname new-pathname) @@ -853,9 +853,9 @@ (push pathname previous-pathnames)))) (defun unix-simplify-pathname (src) - (declare (type simple-string src)) + (declare (type simple-base-string src)) (let* ((src-len (length src)) - (dst (make-string src-len)) + (dst (make-string src-len :element-type 'base-char)) (dst-len 0) (dots 0) (last-slash nil)) @@ -929,7 +929,8 @@ (position #\/ dst :end last-slash :from-end t))) (if prev-prev-slash (setf dst-len (1+ prev-prev-slash)) - (return-from unix-simplify-pathname "./"))))))) + (return-from unix-simplify-pathname + (coerce "./" 'simple-base-string)))))))) (cond ((zerop dst-len) "./") ((= dst-len src-len) diff -urNb sbcl/src/cold/warm.lisp sbcl.char/src/cold/warm.lisp --- sbcl/src/cold/warm.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/cold/warm.lisp 2004-10-29 16:47:30.000000000 +0200 @@ -73,10 +73,13 @@ (make-pathname :directory '(:relative "contrib" :wild-inferiors) :name :wild :type :wild) sys))) + (sb-int:/show "about to set SYS logical pathname translations") (setf (logical-pathname-translations "SYS") `(("SYS:SRC;**;*.*.*" ,src) ("SYS:CONTRIB;**;*.*.*" ,contrib)))) +(sb-int:/show "set SYS logical pathname translations") + ;;; FIXME: CMU CL's pclcom.lisp had extra optional stuff wrapped around ;;; COMPILE-PCL, at least some of which we should probably have too: ;;; @@ -175,7 +178,7 @@ ;; facility, and should be compiled and loaded after ;; our DESCRIBE facility is compiled and loaded. "SRC;PCL;DESCRIBE")) - + (sb-int:/show "at head of DOLIST") (let ((fullname (concatenate 'string "SYS:" stem ".LISP"))) (sb-int:/show "about to compile" fullname) (flet ((report-recompile-restart (stream) diff -urNb sbcl/src/compiler/array-tran.lisp sbcl.char/src/compiler/array-tran.lisp --- sbcl/src/compiler/array-tran.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/compiler/array-tran.lisp 2004-10-29 16:47:30.000000000 +0200 @@ -293,8 +293,7 @@ (give-up-ir1-transform "cannot open-code creation of ~S" result-type-spec)) #-sb-xc-host - (unless (csubtypep (ctype-of (sb!vm:saetp-initial-element-default saetp)) - eltype-type) + (unless (ctypep (sb!vm:saetp-initial-element-default saetp) eltype-type) ;; This situation arises e.g. in (MAKE-ARRAY 4 :ELEMENT-TYPE ;; '(INTEGER 1 5)) ANSI's definition of MAKE-ARRAY says "If ;; INITIAL-ELEMENT is not supplied, the consequences of later diff -urNb sbcl/src/compiler/assem.lisp sbcl.char/src/compiler/assem.lisp --- sbcl/src/compiler/assem.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/compiler/assem.lisp 2004-10-29 16:47:30.000000000 +0200 @@ -27,7 +27,7 @@ ;;; This structure holds the state of the assembler. (defstruct (segment (:copier nil)) ;; the name of this segment (for debugging output and stuff) - (name "unnamed" :type simple-base-string) + (name "unnamed" :type simple-string) ;; Ordinarily this is a vector where instructions are written. If ;; the segment is made invalid (e.g. by APPEND-SEGMENT) then the ;; vector can be replaced by NIL. diff -urNb sbcl/src/compiler/dump.lisp sbcl.char/src/compiler/dump.lisp --- sbcl/src/compiler/dump.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/compiler/dump.lisp 2004-10-29 16:47:30.000000000 +0200 @@ -192,11 +192,19 @@ (declare (type fasl-output fasl-output)) (unless *cold-load-dump* (let ((handle (gethash x (fasl-output-equal-table fasl-output)))) - (cond (handle - (dump-push handle fasl-output) - t) - (t - nil))))) + (cond + (handle (dump-push handle fasl-output) t) + (t nil))))) +(defun string-check-table (x fasl-output) + (declare (type fasl-output fasl-output) + (type string x)) + (unless *cold-load-dump* + (let ((handle (cdr (assoc + (array-element-type x) + (gethash x (fasl-output-equal-table fasl-output)))))) + (cond + (handle (dump-push handle fasl-output) t) + (t nil))))) ;;; These functions are called after dumping an object to save the ;;; object in the table. The object (also passed in as X) must already @@ -217,7 +225,16 @@ (setf (gethash x (fasl-output-eq-table fasl-output)) handle) (dump-push handle fasl-output))) (values)) - +(defun string-save-object (x fasl-output) + (declare (type fasl-output fasl-output) + (type string x)) + (unless *cold-load-dump* + (let ((handle (dump-pop fasl-output))) + (push (cons (array-element-type x) handle) + (gethash x (fasl-output-equal-table fasl-output))) + (setf (gethash x (fasl-output-eq-table fasl-output)) handle) + (dump-push handle fasl-output))) + (values)) ;;; Record X in File's CIRCULARITY-TABLE unless *COLD-LOAD-DUMP* is ;;; true. This is called on objects that we are about to dump might ;;; have a circular path through them. @@ -370,11 +387,8 @@ (dump-structure x file) (eq-save-object x file)) (array - ;; FIXME: The comment at the head of - ;; DUMP-NON-IMMEDIATE-OBJECT says it's for objects which - ;; we want to save, instead of repeatedly dumping them. - ;; But then we dump arrays here without doing anything - ;; like EQUAL-SAVE-OBJECT. What gives? + ;; DUMP-ARRAY (and its callees) are responsible for + ;; updating the EQ and EQUAL hash tables. (dump-array x file)) (number (unless (equal-check-table x file) @@ -586,7 +600,14 @@ (t (unless *cold-load-dump* (dump-fop 'fop-normal-load file)) - (dump-simple-string (package-name pkg) file) + #+sb-xc-host + (dump-simple-base-string + (coerce (package-name pkg) 'simple-base-string) + file) + #-sb-xc-host + (dump-simple-character-string + (coerce (package-name pkg) '(simple-array character (*))) + file) (dump-fop 'fop-package file) (unless *cold-load-dump* (dump-fop 'fop-maybe-cold-load file)) @@ -717,10 +738,21 @@ (*))) x))) (typecase simple-version + #+sb-xc-host + (simple-string + (unless (string-check-table x file) + (dump-simple-base-string simple-version file) + (string-save-object x file))) + #-sb-xc-host (simple-base-string - (unless (equal-check-table x file) - (dump-simple-string simple-version file) - (equal-save-object x file))) + (unless (string-check-table x file) + (dump-simple-base-string simple-version file) + (string-save-object x file))) + #-sb-xc-host + ((simple-array character (*)) + (unless (string-check-table x file) + (dump-simple-character-string simple-version file) + (string-save-object x file))) (simple-vector (dump-simple-vector simple-version file) (eq-save-object x file)) @@ -889,23 +921,31 @@ ;;; Dump characters and string-ish things. -(defun dump-character (ch file) +(defun dump-character (char file) + (let ((code (sb!xc:char-code char))) + (cond + ((< code 256) (dump-fop 'fop-short-character file) - (dump-byte (char-code ch) file)) + (dump-byte code file)) + (t + (dump-fop 'fop-character file) + (dump-word code file))))) -;;; a helper function shared by DUMP-SIMPLE-STRING and DUMP-SYMBOL -(defun dump-characters-of-string (s fasl-output) - (declare (type string s) (type fasl-output fasl-output)) +(defun dump-base-chars-of-string (s fasl-output) + (declare #+sb-xc-host (type simple-string s) + #-sb-xc-host (type simple-base-string s) + (type fasl-output fasl-output)) (dovector (c s) - (dump-byte (char-code c) fasl-output)) + (dump-byte (sb!xc:char-code c) fasl-output)) (values)) + ;;; Dump a SIMPLE-BASE-STRING. -;;; FIXME: should be called DUMP-SIMPLE-BASE-STRING then -(defun dump-simple-string (s file) - (declare (type simple-base-string s)) - (dump-fop* (length s) fop-small-string fop-string file) - (dump-characters-of-string s file) +(defun dump-simple-base-string (s file) + #+sb-xc-host (declare (type simple-string s)) + #-sb-xc-host (declare (type simple-base-string s)) + (dump-fop* (length s) fop-small-base-string fop-base-string file) + (dump-base-chars-of-string s file) (values)) ;;; If we get here, it is assumed that the symbol isn't in the table, @@ -955,7 +995,8 @@ file) (dump-word pname-length file))) - (dump-characters-of-string pname file) + #+sb-xc-host (dump-base-chars-of-string pname file) + #-sb-xc-host (dump-characters-of-string pname file) (unless *cold-load-dump* (setf (gethash s (fasl-output-eq-table file)) diff -urNb sbcl/src/compiler/fndb.lisp sbcl.char/src/compiler/fndb.lisp --- sbcl/src/compiler/fndb.lisp 2004-10-29 14:57:41.000000000 +0200 +++ sbcl.char/src/compiler/fndb.lisp 2004-10-29 16:47:30.000000000 +0200 @@ -421,7 +421,7 @@ (movable foldable flushable)) (defknown name-char (string-designator) (or character null) (movable foldable flushable)) -(defknown code-char (char-code) base-char +(defknown code-char (char-code) character ;; By suppressing constant folding on CODE-CHAR when the ;; cross-compiler is running in the cross-compilation host vanilla ;; ANSI Common Lisp, we can use CODE-CHAR expressions to delay until diff -urNb sbcl/src/compiler/generic/early-objdef.lisp sbcl.char/src/compiler/generic/early-objdef.lisp --- sbcl/src/compiler/generic/early-objdef.lisp 2004-10-28 22:28:42.000000000 +0200 +++ sbcl.char/src/compiler/generic/early-objdef.lisp 2004-10-29 16:47:30.000000000 +0200 @@ -167,6 +167,7 @@ simple-array-unsigned-byte-16 ; 10011110 simple-array-nil ; 10100010 simple-base-string ; 10100110 + simple-character-string simple-bit-vector ; 10101010 simple-vector ; 10101110 #!+#.(cl:if (cl:= 32 sb!vm:n-word-bits) '(and) '(or)) @@ -195,16 +196,19 @@ simple-array ; 11011110 complex-vector-nil ; 11100010 complex-base-string ; 11100110 + complex-character-string complex-bit-vector ; 11101010 complex-vector ; 11101110 complex-array ; 11110010 #!+#.(cl:if (cl:= 32 sb!vm:n-word-bits) '(and) '(or)) unused12 ; 11110110 + #| #!+#.(cl:if (cl:= 32 sb!vm:n-word-bits) '(and) '(or)) unused13 ; 11111010 #!+#.(cl:if (cl:= 32 sb!vm:n-word-bits) '(and) '(or)) unused14 ; 11111110 + |# ) ;;; the different vector subtypes diff -urNb sbcl/src/compiler/generic/genesis.lisp sbcl.char/src/compiler/generic/genesis.lisp --- sbcl/src/compiler/generic/genesis.lisp 2004-10-28 22:28:42.000000000 +0200 +++ sbcl.char/src/compiler/generic/genesis.lisp 2004-10-29 16:47:30.000000000 +0200 @@ -608,9 +608,10 @@ ;;;; copying simple objects into the cold core -(defun string-to-core (string &optional (gspace *dynamic*)) +(defun base-string-to-core (string &optional (gspace *dynamic*)) #!+sb-doc - "Copy string into the cold core and return a descriptor to it." + "Copy STRING (which must only contain STANDARD-CHARs) into the cold +core and return a descriptor to it." ;; (Remember that the system convention for storage of strings leaves an ;; extra null byte at the end to aid in call-out to C.) (let* ((length (length string)) @@ -808,7 +809,7 @@ (make-fixnum-descriptor 0)) (write-wordindexed symbol sb!vm:symbol-plist-slot *nil-descriptor*) (write-wordindexed symbol sb!vm:symbol-name-slot - (string-to-core name *dynamic*)) + (base-string-to-core name *dynamic*)) (write-wordindexed symbol sb!vm:symbol-package-slot *nil-descriptor*) symbol)) @@ -1194,7 +1195,7 @@ ;; because that's the way CMU CL did it; I'm ;; not sure whether there's an underlying ;; reason. -- WHN 1990826 - (string-to-core "NIL" *dynamic*)) + (base-string-to-core "NIL" *dynamic*)) (write-wordindexed des (+ 1 sb!vm:symbol-package-slot) result) @@ -1279,7 +1280,7 @@ (let* ((cold-package (car cold-package-symbols-entry)) (symbols (cdr cold-package-symbols-entry)) (shadows (package-shadowing-symbols cold-package)) - (documentation (string-to-core (documentation cold-package t))) + (documentation (base-string-to-core (documentation cold-package t))) (internal *nil-descriptor*) (external *nil-descriptor*) (imported-internal *nil-descriptor*) @@ -1359,7 +1360,7 @@ (res *nil-descriptor*)) (dolist (u (package-use-list pkg)) (when (assoc u *cold-package-symbols*) - (cold-push (string-to-core (package-name u)) use))) + (cold-push (base-string-to-core (package-name u)) use))) (let* ((pkg-name (package-name pkg)) ;; Make the package nickname lists for the standard packages ;; be the minimum specified by ANSI, regardless of what value @@ -1380,7 +1381,7 @@ (t (package-nicknames pkg))))) (dolist (warm-nickname warm-nicknames) - (cold-push (string-to-core warm-nickname) cold-nicknames))) + (cold-push (base-string-to-core warm-nickname) cold-nicknames))) (cold-push (number-to-core (truncate (package-internal-symbol-count pkg) 0.8)) @@ -1397,7 +1398,7 @@ (cold-push use res) (cold-push (cold-intern :use) res) - (cold-push (string-to-core (package-name pkg)) res) + (cold-push (base-string-to-core (package-name pkg)) res) res)) ;;;; functions and fdefinition objects @@ -1850,7 +1851,7 @@ (defun foreign-symbols-to-core () (let ((result *nil-descriptor*)) (maphash (lambda (symbol value) - (cold-push (cold-cons (string-to-core symbol) + (cold-push (cold-cons (base-string-to-core symbol) (number-to-core value)) result)) *cold-foreign-symbol-table*) @@ -2098,12 +2099,16 @@ ;;;; cold fops for loading vectors -(clone-cold-fop (fop-string) - (fop-small-string) +(clone-cold-fop (fop-base-string) + (fop-small-base-string) (let* ((len (clone-arg)) (string (make-string len))) (read-string-as-bytes *fasl-input-stream* string) - (string-to-core string))) + (base-string-to-core string))) + +(clone-cold-fop (fop-character-string) + (fop-small-character-string) + (bug "CHARACTER-STRING dumped by cross-compiler.")) (clone-cold-fop (fop-vector) (fop-small-vector) diff -urNb sbcl/src/compiler/generic/interr.lisp sbcl.char/src/compiler/generic/interr.lisp --- sbcl/src/compiler/generic/interr.lisp 2004-10-28 22:28:42.000000000 +0200 +++ sbcl.char/src/compiler/generic/interr.lisp 2004-10-29 16:47:30.000000000 +0200 @@ -61,6 +61,10 @@ "Object is not of type STRING.") (object-not-base-string "Object is not of type BASE-STRING.") + (object-not-vector-nil + "Object is not of type (VECTOR NIL).") + (object-not-character-string + "Object is not of type (VECTOR CHARACTER).") (object-not-bit-vector "Object is not of type BIT-VECTOR.") (object-not-array diff -urNb sbcl/src/compiler/generic/late-type-vops.lisp sbcl.char/src/compiler/generic/late-type-vops.lisp --- sbcl/src/compiler/generic/late-type-vops.lisp 2004-10-28 22:28:42.000000000 +0200 +++ sbcl.char/src/compiler/generic/late-type-vops.lisp 2004-10-29 16:47:30.000000000 +0200 @@ -66,7 +66,8 @@ (!define-type-vops simple-string-p check-simple-string nil object-not-simple-string-error - (simple-base-string-widetag simple-array-nil-widetag)) + (simple-character-string-widetag + simple-base-string-widetag simple-array-nil-widetag)) (macrolet ((define-simple-array-type-vops () @@ -109,11 +110,14 @@ (funcallable-instance-header-widetag)) (!define-type-vops array-header-p nil nil nil - (simple-array-widetag complex-base-string-widetag complex-bit-vector-widetag + (simple-array-widetag + complex-character-string-widetag + complex-base-string-widetag complex-bit-vector-widetag complex-vector-widetag complex-array-widetag complex-vector-nil-widetag)) (!define-type-vops stringp check-string nil object-not-string-error - (simple-base-string-widetag complex-base-string-widetag + (simple-character-string-widetag complex-character-string-widetag + simple-base-string-widetag complex-base-string-widetag simple-array-nil-widetag complex-vector-nil-widetag)) (!define-type-vops base-string-p check-base-string nil object-not-base-string-error @@ -127,6 +131,10 @@ object-not-vector-nil-error (simple-array-nil-widetag complex-vector-nil-widetag)) +(!define-type-vops character-string-p check-character-string nil + object-not-character-string-error + (simple-character-string-widetag complex-character-string-widetag)) + (!define-type-vops vectorp check-vector nil object-not-vector-error (complex-vector-widetag . #.(append diff -urNb sbcl/src/compiler/generic/vm-array.lisp sbcl.char/src/compiler/generic/vm-array.lisp --- sbcl/src/compiler/generic/vm-array.lisp 2004-10-28 22:28:43.000000000 +0200 +++ sbcl.char/src/compiler/generic/vm-array.lisp 2004-10-29 16:47:30.000000000 +0200 @@ -64,13 +64,17 @@ (nil #:mu 0 simple-array-nil :complex-typecode #.sb!vm:complex-vector-nil-widetag :importance 0) - (character ,(code-char 0) 8 simple-base-string + (base-char ,(code-char 0) 8 simple-base-string ;; (SIMPLE-BASE-STRINGs are stored with an extra ;; trailing #\NULL for convenience in calling out ;; to C.) :n-pad-elements 1 :complex-typecode #.sb!vm:complex-base-string-widetag :importance 17) + (character ,(code-char 0) 32 simple-character-string + :n-pad-elements 1 + :complex-typecode #.sb!vm:complex-character-string-widetag + :importance 17) (single-float 0.0f0 32 simple-array-single-float :importance 6) (double-float 0.0d0 64 simple-array-double-float diff -urNb sbcl/src/compiler/generic/vm-fndb.lisp sbcl.char/src/compiler/generic/vm-fndb.lisp --- sbcl/src/compiler/generic/vm-fndb.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/compiler/generic/vm-fndb.lisp 2004-10-29 16:47:30.000000000 +0200 @@ -22,6 +22,7 @@ complex-vector-p base-char-p %standard-char-p %instancep base-string-p simple-base-string-p + character-string-p simple-character-string-p array-header-p simple-array-p simple-array-nil-p vector-nil-p simple-array-unsigned-byte-2-p diff -urNb sbcl/src/compiler/generic/vm-tran.lisp sbcl.char/src/compiler/generic/vm-tran.lisp --- sbcl/src/compiler/generic/vm-tran.lisp 2004-10-28 22:28:43.000000000 +0200 +++ sbcl.char/src/compiler/generic/vm-tran.lisp 2004-10-29 16:47:30.000000000 +0200 @@ -49,6 +49,7 @@ (give-up-ir1-transform) `(etypecase string ((simple-array character (*)) (data-vector-ref string index)) + ((simple-array base-char (*)) (data-vector-ref string index)) ((simple-array nil (*)) (data-vector-ref string index)))))) (deftransform hairy-data-vector-ref ((array index) (array t) *) @@ -99,6 +100,8 @@ `(etypecase string ((simple-array character (*)) (data-vector-set string index new-value)) + ((simple-array base-char (*)) + (data-vector-set string index new-value)) ((simple-array nil (*)) (data-vector-set string index new-value)))))) diff -urNb sbcl/src/compiler/generic/vm-typetran.lisp sbcl.char/src/compiler/generic/vm-typetran.lisp --- sbcl/src/compiler/generic/vm-typetran.lisp 2004-10-28 22:28:43.000000000 +0200 +++ sbcl.char/src/compiler/generic/vm-typetran.lisp 2004-10-29 16:47:30.000000000 +0200 @@ -19,6 +19,7 @@ ;;; They shouldn't be used explicitly. (define-type-predicate base-string-p base-string) (define-type-predicate bignump bignum) +(define-type-predicate character-string-p (vector character)) (define-type-predicate complex-double-float-p (complex double-float)) (define-type-predicate complex-single-float-p (complex single-float)) #!+long-float @@ -92,6 +93,7 @@ (define-type-predicate simple-array-complex-long-float-p (simple-array (complex long-float) (*))) (define-type-predicate simple-base-string-p simple-base-string) +(define-type-predicate simple-character-string-p (simple-array character (*))) (define-type-predicate system-area-pointer-p system-area-pointer) (define-type-predicate unsigned-byte-32-p (unsigned-byte 32)) (define-type-predicate signed-byte-32-p (signed-byte 32)) diff -urNb sbcl/src/compiler/ppc/array.lisp sbcl.char/src/compiler/ppc/array.lisp --- sbcl/src/compiler/ppc/array.lisp 2004-10-28 22:28:47.000000000 +0200 +++ sbcl.char/src/compiler/ppc/array.lisp 2004-10-29 16:47:30.000000000 +0200 @@ -111,9 +111,10 @@ (:result-types ,element-type))))) (def-data-vector-frobs simple-base-string byte-index character character-reg) + (def-data-vector-frobs simple-character-string word-index + character character-reg) (def-data-vector-frobs simple-vector word-index * descriptor-reg any-reg) - (def-data-vector-frobs simple-array-unsigned-byte-7 byte-index positive-fixnum unsigned-reg) (def-data-vector-frobs simple-array-unsigned-byte-8 byte-index @@ -134,7 +135,6 @@ (def-data-vector-frobs simple-array-signed-byte-32 word-index signed-num signed-reg)) - ;;; Integer vectors whos elements are smaller than a byte. I.e. bit, 2-bit, ;;; and 4-bit vectors. ;;; diff -urNb sbcl/src/compiler/ppc/c-call.lisp sbcl.char/src/compiler/ppc/c-call.lisp --- sbcl/src/compiler/ppc/c-call.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/compiler/ppc/c-call.lisp 2004-10-29 16:47:30.000000000 +0200 @@ -303,7 +303,7 @@ (:translate foreign-symbol-address) (:policy :fast-safe) (:args) - (:arg-types (:constant simple-base-string)) + (:arg-types (:constant simple-string)) (:info foreign-symbol) (:results (res :scs (sap-reg))) (:result-types system-area-pointer) diff -urNb sbcl/src/compiler/ppc/char.lisp sbcl.char/src/compiler/ppc/char.lisp --- sbcl/src/compiler/ppc/char.lisp 2004-10-28 22:28:48.000000000 +0200 +++ sbcl.char/src/compiler/ppc/char.lisp 2004-10-29 16:47:30.000000000 +0200 @@ -46,6 +46,7 @@ (:affected) (:generator 0 (move y x))) + (define-move-vop character-move :move (character-reg) (character-reg)) @@ -63,9 +64,11 @@ (move y x)) (character-stack (storew x fp (tn-offset y)))))) + (define-move-vop move-character-arg :move-arg (any-reg character-reg) (character-reg)) + ;;; Use standard MOVE-ARG + coercion to move an untagged character ;;; to a descriptor passing location. (define-move-vop move-arg :move-arg diff -urNb sbcl/src/compiler/ppc/vm.lisp sbcl.char/src/compiler/ppc/vm.lisp --- sbcl/src/compiler/ppc/vm.lisp 2004-10-28 22:28:49.000000000 +0200 +++ sbcl.char/src/compiler/ppc/vm.lisp 2004-10-29 16:47:30.000000000 +0200 @@ -332,11 +332,19 @@ (immediate-constant "Immed")))) ;;; The loader uses this to convert alien names to the form they -;;; occur in the symbol table. This is ELF, so do nothing. +;;; occur in the symbol table. (defun extern-alien-name (name) - (declare (type simple-base-string name)) - ;; Darwin is non-ELF, and needs a _ prefix - #!+darwin (concatenate 'string "_" name) - ;; The other (ELF) ports currently don't need any prefix - #!-darwin name) + (declare (type string name)) + ;; Darwin is non-ELF, and needs a _ prefix. The other (ELF) ports + ;; currently don't need any prefix. + (flet ((maybe-prefix (name) + #!+darwin (concatenate 'simple-base-string "_" name) + #!-darwin name)) + (typecase name + (simple-base-string (maybe-prefix name)) + (base-string (coerce (maybe-prefix name) 'simple-base-string)) + (t + (handler-case (coerce (maybe-prefix name) 'simple-base-string) + (type-error () + (error "invalid external alien name: ~S" name))))))) diff -urNb sbcl/src/compiler/seqtran.lisp sbcl.char/src/compiler/seqtran.lisp --- sbcl/src/compiler/seqtran.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/compiler/seqtran.lisp 2004-10-29 16:47:30.000000000 +0200 @@ -712,16 +712,32 @@ sb!vm:n-byte-bits))) string1)) +;;; FIXME: OAOO (should be shared with above, and probably +;;; automatically generated from SAETP). +(deftransform replace ((string1 string2 &key (start1 0) (start2 0) + end1 end2) + ((simple-array character (*)) + (simple-array character (*)) + &rest t) + * + ;; FIXME: consider replacing this policy test + ;; with some tests for the STARTx and ENDx + ;; indices being valid, conditional on high + ;; SAFETY code. + ;; + ;; FIXME: It turns out that this transform is + ;; critical for the performance of string + ;; streams. Make this more explicit. + :policy (< (max safety space) 3)) + `(sb!impl::simple-character-string-replace-from-simple-character-string* + string1 string2 start1 end1 start2 end2)) + ;;; FIXME: this would be a valid transform for certain excluded cases: ;;; * :TEST 'CHAR= or :TEST #'CHAR= ;;; * :TEST 'EQL or :TEST #'EQL ;;; * :FROM-END NIL (or :FROM-END non-NIL, with a little ingenuity) -;;; -;;; also, it should be noted that there's nothing much in this -;;; transform (as opposed to the ones for REPLACE and CONCATENATE) -;;; that particularly limits it to SIMPLE-BASE-STRINGs. (deftransform search ((pattern text &key (start1 0) (start2 0) end1 end2) - (simple-base-string simple-base-string &rest t) + (simple-string simple-string &rest t) * :policy (> speed (max space safety))) `(block search @@ -747,7 +763,7 @@ (deftransform concatenate ((rtype &rest sequences) (t &rest (or simple-base-string (simple-array nil (*)))) - simple-base-string + (simple-array base-string (*)) :policy (< safety 3)) (loop for rest-seqs on sequences for n-seq = (gensym "N-SEQ") @@ -771,7 +787,8 @@ (declare (ignore rtype)) (let* (,@lets (res (make-string (truncate (the index (+ ,@all-lengths)) - sb!vm:n-byte-bits)))) + sb!vm:n-byte-bits) + :element-type 'base-char))) (declare (type index ,@all-lengths)) (let (,@(mapcar (lambda (name) `(,name 0)) starts)) (declare (type index ,@starts)) diff -urNb sbcl/src/compiler/srctran.lisp sbcl.char/src/compiler/srctran.lisp --- sbcl/src/compiler/srctran.lisp 2004-10-29 16:46:52.000000000 +0200 +++ sbcl.char/src/compiler/srctran.lisp 2004-10-29 16:47:30.000000000 +0200 @@ -2939,9 +2939,7 @@ ;;;; character operations -(deftransform char-equal ((a b) - ((character-set ((0 . 255))) - (character-set ((0 . 255))))) +(deftransform char-equal ((a b) (base-char base-char)) "open code" '(let* ((ac (char-code a)) (bc (char-code b)) @@ -2949,31 +2947,21 @@ (or (zerop sum) (when (eql sum #x20) (let ((sum (+ ac bc))) - (or (and (> sum 161) (< sum 213)) - (and (> sum 415) (< sum 461)) - (and (> sum 463) (< sum 477)))))))) + (and (> sum 161) (< sum 213))))))) -(deftransform char-upcase ((x) ((character-set ((0 . 255))))) +(deftransform char-upcase ((x) (base-char)) "open code" '(let ((n-code (char-code x))) - (if (or (and (> n-code #o140) ; Octal 141 is #\a. + (if (and (> n-code #o140) ; Octal 141 is #\a. (< n-code #o173)) ; Octal 172 is #\z. - (and (> n-code #o337) - (< n-code #o367)) - (and (> n-code #o367) - (< n-code #o377))) (code-char (logxor #x20 n-code)) x))) -(deftransform char-downcase ((x) ((character-set ((0 . 255))))) +(deftransform char-downcase ((x) (base-char)) "open code" '(let ((n-code (char-code x))) - (if (or (and (> n-code 64) ; 65 is #\A. + (if (and (> n-code 64) ; 65 is #\A. (< n-code 91)) ; 90 is #\Z. - (and (> n-code 191) - (< n-code 215)) - (and (> n-code 215) - (< n-code 223))) (code-char (logxor #x20 n-code)) x))) @@ -2996,21 +2984,18 @@ ;;; then the result is definitely false. (deftransform simple-equality-transform ((x y) * * :defun-only t) - (cond ((same-leaf-ref-p x y) - t) - ((not (types-equal-or-intersect (lvar-type x) - (lvar-type y))) + (cond + ((same-leaf-ref-p x y) t) + ((not (types-equal-or-intersect (lvar-type x) (lvar-type y))) nil) - (t - (give-up-ir1-transform)))) + (t (give-up-ir1-transform)))) (macrolet ((def (x) `(%deftransform ',x '(function * *) #'simple-equality-transform))) (def eq) - (def char=) - (def equal)) + (def char=)) -;;; This is similar to SIMPLE-EQUALITY-PREDICATE, except that we also +;;; This is similar to SIMPLE-EQUALITY-TRANSFORM, except that we also ;;; try to convert to a type-specific predicate or EQ: ;;; -- If both args are characters, convert to CHAR=. This is better than ;;; just converting to EQ, since CHAR= may have special compilation @@ -3029,8 +3014,8 @@ (y-type (lvar-type y)) (char-type (specifier-type 'character)) (number-type (specifier-type 'number))) - (cond ((same-leaf-ref-p x y) - t) + (cond + ((same-leaf-ref-p x y) t) ((not (types-equal-or-intersect x-type y-type)) nil) ((and (csubtypep x-type char-type) @@ -3047,6 +3032,25 @@ (t (give-up-ir1-transform))))) +;;; similarly to the EQL transform above, we attempt to constant-fold +;;; or convert to a simpler predicate: mostly we have to be careful +;;; with strings. +(deftransform equal ((x y) * *) + "convert to simpler equality predicate" + (let ((x-type (lvar-type x)) + (y-type (lvar-type y)) + (string-type (specifier-type 'string))) + (cond + ((same-leaf-ref-p x y) t) + ((and (csubtypep x-type string-type) + (csubtypep y-type string-type)) + '(string= x y)) + ((and (or (not (types-equal-or-intersect x-type string-type)) + (not (types-equal-or-intersect y-type string-type))) + (not (types-equal-or-intersect x-type y-type))) + nil) + (t (give-up-ir1-transform))))) + ;;; Convert to EQL if both args are rational and complexp is specified ;;; and the same for both. (deftransform = ((x y) * *) diff -urNb sbcl/src/compiler/target-dump.lisp sbcl.char/src/compiler/target-dump.lisp --- sbcl/src/compiler/target-dump.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/compiler/target-dump.lisp 2004-10-29 16:47:30.000000000 +0200 @@ -13,6 +13,21 @@ (in-package "SB!FASL") +;;; a helper function shared by DUMP-SIMPLE-CHARACTER-STRING and +;;; DUMP-SYMBOL (in the target compiler: the cross-compiler uses the +;;; portability knowledge and always dumps BASE-STRINGS). +(defun dump-characters-of-string (s fasl-output) + (declare (type string s) (type fasl-output fasl-output)) + (dovector (c s) + (dump-word (char-code c) fasl-output)) + (values)) + +(defun dump-simple-character-string (s file) + (declare (type (simple-array character (*)) s)) + (dump-fop* (length s) fop-small-character-string fop-character-string file) + (dump-characters-of-string s file) + (values)) + ;;; Dump the first N bytes of VEC out to FILE. VEC is some sort of unboxed ;;; vector-like thing that we can BLT from. (defun dump-raw-bytes (vec n fasl-output) diff -urNb sbcl/src/compiler/typetran.lisp sbcl.char/src/compiler/typetran.lisp --- sbcl/src/compiler/typetran.lisp 2004-10-28 22:28:40.000000000 +0200 +++ sbcl.char/src/compiler/typetran.lisp 2004-10-29 16:47:30.000000000 +0200 @@ -115,7 +115,6 @@ ;;; just expand all its operations into toplevel code? (defun !define-standard-type-predicates () (define-type-predicate arrayp array) - ; (The ATOM predicate is handled separately as (NOT CONS).) (define-type-predicate bit-vector-p bit-vector) (define-type-predicate characterp character) (define-type-predicate compiled-function-p compiled-function) @@ -142,12 +141,10 @@ (define-type-predicate vectorp vector)) (!define-standard-type-predicates) -;;;; transforms for type predicates not implemented primitively -;;;; -;;;; See also VM dependent transforms. - (define-source-transform atom (x) `(not (consp ,x))) +(define-source-transform base-char-p (x) + `(typep ,x 'base-char)) ;;;; TYPEP source transform diff -urNb sbcl/src/compiler/x86/array.lisp sbcl.char/src/compiler/x86/array.lisp --- sbcl/src/compiler/x86/array.lisp 2004-10-28 22:28:50.000000000 +0200 +++ sbcl.char/src/compiler/x86/array.lisp 2004-10-29 16:47:30.000000000 +0200 @@ -1248,7 +1248,7 @@ (:results (value :scs (character-reg))) (:result-types character) (:generator 5 - (inst mov value + (inst movzx value (make-ea :byte :base object :index index :scale 1 :disp (- (* vector-data-offset n-word-bytes) other-pointer-lowtag))))) @@ -1262,7 +1262,7 @@ (:results (value :scs (character-reg))) (:result-types character) (:generator 4 - (inst mov value + (inst movzx value (make-ea :byte :base object :disp (- (+ (* vector-data-offset n-word-bytes) index) other-pointer-lowtag))))) @@ -1272,32 +1272,48 @@ (:policy :fast-safe) (:args (object :scs (descriptor-reg) :to (:eval 0)) (index :scs (unsigned-reg) :to (:eval 0)) - (value :scs (character-reg) :target result)) + (value :scs (character-reg) :target eax)) (:arg-types simple-base-string positive-fixnum character) + (:temporary (:sc character-reg :offset eax-offset :target result + :from (:argument 2) :to (:result 0)) + eax) (:results (result :scs (character-reg))) (:result-types character) (:generator 5 + (move eax value) (inst mov (make-ea :byte :base object :index index :scale 1 :disp (- (* vector-data-offset n-word-bytes) other-pointer-lowtag)) - value) - (move result value))) + al-tn) + (move result eax))) -(define-vop (data-vector-set/simple-base-string-c) +(define-vop (data-vector-set-c/simple-base-string) (:translate data-vector-set) (:policy :fast-safe) (:args (object :scs (descriptor-reg) :to (:eval 0)) (value :scs (character-reg))) (:info index) (:arg-types simple-base-string (:constant (signed-byte 30)) character) + (:temporary (:sc unsigned-reg :offset eax-offset :target result + :from (:argument 1) :to (:result 0)) + eax) (:results (result :scs (character-reg))) (:result-types character) (:generator 4 + (move eax value) (inst mov (make-ea :byte :base object :disp (- (+ (* vector-data-offset n-word-bytes) index) other-pointer-lowtag)) - value) - (move result value))) + al-tn) + (move result eax))) + +(define-full-reffer data-vector-ref/simple-character-string + simple-character-string vector-data-offset other-pointer-lowtag + (character-reg) character data-vector-ref) + +(define-full-setter data-vector-ref/simple-character-string + simple-character-string vector-data-offset other-pointer-lowtag + (character-reg) character data-vector-set) ;;; signed-byte-8 diff -urNb sbcl/src/compiler/x86/c-call.lisp sbcl.char/src/compiler/x86/c-call.lisp --- sbcl/src/compiler/x86/c-call.lisp 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/compiler/x86/c-call.lisp 2004-10-29 16:47:30.000000000 +0200 @@ -185,7 +185,7 @@ (:translate foreign-symbol-address) (:policy :fast-safe) (:args) - (:arg-types (:constant simple-base-string)) + (:arg-types (:constant simple-string)) (:info foreign-symbol) (:results (res :scs (sap-reg))) (:result-types system-area-pointer) diff -urNb sbcl/src/compiler/x86/char.lisp sbcl.char/src/compiler/x86/char.lisp --- sbcl/src/compiler/x86/char.lisp 2004-10-28 22:28:50.000000000 +0200 +++ sbcl.char/src/compiler/x86/char.lisp 2004-10-29 16:47:30.000000000 +0200 @@ -15,36 +15,32 @@ ;;; Move a tagged char to an untagged representation. (define-vop (move-to-character) - (:args (x :scs (any-reg control-stack) :target al)) - (:temporary (:sc byte-reg :offset al-offset - :from (:argument 0) :to (:eval 0)) al) - (:ignore al) - (:temporary (:sc byte-reg :offset ah-offset :target y - :from (:argument 0) :to (:result 0)) ah) - (:results (y :scs (character-reg character-stack))) + (:args (x :scs (any-reg descriptor-reg) :target y + :load-if (not (location= x y)))) + (:results (y :scs (character-reg) + :load-if (not (location= x y)))) (:note "character untagging") (:generator 1 - (move eax-tn x) - (move y ah))) + (move y x) + (inst shr y n-widetag-bits))) (define-move-vop move-to-character :move - (any-reg control-stack) (character-reg character-stack)) + (any-reg) (character-reg)) ;;; Move an untagged char to a tagged representation. (define-vop (move-from-character) - (:args (x :scs (character-reg character-stack) :target ah)) - (:temporary (:sc byte-reg :offset al-offset :target y - :from (:argument 0) :to (:result 0)) al) - (:temporary (:sc byte-reg :offset ah-offset - :from (:argument 0) :to (:result 0)) ah) - (:results (y :scs (any-reg descriptor-reg control-stack))) + (:args (x :scs (character-reg))) + (:results (y :scs (any-reg descriptor-reg))) (:note "character tagging") (:generator 1 - (move ah x) ; Maybe move char byte. - (inst mov al character-widetag) ; x86 to type bits - (inst and eax-tn #xffff) ; Remove any junk bits. - (move y eax-tn))) + ;; FIXME: is this inefficient? Is there a better way of writing + ;; it? (fixnum tagging is done with LEA). We can't use SHL + ;; because we either scribble over the source register or briefly + ;; have a non-descriptor in a descriptor register, unless we + ;; introduce a temporary. + (inst imul y x (ash 1 n-widetag-bits)) + (inst or y character-widetag))) (define-move-vop move-from-character :move - (character-reg character-stack) (any-reg descriptor-reg control-stack)) + (character-reg) (any-reg descriptor-reg)) ;;; Move untagged character values. (define-vop (character-move) @@ -74,9 +70,10 @@ (character-reg (move y x)) (character-stack - (inst mov - (make-ea :byte :base fp :disp (- (* (1+ (tn-offset y)) 4))) - x))))) + ;; copied blindly. No idea if it's right + (if (= (tn-offset fp) esp-offset) + (storew x fp (tn-offset y)) ; c-call + (storew x fp (- (1+ (tn-offset y))))))))) (define-move-vop move-character-arg :move-arg (any-reg character-reg) (character-reg)) @@ -95,21 +92,17 @@ (:results (res :scs (unsigned-reg))) (:result-types positive-fixnum) (:generator 1 - (inst movzx res ch))) + (inst mov res ch))) (define-vop (code-char) (:translate code-char) (:policy :fast-safe) - (:args (code :scs (unsigned-reg unsigned-stack) :target eax)) + (:args (code :scs (unsigned-reg unsigned-stack))) (:arg-types positive-fixnum) - (:temporary (:sc unsigned-reg :offset eax-offset :target res - :from (:argument 0) :to (:result 0)) - eax) (:results (res :scs (character-reg))) (:result-types character) (:generator 1 - (move eax code) - (move res al-tn))) + (inst mov res code))) ;;; comparison of CHARACTERs (define-vop (character-compare) diff -urNb sbcl/src/compiler/x86/vm.lisp sbcl.char/src/compiler/x86/vm.lisp --- sbcl/src/compiler/x86/vm.lisp 2004-10-28 22:28:51.000000000 +0200 +++ sbcl.char/src/compiler/x86/vm.lisp 2004-10-29 16:47:30.000000000 +0200 @@ -229,8 +229,8 @@ ;; non-descriptor characters (character-reg registers - :locations #.*byte-regs* - :reserve-locations (#.ah-offset #.al-offset) + :locations #.*dword-regs* +; :reserve-locations (#.ah-offset #.al-offset) :constant-scs (immediate) :save-p t :alternate-scs (character-stack)) @@ -322,11 +322,12 @@ (catch-block stack :element-size kludge-nondeterministic-catch-block-size)) (eval-when (:compile-toplevel :load-toplevel :execute) -(defparameter *byte-sc-names* '(character-reg byte-reg character-stack)) +(defparameter *byte-sc-names* '(byte-reg)) (defparameter *word-sc-names* '(word-reg)) (defparameter *dword-sc-names* '(any-reg descriptor-reg sap-reg signed-reg unsigned-reg control-stack - signed-stack unsigned-stack sap-stack single-stack constant)) + signed-stack unsigned-stack sap-stack single-stack + character-reg character-stack constant)) ;;; added by jrd. I guess the right thing to do is to treat floats ;;; as a separate size... ;;; @@ -450,6 +451,10 @@ ;;; The loader uses this to convert alien names to the form they need in ;;; the symbol table (for example, prepending an underscore). (defun extern-alien-name (name) - (declare (type simple-base-string name)) + (declare (type string name)) ;; ELF ports currently don't need any prefix - name) + (typecase name + (simple-base-string name) + (base-string (coerce name 'simple-base-string)) + (t (handler-case (coerce name 'simple-base-string) + (type-error () (error "invalid external alien name: ~S" name)))))) diff -urNb sbcl/src/runtime/backtrace.c sbcl.char/src/runtime/backtrace.c --- sbcl/src/runtime/backtrace.c 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/runtime/backtrace.c 2004-10-29 16:47:30.000000000 +0200 @@ -244,6 +244,7 @@ string = (struct vector *) object; printf("%s, ", (char *) string->data); } else + /* FIXME: broken from (VECTOR NIL) */ printf("(Not simple string??\?), "); } else printf("(Not other pointer??\?), "); diff -urNb sbcl/src/runtime/gc-common.c sbcl.char/src/runtime/gc-common.c --- sbcl/src/runtime/gc-common.c 2004-10-28 22:28:56.000000000 +0200 +++ sbcl.char/src/runtime/gc-common.c 2004-10-29 16:47:30.000000000 +0200 @@ -779,6 +779,56 @@ } static int +size_character_string(lispobj *where) +{ + struct vector *vector; + int length, nwords; + + /* NOTE: A string contains one more byte of data (a terminating + * '\0' to help when interfacing with C functions) than indicated + * by the length slot. */ + + vector = (struct vector *) where; + length = fixnum_value(vector->length) + 1; + nwords = CEILING(NWORDS(length, 32) + 2, 2); + + return nwords; +} + +scav_character_string(lispobj *where, lispobj object) +{ + struct vector *vector; + int length, nwords; + + /* NOTE: Strings contain one more byte of data than the length */ + /* slot indicates. */ + + vector = (struct vector *) where; + length = fixnum_value(vector->length) + 1; + nwords = CEILING(NWORDS(length, 32) + 2, 2); + + return nwords; +} +static lispobj +trans_character_string(lispobj object) +{ + struct vector *vector; + int length, nwords; + + gc_assert(is_lisp_pointer(object)); + + /* NOTE: A string contains one more byte of data (a terminating + * '\0' to help when interfacing with C functions) than indicated + * by the length slot. */ + + vector = (struct vector *) native_pointer(object); + length = fixnum_value(vector->length) + 1; + nwords = CEILING(NWORDS(length, 32) + 2, 2); + + return copy_large_unboxed_object(object, nwords); +} + +static int size_base_string(lispobj *where) { struct vector *vector; @@ -1550,6 +1600,7 @@ #endif scavtab[SIMPLE_ARRAY_WIDETAG] = scav_boxed; scavtab[SIMPLE_BASE_STRING_WIDETAG] = scav_base_string; + scavtab[SIMPLE_CHARACTER_STRING_WIDETAG] = scav_character_string; scavtab[SIMPLE_BIT_VECTOR_WIDETAG] = scav_vector_bit; scavtab[SIMPLE_ARRAY_NIL_WIDETAG] = scav_vector_nil; scavtab[SIMPLE_ARRAY_UNSIGNED_BYTE_2_WIDETAG] = @@ -1625,6 +1676,7 @@ scav_vector_complex_long_float; #endif scavtab[COMPLEX_BASE_STRING_WIDETAG] = scav_boxed; + scavtab[COMPLEX_CHARACTER_STRING_WIDETAG] = scav_boxed; scavtab[COMPLEX_VECTOR_NIL_WIDETAG] = scav_boxed; scavtab[COMPLEX_BIT_VECTOR_WIDETAG] = scav_boxed; scavtab[COMPLEX_VECTOR_WIDETAG] = scav_boxed; @@ -1675,6 +1727,7 @@ #endif transother[SIMPLE_ARRAY_WIDETAG] = trans_boxed; /* but not GENCGC */ transother[SIMPLE_BASE_STRING_WIDETAG] = trans_base_string; + transother[SIMPLE_CHARACTER_STRING_WIDETAG] = trans_character_string; transother[SIMPLE_BIT_VECTOR_WIDETAG] = trans_vector_bit; transother[SIMPLE_VECTOR_WIDETAG] = trans_vector; transother[SIMPLE_ARRAY_NIL_WIDETAG] = trans_vector_nil; @@ -1755,6 +1808,7 @@ trans_vector_complex_long_float; #endif transother[COMPLEX_BASE_STRING_WIDETAG] = trans_boxed; + transother[COMPLEX_CHARACTER_STRING_WIDETAG] = trans_boxed; transother[COMPLEX_BIT_VECTOR_WIDETAG] = trans_boxed; transother[COMPLEX_VECTOR_NIL_WIDETAG] = trans_boxed; transother[COMPLEX_VECTOR_WIDETAG] = trans_boxed; @@ -1805,6 +1859,7 @@ #endif sizetab[SIMPLE_ARRAY_WIDETAG] = size_boxed; sizetab[SIMPLE_BASE_STRING_WIDETAG] = size_base_string; + sizetab[SIMPLE_CHARACTER_STRING_WIDETAG] = size_character_string; sizetab[SIMPLE_BIT_VECTOR_WIDETAG] = size_vector_bit; sizetab[SIMPLE_VECTOR_WIDETAG] = size_vector; sizetab[SIMPLE_ARRAY_NIL_WIDETAG] = size_vector_nil; @@ -1881,6 +1936,7 @@ size_vector_complex_long_float; #endif sizetab[COMPLEX_BASE_STRING_WIDETAG] = size_boxed; + sizetab[COMPLEX_CHARACTER_STRING_WIDETAG] = size_boxed; sizetab[COMPLEX_VECTOR_NIL_WIDETAG] = size_boxed; sizetab[COMPLEX_BIT_VECTOR_WIDETAG] = size_boxed; sizetab[COMPLEX_VECTOR_WIDETAG] = size_boxed; diff -urNb sbcl/src/runtime/gencgc.c sbcl.char/src/runtime/gencgc.c --- sbcl/src/runtime/gencgc.c 2004-10-28 22:28:57.000000000 +0200 +++ sbcl.char/src/runtime/gencgc.c 2004-10-29 16:47:30.000000000 +0200 @@ -2145,6 +2145,7 @@ #endif case SIMPLE_ARRAY_WIDETAG: case COMPLEX_BASE_STRING_WIDETAG: + case COMPLEX_CHARACTER_STRING_WIDETAG: case COMPLEX_VECTOR_NIL_WIDETAG: case COMPLEX_BIT_VECTOR_WIDETAG: case COMPLEX_VECTOR_WIDETAG: @@ -2160,6 +2161,7 @@ case LONG_FLOAT_WIDETAG: #endif case SIMPLE_BASE_STRING_WIDETAG: + case SIMPLE_CHARACTER_STRING_WIDETAG: case SIMPLE_BIT_VECTOR_WIDETAG: case SIMPLE_ARRAY_NIL_WIDETAG: case SIMPLE_ARRAY_UNSIGNED_BYTE_2_WIDETAG: @@ -2248,6 +2250,7 @@ break; case BIGNUM_WIDETAG: case SIMPLE_BASE_STRING_WIDETAG: + case SIMPLE_CHARACTER_STRING_WIDETAG: case SIMPLE_BIT_VECTOR_WIDETAG: case SIMPLE_ARRAY_NIL_WIDETAG: case SIMPLE_ARRAY_UNSIGNED_BYTE_2_WIDETAG: @@ -3111,6 +3114,7 @@ case COMPLEX_WIDETAG: case SIMPLE_ARRAY_WIDETAG: case COMPLEX_BASE_STRING_WIDETAG: + case COMPLEX_CHARACTER_STRING_WIDETAG: case COMPLEX_VECTOR_NIL_WIDETAG: case COMPLEX_BIT_VECTOR_WIDETAG: case COMPLEX_VECTOR_WIDETAG: @@ -3197,6 +3201,7 @@ case COMPLEX_LONG_FLOAT_WIDETAG: #endif case SIMPLE_BASE_STRING_WIDETAG: + case SIMPLE_CHARACTER_STRING_WIDETAG: case SIMPLE_BIT_VECTOR_WIDETAG: case SIMPLE_ARRAY_NIL_WIDETAG: case SIMPLE_ARRAY_UNSIGNED_BYTE_2_WIDETAG: diff -urNb sbcl/src/runtime/print.c sbcl.char/src/runtime/print.c --- sbcl/src/runtime/print.c 2004-10-28 22:28:59.000000000 +0200 +++ sbcl.char/src/runtime/print.c 2004-10-29 16:47:30.000000000 +0200 @@ -536,6 +536,7 @@ #endif case SIMPLE_BASE_STRING_WIDETAG: + case SIMPLE_CHARACTER_STRING_WIDETAG: /* FIXME */ NEWLINE_OR_RETURN; cptr = (char *)(ptr+1); putchar('"'); @@ -599,6 +600,7 @@ case SIMPLE_ARRAY_COMPLEX_LONG_FLOAT_WIDETAG: #endif case COMPLEX_BASE_STRING_WIDETAG: + case COMPLEX_CHARACTER_STRING_WIDETAG: case COMPLEX_VECTOR_NIL_WIDETAG: case COMPLEX_BIT_VECTOR_WIDETAG: case COMPLEX_VECTOR_WIDETAG: diff -urNb sbcl/src/runtime/purify.c sbcl.char/src/runtime/purify.c --- sbcl/src/runtime/purify.c 2004-10-28 22:28:59.000000000 +0200 +++ sbcl.char/src/runtime/purify.c 2004-10-29 16:47:30.000000000 +0200 @@ -285,6 +285,7 @@ #endif case SIMPLE_ARRAY_WIDETAG: case COMPLEX_BASE_STRING_WIDETAG: + case COMPLEX_CHARACTER_STRING_WIDETAG: case COMPLEX_VECTOR_NIL_WIDETAG: case COMPLEX_BIT_VECTOR_WIDETAG: case COMPLEX_VECTOR_WIDETAG: @@ -301,6 +302,7 @@ #endif case SIMPLE_ARRAY_NIL_WIDETAG: case SIMPLE_BASE_STRING_WIDETAG: + case SIMPLE_CHARACTER_STRING_WIDETAG: case SIMPLE_BIT_VECTOR_WIDETAG: case SIMPLE_ARRAY_UNSIGNED_BYTE_2_WIDETAG: case SIMPLE_ARRAY_UNSIGNED_BYTE_4_WIDETAG: @@ -912,6 +914,7 @@ case COMPLEX_WIDETAG: case SIMPLE_ARRAY_WIDETAG: case COMPLEX_BASE_STRING_WIDETAG: + case COMPLEX_CHARACTER_STRING_WIDETAG: case COMPLEX_BIT_VECTOR_WIDETAG: case COMPLEX_VECTOR_NIL_WIDETAG: case COMPLEX_VECTOR_WIDETAG: @@ -931,6 +934,9 @@ case SIMPLE_BASE_STRING_WIDETAG: return ptrans_vector(thing, 8, 1, 0, constant); + case SIMPLE_CHARACTER_STRING_WIDETAG: + return ptrans_vector(thing, 32, 1, 0, constant); + case SIMPLE_BIT_VECTOR_WIDETAG: return ptrans_vector(thing, 1, 0, 0, constant); @@ -1152,6 +1158,11 @@ count = CEILING(NWORDS(fixnum_value(vector->length)+1,8)+2,2); break; + case SIMPLE_CHARACTER_STRING_WIDETAG: + vector = (struct vector *)addr; + count = CEILING(NWORDS(fixnum_value(vector->length)+1,32)+2,2); + break; + case SIMPLE_BIT_VECTOR_WIDETAG: vector = (struct vector *)addr; count = CEILING(NWORDS(fixnum_value(vector->length),1)+2,2); diff -urNb sbcl/src/runtime/search.c sbcl.char/src/runtime/search.c --- sbcl/src/runtime/search.c 2004-10-26 22:40:32.000000000 +0200 +++ sbcl.char/src/runtime/search.c 2004-10-29 16:47:30.000000000 +0200 @@ -47,6 +47,8 @@ if (lowtag_of(symbol->name) == OTHER_POINTER_LOWTAG) { symbol_name = (struct vector *)native_pointer(symbol->name); if (is_valid_lisp_addr((os_vm_address_t)symbol_name) && + /* FIXME: Broken with more than one type of string + (i.e. even broken given (VECTOR NIL) */ widetag_of(symbol_name->header) == SIMPLE_BASE_STRING_WIDETAG && strcmp((char *)symbol_name->data, name) == 0) return 1; diff -urNb sbcl/tests/compiler.test.sh sbcl.char/tests/compiler.test.sh --- sbcl/tests/compiler.test.sh 2004-10-28 22:29:18.000000000 +0200 +++ sbcl.char/tests/compiler.test.sh 2004-10-29 16:47:30.000000000 +0200 @@ -286,6 +286,12 @@ EOF expect_failed_compile $tmpfilename +# This should be clean +cat > $tmpfilename <