diff options
114 files changed, 5934 insertions, 1677 deletions
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 9108b5a..d42585a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -16,7 +16,7 @@ jobs: steps: - name: Git Clone - uses: actions/checkout@v2 + uses: actions/checkout@v4.1.6 with: submodules: true @@ -48,10 +48,8 @@ jobs: run: xvfb-run -a make check TESTSUITEFLAGS="--verbose" - name: Build Developer Documentation run: cd doc && make devdoc - # FIXME: Will try to perform an out-of-tree build which will not - # work without manual intervention due to Scintilla. -# - name: make distcheck -# run: xvfb-run -a make distcheck + - name: make distcheck + run: xvfb-run -a make distcheck - name: Build Source Tarball run: make dist @@ -61,7 +59,7 @@ jobs: steps: - name: Git Clone - uses: actions/checkout@v2 + uses: actions/checkout@v4.1.6 with: submodules: true @@ -79,16 +77,14 @@ jobs: ./configure --with-interface=ncurses --enable-debug --enable-html-manual - run: make - - run: make install + - run: sudo make install # NOTE: The test suite must be run in verbose mode because if it fails # we won't be able to analyze testsuite.log. - name: Run Test Suite run: make check TESTSUITEFLAGS="--verbose" - name: Build Developer Documentation run: cd doc && make devdoc - # FIXME: Will try to perform an out-of-tree build which will not - # work without manual intervention due to Scintilla. -# - run: make distcheck + - run: make distcheck - name: Build Source Tarball run: make dist @@ -105,7 +101,7 @@ jobs: steps: - name: Git Clone - uses: actions/checkout@v2 + uses: actions/checkout@v4.1.6 with: submodules: true @@ -116,7 +112,7 @@ jobs: - name: Install Build Dependencies run: > pacman -S --noconfirm --needed - base-devel autotools mingw-w64-i686-toolchain + base-devel mingw-w64-i686-autotools mingw-w64-i686-toolchain mingw-w64-i686-glib2 mingw-w64-i686-pdcurses groff mingw-w64-i686-doxygen @@ -135,8 +131,10 @@ jobs: run: make check TESTSUITEFLAGS="--verbose" - name: Build Developer Documentation run: cd doc && make devdoc - # FIXME: Will try to perform an out-of-tree build which will not - # work without manual intervention due to Scintilla. -# - run: make distcheck + - name: make distcheck + env: + DISTCHECK_CONFIGURE_FLAGS: --with-interface=pdcurses-gui + PDCURSES_CFLAGS: -I/mingw32/include/pdcurses/ + run: make distcheck - name: Build Source Tarball run: make dist diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 2f14dbf..0af6612 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -17,7 +17,7 @@ jobs: steps: - name: Git Clone - uses: actions/checkout@v2 + uses: actions/checkout@v4.1.6 with: submodules: true @@ -47,20 +47,34 @@ jobs: cp debian-temp/sciteco-gtk_*.deb sciteco-gtk_nightly_${{matrix.os}}_amd64.deb cp debian-temp/sciteco-common_*.deb sciteco-common_nightly_${{matrix.os}}_all.deb - - name: Archive Debian/Ubuntu Packages - uses: pyTooling/Actions/releaser@v0.4.6 + - name: Build AppImages + # Should always be on the oldest supported Ubuntu + if: matrix.os == 'ubuntu-20.04' + run: | + cd AppImage + wget -O pkg2appimage.AppImage \ + "https://github.com/AppImageCommunity/pkg2appimage/releases/download/continuous/pkg2appimage--x86_64.AppImage" + chmod +x pkg2appimage.AppImage + ./pkg2appimage.AppImage curses.yml + mv out/*.AppImage ../sciteco-curses_nightly_x86_64.AppImage + ./pkg2appimage.AppImage gtk.yml + mv out/*.AppImage ../sciteco-gtk_nightly_x86_64.AppImage + chmod a+x *.AppImage + + - name: Archive Debian/Ubuntu Packages and AppImages + uses: pyTooling/Actions/releaser@v1.0.5 with: token: ${{ secrets.GITHUB_TOKEN }} tag: nightly - files: ./*.deb + files: ./*.deb ./*.AppImage macos: - runs-on: macos-11 + runs-on: macos-12 steps: - name: Git Clone - uses: actions/checkout@v2 + uses: actions/checkout@v4.1.6 with: submodules: true @@ -71,8 +85,9 @@ jobs: # the system libncurses ABI breaks. # However, Homebrew installs ncurses as a keg and it will refer to a # non-standard $TERMINFO. This could be worked around. + # The macOS Groff version appears to be outdated. - name: Install Build Dependencies - run: brew install autoconf automake libtool glib dylibbundler + run: brew install autoconf automake libtool glib groff dylibbundler # Required by pyTooling/Actions/releaser - name: Set up Python uses: actions/setup-python@v4.3.0 @@ -97,7 +112,7 @@ jobs: # but the more we get rid off, the better. run: | autoreconf -i - ./configure --with-interface=ncurses --enable-static-executables + ./configure --with-interface=ncurses --enable-static-executables --enable-html-manual - name: make run: make -j 2 @@ -124,12 +139,36 @@ jobs: --root temp-install --install-location / \ sciteco-curses_nightly_macos_x86_64.pkg - name: Archive Mac OS Distribution (ncurses) - uses: pyTooling/Actions/releaser/composite@v0.4.6 + uses: pyTooling/Actions/releaser/composite@v1.0.5 with: token: ${{ secrets.GITHUB_TOKEN }} tag: nightly files: ./*.pkg + # The website is published on Mac OS only because we cannot tweak the + # ./configure flags on Ubuntu where Debian packages are built. + # FIXME: This could be done without a gh-pages branch, see + # https://github.com/actions/starter-workflows/blob/main/pages/static.yml + # This however should be in its own workflow and we'd have to rebuild + # SciTECO and everything. + # FIXME: Also build cheat-sheet.pdf automatically? + - run: make install + - name: Install lowdown (Markdown processor) + run: brew install lowdown + - name: Generate website + run: cd www && sciteco -m build.tes + - name: Publish Website + run: | + cd www + touch .nojekyll + git init + cp ../.git/config ./.git/config + git add .nojekyll *.html + git config --local user.email "Website@GitHubActions" + git config --local user.name "GitHub Actions" + git commit -a -m "update ${{ github.sha }}" + git push -u origin +HEAD:gh-pages + win32-curses: runs-on: windows-2019 @@ -143,7 +182,7 @@ jobs: steps: - name: Git Clone - uses: actions/checkout@v2 + uses: actions/checkout@v4.1.6 with: submodules: true @@ -154,7 +193,7 @@ jobs: - name: Install Build Dependencies run: > pacman -S --noconfirm --needed - base-devel autotools mingw-w64-i686-toolchain + base-devel mingw-w64-i686-autotools mingw-w64-i686-toolchain mingw-w64-i686-glib2 mingw-w64-i686-pdcurses groff @@ -175,9 +214,7 @@ jobs: LDFLAGS: -flto run: | autoreconf -i - mkdir -p build-wingui/contrib build-wincon/contrib - cp -r contrib/{scintilla,lexilla,scinterm} build-wingui/contrib - cp -r contrib/{scintilla,lexilla,scinterm} build-wincon/contrib + mkdir build-wingui build-wincon (cd build-wingui ../configure --with-interface=pdcurses-gui --enable-html-manual --program-prefix=g \ PDCURSES_LIBS="-lpdcurses_wingui -lgdi32 -lcomdlg32 -lwinmm") @@ -216,7 +253,7 @@ jobs: for f in *.exe; do ../contrib/mingw-bundledlls --copy $f; done zip -9 -r ../sciteco-pdcurses_nightly_win32.zip . - name: Archive Windows Distribution (PDCurses) - uses: pyTooling/Actions/releaser/composite@v0.4.6 + uses: pyTooling/Actions/releaser/composite@v1.0.5 with: token: ${{ secrets.GITHUB_TOKEN }} tag: nightly @@ -239,7 +276,7 @@ jobs: steps: - name: Git Clone - uses: actions/checkout@v2 + uses: actions/checkout@v4.1.6 with: submodules: true @@ -250,7 +287,7 @@ jobs: - name: Install Build Dependencies run: > pacman -S --noconfirm --needed - base-devel autotools mingw-w64-i686-toolchain + base-devel mingw-w64-i686-autotools mingw-w64-i686-toolchain mingw-w64-i686-glib2 mingw-w64-i686-gtk3 groff @@ -312,7 +349,7 @@ jobs: cp ../win32/loaders.cache lib/gdk-pixbuf-2.0/2.10.0/ zip -9 -r ../sciteco-gtk3_nightly_win32.zip . - name: Archive Windows Distribution (GTK+ 3) - uses: pyTooling/Actions/releaser/composite@v0.4.6 + uses: pyTooling/Actions/releaser/composite@v1.0.5 with: token: ${{ secrets.GITHUB_TOKEN }} tag: nightly diff --git a/AppImage/curses.yml b/AppImage/curses.yml new file mode 100755 index 0000000..2989a39 --- /dev/null +++ b/AppImage/curses.yml @@ -0,0 +1,31 @@ +app: sciteco-curses + +# We currently use paths hardcoded at build-time. +# Alternatively, it would be possible to customize the AppRun script or +# add a wrapper that sets $SCITECOPATH. +union: true + +ingredients: + packages: + - sciteco-curses + dist: focal + sources: + - deb http://archive.ubuntu.com/ubuntu/ focal main universe +# ppas: +# - robin-haberkorn/sciteco + script: + - wget -c "https://github.com/rhaberkorn/sciteco/releases/download/nightly/sciteco-common_nightly_ubuntu-20.04_all.deb" + - wget -c "https://github.com/rhaberkorn/sciteco/releases/download/nightly/sciteco-curses_nightly_ubuntu-20.04_amd64.deb" + post_script: + - dpkg -I sciteco-curses*.deb | grep "Version:" | cut -d':' -f2 | cut -d'+' -f1 | sed 's/^[ ]*//g' >VERSION + +script: + # This is currently not installed by sciteco-curses. + # FIXME: There should perhaps be a unique name in the desktop file, so it does not conflict with the Gtk version. + - wget -O sciteco-curses.desktop -c "https://raw.githubusercontent.com/rhaberkorn/sciteco/master/src/sciteco.desktop" + - sed -i -e 's@gsciteco@sciteco@g' sciteco-curses.desktop + - echo 'Terminal=true' >>sciteco-curses.desktop + - wget -O sciteco.png -c "https://raw.githubusercontent.com/rhaberkorn/sciteco/master/ico/sciteco-256.png" + # Thinning: These documentation files are pointless. + # SciTECO comes with its own online help system. + - rm -rf ./usr/share/doc ./usr/share/man diff --git a/AppImage/gtk.yml b/AppImage/gtk.yml new file mode 100755 index 0000000..d49e308 --- /dev/null +++ b/AppImage/gtk.yml @@ -0,0 +1,33 @@ +app: sciteco-gtk + +# We currently use paths hardcoded at build-time. +# Alternatively, it would be possible to customize the AppRun script or +# add a wrapper that sets $SCITECOPATH. +union: true + +ingredients: + packages: + - sciteco-gtk + exclude: + # pkg2appimage blacklists Gtk, Pango and other libs from the GNOME stack, + # so excluding glib as well should actually improve portability. + - libglib2.0-0 + dist: focal + sources: + - deb http://archive.ubuntu.com/ubuntu/ focal main universe +# ppas: +# - robin-haberkorn/sciteco + script: + - wget -c "https://github.com/rhaberkorn/sciteco/releases/download/nightly/sciteco-common_nightly_ubuntu-20.04_all.deb" + - wget -c "https://github.com/rhaberkorn/sciteco/releases/download/nightly/sciteco-gtk_nightly_ubuntu-20.04_amd64.deb" + post_script: + - dpkg -I sciteco-gtk*.deb | grep "Version:" | cut -d':' -f2 | cut -d'+' -f1 | sed 's/^[ ]*//g' >VERSION + +script: + # FIXME: There should perhaps be a unique name in the desktop file, so it does not conflict with the Curses version. + - mv ./usr/share/applications/sciteco.desktop ./sciteco-gtk.desktop + - cp ./usr/share/icons/hicolor/256x256/apps/sciteco.png ./sciteco.png + - rm -rf ./usr/share/icons + # Thinning: These documentation files are pointless. + # SciTECO comes with its own online help system. + - rm -rf ./usr/share/doc ./usr/share/man @@ -6,6 +6,143 @@ using a prebuilt binary) are included. Entries marked with "(!)" might break macro portability compared to the preceding release. +Version 2.1.0 +~~~~~~~~~~~~~ + +7413b9c Fixed memory leak when replacing command lines. + Even cursor movements via function keys (fnkeys.tes) were constantly leaking memory. +820f716 improved support for braces within loops: warn about unclosed braces and fixed breaking from within braces +7adcfbf, +9a25dfd, +(!)b36ff25 Pattern match characters support ^Q/^R now as well. + This makes it possible, albeit cumbersome, to escape pattern match characters. +5395a7d check the memory limit and allow interruptions when loading files +973e50d FreeBSD/jemalloc: fixed recovery after hitting memory limit +b3ae8e2 fixed memory limiting if the process' memory usage is larger than 2GB and overflow checking +f188d74 fixed rubbing out (some) string building constructs at the beginning of the command line argument +dcaeb77 Inhibit some immediate editing commands after ^Q/^R string building constructs. + This makes it easier to insert ^W (23) by typing ^Q^W. + ^Q^U was coincidentally already working previously. +c288762 Allow OSC-52 clipboards on all terminal emulators. + For Urxvt support, have a look at https://gist.github.com/rhaberkorn/d7406420b69841ebbcab97548e38b37d +49e1f09 syntax errors are reported with "echoed" characters, ie. as purely printable characters +(!)2b5b2a4 ^W^W and ^V^V can be typed completely with upcarets now and they case fold all expansions + of ^EQq, ^EUq and so on. For instance you can now type EUq^W^W^EQq$ to + upper case Q-Register q. +(!)fcf962e Ctrl+^ (30) is no longer translated to a single caret in string building (refs #20) +5b3906f, +8fbd0b3 "Special" Q-Registers now support EQq...$ (load) and E%q...$ (save) commands. + You can for instance directly load the clipboard from file contents. +b7b9840 check that local register is not edited at the end of macro calls +3db9368 improved HTML lexer (html.tes) +e307e56 netbsd-curses: fixed the default escape delay. + Recent versions of netbsd-curses are indeed fully supported now. +84cc45e PDCurses/WinGUI: significantly reduced flickering +a06bcea, +6f16667, +8744502 Curses: added support for cool Unicode icons (refs #5). + This requires "Nerd Fonts" and 0,512ED in the profile. +52d73e8 GTK: support setting and getting clipboards containing null bytes +c739742 GTK: allow disabling client-side decorations by setting $GTK_CSD=0. + This is the same variable used by gtk3-nocsd. +19d3f12 GTK: Fixes icons in tabbed and st (when embedding SciTECO via --xembed). +0e6e059 GTK: ignore the keyboard layout whereever possible (refs #5) +bc859a0 fixed rubout of empty forward kill (FK) +638f63c, +966d3ef minor search optimization - search commands will remove the buffer gap less often +ded9a02 test suite: enable the recursion overflow test case everywhere +2fd3664, +c2057eb, +(!!)abb5d23 Function key macros have been reworked into a more generic key macro feature, + allowing you to repurpose "international" characters on your keyboard. + You could also simply remap them to their latin counterparts, so you don't have + to change keyboard layouts often (on Curses, as GTK does that automatically). + The function key macro prefix changed from ^F to ^K. + Also, you don't have to and should no longer set 0,64ED. +cc63f3b improved file name autocompletion +c59b33c, +4789e39, +4ca4ba2, +f55f50e, +73d574b, +(!)6857807 The SciTECO parser is Unicode-based now (refs #5). + All language constructs are still exclusively based on ASCII, + but Unicode (UTF-8) is accepted everywhere, where an arbitrary character + is expected (as in string arguments and Q-Register names). + SciTECO macros must be in valid UTF-8, which may break existing macros that + embed arbitrary bytes. +2f632e1 disable unused Scintilla features at build time, slightly reducing binary size +e5d49eb Production builds will be slightly faster due to omission of runtime assertions. +b729ee4 sample.teco_ini: Codepage guessing (refs #5) +dd36439 sample.teco_ini: fixed opening files with glob characters in their names. + You are advised to merge changes into your ~/.teco_ini as after every upgrade. +c4a1c3a, +c222fa3 added an improvised lexer for styling Git commit, tag and merge messages. +bed2f36 <f,tXq>: fixed for very large character ranges +9241075 improved 8-bit cleanliness test cases and added Unicode test cases (refs #5) +4f23187 Added raw ANSI mode via ED flags to facilitate 8-bit clean editing (refs #5). + There is also a `--8bit` and `-8` command line argument. +41ab5cf Xq and ]q inherit the document encoding from the source document (refs #5) +403c1cd search patterns are now expected to be in UTF-8 and the document's encoding is taken into account (refs #5) +d714bb4, +d556aee, +b31b887 the ^EUq string building escape construct now respects the encoding (can insert bytes or codepoints) (refs #5) +850fa38, +(!)b85edaa <I> and <n^U> commands checks input codepoint ranges depending on the encoding (refs #5) +a747cff conditionals, eg. <n"A>, now check for Unicode codepoints (refs #5) +1d8f702 grosciteco: added the "aq", "ha" and "ti" glyphs (refs #22). + This fixes building on newer Groff versions, but may not have been an issue in v2.0.0. +db5ab17 grosciteco: support Unicode (refs #5) +33124e3, +e384e4f Implemented <EE> and <^E> commands for configuring and converting between encodings and translating between + glyph and byte offsets (refs #5). + Support for handling single-byte encodings is still incomplete, but you can edit + raw binary files. +62ccd35 win32: fixed opening and saving UTF-8 filenames (refs #5) +4a4ffd9 win32: convert command line to UTF-8 (refs #5) +90bad24 allow Unicode characters in command line arguments (refs #5) +2a05075, +745a76a, +359e257, +4dadac8, +e466218, +34683e8, +7507ad3, +7c59256, +f79a6f6, +c71ed30, +(!)4c6b681 Input and displaying of Unicode characters is now possible (refs #5). + Furthermore you can edit and navigate Unicode (UTF-8) documents. + TECO addresses refer to glyphs now in Unicode documents, instead of bytes. + This uses heuristics and can be slow in certain corner cases. + Care must be taken when interacting with raw Scintilla-messages (ES) as they + are always byte-addressed. You must use the new ^E command in these situations + to translate between buffer addresses. +fdc185b fixed retrieval of characters with codes larger than 127 (<A>, <nQ> commands) +ee9cf43, +e2c7c11 fully support out of tree builds +fbaa927, +07d9cdf, +78e7f30, +bbcf801 Updated Scintilla to v5.5.2, Scinterm to v5.1 and Lexilla to HEAD (rel-5-2-7-211-ge0f045a0). + This adds a troff/nroff lexer (for manpages among other things). +83398b3 fixed expressions like `1,(2)` or `(1),(2)`: they are reported as two numbers now +21c5be3 Fixed and improved the power (^*) operator. + It did not handle corner cases and was inefficient. +f2b070c fnkeys.tes: cursor movement now preserves the column as in most text editors +e37c2d6 fnkeys.tes: support zooming via F9/F10 on Gtk +23fb4d2 grosciteco.tes(1) manpage: fixed formatting of list of troff macros +5a8c5e8, +27fba8b, +c3e25ca, +33721ec several fixes in the sciteco(1) and sciteco(7) man pages +37eef3c avoid Groff warnings due to `\` escapes +22906ff fixed Clang warnings about one-bit-wide boolean integers (-Wsingle-bit-bitfield-constant-conversion) +7c1c6e2 fixed <EC$> assertions: specifying empty command strings was undefined +f1fec09 fixed ]$ and ]~ (pop from Q-Reg stack to special Q-Registers). + The idiom [$ FG...$ ]$ to change the working directory temporarily now works. + Similarily you can now write [~ ^U~...$ ]~ to change the clipboard temporarily. + Version 2.0.0 ~~~~~~~~~~~~~ @@ -3,7 +3,7 @@ Installation Instructions Copyright (C) 1994, 1995, 1996, 1999, 2000, 2001, 2002, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc. -Copyright (C) 2013-2023 Robin Haberkorn +Copyright (C) 2013-2024 Robin Haberkorn Copying and distribution of this file, with or without modification, are permitted in any medium without royalty provided the copyright @@ -47,7 +47,7 @@ installed by the user manually: * Scintilla (v5.3.0 or later): http://www.scintilla.org/ * When choosing the Curses interface: - * Scinterm (v4.0 or later): + * Scinterm (v5.2 or later): http://foicica.com/scinterm/ * Lexilla (v5.0.0 or later, optional): https://www.scintilla.org/Lexilla.html @@ -137,31 +137,6 @@ Windows releases need special configuration options. Building these packages is automated by the `./distribute` script (a standalone Makefile script). -Out of Tree Builds -================== - -Like any Autoconf/Automake-based project, SciTECO can generally -be built out-of-tree (in a different directory than its sources). -This is often useful, especially when cross-compiling. -Unfortunately, the Scintilla (and related libraries') build-system -does not support out-of-tree builds, so even though Scintilla has been tied -into SciTECO's build system for convenience, out-of-tree builds -cannot be performed directly as Scintilla would still be built -in SciTECO's source directory by default. - -However, you may copy Scintilla, Scinterm and Lexilla into -SciTECO's build directory. -An out-of-tree build of SciTECO can thus be achieved using the -following steps (supposing that the build directory will be -a subdirectory of the Git repository called `build-dir`): - - $ mkdir -p build-dir/contrib - $ cp -r contrib/{scintilla,scinterm,lexilla} build-dir/contrib/ - $ cd build-dir - $ ../configure - $ make - ... - For more details on building Autoconf-based projects refer to the remainder of this document which covers installing Autoconf-based projects in general. @@ -0,0 +1,5 @@ +News +==== + +<span class="nf nf-md-new_box"></span> +Check out the new [v2.1.0 release](https://github.com/rhaberkorn/sciteco/releases/tag/v2.1.0). @@ -0,0 +1 @@ +NEWS
\ No newline at end of file @@ -4,7 +4,6 @@ Overview ======== -<img align="left" alt="SciTECO" src="ico/sciteco-48.png"/> SciTECO is an interactive TECO dialect, similar to Video TECO. It also adds features from classic TECO-11, as well as unique new ideas. @@ -17,8 +16,10 @@ as far as possible. For instance, moving the cursor to the right can be done imm When you delete a character from the end of the command line macro (called rubout), the side-effects of that character which may be a command or part of a command, are undone. -SciTECO uses the [Scintilla](http://www.scintilla.org/) editor component and supports -GTK+ 3 as well as Curses frontends (using [Scinterm](http://foicica.com/scinterm/)). + + +SciTECO uses the [Scintilla](https://www.scintilla.org/) editor component and supports +GTK+ 3 as well as Curses frontends (using [Scinterm](https://foicica.com/scinterm/)). The Curses frontend is verified to work with [ncurses](https://www.gnu.org/software/ncurses/), [NetBSD Curses](https://github.com/sabotage-linux/netbsd-curses), @@ -53,7 +54,8 @@ Features the command stream. This also enables navigating with function keys (e.g. cursor keys) as demonstrated by the standard library `fnkeys.tes`. - Function key macros can be context-sensitive, too. + In fact, all keys with printable representation and control keys can be remapped using + key macros - and they can be context-sensitive as well! * Many TECO-11 features, like that most commands have a colon-modified form, string-building characters, exotic match characters... * Interactivity: Immediate searching (similar to search-as-you-type) and @@ -73,8 +75,13 @@ Features * Munging: Macros may be munged, that is executed in batch mode. In other words, SciTECO can be used for scripting. By default, a profile is munged. -* 8-bit clean: SciTECO can be used to edit binary files if automatic EOL conversion - is turned off (`16,0ED`). +* Full Unicode (UTF-8) support: The document is still represented as a random-accessible + codepoint sequence. + While SciTECO macros can be written in plain ASCII - even in its printable subset - + Unicode glyphs can be used everywhere, where a single character is accepted, for instance + in single letter Q-Register names. +* 8-bit clean: SciTECO can be used to edit binary files if the encoding is changed to + ANSI and automatic EOL conversion is turned off (easiest with `--8bit`). * Self-documenting: An integrated indexed help system allows browsing formatted documentation about commands, macros and concepts within SciTECO (`?` command). Macro packages can be documented with the `tedoc` tool, generating man pages. @@ -104,35 +111,40 @@ Download There are prebuilt binary packages and source bundles for your convenience: -* [Download Archive at Sourceforge](https://sourceforge.net/projects/sciteco/files/) * [Github Releases](https://github.com/rhaberkorn/sciteco/releases) +* [Download Archive at Sourceforge](https://sourceforge.net/projects/sciteco/files/) * [Ubuntu PPA](https://launchpad.net/~robin-haberkorn/+archive/sciteco) +* [Arch User Repository](https://aur.archlinux.org/packages/sciteco-git) * Users of OpenWrt may try to install the [`sciteco` package of this feed](https://github.com/rhaberkorn/nanonote-ports). * Yocto/OpenEmbedded users should try the [`sciteco` package from this layer](https://github.com/rhaberkorn/meta-rhaberkorn). * [Chocolatey package](https://community.chocolatey.org/packages/SciTECO) for Windows users. -* The official [SciTECO homepage](http://sciteco.sf.net/) is a live demo. -These releases may be quite outdated, so you may also try out the -[nightly builds](https://github.com/rhaberkorn/sciteco/releases/tag/nightly) - +These releases may be quite outdated and not all of them are provided or tested by the author. +So you may also try out the [nightly builds](https://github.com/rhaberkorn/sciteco/releases/tag/nightly) - they represent the repository's HEAD commit but may well be unstable. -Both ncurses and Gtk+ packages are provided both for Ubuntu and Windows. +Both ncurses and Gtk+ packages are provided for Ubuntu, generic Linux +(in the form of [AppImages](https://appimage.org/)) and Windows. For [Mac OS X](https://github.com/rhaberkorn/sciteco/wiki/Mac-OS-Support), we currently only provide *experimental* ncurses builds. If everything fails, you can try building from source. -See [`INSTALL`](INSTALL) for more details. +See [`INSTALL`](https://github.com/rhaberkorn/sciteco/blob/master/INSTALL) for more details. Additional Documentation ======================== * Online manpages: - [__sciteco__(1)](http://sciteco.sf.net/manuals/sciteco.1.html), - [__sciteco__(7)](http://sciteco.sf.net/manuals/sciteco.7.html), - [__grosciteco.tes__(1)](http://sciteco.sf.net/manuals/grosciteco.tes.1.html), - [__tedoc.tes__(1)](http://sciteco.sf.net/manuals/tedoc.tes.1.html) + [__sciteco__(1)](https://rhaberkorn.github.io/sciteco/sciteco.1.html), + [__sciteco__(7)](https://rhaberkorn.github.io/sciteco/sciteco.7.html), + [__grosciteco.tes__(1)](https://rhaberkorn.github.io/sciteco/grosciteco.tes.1.html), + [__tedoc.tes__(1)](https://rhaberkorn.github.io/sciteco/tedoc.tes.1.html) +* [Cheat Sheet and Language Overview](https://sciteco.sf.net/manuals/cheat-sheet.pdf). + This can be printed on an A4 sheet of paper. * [Wiki at Github](https://github.com/rhaberkorn/sciteco/wiki) -* A [short presentation](http://sciteco.sf.net/manuals/presentation.pdf) - (in German!) hold at [Netz39](http://www.netz39.de/). +* A [short presentation](https://sciteco.sf.net/manuals/presentation.pdf) + (in German!) hold at [Netz39](https://www.netz39.de/). + +<p align="center"><img alt="SciTECO icon" src="https://github.com/rhaberkorn/sciteco/raw/master/ico/sciteco-48.png"/></p> @@ -4,8 +4,16 @@ Tasks: "edit" hook. Known Bugs: - * Using fnkeys.tes still flickers on PDCurses/WinGUI. - Apparently a PDCurses bug. + * Editing very large files, or at least files with very long lines, is painstakingly slow. + Try for instance openrussian-custom-2023-10-09.sql. + For some strange reason, this affects both Curses and GTK. + In UTF-8 mode, this doesn't even load anytime soon. + * PDCurses/WinGUI: Pressing Ctrl+Shift+6 on an US keyboard layout + inserts "6^^". This looks like a PDCurses/WinGUI bug. + https://github.com/Bill-Gray/PDCursesMod/issues/323 + * PDCurses/WinGUI: There is still some flickering, but it got better + since key macros update the command line only once. + https://github.com/Bill-Gray/PDCursesMod/issues/322 * Win32: Interrupting <EC> will sometimes hang. Affects both PDCurses/WinGUI and Gtk. In this case you have to kill the subprocess using the task manager. @@ -16,17 +24,14 @@ Known Bugs: after hitting the OOM limit, eg. after <%a>. Apparently an effect of HAVE_MORECORE (sbrk()) - some allocation is always left at the end. - * S<LF>^ES^N<$ does not find the first line that does not begin with "<" - ^ES is apparently not greedy. - * fnkeys.tes: Cursor movements will swallow all preceding braced - expressions - there should be more checks. + * S<LF>^ES^N<$ does not find the first line that does not begin with "<". + This is because \s+ backtracks and can match shorter sequences. + Perhaps ^ES should always be translated to \s++ (possessive quantifier)? * Colors are still wrong in Linux console even if TERM=linux-16color when using Solarized. Affects e.g. the message line which uses the reverse of STYLE_DEFAULT. Perhaps we must call init_color() before initializing color pairs (currently done by Scinterm). - * Scinterm: The underline attribute is not applied properly - even on Urxvt where it obviously works. * session.save should save and reset ^R. Perhaps ^R should be mapped to a Q-Reg to allow [^R. Currently, saving the buffer session fails if ^R != 10. @@ -42,17 +47,15 @@ Known Bugs: and b) the file mode and ownership of re-created files can be preserved. We should fall back silently to an (inefficient) memory copy or temporary file strategy if this is detected. - * crashes on large files: S^EM^X$ (regexp: .*) - Happens because the Glib regex engine is based on a recursive + * Crashes on large files: S^EM^X$ (regexp: (.)+) + Happens because the Glib regex engine is based on a recursive (backtracking) Perl regex library. - The Homebrew and MinGW versions of glib no longer suffer from this. + I can provoke the problem only on Ubuntu 20.04. This is apparently impossible to fix as long as we do not have control over the regex engine build. - We should therefore switch the underlying Regex engine. - Oniguruma looks promising and is also packed for Ubuntu (libonig2). - It would also directly allow globbing by tweaking the syntax. - TRE also looks promising and is smaller than Oniguruma. - GRegEx (PCRE) could still be supported as a fallback. + We need something based on a non-backtracking Thompson's NFA with Unicode (UTF-8), see + https://swtch.com/~rsc/regexp/ + Basically only RE2 would check all the boxes. * It is still possible to crash SciTECO using recursive functions, since they map to the C program's call stack. It is perhaps best to use another stack of @@ -67,25 +70,30 @@ Known Bugs: automatically. Unfortunately, neither Linux, nor FreeBSD allow the re-linking based on file descriptors. On Linux this fails because the - file's link count will be 0; on BSD, /dev/fd/X is on a different - volume and cannot be linked to its old path. - Thus, we'd have to copy the file. + file's link count will be 0; + On BSD linkat(AT_EMPTY_PATH) requires root privileges and does not + work with unlinked files anyway. * Windows NT has hard links as well, but they don't work with file handles either. However, it could be possible to call CreateFile(FILE_FLAG_DELETE_ON_CLOSE) on the savepoint file, ensuring cleanup even on abnormal program termination. + However, this flag cannot be cleared once we restore a save point, + so we'd have to copy its contents just like in the UNIX case. There is also MoveFileEx(file, NULL, MOVEFILE_DELAY_UNTIL_REBOOT). * Windows has file system forks, but they can be orphaned just like ordinary files but are harder to locate and clean up manually. * Setting window title is broken on ncurses/XTerm. + The necessary capabilities are usually not in the Terminfo database. Perhaps do some XTerm magic here. We can also restore window titles on exit using XTerm. + * The XTerm OSC-52 clipboard feature appears to garble Unicode characters. + This is apparently an XTerm bug, probably due to 8-bit-uncleanliness. + It was verified by `printf "\e]52;c;?\a"` on the command line. * Glib (error) messages are not integrated with SciTECO's logging system. - * Out-of-tree builds are broken. - See contrib/scintilla.am. * Gtk on Unix: On ^Z, we do not suspend properly. The window is still shown. + This would be a useful feature especially with --xembed on st. Perhaps we should try to catch SIGTSTP? This does not work with g_unix_signal_add(), though, so any workaround would be tricky. @@ -97,7 +105,7 @@ Known Bugs: the window has really been hidden/unrealized. Even if everything worked, it might well be annoying if you accidentally suspend your instance while not being - connected to a terminal. + connected to a terminal. Although this could be checked at runtime. Suspension from the command-line has therefore been disabled on Gtk for the time being. * Many Scintilla commands <ES> can easily crash the editor. @@ -113,6 +121,34 @@ Known Bugs: (and with --no-profile) under Mac OS terminal emulators. This does not happen under Linux with Darling. See https://github.com/rhaberkorn/sciteco/issues/12 + * File name autocompletion should take glob patterns into account. + The simple reason is that if a filename really contains glob characters + and you are trying to open it with EB, you might end up not being + able to autocomplete it if a previous autocompletion inserted + escaped glob patterns. + Unfortunately, this would be very tricky to do right. + * The git.blame macro is broken, at least on Git v2.45.2 and v2.25.1. Compare + cat sample.teco_ini | git blame --incremental --contents - -- sample.teco_ini | grep -E '^[a-f0-9]{40}' + (which is wrong and does not even contain all commits) with + git blame --incremental --contents sample.teco_ini -- sample.teco_ini | grep -E '^[a-f0-9]{40}' + which is correct. Without --incremental even the formatting is broken. + This could well be a Git bug. + * Margins, identions and the like are not configured on the unnamed + buffer by sample.teco_ini. + And this is probably correct. + However when saving a new unnamed file for the first time, + nothing will change either and it's tricky to apply the correct + settings. You have to EF and EB the file after the initial + save to get everything set up correctly. + Or manually run 0M#ED, but this cannot be rubbed out. + We need a "save" ED hook to get this right. + For instance, you could check whether the margin is set up + as a sign of whether lexing has already been applied. + On the other hand, this Save also can't be properly rubbed out and + will not restore the original margins and styling unless we add native + commands for ALL of the Scintilla messages involved. + Automatically running EF EB...$ in the "save" hook could + also have unwanted side effects. Features: * Auto-indention could be implemented via context-sensitive @@ -171,51 +207,57 @@ Features: and the return type. * Having a separate number parser state will simplify number syntax highlighting. - * Function key masking flag for the beginning of the command + * Key macro masking flag for the beginning of the command line. May be useful e.g. for solarized's F5 key (i.e. function key macros that need to terminate the command line as they cannot be rubbed out properly). - * fnkeys.tec could preserve the column more reliably when - moving up and down by encoding a character offset into the - command line. E.g. (100-3C) would tell us that we have to add - 3 to the real column when moving up/down because the current - line is too short. - * Function key macros should behave more like regular macros: - If inserting a character results in an error, the entire - macro should be rubbed out. This means it would be OK to - let commands in function key macros fail and would fix, e.g. - ^FCLOSE. - * Function key macros could support special escape sequences + * Key macros could support special escape sequences that allow us to modify the parser state reliably. E.g. one construct could expand to the current string argument's termination character (which may not be Escape). - In combination with a special function key macro state + In combination with a special key macro state effective only in the start state of the string building state machine, perhaps only in insertion commands, this could be used to make the cursor movement keys work in insertion commands by automatically terminating the command. Even more simple, the function key flag could be effective only when the termination character is $. - * Function key handling should always be enabled. This was - configurable because of the way escape was handled in ncurses. - Now that escape is always immediate, there is little benefit - in having this still configurable. In fact if turned off, - SciTECO would try to execute escape sequences. - The ED flag could still exist and tell whether the function - key macros are used at all (i.e. this is how Gtk behaves currently). + * Support more function keys. + We can define more function keys via define_key(3NCURSES). + Unfortunately they are not really standardized - st and urxvt for instance + have different escape sequences for Ctrl+Up or Alt+Up. + It seems they can be looked up with tigetstr() and + then passed to define_key(). + Alternatively call use_extended_names(TRUE) and look up the + key codes with key_defined(). + At the very least PDCurses and Gtk could support much more + keys and Alt and Ctrl modifiers. + See also https://stackoverflow.com/questions/31379824/how-to-get-control-characters-for-ctrlleft-from-terminfo-in-zsh + https://gist.github.com/rkumar/1237091 * Mouse support. Not that hard to implement. Mouse events - use a pseudo function key macro as in Curses. + use a pseudo key macro as in Curses. Using some special command, macros can query the current mouse state (this maps to an Interface method). + This should be configurable via an ED flag as it changes + the behavior of the terminal. + * GTK currently allows mouse scrolling as it is not under control + of SciTECO. + Once we got mouse support in Curses, we could either also + automatically scroll or (better) expose the scroll events + as key macros. * Support loading from stdin (--stdin) and writing to the current buffer to stdout on exit (--stdout). This will make it easy to write command line filters, - We will need flags like --8-bit-clean and --quiet with + We will need flags like and --quiet with single-letter forms to make it possible to write hash-bang lines like #!...sciteco -q8iom Command line arguments should then also be handled differently, passing them in an array or single string register, so they no longer affect the unnamed buffer. + * Once we've got --stdout, it makes sense to ship a version of + tecat written in SciTECO. + This is useful as a git diff textconv filter. + See https://gist.github.com/rhaberkorn/6534ecf1b05de6216d0a9c33f31ab5f8 * For third-party macro authors, it is useful to know the standard library path (e.g. to install new lexers). There could be a --print-path option, or with the --quiet @@ -249,17 +291,28 @@ Features: of EI that considers $SCITECOPATH. Current use of EI (insert without string building) will have to move, but it might vanish anyway once we can disable string building - with a special character. - * ::S for string "comparisons" (anchored search). + with a special character, eg. you could write I^C instead. + * <I> doesn't have string building enabled in classic TECO. + Changing this would perhaps be a change too radical. + Also, we would then need a string-building variant like <:I>. + * ::S for string "comparisons" (anchored search) and + ::FS for anchored search-replace. This is supposed to be an alias for .,.:FB which would be .,.:S in SciTECO. Apparanetly, the bounded search is still incompatible in SciTECO, as it is allowed to match beyond the bounds. Either the semantics of m,n:S should be changed or an FB command with classic TECO semantics should be - introduced.. + introduced. * ^S (-(length) of last referenced string), ^Y as .+^S,. * ^Q convert line arg into character arg * ^A, T and stdio in general + * nA returned -1 in case of invalid positions (similar to SciTECO's ^E) + instead of failing. + * ^W was an immediate action command to repaint the screen. + This could be a regular command to allow refreshing in long loops. + Video TECO had ET for the same purpose. + TECO 10 had a ^W regular command for case folding all strings, + but I don't think it's worth supporting. * Search for beginning of string; i.e. a version of S that leaves dot before the search string, similar to FK (request of N.M.). @@ -289,10 +342,8 @@ Features: * instead of 0EB to show the list of buffers, there should perhaps be a special TAB-completion (^G mode?) that completes only buffers in the ring. It should also display the numeric buffer ids. - * properly support Unicode encodings and the character-based model - * link against libncursesw if possible - * translate documents to Unicode strings - * a position refers to a character/codepoint + * Gtk: Unicode IME support for asiatic languages. + Have a look how it is done in Scintilla. * Progress indication in commandline cursor: Perhaps blinking or invisible? * Command to free Q-Register (remove from table). @@ -380,6 +431,7 @@ Features: all of SciTECO can be run against nodejs as a runtime. I'm not aware of any (working) alternatives, like cross-compiling for the JVM. + See also https://gist.github.com/VitoVan/92ba4f2b68fec31cda803119686295e5 * Windows supports virtual terminals now. See: https://docs.microsoft.com/en-us/windows/console/classic-vs-vt Perhaps we should transition from PDCurses to something using @@ -387,28 +439,6 @@ Features: * Improve the message line so it can log multiple messages. Especially important on GUI platforms and Win32 so we can get rid of the attached console window. - * On Unix/X11, it may be possible to draw the Gtk+ UI into the - same window as the current terminal. - SciTECO now supports Xembed via the --xembed option. - Urxvt and many other terminal emulators have $WINDOWID. - But the two won't work together. Urxvt and XTerm are apparently - not real Xembed hosts. - * Currently, you cannot pass UTF-8 parameters to SciTECO macros. - This is not critical since we don't support Unicode anyway. - Sooner or later however we should use g_win32_get_command_line(). - * Some platforms like MinGW and some Linux flavours have - native Scintilla packages. - Perhaps it makes sense to be able to build against them - using --with-scintilla. - * Write a Scintilla/Lexilla lexer for Roff - * There is an Urxvt extension 52-osc for implementing the - xterm-like clipboard control sequences. - If may therefore make sense to support that even without checking - the xterm version and update documentation accordingly. - * Write standard library module for spell checking. - This can eg. use hunspell/aspell/ispell `-a` mode. - See my SciTE implementation on how to do this. - The resulting macro can be added to a save hook. * A dirtify-hook would be useful and could be used for spell checking. Naturally it could only be exected at the end of executing interactive commands) and it should be triggered @@ -432,11 +462,12 @@ Features: A new private key has already been registered on Launchpad and Github. We just need to integrate with CI. See also https://github.com/marketplace/actions/import-gpg - * AppImage for Linux * 64-bit Windows builds * Mac OS Arm64 builds either separately or via universal binary. See https://codetinkering.com/switch-homebrew-arm-x86/ Target flag: `-target arm64-apple-macos11` + * Get into AppImageHub. + * Get meta-rhaberkorn into https://layers.openembedded.org * Linux: Relocatable binaries instead of hardcoding the library path. This makes it possible to run builds installed via `make install DESTDIR=...` and will aid in creating AppImages. @@ -449,6 +480,81 @@ Features: or grep -n results. * <:EF> for saving and closing a buffer, similar to <:EX>. * Bash completions. + * Case-sensitive search command (modifier or flag). + * FreeBSD: rctl(8) theoretically allows setting up per-process actions + when exceeding the memory limit. + This however requires special system settings. + * Auto-completions customization via external programs. + This among other things could be used to integrate LSPs-driven + autocompletions. + * Whereever we take buffer positions (nJ; n,mD; nQ...), + negative numbers could refer to the end of the buffer or + Q-Register string. + * Support extended operators like in TECO-64: + https://github.com/fpjohnston/TECO-64/blob/master/doc/oper.md + However, instead of introducing a separate parser state, better + use operators like ~=, ~< etc. + * It should be possible to disable auto-completions of one-character + register names, so that we can map the idention macro to M<TAB>. + * Add a configure-switch for LTO (--enable-lto). + * There should be a string building character for including + a character by code. Currently, there is only ^EUq where + q must be set earlier. + This would be useful when searching in binary files or + to include Unicode characters by code point. + Unfortunately its syntax cannot depend on the string argument's + encoding, as that could confuse parse-only mode. + Perhaps ^E!xxxx or overload ^Qx0123. + TECO-11 has ^Ennn (octal), but only for searching? + * There should be a string building construct for escaping + search patterns. + Since string building is performed immediately before + search pattern translation, you cannot currently + search for a Q-Register verbatim. + * Tweak the Makefile lexer for Automake support. + In the simplest case, just add the *.am file extension. + * Add an fnkeys.tes alternative where moving cursor keys + leaves you in the insert (I) command. + That will behave very similar to classical editors. + * Lexing via SciTECO macros? + They would have to be in their own parser instance since Scintilla + could ask us to restyle at any time and within string arguments, + which would confuse the parser as it is. + Also, parsers are not fully embeddable right now. + At the same time, it would need access to the view/document it's + supposed to style. Tricky, but not impossible. + * Support external lexers. + Eg. @ES/SCI_SETILEXER/scintillua:APL/ + automatically loads libscintillua.so or scintillua.dll, caches + the library handle and creates the "APL" lexer. + There would also have to be some kind of SCI_NAMEDSTYLE wrapper, + so we can look up style ids by name (specifically for Scintillua). + * ^^ in string building expanding to a single caret is not + consistent. Perhaps we should allow only ^Q^ as a way to insert a + single caret? + * Support for non-ANSI single byte encodings is still incomplete. + You can set them with EE and they will be correctly + displayed (on Gtk at least), but there is no way + to insert text in these codepages, at least outside + of the latin range of course. + There are two ways this could be implemented: + * Either all sorts of commands automatically iconv + from/to the configured encoding. + * Or we iconv once to UTF-8 when loading the file + and iconv back when saving. + This is probably easier but means, you have to + somehow specify the codepage on EB as you cannot + change it later on. + We could say that nEB...$ specifies the code page + if the string argument is nonempty. + On the other hand, iconv uses symbolic identifiers. + * Perhaps the Unicode "icons" should be configurable via TECO. + In the easiest case there could simply be 2 Q-Reg namespaces: + ^F... for filenames and ^E... for extensions. + The numeric part could be used to store the codepoints. + * Scinterm: The underline and italic styles are not applied properly + even on Urxvt where they obviously work. + https://github.com/orbitalquark/scinterm/issues/22 Optimizations: * Use SC_DOCUMENTOPTION_STYLES_NONE in batch mode. @@ -458,7 +564,8 @@ Optimizations: requested explicitly per ED flag or command line option. * teco_interface_cmdline_update() should be called only once after inserting an entire command line macro. - * There is should be a common error code for null-byte tests. + * There should be a common error code for null-byte tests + instead of TECO_ERROR_FAILED. * teco_string_append() could be optimized by ORing a padding into the realloc() size (e.g. 0xFF). However, this has not proven effective on Linux/glibc @@ -468,24 +575,59 @@ Optimizations: * commonly used (special) Q-Registers could be cached, saving the q-reg table lookup * refactor search commands (create proper base class) - * Add a configure-switch for LTO (--enable-lto). * undo__teco_interface_ssm() could always include the check for teco_current_doc_must_undo(). - * Define TECO_DESTRUCTOR depending on NDEBUG instead - of ifdefing all __attribute__((destructors)). - This also avoids warning for symbols referenced only in - the constructors. + * Avoid Scintilla Undo actions entirely. + This will make undo token creation of deletions harder, + since we must first query the text to be reinserted again, + but will probably save lots of memory. + * Newer GCC and C23 features: + * Perhaps teco_bool_t usage could be simplified using + __attribute__((hardbool)). + * Use `#elifdef` instead of `#elif defined`. + * Use `[[gnu::foo]]` instead of `__attribute__((foo))`. + * The TECO_FOR_EACH() hack could be simplified at least marginally + using __VA_OPT__(). + * Parsing might be optimized with libc's Unicode handling + functions and/or custom optimized versions. + * The new Scintilla IDocumentEditable interface could be + used to speed up Q-Register string accesses. + That is, when it actually supports anything useful. + This is a provisional feature and supported only via C++, + so we would need a small wrapper to call it from C world. + * Perhaps replace glib with gnulib. Could improve + portability and reduce binary size on platforms where + I have to ship the glib shared library. + Also we could control memory allocations more tightly. + We could also make use of the fnmatch and canonicalize + modules. + On the other hand, this imports tons of sh*t into the + repository and chains us to Autotools. + * Does it make sense to import glib-2.0.m4? + * According to ChatGPT (sic) the glibc and jemalloc malloc_usable_size() + do not change during the lifetime of an object, + although this is an implementation detail. + Perhaps we should support these system allocators by default + if malloc_usable_size() and the original functions as __malloc() + are defined. + But how to even test for glibc's ptmalloc? + Linux could use musl as well for instance. Documentation: - * Code docs (Doxygen). It's slowly getting better... + * Doxygen docs could be deployed on Github pages + automatically. * The ? command could be extended to support looking up help terms at dot in the current document (e.g. if called ?$). Furthermore, womanpages could contain "hypertext" links to help topics using special Troff markup and grosciteco support. * The command reference should include an overview. - * Write a cheat sheet. Either on www.cheatography.com, or - using Groff and include with SciTECO. * Write some tutorials for the Wiki, e.g. about paragraph reflowing... Object-oriented SciTECO ideoms etc. ;-) - * Record a "video" on https://asciinema.org/ + * What to do with `--xembed`: tabbed, st + when used as the git editor, etc. + * Perhaps there should be a Getting Started document, + that is automatically opened by sample.teco_ini. + * The HTML manuals lack monospaced fonts. + This is partly because an.tmac removes the Courier family + in nroff mode, but it still doesn't work if you undo this. diff --git a/bootstrap.am b/bootstrap.am index a48df7a..8bce720 100644 --- a/bootstrap.am +++ b/bootstrap.am @@ -29,7 +29,6 @@ SUBST_MACRO = EB$<\e \ <FS@PACKAGE_NAME^Q@\e@PACKAGE_NAME@\e;>J \ <FS@PACKAGE_VERSION^Q@\e@PACKAGE_VERSION@\e;>J \ <FS@PACKAGE_URL^Q@\e@PACKAGE_URL@\e;>J \ - <FS@PACKAGE_URL_DEV^Q@\e@PACKAGE_URL_DEV@\e;>J \ <FS@bindir^Q@\e$(bindir)\e;>J \ <FS@libexecdir^Q@\e$(libexecdir)\e;>J \ <FS@scitecodatadir^Q@\e$(scitecodatadir)\e;>J \ diff --git a/configure.ac b/configure.ac index f672553..d9dd7e0 100644 --- a/configure.ac +++ b/configure.ac @@ -2,14 +2,10 @@ # Process this file with autoconf to produce a configure script. AC_PREREQ([2.65]) -AC_INIT([SciTECO], [2.0.0], +AC_INIT([SciTECO], [2.1.0], [robin.haberkorn@googlemail.com], [sciteco], - [http://sciteco.sf.net/]) -PACKAGE_URL_DEV=https://github.com/rhaberkorn/sciteco -AC_SUBST(PACKAGE_URL_DEV) -AC_DEFINE_UNQUOTED(PACKAGE_URL_DEV, ["$PACKAGE_URL_DEV"], - [Package development homepage.]) + [https://github.com/rhaberkorn/sciteco]) AC_CONFIG_MACRO_DIR(m4) AC_CONFIG_AUX_DIR(config) AM_INIT_AUTOMAKE @@ -23,13 +19,19 @@ AC_CANONICAL_HOST AX_CHECK_ENABLE_DEBUG AM_CONDITIONAL(DEBUG, [test x$ax_enable_debug != xno]) +if [[ x$ax_enable_debug = xno ]]; then + # glib does not look at NDEBUG + AC_DEFINE(G_DISABLE_ASSERT, 1, [Disable g_assert()]) +fi # Use the user provided CXXFLAGS for Scintilla as well. # This makes sure that the same optimizations are applied # to SciTECO and Scintilla. # We cannot pass CXXFLAGS directly to Scintilla since # we modify it during library checking. -SCINTILLA_CXXFLAGS="$CXXFLAGS" +# Additionally, we can disable unused Scintilla features. +# See https://scintilla.org/ScintillaDoc.html#BuildingScintilla +SCINTILLA_CXXFLAGS="$CXXFLAGS -DNO_CXX11_REGEX -DSCI_DISABLE_PROVISIONAL" AC_SUBST(SCINTILLA_CXXFLAGS) # Automake build flag substitutions. @@ -45,7 +47,12 @@ canonicalize() { } # Checks for programs. -LT_INIT +# +# Disable shared libraries by default (--disable-shared). +# We don't install any library, so this __should__ not matter. +# In reality Libtool builds unnecessary wrapper binaries on win32 (MinGW) without this. +# These wrapper binaries do not handle UTF-8 properly and break the test suite. +LT_INIT([disable-shared]) # FIXME: Check for -std=gnu11? AC_PROG_CC AC_PROG_SED @@ -125,19 +132,20 @@ PKG_CHECK_MODULES(LIBGLIB, [glib-2.0 >= 2.44], [ LIBS="$LIBS $LIBGLIB_LIBS" ]) -# Required by Scintilla. -AX_PTHREAD([], [ - AC_MSG_ERROR([pthread not found!]) -]) -SCINTILLA_CXXFLAGS="$SCINTILLA_CXXFLAGS $PTHREAD_CFLAGS" -LIBS="$LIBS $PTHREAD_LIBS" - case $host in *-mingw*) AC_CHECK_HEADERS([windows.h], , [ AC_MSG_ERROR([Missing Windows headers!]) ]) ;; +*) + # Required by Scintilla. + AX_PTHREAD([], [ + AC_MSG_ERROR([pthread not found!]) + ]) + SCINTILLA_CXXFLAGS="$SCINTILLA_CXXFLAGS $PTHREAD_CFLAGS" + LIBS="$LIBS $PTHREAD_LIBS" + ;; esac # Checks for typedefs, structures, and compiler characteristics. @@ -190,8 +198,8 @@ AM_CPPFLAGS="$AM_CPPFLAGS -D'SCITECODATADIR=\"\$(scitecodatadir)\"' \ # Scintilla (some platforms ship with it). AC_ARG_WITH(scintilla, AS_HELP_STRING([--with-scintilla=PATH], - [Specify Scintilla's path [default=./contrib/scintilla]]), - [SCINTILLA_PATH=$withval], [SCINTILLA_PATH=./contrib/scintilla]) + [Specify Scintilla's path [default=SRCDIR/contrib/scintilla]]), + [SCINTILLA_PATH=$withval], [SCINTILLA_PATH=$srcdir/contrib/scintilla]) SCINTILLA_PATH=`canonicalize $SCINTILLA_PATH` AC_SUBST(SCINTILLA_PATH) @@ -199,15 +207,15 @@ CPPFLAGS="$CPPFLAGS -I$SCINTILLA_PATH/include" AC_ARG_WITH(scinterm, AS_HELP_STRING([--with-scinterm=PATH], - [Specify Scinterm's path [default=./contrib/scinterm]]), - [SCINTERM_PATH=$withval], [SCINTERM_PATH=./contrib/scinterm]) + [Specify Scinterm's path [default=SRCDIR/contrib/scinterm]]), + [SCINTERM_PATH=$withval], [SCINTERM_PATH=$srcdir/contrib/scinterm]) SCINTERM_PATH=`canonicalize $SCINTERM_PATH` AC_SUBST(SCINTERM_PATH) AC_ARG_WITH(lexilla, AS_HELP_STRING([--with-lexilla=PATH], - [Specify Lexilla's path [default=./contrib/lexilla]]), - [LEXILLA_PATH=$withval], [LEXILLA_PATH=./contrib/lexilla]) + [Specify Lexilla's path [default=SRCDIR/contrib/lexilla]]), + [LEXILLA_PATH=$withval], [LEXILLA_PATH=$srcdir/contrib/lexilla]) AM_CONDITIONAL(LEXILLA, [test x$LEXILLA_PATH != xno]) if [[ x$LEXILLA_PATH != xno ]]; then LEXILLA_PATH=`canonicalize $LEXILLA_PATH` @@ -225,11 +233,16 @@ case $INTERFACE in *curses*) case $INTERFACE in ncurses | netbsd-curses) - PKG_CHECK_MODULES(NCURSES, [ncurses], [ + # The widechar version of ncurses is necessary for Unicode + # support even when not using widechar APIs. + PKG_CHECK_MODULES(NCURSES, [ncursesw], [ CFLAGS="$CFLAGS $NCURSES_CFLAGS" CXXFLAGS="$CXXFLAGS $NCURSES_CFLAGS" LIBS="$LIBS $NCURSES_LIBS" ], [ + # This should be sufficient to detect non-widechar + # ncurses versions as well, although we could also check + # for an "ncurses" package. AC_CHECK_LIB(ncurses, initscr, , [ AC_MSG_ERROR([libncurses missing!]) ]) diff --git a/contrib/lexilla b/contrib/lexilla -Subproject 43ea736569d52ba6cf7e7325cf39009409e7282 +Subproject e0f045a027be35ad720355dd9affa851cfa4d4f diff --git a/contrib/scinterm b/contrib/scinterm -Subproject ebb79cb1d0103ec58f94e83d2d278f91cddc107 +Subproject bbedf0215c9b0d3049925d3886d983f6d778581 diff --git a/contrib/scintilla b/contrib/scintilla -Subproject de2ed1d4bfc55dcca30f74790a3b4f7931b0803 +Subproject df71b092506451a227c81361986668ce69bcc09 diff --git a/contrib/scintilla.am b/contrib/scintilla.am index 6c75e65..9ba728b 100644 --- a/contrib/scintilla.am +++ b/contrib/scintilla.am @@ -1,31 +1,32 @@ # We use the existing Scintilla/Lexilla Makefile build system to # avoid redundancies and maintainance overhead. # These build systems produce static libraries (*.a). -# This however has two diadvantages: -# 1.) Out-of-tree builds are not straight forward and require -# manually cloning the submodules into the build system. -# `make distcheck` is consequently also broken. -# 2.) We cannot add plain static libraries to libtool convenience -# libraries using LIBADD. -# Therefore, we cannot wrap Scintilla and Lexilla into a convenience -# library. -# That's why this file must instead be included everywhere where -# scintilla.a and liblexilla.a are referenced and they must be added -# to programs using LDADD. +# This however has the following disadvantages: +# +# * We cannot add plain static libraries to libtool convenience +# libraries using LIBADD. +# Therefore, we cannot wrap Scintilla and Lexilla into a convenience +# library. +# That's why this file must instead be included everywhere where +# scintilla.a and liblexilla.a are referenced and they must be added +# to programs using LDADD. + +LIBSCINTILLA = @abs_top_builddir@/contrib/scintilla/bin/scintilla.a if INTERFACE_GTK -MAKE_SCINTILLA = $(MAKE) -C @SCINTILLA_PATH@/gtk \ +MAKE_SCINTILLA = $(MAKE) -C @top_builddir@/contrib/scintilla/bin \ + -f @SCINTILLA_PATH@/gtk/makefile \ + srcdir=@SCINTILLA_PATH@/gtk \ + COMPLIB=$(LIBSCINTILLA) \ GTK3=yes CONFIGFLAGS='@LIBGTK_CFLAGS@' \ CXXFLAGS='@SCINTILLA_CXXFLAGS@' else -# NOTE: The VPATH hack allows us to keep Scinterm separate from -# the Scintilla repo and avoid using recursive submodules. -# -# FIXME: There is currently no way to override the standard optimization -# flags of Scinterm, so we pass them in CURSES_FLAGS. -MAKE_SCINTILLA = $(MAKE) -C @SCINTILLA_PATH@/bin -f @SCINTERM_PATH@/Makefile \ - VPATH=@SCINTERM_PATH@ \ - CURSES_FLAGS='@PDCURSES_CFLAGS@ @XCURSES_CFLAGS@ @NCURSES_CFLAGS@ @SCINTILLA_CXXFLAGS@' +MAKE_SCINTILLA = $(MAKE) -C @top_builddir@/contrib/scintilla/bin \ + -f @SCINTERM_PATH@/Makefile \ + srcdir=@SCINTERM_PATH@ basedir=@SCINTILLA_PATH@ \ + scintilla=$(LIBSCINTILLA) \ + CXXFLAGS='@SCINTILLA_CXXFLAGS@' \ + CURSES_FLAGS='@PDCURSES_CFLAGS@ @XCURSES_CFLAGS@ @NCURSES_CFLAGS@' endif # Pass toolchain configuration to Scintilla. @@ -38,21 +39,34 @@ MAKE_SCINTILLA += CC='@CC@' CXX='@CXX@' \ # If it's up to date, the additional recursive # make call does not hurt. .PHONY: make-scintilla +# FIXME: deps.mak cannot be generated when building out-of-tree. +# Also this would draw in a Python dependency. +# It needs to be removed in clean-local-scintilla to appease `make distcheck`. make-scintilla: - $(MAKE_SCINTILLA) ../bin/scintilla.a + mkdir -p @top_builddir@/contrib/scintilla/bin + touch @top_builddir@/contrib/scintilla/bin/deps.mak + $(MAKE_SCINTILLA) $(LIBSCINTILLA) # scintilla.a itself is not phony. # This avoids unnecessary relinking if it is # up to date. # Also note the ; which defines this recipe as # empty. -@SCINTILLA_PATH@/bin/scintilla.a : make-scintilla; +$(LIBSCINTILLA) : make-scintilla; .PHONY: clean-local-scintilla clean-local-scintilla: + mkdir -p @top_builddir@/contrib/scintilla/bin $(MAKE_SCINTILLA) clean + test "@abs_top_srcdir" = "@abs_top_builddir@" || \ + $(RM) -f @top_builddir@/contrib/scintilla/bin/deps.mak + +LIBLEXILLA = @abs_top_builddir@/contrib/lexilla/bin/liblexilla.a MAKE_LEXILLA = $(MAKE) -C @LEXILLA_PATH@/src \ + DIR_O=@abs_top_builddir@/contrib/lexilla/bin \ + DIR_BIN=@abs_top_builddir@/contrib/lexilla/bin \ + SCINTILLA_INCLUDE=@SCINTILLA_PATH@/include \ CXXFLAGS='@SCINTILLA_CXXFLAGS@' \ CC='@CC@' CXX='@CXX@' \ AR='@AR@' RANLIB='@RANLIB@' @@ -65,12 +79,14 @@ MAKE_LEXILLA += BASE_FLAGS='' .PHONY: make-lexilla make-lexilla: - $(MAKE_LEXILLA) ../bin/liblexilla.a + mkdir -p @top_builddir@/contrib/lexilla/bin + $(MAKE_LEXILLA) $(LIBLEXILLA) -@LEXILLA_PATH@/bin/liblexilla.a : make-lexilla; +$(LIBLEXILLA) : make-lexilla; .PHONY: clean-local-lexilla clean-local-lexilla: + mkdir -p @top_builddir@/contrib/lexilla/bin $(MAKE_LEXILLA) clean # NOTE: using a separate `clean-local-scintilla` diff --git a/debian/changelog b/debian/changelog index a9cf041..e2c55c3 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,8 +1,14 @@ -sciteco (2.0.0-1) unstable; urgency=low +sciteco (2.1.0-0) unstable; urgency=low + + * new upstream version v2.1.0 + + -- Robin Haberkorn <robin.haberkorn@googlemail.com> Wed, 16 Oct 2024 18:29:15 +0300 + +sciteco (2.0.0-8) unstable; urgency=low * new upstream version v2.0.0 - -- Robin Haberkorn <robin.haberkorn@googlemail.com> Sun, 30 May 2021 15:19:54 +0200 + -- Robin Haberkorn <robin.haberkorn@googlemail.com> Tue, 20 Jun 2023 05:50:30 +0300 sciteco (0.6.4-5) unstable; urgency=low diff --git a/debian/control b/debian/control index 5d9a15a..a5aeca0 100644 --- a/debian/control +++ b/debian/control @@ -3,17 +3,17 @@ Section: editors Priority: optional Maintainer: Robin Haberkorn <robin.haberkorn@googlemail.com> Build-Depends: debhelper (>= 10), dh-exec, g++ (>= 4:5.0), libglib2.0-dev (>= 2.44), - ncurses-base, ncurses-term, libncurses5-dev, + ncurses-term, libncurses-dev, libgtk-3-dev (>= 3.12), xvfb, groff-base -Standards-Version: 3.9.2 +Standards-Version: 4.5.0 Homepage: http://sciteco.sf.net/ Vcs-Browser: https://github.com/rhaberkorn/sciteco Vcs-Git: git://github.com/rhaberkorn/sciteco.git Package: sciteco-curses Architecture: any -Depends: ${shlibs:Depends}, ${misc:Depends}, sciteco-common (= ${binary:Version}) +Depends: ${shlibs:Depends}, ${misc:Depends}, sciteco-common (= ${source:Version}) Description: Scintilla-based Text Editor and Corrector (curses) SciTECO is an interactive TECO dialect, similar to Video TECO. It also adds features from classic Standard TECO-11, @@ -25,7 +25,7 @@ Description: Scintilla-based Text Editor and Corrector (curses) Package: sciteco-gtk Architecture: any -Depends: ${shlibs:Depends}, ${misc:Depends}, sciteco-common (= ${binary:Version}) +Depends: ${shlibs:Depends}, ${misc:Depends}, sciteco-common (= ${source:Version}) Description: Scintilla-based Text Editor and Corrector (GTK+) SciTECO is an interactive TECO dialect, similar to Video TECO. It also adds features from classic Standard TECO-11, diff --git a/debian/copyright b/debian/copyright index f95f495..b03b1b0 100644 --- a/debian/copyright +++ b/debian/copyright @@ -1,20 +1,21 @@ Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ Upstream-Name: SciTECO Upstream-Contact: robin.haberkorn@googlemail.com -Source: https://sourceforge.net/projects/sciteco +Source: https://github.com/rhaberkorn/sciteco -Files: debian/* src/* lib/* doc/* tests/* ico/* win32/* *.ac *.am *.teco_ini -Copyright: Copyright 2013-2023 Robin Haberkorn <robin.haberkorn@googlemail.com> -License: GPL-3+ +Files: * +Copyright: Copyright 2012-2024 Robin Haberkorn <robin.haberkorn@googlemail.com> +License: GPL-3 /usr/share/common-licenses/GPL-3 Files: contrib/dlmalloc/*.c contrib/dlmalloc/*.h Copyright: Public Domain -License: CC0 +License: CC0-1.0 + /usr/share/common-licenses/CC0-1.0 Files: contrib/rb3ptr/*.c contrib/rb3ptr/*.h Copyright: Copyright 2019 Jens Stimpfle -License: +License: MIT Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to @@ -34,7 +35,7 @@ License: Files: contrib/scintilla/* contrib/lexilla/* Copyright: Copyright 1998-2021 Neil Hodgson <neilh@scintilla.org> -License: +License: MIT-Hodgson License for Lexilla, Scintilla, and SciTE . Copyright 1998-2021 by Neil Hodgson <neilh@scintilla.org> @@ -58,7 +59,7 @@ License: Files: contrib/scinterm/* Copyright: Copyright 2012-2020 Mitchell <mitchell.att.foicica.com> -License: +License: MIT The MIT License . Copyright (c) 2012-2020 Mitchell @@ -80,3 +81,28 @@ License: LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +Files: contrib/mingw-bundledlls +Copyright: Copyright 2015 Martin Preisler +License: MIT + The MIT License + . + Copyright (c) 2015 Martin Preisler + . + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + . + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + . + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. diff --git a/debian/rules b/debian/rules index a8a0368..fb1d65e 100755 --- a/debian/rules +++ b/debian/rules @@ -29,15 +29,11 @@ endif %: dh $@ -build build-arch build-indep: build-curses-stamp build-gtk-stamp - dh_testdir +build build-arch build-indep: build-curses-stamp build-gtk-stamp; build-curses-stamp: dh_testdir rm -rf build-curses - mkdir -p build-curses/contrib - cp -r contrib/scintilla contrib/scinterm contrib/lexilla \ - build-curses/contrib/ dh_auto_configure -Bbuild-curses -- \ --with-interface=ncurses dh_auto_build -Bbuild-curses @@ -49,9 +45,6 @@ build-curses-stamp: build-gtk-stamp: dh_testdir rm -rf build-gtk - mkdir -p build-gtk/contrib - cp -r contrib/scintilla contrib/lexilla \ - build-gtk/contrib/ dh_auto_configure -Bbuild-gtk -- \ --program-prefix=g \ --with-interface=gtk @@ -66,6 +59,38 @@ install: xvfb-run -a dh install -Bbuild-gtk -psciteco-gtk dh install -Bbuild-curses -psciteco-curses -psciteco-common +# NOTE: This skips the dh_install* commands of `dh binary-arch` +# which would ignore `-p...`. +define custom_binary_arch + dh_testroot -a $(1) + dh_prep -a $(1) + dh_bugfiles -a $(1) + dh_ucf -a $(1) + dh_lintian -a $(1) + dh_icons -a $(1) + dh_perl -a $(1) + dh_usrlocal -a $(1) + dh_link -a $(1) + dh_strip_nondeterminism -a $(1) + dh_compress -a $(1) + dh_fixperms -a $(1) + dh_missing -a $(1) + dh_strip -a $(1) + dh_makeshlibs -a $(1) + dh_shlibdeps -a $(1) + dh_installdeb -a $(1) + dh_gencontrol -a $(1) + dh_md5sums -a $(1) + dh_builddeb -a $(1) +endef + +binary-arch: + $(call custom_binary_arch,-O-Bbuild-gtk -O-psciteco-gtk) + $(call custom_binary_arch,-O-Bbuild-curses -O-psciteco-curses) + +binary-indep: + dh binary-indep -Bbuild-curses + clean: dh clean -Bbuild-gtk dh clean -Bbuild-curses diff --git a/debian/sciteco-gtk.install b/debian/sciteco-gtk.install index 8907904..bafa0d8 100755 --- a/debian/sciteco-gtk.install +++ b/debian/sciteco-gtk.install @@ -12,4 +12,4 @@ ico/sciteco-32.png => usr/share/icons/hicolor/32x32/apps/sciteco.png ico/sciteco-48.png => usr/share/icons/hicolor/48x48/apps/sciteco.png ico/sciteco-256.png => usr/share/icons/hicolor/256x256/apps/sciteco.png -src/sciteco.desktop => usr/share/applications +src/sciteco.desktop usr/share/applications diff --git a/doc/Makefile.am b/doc/Makefile.am index 9e32c19..fac7b1b 100644 --- a/doc/Makefile.am +++ b/doc/Makefile.am @@ -61,7 +61,7 @@ CLEANFILES += $(women_DATA) $@ $< %.intermediate : % sciteco.tmac - @GROFF@ -wall -Z -Tascii -t -man -M@srcdir@ -msciteco $< >$@ + @GROFF@ -wall -Z -Kutf-8 -Tutf8 -t -man -M@srcdir@ -msciteco $< >$@ man_MANS = grosciteco.tes.1 EXTRA_DIST = grosciteco.tes.1.in @@ -95,6 +95,10 @@ SUFFIXES += .htbl .html .htbl.html: @GROFF@ -wall -Thtml -man $< >$@ +# FIXME: We may want to build the cheat sheet automatically. +# This would require a full Groff installation, though. +EXTRA_DIST += cheat-sheet.mm + # # Doxygen processing (do not install or distribute) # diff --git a/doc/cheat-sheet.mm b/doc/cheat-sheet.mm new file mode 100644 index 0000000..ea0d893 --- /dev/null +++ b/doc/cheat-sheet.mm @@ -0,0 +1,821 @@ +\# pdfmom --roff -U -P-pa4 -rW=19c -rO=1c -rL=31c -mm -mhdtbl -mpdfpic cheat-sheet.mm >cheat-sheet.pdf +.PGNH +. +.pdfinfo /Title SciTECO Cheat Sheet +.pdfinfo /Author Robin Haberkorn +. +.SP 0.5c +. +.\" allow \fC instead of \f(CR. +.ftr C CR +. +.ad c +\#.B "SciTECO Cheat Sheet" +\#.SP +Overview of \fBSciTECO\fP as an editor. +A full language description can be found in +.pdfhref W -D https://rhaberkorn.github.io/sciteco/sciteco.7.html -A . \fBsciteco\fP(7) +.br +. +.\" subscripts +.ds < \v'+.3m\s'\En[.s]*7u/10u'-.1m' +.ds > \v'+.1m\s0-.3m' +. +.\" FIXME: For switching between CR and I fonts +.de CI +. if \\n[.$] .fnt@switch \fC \fI \\$@ +.. +. +.\" For drawing with foreground ($1) and background ($2) color. +.\" Adapted from the BOX macro in Groff manual "Drawing Requests". +.\" The $ is added to standardize the height of all boxes +.\" (as it stretches above and below the base line). +.\" NOTE: This does not work in arguments to .TD! +.ds FILLSTR \ +\R!@wd \w'\\$3$'-\w'$'!\ +\h'.1m'\ +\h'-.1m'\v'(.1m - \\n[rsb]u)'\ +\M[\\$2]\ +\D'P 0 -(\\n[rst]u - \\n[rsb]u + .2m) \ + (\\n[@wd]u + .2m) 0 \ + 0 (\\n[rst]u - \\n[rsb]u + .2m) \ + -(\\n[@wd]u + .2m) 0'\ +\h'.1m'\v'-(.1m - \\n[rsb]u)'\ +\M[]\ +\m[\\$1]\\$3\m[]\ +\h'.1m' +. +.\" Control char: monospaced font and inverted colors +.ds CTRL \fC\\*[FILLSTR white black "\\$*"]\fP +.ds $ \\*[CTRL $] +.ds $$ \\*[CTRL $$] +. +.ds t*hl +.ds t*vl +.nr t*csp 0 +.ds t*bc black +.ds t*fgc black +.ds t*bgc white +.ds t*hal l +. +.de TBLX +. ds TBLX-TITLE \\$1 +. shift +. TBL cols=2 \\$* +. TR fgc=white bgc=black fst=B +. TD colspan=2 "\\*[TBLX-TITLE]" +. nr TRX 0 1 +.. +.de TRX +. ie \\n+[TRX]%2 .TR bgc=grey90 \\$* +. el .TR \\$* +.. +.\" FIXME: Does not work when tables are automatically +.\" deferred to the next column or page. +.\" It's unclear what controls the spacing between tables. +.am ETB +. sp -0.1c +.. +. +.\" Legend +.TBL cols=4 width='10% 40% 10% 40%' +. TR fgc=white bgc=black fst=B +. TD colspan=4 "Legend" +. TR bgc=grey90 +. TD +\*$ +. TD +String delimiter (Escape key) +\# Mention @ modifier? +\# Or that you can also press Delete if fnkeys.tes is loaded. +. TD +\*($$ +. TD +Command-line termination (2\(muEscape key) +. TR +. TD +. CI X +. TD +Regular command. +They are case-insensitive. +. TD +. I n +. TD +Some integer, often optional (1 or 0 by default). +You can write \fC-\fP instead of \fC-1\fP. +. TR bgc=grey90 +. TD +. I text +. TD +Arbitrary \fItext\fP. +. TD +. I q +. TD +A named storage area called a Q-register. +Use any case insensitive single character to name the register. +There are also two letter names initiated by \fC#\fP and long names in \fC[\fP...\fC]\fP braces. +. TR +. TD +\*[CTRL ^\f(CIX\fP]\fR +. TD +Ctrl+\fIX\fP, but can also be typed with a caret (\fC^\fP). +. TD +\*[CTRL LF] +. TD +Line Feed, ie. Enter/Return key +. TR bgc=grey90 +. TD colspan=4 +For instance: +\fC-C\fP \(== \fC-1C\fP \(== \fCR\fP \(DI +\fCQa\fP \(== \fCQ[A]\fP \(DI +\fCQ#ab\fP \(== \fCQ[AB]\fP \(DI +Ctrl+I \(== \*[CTRL ^I] \(== \*[CTRL TAB] \(DI +Ctrl+J \(== \*[CTRL ^J] \(== \*[CTRL LF] +.br +Undo (Rubout): Backspace, \*[CTRL ^W], \*[CTRL ^U] \(DI +Redo (Rubin): First \*[CTRL ^G], then Backspace, \*[CTRL ^W]... +.ETB +. +.\" Automatically move tables to the beginning of the next column. +.am tbl@top-hook +. t*hm +.. +.\" Automatically move tables to the beginning of the next page. +.am pg@end-of-text +. t*EM +.. +. +.MC (u;(\nW-0.5c)/3) 0.25c \" 3 columns +. +.TBLX "Exiting" width='30% 70%' +. TRX +. TD +\fCEX\fP\*($$ +. TD +Exit, but only if no buffer is \(lqdirty\(rq (unsaved) +. TRX +. TD +\fC-EX\fP\*($$ +. TD +Exit even if buffer is \(lqdirty\(rq, ie. discarding all unsaved changes. +. TRX +. TD +\fC:EX\fP\*($$ +. TD +Exit, saving all \(lqdirty\(rq buffers. +.ETB +. +.TBLX "Files" width='30% 70%' +. TRX +. TD +\fCEB\fIfile\fR\*$ +. TD +Edit buffer or open new \fIfile\fP (glob pattern). +Files, that did not exist on disk, will not be created until you save them. +. TRX +. TD +\fCEB*.c\*$ +. TD +Open all files with extension \fCc\fP. +. TRX +. TD +\fCEB\fP\*$ +. TD +Edit the unnamed buffer. +. TRX +. TD +. CI 0EB +. TD +Show buffer ring/list. +You can specify a filename afterwards to open a file. +. TRX +. TD +\fIn\fCEB\fR\*$ +.br +.CI "" n U* +. TD +Select \fIn\fP-th buffer in ring. +. TRX +. TD +\fC%*\*$ +. TD +Select next buffer in ring. +. TRX +. TD +\fC-%*\*$ +. TD +Select pevious buffer in ring. +. TRX +. TD +. CI EJU* +. TD +Select last buffer in ring. +. TRX +. TD +\fCEW\fP\*$ +. TD +Write (save) current buffer under its current name. +Does not work on the unnamed buffer. +. TRX +. TD +\fCEW\fIfile\fR\*$ +. TD +Save current buffer under new name \fIfile\fP (Save As). +. TRX +. TD +. CI EF +. TD +Finish (close) current buffer. +. TRX +. TD +. CI -EF +. TD +Finish (close) current buffer, discarding all unsaved changes. +. TRX +. TD +\fCFG\fIpath\fR\*$ +. TD +Go to folder \fIpath\fP, ie. change working directory. +. TRX +. TD +. CI 0EE +. TD +Set single byte ASCII mode. +. TRX +. TD colspan=2 +\fBTip:\fP You can use the Tab-key for autocompleting filenames and paths. +.ETB +. +.NCOL +. +.TBLX "Text Insertion" width='30% 70%' +. TRX +. TD +\fCI\fItext\*$ +. TD +Insert \fItext\fP into buffer. +. TRX +. TD +. CI I ... ^^ +. TD +Insert single caret (\fC^\fP). +. TRX +. TD +\fCI\fI...\*[CTRL ^Q$] +. TD +Insert \*$ (ASCII 27). +. TRX +. TD +\*[CTRL TAB]\fItext\*$ +. TD +Insert \fItext\fP with leading tab/indentation. +See also +.pdfhref W -D https://github.com/rhaberkorn/sciteco/wiki/Useful-Macros#indent-code-block -A . \fIn\fCM#it\fP +.ETB +. +.TBLX "Text Deletion" width='30% 70%' +. TRX +. TD +. CI D +. TD +Delete next character. +. TRX +. TD +. CI "" n D +. TD +Delete next \fIn\fP characters. +.ig END +. TRX +. TD +. CI V +. TD +Delete next word. +.END +. TRX +. TD +. CI "" n V +. TD +Delete next \fIn\fP words. +. TRX +. TD +. CI "" n Y +. TD +Delete previous \fIn\fP words. +. TRX +. TD +\fCFK\*[CTRL LF$] +. TD +Delete remainder of line. +.ig END +. TRX +. TD +. CI K +. TD +Kill (delete) from current position to beginning of next line. +.END +. TRX +. TD +. CI 0K +. TD +Kill (delete) to beginning of current line. +. TRX +. TD +. CI 0KK +. TD +Kill (delete) entire line +. TRX +. TD +. CI "" n K +. TD +Kill (delete) next \fIn\fP lines. +. TRX +. TD +. CI HK +. TD +Kill (delete) whole buffer. +.ETB +. +.TBLX "Copy & Paste" width='30% 70%' +. TRX +. TD +. CI X q +. TD +Copy from current position until beginning of next line into Q-Register \fIq\fP. +. TRX +. TD +. CI "" n X q +. TD +Copy next \fIn\fP lines into Q-Register \fIq\fP. +. TRX +. TD +. CI "" n :X q +. TD +Append next \fIn\fP lines to Q-Register \fIq\fP. +. TRX +. TD +. CI "" n X q\|n K +. TD +Cut next \fIn\fP lines into Q-Register \fIq\fP. +. TRX +. TD +. CI HX q +. TD +Copy whole buffer into Q-Register \fIq\fP. +. TRX +. TD +. CI X\(ti +. TD +Copy line into clipboard. See also +.pdfhref W -D https://github.com/rhaberkorn/sciteco/wiki/Useful-Macros#copypaste-from-clipboard -A . \fCM#xc\fP +. TRX +. TD +. CI G q +. TD +Get (paste) Q-Register \fIq\fP at current position. +. TRX +. TD +\fCI\fI...\*[CTRL ^E]\fCQ\fIq +. TD +Paste Q-Register \fIq\fP while inserting text. +.ig END +. TRX +. TD +\fCE%\fIq\|file\*$ +. TD +Save Q-Register \fIq\fP into \fIfile\fP. +.END +. TRX +. TD +\fCEQ\fIq\*$ +. TD +Edit Q-Register \fIq\fP as a text buffer. +.ig END +. TRX +. TD +\fCEQ\fIq\|file\*$ +. TD +Read \fIfile\fP into Q-Register \fIq\fP. +.END +.ETB +. +.NCOL +. +.TBLX "Cursor Movement" width='30% 70%' +. TRX +. TD +. CI C +. TD +Move one character forward. +. TRX +. TD +. CI "" n C +. TD +Move \fIn\fP characters forward. +. TRX +. TD +. CI R +. TD +Move one character backwards (reverse). +. TRX +. TD +. CI "" n R +. TD +Move \fIn\fP characters backwards (reverse). +. TRX +. TD +. CI W +. TD +Move to the beginning of next word. +. TRX +. TD +. CI L +. TD +Move to the beginning of next line. +. TRX +. TD +. CI "" n L +. TD +Move fordware \fIn\fP lines. +. TRX +. TD +. CI 0L +. TD +Move to the beginning of current line. +. TRX +. TD +. CI LR +. TD +Move to end of current line. +. TRX +. TD +. CI "" n B +. TD +Move backwards \fIn\fP lines. +. TRX +. TD +. CI J n L +. TD +Go to beginning of line \fIn\fP+1. +. TRX +. TD +. CI J +. TD +Jump to beginning of buffer. +. TRX +. TD +. CI ZJ +. TD +Jump to end of buffer. +. TRX +. TD colspan=2 +\fBTip:\fP Enable the \fCfnkeys.tes\fP module in \fC.teco_ini\fP +to move around with cursor keys! +.ETB +. +.TBLX "External Programs" width='42% 58%' +. TRX +. TD +\fCEC\fIcommand\*$ +. TD +Insert output of \fIcommand\fP. +. TRX +. TD +\fIn\fCEC\fIcommand\*$ +. TD +Filter next \fIn\fP lines through \fIcommand\fP. +. TRX +. TD +\fCHEC\fIcommand\*$ +. TD +Filter whole buffer through \fIcommand\fP. +. TRX +. TD +\fIn\fCECsort\*$ +. TD +Sort next \fIn\fP lines (UNIX). +.ETB +. +.TBLX "Macros" width='42% 58%' +. TRX +. TD +\fC@\*[CTRL ^U]\fIq\fP{\fImacro\fP} +. TD +Define \fImacro\fP in Q-Register \fIq\fP. +. TRX +. TD +. CI M q +. TD +Call macro in Q-Register \fIq\fP. +. TRX +. TD +\*($$\fC*\fIq +. TD +Discard command-line, storing it in \fIq\fP. +.ETB +. +.PGNH +.\" FIXME: We shouldn't have to reinitialize the column mode. +.1C \" Backpage +.SP 0.5c +. +.MC (u;(\nW-0.5c)/3) 0.25c \" 3 columns +. +.TBLX "Search & Replace" width='45% 55%' +. TRX +. TD +\fCS\fItext\*$ +. TD +Search for next ocurrence of \fItext\fP. +. TRX +. TD +\fC-S\fItext\*$ +. TD +Search for previous ocurrence of \fItext\fP. +. TRX +. TD +\fIn\fCS\fItext\*$ +. TD +Search for \fIn\fP-th ocurrence of \fItext\fP. +. TRX +. TD +\fCS\*$ +. TD +Repeat last search (pattern from Q-Register \fC_\fP). +. TRX +. TD +\fCN\fItext\*$ +. TD +Search for next ocurrence of \fItext\fP across all buffers. +. TRX +. TD +\fCFR\fIfrom\*$\fIto\*$ +. TD +Find next ocurrence of \fIfrom\fP and replace it with \fIto\fP. +. TRX +. TD +\fCFR\*($$ +. TD +Repeat the last search-replace operation (\fC_\fP and \fC-\fP). +. TRX +. TD +\fC<FR\fIfrom\*$\fIto\*$\fC;>\fP +. TD +Find and replace all ocurrences in buffer beginning at current position. +. TRX +. TD +\fCFK\fItext\*$ +. TD +Find and kill (delete) up to first ocurrence of \fItext\fP. +. TRX +. TD +\fCFD\fItext\*$ +. TD +Find and delete first ocurrence of \fItext\fP. +.ETB +. +.TBLX "Control Flow" width='45% 55%' +. TRX +. TD +. CI < commands > +. TD +Repeat \fIcommands\fP infinitely. +. TRX +. TD +. CI "" n < commands > +. TD +Repeat \fIcommands\fP \fIn\fP times. +. TRX +. TD +. CI "" n ; +. TD +Break from loop if \fIn\fP is false (non-negative). +. TRX +. TD colspan=2 +For instance, to add \fC#\fP in front of the next 10 lines: +\fC0L10<I#\*$L>\fP +.ETB +. +.TBLX "Help" width='45% 55%' +. TRX +. TD +\fC?\fItopic\*$ +. TD +Search help for \fItopic\fP (may be command). +.ig END \" not yet supported +. TRX +. TD +\fC?\*$ +. TD +Search help by word at current position in buffer. +.END +. TRX +. TD colspan=2 +\fBTip:\fP You can use the Tab-key for autocompleting topics. +.ETB +. +.NCOL +. +.TBLX "Search Patterns" width='40% 60%' +. TRX +. TD +\*[CTRL ^X] +. TD +Matches any character. +. TRX +. TD +\*[CTRL ^E]\fCS +. TD +Matches any non-empty sequence of whitespace characters. +. TRX +. TD +\*[CTRL ^E]\fCA +. TD +Matches any alphabetic characters. +. TRX +. TD +\*[CTRL ^E]\fCD +. TD +Matches any digit. +. TRX +. TD +\*[CTRL ^N]\fIclass\fP +. TD +Matches any character not in \fIclass\fP. +. TRX +. TD +\*[CTRL ^E]\fCM\fIpattern\fP +. TD +Matches many ocurrences of \fIpattern\fP. +. TRX +. TD +\*[CTRL ^E]\fCG\fIq\fP +. TD +Matches any character in Q-Register \fIq\fP. +. TRX +. TD +\*[CTRL ^E]\fC[\fIp\*<1\*>\fP,\fIp\*<2\*>\fP,\fI...\fP] +. TD +Matches \fIp\*<1\*>\fP or \fIp\*<2\*>\fP. +. TRX +. TD colspan=2 +To remove all trailing whitespace characters, you could type: +.br +\fCJ<FR\*[CTRL LF^E]M\*[CTRL ^E][\0,\*[CTRL TAB]]\*[CTRL $LF$];> +.ETB +. +.TBLX "String Building" width='40% 60%' +. TRX +. TD +\*[CTRL ^E]\fCQ\fIq +. TD +Expand to string contents of Q-Register \fIq\fP. +. TRX +. TD +\*[CTRL ^E]\fC\\\fIq +. TD +Expand to integer contents of Q-Register \fIq\fP. +. TRX +. TD +\*[CTRL ^E]\fCU\fIq +. TD +Expand to character represented by codepoint in Q-Register \fIq\fP. +. TRX +. TD +\*[CTRL ^Q]\fIx +. TD +Quote (escape) the following character \fIx\fP. +. TRX +. TD +\*[CTRL ^Q^Q] +. TD +Expands to \*[CTRL ^Q]. +.ETB +. +.\" Perhaps add the rubout/rubin code as well? +.\" If so, it would rather belong on page 1. +.TBLX "Command-line Editing" width='40% 60%' +. TRX +. TD +. CI { +. TD +Edit current command-line. +. TRX +. TD +. CI } +. TD +Replace command-line with edited version. +. TRX +. TD +. CI {HK} +. TD +Undo the entire command-line. +.ig END +. TRX +. TD +\*[CTRL ^W] +. TD +Rub out word or command. +. TRX +. TD +\*[CTRL ^U] +. TD +Rub out string argument. +.END +. TRX +. TD +\*[CTRL ^G^W] +. TD +Rub in word. +Also try Shift+Delete if \fCfnkeys.tes\fP is loaded. +.ETB +. +.sp |(u;\nL-7.7c) +.PDFPIC -I -5c ../ico/sciteco-256.pdf 5c +. +.NCOL +. +.TBLX "Arithmetics" width='40% 60%' +. TRX +. TD +. CI "" n U q +. TD +Assign number \fIn\fP to Q-Register \fIq\fP. +. TRX +. TD +. CI -U q +. TD +Assign -1 to Q-Register \fIq\fP. +. TRX +. TD +. CI Q q +. TD +Query (get) integer from Q-Register \fIq\fP. +. TRX +. TD +. CI "" n % q +. TD +Add \fIn\fP to Q-Register \fIq\fP and return new value. +. TRX +. TD +. CI % q +. TD +Increase Q-Register \fIq\fP and return new value. +. TRX +. TD +. CI -% q +. TD +Decrease Q-Register \fIq\fP and return new value. +. TRX +. TD +\*[CTRL ^^]\fIx +. TD +Codepoint of character \fIx\fP. +. TRX +. TD +. CI "" n A +. TD +Get codepoint \fIn\fP characters after current position. +. TRX +. TD +\fC\\ +. TD +Parse and retrieve integer at current position in buffer. +. TRX +. TD +\fIn\fC\\ +. TD +Insert integer \fIn\fP into buffer at current position. +. TRX +. TD +\fC\\+\fIn\fP\\V +. TD +Add \fIn\fP to number at current position in buffer. +. TRX +. TD +\fIn\*[CTRL ^_] +. TD +Binary negate \fIn\fP \(em negate TECO boolean. +. TRX +. TD +. CI "" n = +. TD +Show value of \fIn\fP in message line. +. TRX +. TD colspan=2 +Q-Registers consist of 2 cells: strings and integers. +These are independent. +Setting a number does not change the string part! +.ETB +. +.TBLX "Syntax Highlighting (lexers.tes)" width='60% 40%' +. TRX +. TD +. CI M[lexer.set. name ] +. TD +Set lexer (syntax highlighting) for language \fIname\fP. +. TRX +. TD colspan=2 +\fBTip:\fP You can use the Tab-key for autocompleting long Q-Register +names (and therefore Lexer names). +.ETB +\# EOF
\ No newline at end of file diff --git a/doc/grosciteco.tes b/doc/grosciteco.tes index cad7081..0caf3a5 100755 --- a/doc/grosciteco.tes +++ b/doc/grosciteco.tes @@ -11,8 +11,9 @@ LR 0X[output-woman] 2LR 0X[input] HK :Q[getopt.t]"< EU[getopt.t]Q[output-woman].tec ' EBN[input] +0EE !* Groff intermediate code is always ASCII *! -! skip whitespace characters ! +!* skip whitespace characters *! @#sw{ <0A-^^ "N 1; ' :C;> } @@ -49,30 +50,35 @@ EBN[input] Q.n<.-Z"= 1; ' 0A-10"= 1; ' D> ].n } +!* style last N glyphs/characters *! 1U[default-style] @[style]{ [.l U.l Q[font]-Q[default-style]"N - .-Q.lESSTARTSTYLING Q[font],Q.lESSETSTYLING + .-Q.lESSTARTSTYLING Q[font],Q.lESSETSTYLING ' ].l } [topics] -!* - * Special characters - * FIXME: Use UTF8 characters once available - *! -[glyphs.**]* +!* special characters *! +[glyphs.**]∗ [glyphs.\-]- [glyphs.aa]' +[glyphs.aq]' [glyphs.dq]" -[glyphs.hy]- -[glyphs.la]< -[glyphs.ra]> -[glyphs.lq]" -[glyphs.rq]" +[glyphs.hy]‐ +[glyphs.ha]^ +[glyphs.em]— +[glyphs.bu]• +[glyphs.la]⟨ +[glyphs.ra]⟩ +[glyphs.lq]“ +[glyphs.rq]” [glyphs.rs]\ +[glyphs.ti]~ [glyphs.+]+ +[glyphs.->]→ +[glyphs.tm]™ !* process formatter commands *! @[format]{ < @@ -103,16 +109,16 @@ EBN[input] * a table). Only the line+column should no longer change. * Either store line+column or use markers. *! - [* EB 0:M[move] .U.d ]* + [* EB 0:M[move] U.d ]* :EU[topics]\.d: C :X[topics] L F< !cmd.xXsciteco_tt! - [* EB 0:M[move] .U[ttstart] ]* + [* EB 0:M[move] U[ttstart] ]* L F< !cmd.xXsciteco_tt_end! [* EB 0:M[move] - .-Q[ttstart]< + -Q[ttstart]< Q[ttstart]ESSTARTSTYLING Q[ttstart]ESGETSTYLEAT+16,1ESSETSTYLING %[ttstart]> ]* @@ -194,22 +200,27 @@ EBN[input] :M#sw :M#gi/Q[res.h]+Q[pos.h]U.[to.h] :M#gi/Q[res.v]+Q[pos.v]U.[to.v] :M#sc [* EB Q.[to.h]-Q[pos.h]"= - ! vertical line ! + !* vertical line *! Q.[to.v]-Q[pos.v]"< Q[pos.v]U.v Q.[to.v]U[pos.v] | Q.[to.v]U.v ' - 1:M[move] I+ %[pos.v] + 1:M[move] %[pos.v] .-Z"= I R ' + -A-9472"= 0A-9472"= I┬ | I┐ ' + | 0A-9472"= I┌ | I╷ ' ' Q.v-Q[pos.v]< - 1:M[move] I| %[pos.v] + 1:M[move] %[pos.v] .-Z"= I R ' + -A-9472"= 0A-9472"= I┼ | I┤ ' + | 0A-9472"= I├ | I│ ' ' > - 1:M[move] I+ + 1:M[move] .-Z"= I R ' + -A-9472"= 0A-9472"= I┴ | I┘ ' + | 0A-9472"= I└ | I╵ ' ' Q.[to.v]U[pos.v] | - ! horizontal line ! + !* horizontal line + FIXME: works only if horizonal lines are drawn first. *! Q.[to.h]-Q[pos.h]"< Q[pos.h]U.h Q.[to.h]U[pos.h] | Q.[to.h]U.h ' - 1:M[move] I+ %[pos.h] - Q.h-Q[pos.h]< - 1:M[move] I- %[pos.h] + Q.h-Q[pos.h]+1< + 1:M[move] I─ %[pos.h] > - 1:M[move] I+ Q.[to.h]U[pos.h] ' ]* F< @@ -221,15 +232,24 @@ EBN[input] :Q.w%[pos.h] :M#sc F< !cmd.C! - :M#sw .(:M#sa).X.w + :M#sw 0A-^^u"= + C 16 :M#giU.w + | + .(:M#sa).X.w 0Q[glyphs.Q.w]U.w + ' [* EB 1:M[move] - G[glyphs.Q.w] 1:M[style] ]* :M#sc F< + Q.wI 1:M[style] ]* :M#sc F< !cmd.c! :M#sw 0AU.w C [* EB 1:M[move] G[glyphs.U.w] 1:M[style] ]* :M#sc F< + !cmd.N! + :M#sw :M#giU.w + [* EB 1:M[move] + Q.wI 1:M[style] ]* :M#sc F< + !cmd.n! :M#sw :M#gi :M#gi :M#sc F< @@ -268,7 +288,8 @@ Q*U* * TODO: The size can still be improved by using SCI_SETSTYLINGEX * if appropriate. *! -EB J 0U#cs 0U#cd +EB 0EE !* operate in single-byte mode *! +J 0U#cs 0U#cd < .ESGETSTYLEATUs Qs"< Qs= ' .-Z"< Qs-Q#cs"= C F< ' ' diff --git a/doc/grosciteco.tes.1.in b/doc/grosciteco.tes.1.in index 39941db..d264afc 100644 --- a/doc/grosciteco.tes.1.in +++ b/doc/grosciteco.tes.1.in @@ -95,7 +95,6 @@ and provides the following \fBtroff\fP macros that can be called by documentation authors: . .TP -.TQ \fB.SCITECO_TOPIC \fItopics...\fR .SCITECO_TOPIC .SCITECO_TOPIC SCITECO_TOPIC Defines one or more topics in \*(ST's help system. diff --git a/doc/htbl.tes b/doc/htbl.tes index 2bc5056..9cd5100 100755 --- a/doc/htbl.tes +++ b/doc/htbl.tes @@ -66,7 +66,7 @@ k q.[row]u.[frows] q.[col]u.[fcols] 1u.[row] 1u.[col] .u.#bd .,.+3:ST{^J"S .,S^JT}.-3X.[data.\.[row].\.[col]] C 1 | - .,.+2:S\^^"S + .,.+2:S\^^"S 0U.[format.\.[row].\.[col].rowspan] Q.[row]-1U.#cr <Q.[format.\.#cr.\.[col].rowspan]">1;' -%.#cr> diff --git a/doc/sciteco.1.in b/doc/sciteco.1.in index 1ebb4de..82c1a47 100644 --- a/doc/sciteco.1.in +++ b/doc/sciteco.1.in @@ -19,6 +19,7 @@ Scintilla-based \fBT\fPext \fBE\fPditor and \fBCO\fPrrector .OP "-e|--eval" macro .OP "-m|--mung" .OP "--no-profile" +.OP "-8|--8bit" .RI [ "UI option .\|.\|." ] .OP "--" .RI [ script ] @@ -191,6 +192,14 @@ munging an empty file. This is useful to fix up a broken profile script. This option has no effect when a file is explicitly munged with .BR \-\-mung . +.IP "\fB-8\fR, \fB--8bit\fR" +.SCITECO_TOPIC "-8" "--8-bit" +Use raw single-byte ANSI encoding by default and disable automatic EOL conversion, +which optimizes \*(ST for 8-bit cleanliness. +It is equivalent to executing \(lq16,4ED\(rq, but since it is executed +very early at startup, all Q-Registers and the unnamed buffer will +already be in ANSI encoding. +This option is also useful when munging the profile macro. .IP "\fIUI options .\|.\|.\fP" Some graphical user interfaces, notably GTK+, provide additional command line options. @@ -305,6 +314,10 @@ for details. environment before initializing Curses, so these variables can be modified in the profile macro. . +.LP +On GTK+, you may turn off the infamous client-side window decorations +by setting the environment variable \fBGTK_CSD\fP to \(lq0\(rq. +. . .SH SIGNALS . @@ -392,21 +405,14 @@ It may also be used as a template for Language reference: .BR sciteco (7) .TP -The \fBtroff\fP post-processor for \*(ST, including -information on how to write \(lqwomanpages\(rq: +The \fBtroff\fP post-processor for \*(ST, including information on how to write \(lqwomanpages\(rq: .BR grosciteco.tes (1) .TP -A \fBtroff\fP pre-processor commonly used to generate -man-pages: +A \fBtroff\fP pre-processor commonly used to generate man-pages: .BR tedoc.tes (1) .TP -Homepage: -.UR @PACKAGE_URL@ -\*(ST at Sourceforge -.UE -.TP Development home, bug tracker and wiki: -.UR @PACKAGE_URL_DEV@ +.UR @PACKAGE_URL@ \*(ST at Github .UE .TP diff --git a/doc/sciteco.7.template b/doc/sciteco.7.template index 2ad16d3..81deac0 100644 --- a/doc/sciteco.7.template +++ b/doc/sciteco.7.template @@ -86,17 +86,17 @@ regular commands for command-line editing. .SH KEY TRANSLATION . When the user presses a key or key-combination it is first translated -to an ASCII character. -All immediate editing commands and regular \*(ST commands operate on -a language based solely on -.B ASCII -characters. +to an UTF-8 string. The rules for translating keys are as follows: .RS .IP 1. 4 Keys with a printable representation (letters, digits and special -characters) are translated to their printable representation. -Shift-combinations automatically result in upper-case letters. +characters) are translated to their printable representation +according to the current keyboard layout and modifier keys. +On the Gtk UI, \*(ST tries to automatically take ANSI letter +values in situations where the parser accepts only ANSI +characters. +On Curses, you might need key macros to achieve the same. .IP 2. .SCITECO_TOPIC ctrl Control-combinations (e.g. CTRL+A) are translated to control @@ -104,7 +104,9 @@ codes, that is a code smaller than 32. The control code can be calculated by stripping the seventh bit from the upper-case letter's ASCII code. So for instance, the upper or lower case A (65) will be translated -to code 1, B to code 2, ecetera. +to code 1, B (66) to code 2, ecetera. +\*(ST will always use latin letters regardless of the current +keyboard layout. \*(ST echos control codes as Caret followed by the corresponding upper case letter, so you seldomly need to know a control codes actual numeric code. @@ -126,138 +128,164 @@ there is often an equivalent typed with the caret character (e.g. \(lq^I\(rq). .IP 4. A selection of other keys without printable representation (called -function keys) are translated to user-definable character sequences. -This feature is called function key macros and explained in the -next subsection. +function keys) are looked up as key macros, allowing user-definable character +sequences to be inserted, including immediate editing commands. +If there is no matching key macro, nothing is inserted. +The key macro feature is explained in the next subsection. +.IP 5. +All keys with printable representations are also looked up +as key macros, allowing them to be remapped just like function keys. +Otherwise the corresponding UTF-8 strings are inserted into the command stream. +.IP 6. +The result of key macro lookups or the default printable representations +are processed as immediate editing commands in a context-sensitive manner +(see section +.BR "COMMANDLINE EDITING" ). +By default they are inserted into the command line macro and are +immediately executed. .RE . -.SS Function Key Macros -. -.SCITECO_TOPIC "function key" +.LP +While \*(ST handles keys with arbitrary Unicode representations, +all immediate editing commands and regular \*(ST commands operate on +a language based solely on +.B ASCII +codes, which is a subset of Unicode. +\# This is because we cannot assume the presence of any particular non-ANSI +\# symbol on a user's keyboard. +\# Immediate editing commands do not operate directly on function keys +\# because we didn't want to introduce an UI-independent representation +\# of function keys - it would also complicate insertion of immediate +\# editing commands from key macros. +Since the \*(ST parser is Unicode-aware, this does not exclude +using Unicode glyphs wherever a single character is expected, +ie. \fB^^\fIx\fR and \fBU\fIq\fR works with arbitrary Unicode glyphs. +All \*(ST macros must be in valid UTF-8. +. +.SS Key Macros +. +.SCITECO_TOPIC "key macro" ^K By default function keys except Escape, Backspace and Return are ignored by \*(ST. -By setting bit 6 of the \fBED\fP flag variable, function key handling -is enabled: -.SCITECO_TT -.EX -0,64ED -.SCITECO_TT_END -.EE -This is usually performed in the editor profile. -With certain interfaces (curses) after enabling function keys, -the Escape key might only be handled after a short delay. +With certain interfaces (curses) the Escape key might only be handled +after a short delay. This is because it might be used by the terminal to transmit Escape Sequences. This delay is minimized by \*(ST, so using the escape key should not be a problem even on ncurses/UNIX. +If the default delay is too small, it can be tweaked with the +.B ESCDELAY +environment variable and if necessary a key macro can be +defined as an escape surrogate as described in this section +.RB ( fnkeys.tes +defines the delete key as an escape surrogate for instance). .LP -Enabling function keys also enables Function Key Macros. -These are Q-Register strings inserted into the command stream -(before immediate editing command handling) when certain function -keys (or combinations) are pressed. -The following list of Function Key Macro registers are supported: +To make use of function keys or to remap all other keys, +special Q-Register strings can be defined that are inserted into the command stream +before immediate editing command handling. +The following list of key macro registers are supported: +. .TP 9 -.SCITECO_TOPIC ^FDOWN -.B ^FDOWN +.SCITECO_TOPIC ^KDOWN +.B ^KDOWN .TQ -.SCITECO_TOPIC ^FUP -.B ^FUP +.SCITECO_TOPIC ^KUP +.B ^KUP Inserted when the down/up cursor keys are pressed. .TP -.SCITECO_TOPIC ^FLEFT -.B ^FLEFT +.SCITECO_TOPIC ^KLEFT +.B ^KLEFT .TQ -.SCITECO_TOPIC ^FSLEFT -.B ^FSLEFT +.SCITECO_TOPIC ^KSLEFT +.B ^KSLEFT Inserted when the left or shift-left cursor keys are pressed. .TP -.SCITECO_TOPIC ^FRIGHT -.B ^FRIGHT +.SCITECO_TOPIC ^KRIGHT +.B ^KRIGHT .TQ -.SCITECO_TOPIC ^FSRIGHT -.B ^FSRIGHT +.SCITECO_TOPIC ^KSRIGHT +.B ^KSRIGHT Inserted when the right or shift-right cursor keys are pressed. .TP -.SCITECO_TOPIC ^FHOME -.B ^FHOME +.SCITECO_TOPIC ^KHOME +.B ^KHOME .TQ -.SCITECO_TOPIC ^FSHOME -.B ^FSHOME +.SCITECO_TOPIC ^KSHOME +.B ^KSHOME Inserted when the Home or shift-Home keys are pressed. .TP -.SCITECO_TOPIC ^FF -.BI ^FF x -Inserted when the Fx-key is pressed +.SCITECO_TOPIC ^KF +.BI ^KF x +Inserted when the F\fIx\fP-key is pressed .RI ( x is a number between 0 and 63). .TP -.SCITECO_TOPIC ^FDC -.B ^FDC +.SCITECO_TOPIC ^KDC +.B ^KDC .TQ -.SCITECO_TOPIC ^FSDC -.B ^FSDC +.SCITECO_TOPIC ^KSDC +.B ^KSDC Inserted when the Delete or shift-Delete key is pressed. .TP -.SCITECO_TOPIC ^FIC -.B ^FIC +.SCITECO_TOPIC ^KIC +.B ^KIC .TQ -.SCITECO_TOPIC ^FSIC -.B ^FSIC +.SCITECO_TOPIC ^KSIC +.B ^KSIC Inserted when the Insert or shift-Insert key is pressed. .TP -.SCITECO_TOPIC ^FPPAGE -.B ^FPPAGE +.SCITECO_TOPIC ^KPPAGE +.B ^KPPAGE .TQ -.SCITECO_TOPIC ^FNPAGE -.B ^FNPAGE +.SCITECO_TOPIC ^KNPAGE +.B ^KNPAGE Inserted when the Page-Up or Page-Down key is pressed. .TP -.SCITECO_TOPIC ^FPRINT -.B ^FPRINT +.SCITECO_TOPIC ^KPRINT +.B ^KPRINT .TQ -.SCITECO_TOPIC ^FSPRINT -.B ^FSPRINT +.SCITECO_TOPIC ^KSPRINT +.B ^KSPRINT Inserted when the Print or shift-Print key is pressed. .TP -.SCITECO_TOPIC ^FA1 -.B ^FA1 +.SCITECO_TOPIC ^KA1 +.B ^KA1 .TQ -.SCITECO_TOPIC ^FA3 -.B ^FA3 +.SCITECO_TOPIC ^KA3 +.B ^KA3 .TQ -.SCITECO_TOPIC ^FB2 -.B ^FB2 +.SCITECO_TOPIC ^KB2 +.B ^KB2 .TQ -.SCITECO_TOPIC ^FC1 -.B ^FC1 +.SCITECO_TOPIC ^KC1 +.B ^KC1 .TQ -.SCITECO_TOPIC ^FC3 -.B ^FC3 +.SCITECO_TOPIC ^KC3 +.B ^KC3 Inserted when the numeric key pad's upper left key (7), upper right key (9), central key (5), lower left key (1), or lower right key (3) is pressed and num-lock is disabled. The key-pad's cursor keys are handled like the regular cursor keys. .TP -.SCITECO_TOPIC ^FEND -.B ^FEND +.SCITECO_TOPIC ^KEND +.B ^KEND .TQ -.SCITECO_TOPIC ^FSEND -.B ^FSEND +.SCITECO_TOPIC ^KSEND +.B ^KSEND Inserted when the End or shift-End key is pressed. .TP -.SCITECO_TOPIC ^FHELP -.B ^FHELP +.SCITECO_TOPIC ^KHELP +.B ^KHELP .TQ -.SCITECO_TOPIC ^FSHELP -.B ^FSHELP +.SCITECO_TOPIC ^KSHELP +.B ^KSHELP Inserted when the Help or shift-Help key is pressed. .TQ -.SCITECO_TOPIC ^FCLOSE -.B ^FCLOSE -ISCITECO_TOPIC when the Close key has been pressed. +.SCITECO_TOPIC ^KCLOSE +.B ^KCLOSE Inserted when the Close key has been pressed. More importantly, this key is emulated in some GUIs (notably GTK+) when the user tries to close \*(ST's @@ -266,34 +294,50 @@ This allows customizing \*(ST's behaviour when program termination is requested (e.g. only quit if there are no unsaved buffers). The close key is also special because -it has a default action if function key macros are -disabled or the \(lq^FCLOSE\(rq macro is undefined: +it has a default action if the \(lq^KCLOSE\(rq macro is undefined: It unconditionally quits \*(ST. The default action is \fBnot\fP performed when -\(lq^FCLOSE\(rq has merely been masked out in the +\(lq^KCLOSE\(rq has merely been masked out in the current parser state (see below). +.TP +.BI ^K x +Any other key with printable representation and all control codes +are looked up with a \(lq^K\(rq prefix. +\fIx\fP can usually only be a single Unicode glyph. +\# Although the result of IMEs is looked up in Gtk, which I suppose +\# can be multiple codepoints. +If undefined, \fIx\fP is inserted unmodified. +\# NOTE: Since all function key macros are longer than 2 +\# characters, there shouldn't be any namespace collisions. . .LP -\(lq^F\(rq corresponds to CTRL+F in the above list but +\(lq^K\(rq corresponds to CTRL+K (ASCII code 11) in the above list but might be typed with a caret due to string building characters in long Q-Register names. The names are all derived from key definitions of the curses library \(em not all of them may be supported on any particular user interface. .LP -By default function key macros are effective everywhere \(em -pressing a function key has the same effect as processing -the characters of the corresponding function key macro as +The result of key macro expansion differs from +consecutive key presses in that they are considered an unity. +If insertion of a single character fails (raises an error), +the entire macro expansion is automatically rubbed out. +.LP +By default key macros are effective everywhere \(em +pressing a key has the same effect as processing +the characters of the corresponding key macro as immediate editing commands (or self-inserting characters). -However function key macros that rewrite the current command line +However key macros that rewrite the current command line will only work correctly from specific \*(ST parser states. -\*(ST therefore allows you to mask function key macros in +Another common use of key macros would be to define +aliases of \*(ST commands for non-latin keys on Curses. +\*(ST therefore allows you to mask key macros in specific parser states by evaluating the Q-Register's numeric -part, thus allowing you to control \fIwhere\fP a function key +part, thus allowing you to control \fIwhere\fP a key macro is effective. The numeric part represents a bitmask of states where -function keys are \fIdisabled\fP (so the default value 0 -enables that function key everywhere). +keys macros are \fIdisabled\fP (so the default value 0 +enables that key macro everywhere). \*(ST defines the following state flags: .IP 1 4 Bit 0 represents the \(lqstart\(rq state where \*(ST accepts the @@ -302,23 +346,27 @@ This is the state you will want command line editing macros to be enabled in. .IP 2 Bit 1 represents any string argument. +.IP 4 +Bit 2 represents any case insensitive syntactic character. +This is the state you might want to use for translating +non-latin characters to their latin equivalent. .LP All other bits/flags represent any other parser state. Consequently, setting the register to the inverse of a bitmask of state flags enables the corresponding macro only for the specified states. -For instance, to enable the \(lq^FRIGHT\(rq function key macro +For instance, to enable the \(lq^KRIGHT\(rq key macro only in the \(lqstart\(rq state, you could set: .SCITECO_TT .EX -1^_U[^FRIGHT] +1^_U[^KRIGHT] .SCITECO_TT_END .EE .LP -A set of useful Function Key Macros are provided in the +A set of useful key macros for function keys is provided in the standard library .BR fnkeys.tes . -It demonstrates how Function Key Macros may be used to define +It demonstrates how key macros may be used to define alternate Escape keys (so the delay issue is not experienced), or do insertion and command-line editing using function keys. . @@ -1069,11 +1117,27 @@ Every document has a current position called dot (after the \(lq.\(rq command that returns it). A document may contain any sequence of bytes but positions refer to characters that might not correspond to individual -bytes depending on the document's encoding. +bytes depending on the document's encoding (see \fBEE\fP command). +The \fB^E\fP command can be used to translate between byte +and character/glyph positions. Consequently when querying the code at a character position or inserting characters by code, the code may be an Unicode codepoint instead of byte-sized integer. -Currently however, \*(ST will only handle ASCII files. +.LP +Currently, \*(ST supports buffers in UTF-8 and single-byte +ANSI encodings, that can also be used for editing raw binary files. +\# You can configure other single-byte code pages with EE, +\# but there isn't yet any way to insert characters. +UTF-8 is the default codepage for new buffers and Q-Registers +unless the 2nd \fBED\fP flag bit is set. +You can also specify \fB--8bit\fP to optimize \*(ST for +8-bit cleanliness. +While navigation in documents with single-byte encodings +takes place in constant time, \*(ST uses heuristics in +UTF-8 documents for translating between byte and character +offsets which are slower especially when \(lqjumping\(rq +into very large lines. +\# But there are optimizations for R, C and A... .LP .SCITECO_TOPIC "EOL translation" To simplify working with files using different end of line @@ -1461,16 +1525,27 @@ The existence of a clipboard register can thus be checked in macros to determine whether getting and modifying that particular clipboard is supported natively. .br -.SCITECO_TOPIC xterm +.SCITECO_TOPIC OSC-52 xterm \*(ST does \fBnot\fP generally support clipboards on ncurses, -but has special support when used with a sufficiently recent version -of \fBxterm\fP(1). -Since the operability of XTerm clipboards cannot be tested +but has special support for OSC-52 escape sequences, as were +introduced by sufficiently recent versions of +.BR xterm (1) +and have since been adopted by several other terminal emulators. +Since the operability of OSC-52 clipboards cannot be tested automatically, users will have to set the flag 256 of the -\fBED\fP flags if and only if their XTerm is configured for allowing +\fBED\fP flags if and only if their terminal emulator is properly +configured. +.BR xterm (1) +for instance must be configured for allowing the \fISetSelection\fP and \fIGetSelection\fP window operations. -\*(ST will still check whether XTerm is actually used in -a particular session. +If running under +.BR xterm (1), +\*(ST will still check whether the XTerm version is sufficient. +.SCITECO_TOPIC Kitty +Other terminal emulators like Kitty may ask for permission to read the +clipboard (\fBread-clipboard-ask\fP). +This is not supported by \*(ST and must be disabled +(use \fBread-clipboard\fP instead). .SCITECO_TOPIC xclip If native clipboard support is unavailable, users may still fall back to using external tools like \fBxclip\fP(1) @@ -1493,11 +1568,11 @@ the original clipboard contents, though. The numeric parts of the clipboard registers are currently not used by \*(ST. .TP -.BI ^F key -Function key registers as documented in section -\fBKEY TRANSLATION\fP. -Their string-content represents a function key macro -and their numeric part is a function key mask. +.BI ^K key +Key macro registers as documented in section +.BR "KEY TRANSLATION" . +Their string-content represents a key macro +and their numeric part is a key macro mask. None of those registers are automatically initialized on startup. .TP @@ -1571,7 +1646,8 @@ contents of the search register you could write: [_ Sfoo$ ]_ .SCITECO_TT_END .EE -. +To copy the string and numeric contents of register \(lqA\(rq to \(lqB\(rq, +you could write \(lq[a ]b\(rq. . .SH STRING-BUILDING CHARACTERS .SCITECO_TOPIC "string building" @@ -1588,7 +1664,9 @@ stages: .IP 1. 4 Carets followed by characters are translated to control codes, so \(lq^a\(rq and \(lq^A\(rq are equivalent to CTRL+A (code 1). -A double caret \(lq^^\(rq is translated to a single caret. +\# FIXME: Should we change the double-caret behavior? +A double caret \(lq^^\(rq is translated to a single caret, +but Ctrl+caret (code 30) is not translated at all. This caret-handling is independent of the caret-handling in command names. .IP 2. @@ -1616,22 +1694,29 @@ thus refers to the corresponding control code: Escape character \fIc\fP. The character is not handled as a string building or string termination character, so for instance \(lq^Q^Q\(rq translates to \(lq^Q\(rq. +Furthermore, some immediate editing commands are inhibited right after \fB^Q\fR, +so you can type \(lq^Q^U\(rq and \(lq^Q^W\(rq, which translate to control codes +21 and 23. .TP .SCITECO_TOPIC ^V^V ^Vc lower .B ^V^V .TQ .BI ^V c -Translates all following characters into lower case. +Translates all following characters, including the expansions of \fB^EQ\fP, +\fB^EU\fP etc., into lower case. When \fB^V\fP is not followed by \fB^V\fP, a single character \fIc\fP is lower-cased. +\# Which is pretty pointless nowadays. .TP .SCITECO_TOPIC ^W^W ^Wc .B ^W^W .TQ .BI ^W c Analogous to \fB^V\fP, but upper-cases characters. +Since \fB^W\fP is an immediate editing command, this can practically be typed +only with upcarets in interactive mode. .TP -.SCITECO_TOPIC ^E\\ ^E\\q +.SCITECO_TOPIC ^E\[rs] ^E\[rs]q .BI ^E\(rs q Expands to the formatted number stored in the numeric part of Q-Register \fIq\fP. @@ -1648,6 +1733,12 @@ Expands to the character whose code is stored in the numeric part of Q-Register \fIq\fP. For instance if register \(lqA\(rq contains the code 66, \(lq^EUa\(rq expands to the character \(lqB\(rq. +The interpretation of this code depends on the context. +Within inserts and searches (\fBI\fP, \fBS\fP, etc.) bytes or Unicode codepoints +are expected depending on the buffer's encoding. +Operations on registers (\fBEU\fP) similarily consult the +register's encoding. +Everything else expects Unicode codepoints. .TP .SCITECO_TOPIC ^EQ ^EQq .BI ^EQ q @@ -1698,6 +1789,14 @@ The following pattern match constructs are supported for matching one character in different character classes (caret-notations refer to the corresponding control characters): .TP +.BI ^Q c +.TQ +.BI ^R c +Escape character \fIc\fP. +Since these are interpreted as string building characters as well, +you may have to type two or three \fB^Q\fP in a row to escape a +pattern match character. +.TP .SCITECO_TOPIC ^S ^EB .B ^S .TQ @@ -2067,17 +2166,17 @@ For instance the following macro inserts \fIn\fP tab characters .TP .SCITECO_TOPIC """A" .IB n \(dqA -Applies if \fIn\fP is the code of an alphabetic character. +Applies if \fIn\fP is the Unicode codepoint of an alphabetic character. .TP .SCITECO_TOPIC """C" .IB n \(dqC -Applies if \fIn\fP is the code of a symbol constituent. +Applies if \fIn\fP is the Unicode codepoint of a symbol constituent. Like in pattern matching, a symbol constituent is defined as an alpha-numeric character, dot, dollar or underscore. .TP .SCITECO_TOPIC """D" .IB n \(dqD -Applies if \fIn\fP is the code of a digit character (0 to 9). +Applies if \fIn\fP is the Unicode codepoint of a digit character. The current radix is insignificant. .TP .SCITECO_TOPIC """I" @@ -2150,16 +2249,16 @@ will commonly write: .TP .SCITECO_TOPIC """R" .IB n \(dqR -Applies if \fIn\fP is the code of an alpha-numeric character. +Applies if \fIn\fP is the Unicode codepoint of an alpha-numeric character. .TP .SCITECO_TOPIC """V" .IB n \(dqV -Applies if \fIn\fP is the code of a lower-case alphabetic +Applies if \fIn\fP is the Unicode codepoint of a lower-case alphabetic character. .TP .SCITECO_TOPIC """W" .IB n \(dqW -Applies if \fIn\fP is the code of a upper-case alphabetic +Applies if \fIn\fP is the Unicode codepoint of an upper-case alphabetic character. .LP There are also a number of flow-control commands like @@ -2219,6 +2318,7 @@ This manual mentions differences on several occasions. . .SH SEE ALSO . +.\" FIXME: The URLs do not format in FreeBSD's man or in woman pages. .TP Program invocation and options: .BR sciteco (1) @@ -2228,16 +2328,19 @@ Scintilla messages and other documentation: Scintilla .UE .TP -Scinterm manual, documenting the mapping of -\(lqRGB\(rq values to terminal colors on curses user interfaces: +Scinterm manual, documenting the mapping of \(lqRGB\(rq values to terminal colors on curses user interfaces: .UR http://foicica.com/scinterm/manual.html Scinterm manual .UE .TP -Gtk+ 3 documentation, containg details about -its CSS support and syntax: -.UR https://developer.gnome.org/gtk3/stable/GtkCssProvider.html -GtkCssProvider +Suitable terminal fonts for icon support in Curses (see \fBED\fP flags): +.UR https://www.nerdfonts.com/ +Nerd Fonts +.UE +.TP +Gtk+ 3 documentation, containg details about its CSS support and syntax: +.UR https://docs.gtk.org/gtk3/css-overview.html +Overview of CSS in GTK .UE . . diff --git a/freebsd/Makefile b/freebsd/Makefile new file mode 100644 index 0000000..72e4480 --- /dev/null +++ b/freebsd/Makefile @@ -0,0 +1,92 @@ +PORTNAME= sciteco +DISTVERSION= 2.1.0 +CATEGORIES= editors textproc devel +MASTER_SITES= https://github.com/rhaberkorn/${PORTNAME}/releases/download/v${DISTVERSION}/ \ + SOURCEFORGE/${PORTNAME}/v${DISTVERSION}/ + +MAINTAINER= robin.haberkorn@googlemail.com +COMMENT= Scintilla-based Text Editor and Corrector +WWW= https://github.com/rhaberkorn/sciteco + +LICENSE= GPLv3+ +LICENSE_FILE= ${WRKSRC}/COPYING + +FLAVORS= curses gtk +FLAVOR?= ${FLAVORS:[1]} +curses_PKGNAMESUFFIX=-curses +gtk_PKGNAMESUFFIX=-gtk + +# As SciTECO uses itself during the build process, +# it makes sense to compile it running under a dummy XServer. +# This is both faster and works in headless environments as well. +gtk_BUILD_DEPENDS=Xvfb:x11-servers/xorg-server@xvfb \ + xauth:x11/xauth mcookie:devel/util-linux + +USES= gmake pkgconfig compiler:c11 compiler:c++17-lang gnome groff +USE_GNOME= glib20 + +GNU_CONFIGURE= yes +CONFIGURE_ARGS= +CONFIGURE_OUTSOURCE=yes + +MAKEFILE= GNUmakefile +TEST_TARGET= check + +# NOTE: Unlike on Debian, we cannot build a sciteco-common package. +# FreeBSD does not yet support subpackages. +# Therefore both flavors will install totally independant +# (partially redundant) files. +.if ${FLAVOR} == gtk +DATADIR= ${PREFIX}/share/gsciteco +.endif + +SUB_FILES= pkg-message + +.if ${FLAVOR} == curses +USES+= ncurses +CONFIGURE_ARGS+=--with-interface=ncurses +PLIST_SUB+= GTK="@comment " PROGRAM_PREFIX="" +.elif ${FLAVOR} == gtk +USES+= desktop-file-utils +# FIXME: To appease QA checks, we would have to +# USE_GNOME+=cairo gdkpixbuf2 and +# gtk_LIB_DEPENDS+=libharfbuzz.so:print/harfbuzz +USE_GNOME+= gtk30 +CONFIGURE_ARGS+=--with-interface=gtk --program-prefix=g \ + --with-scitecodatadir="${DATADIR}" +PLIST_SUB+= GTK="" PROGRAM_PREFIX=g +.endif + +OPTIONS_DEFINE= LEXILLA MALLOC_REPLACEMENT TECO_INTEGER_32 +OPTIONS_DEFAULT=LEXILLA +OPTIONS_SUB= yes + +LEXILLA_DESC=Build with Lexilla lexer support (larger) +MALLOC_REPLACEMENT_DESC=Force replacement of system malloc() +TECO_INTEGER_32_DESC=Use 32-bit TECO integers + +LEXILLA_CONFIGURE_OFF=--without-lexilla +MALLOC_REPLACEMENT_CONFIGURE_ON=--enable-malloc-replacement +TECO_INTEGER_32_CONFIGURE_ON=--with-teco-integer=32 + +.include <bsd.port.pre.mk> + +.if ${FLAVOR} == gtk +# Run under Xvfb (see above). +# This is done only now, since USES=gmake would overwrite MAKE_CMD. +MAKE_CMD="${SCRIPTDIR}/xvfb-run.sh" ${GMAKE} +.endif + +post-install: +.for SZ in 16 32 48 256 + ${MKDIR} ${STAGEDIR}${PREFIX}/share/icons/hicolor/${SZ}x${SZ}/apps + ${INSTALL_DATA} ${WRKSRC}/ico/sciteco-${SZ}.png \ + ${STAGEDIR}${PREFIX}/share/icons/hicolor/${SZ}x${SZ}/apps/sciteco.png +.endfor + # To appease `make check-plist`: + ${RM} ${STAGEDIR}${DATADIR}/*.png + ${MKDIR} ${STAGEDIR}${PREFIX}/share/applications + ${INSTALL_DATA} ${WRKSRC}/src/sciteco.desktop \ + ${STAGEDIR}${PREFIX}/share/applications/sciteco.desktop + +.include <bsd.port.post.mk> diff --git a/freebsd/distinfo b/freebsd/distinfo new file mode 100644 index 0000000..d8aace8 --- /dev/null +++ b/freebsd/distinfo @@ -0,0 +1,3 @@ +TIMESTAMP = 1729096188 +SHA256 (sciteco-2.1.0.tar.gz) = f03dfe6c0e0a19e3144b25e2f50883b9b958422af8249833cfc49f076f88e361 +SIZE (sciteco-2.1.0.tar.gz) = 3966407 diff --git a/freebsd/files/pkg-message.in b/freebsd/files/pkg-message.in new file mode 100644 index 0000000..b91ee06 --- /dev/null +++ b/freebsd/files/pkg-message.in @@ -0,0 +1,9 @@ +[ +{ type: install + message: <<XYZZY +You are recommended to copy %%DATADIR%%/sample.teco_ini to ~/.teco_ini +and edit this file afterwards with SciTECO. +XYZZY +} +] + diff --git a/freebsd/pkg-descr b/freebsd/pkg-descr new file mode 100644 index 0000000..530d978 --- /dev/null +++ b/freebsd/pkg-descr @@ -0,0 +1,5 @@ +SciTECO is an interactive TECO dialect, similar to Video TECO. +It also adds features from classic Standard TECO-11, +and incorporates many unique new ideas. +It is geared towards UNIX-like operating systems but also +natively supports Microsoft Windows NT. diff --git a/freebsd/pkg-plist b/freebsd/pkg-plist new file mode 100644 index 0000000..5539757 --- /dev/null +++ b/freebsd/pkg-plist @@ -0,0 +1,110 @@ +bin/%%PROGRAM_PREFIX%%grosciteco.tes +bin/%%PROGRAM_PREFIX%%sciteco +bin/%%PROGRAM_PREFIX%%tedoc.tes +share/man/man1/%%PROGRAM_PREFIX%%grosciteco.tes.1.gz +share/man/man1/%%PROGRAM_PREFIX%%sciteco.1.gz +share/man/man1/%%PROGRAM_PREFIX%%tedoc.tes.1.gz +share/man/man7/%%PROGRAM_PREFIX%%sciteco.7.gz +%%DATADIR%%/lib/color.tes +%%DATADIR%%/lib/colors/solarized.tes +%%DATADIR%%/lib/colors/terminal.tes +%%DATADIR%%/lib/fnkeys.tes +%%DATADIR%%/lib/getopt.tes +%%DATADIR%%/lib/lexer.tes +%%LEXILLA%%%%DATADIR%%/lib/lexers/abaqus.tes +%%LEXILLA%%%%DATADIR%%/lib/lexers/ada.tes +%%LEXILLA%%%%DATADIR%%/lib/lexers/asciidoc.tes +%%LEXILLA%%%%DATADIR%%/lib/lexers/asl.tes +%%LEXILLA%%%%DATADIR%%/lib/lexers/asm.tes +%%LEXILLA%%%%DATADIR%%/lib/lexers/ave.tes +%%LEXILLA%%%%DATADIR%%/lib/lexers/avs.tes +%%LEXILLA%%%%DATADIR%%/lib/lexers/awk.tes +%%LEXILLA%%%%DATADIR%%/lib/lexers/baan.tes +%%LEXILLA%%%%DATADIR%%/lib/lexers/bash.tes +%%LEXILLA%%%%DATADIR%%/lib/lexers/batch.tes +%%LEXILLA%%%%DATADIR%%/lib/lexers/blitzbasic.tes +%%LEXILLA%%%%DATADIR%%/lib/lexers/c.tes +%%LEXILLA%%%%DATADIR%%/lib/lexers/caml.tes +%%LEXILLA%%%%DATADIR%%/lib/lexers/ch.tes +%%LEXILLA%%%%DATADIR%%/lib/lexers/cmake.tes +%%LEXILLA%%%%DATADIR%%/lib/lexers/cobol.tes +%%LEXILLA%%%%DATADIR%%/lib/lexers/cpp.tes +%%LEXILLA%%%%DATADIR%%/lib/lexers/cs.tes +%%LEXILLA%%%%DATADIR%%/lib/lexers/d.tes +%%LEXILLA%%%%DATADIR%%/lib/lexers/devicetree.tes +%%LEXILLA%%%%DATADIR%%/lib/lexers/diff.tes +%%LEXILLA%%%%DATADIR%%/lib/lexers/docbook.tes +%%LEXILLA%%%%DATADIR%%/lib/lexers/eiffel.tes +%%LEXILLA%%%%DATADIR%%/lib/lexers/f77.tes +%%LEXILLA%%%%DATADIR%%/lib/lexers/f95.tes +%%LEXILLA%%%%DATADIR%%/lib/lexers/flagship.tes +%%LEXILLA%%%%DATADIR%%/lib/lexers/flash.tes +%%LEXILLA%%%%DATADIR%%/lib/lexers/freebasic.tes +%%LEXILLA%%%%DATADIR%%/lib/lexers/gap.tes +%%LEXILLA%%%%DATADIR%%/lib/lexers/git.tes +%%LEXILLA%%%%DATADIR%%/lib/lexers/go.tes +%%LEXILLA%%%%DATADIR%%/lib/lexers/gob.tes +%%LEXILLA%%%%DATADIR%%/lib/lexers/html.tes +%%LEXILLA%%%%DATADIR%%/lib/lexers/idl.tes +%%LEXILLA%%%%DATADIR%%/lib/lexers/inno.tes +%%LEXILLA%%%%DATADIR%%/lib/lexers/java.tes +%%LEXILLA%%%%DATADIR%%/lib/lexers/js.tes +%%LEXILLA%%%%DATADIR%%/lib/lexers/kix.tes +%%LEXILLA%%%%DATADIR%%/lib/lexers/lisp.tes +%%LEXILLA%%%%DATADIR%%/lib/lexers/lout.tes +%%LEXILLA%%%%DATADIR%%/lib/lexers/lua.tes +%%LEXILLA%%%%DATADIR%%/lib/lexers/make.tes +%%LEXILLA%%%%DATADIR%%/lib/lexers/mako.tes +%%LEXILLA%%%%DATADIR%%/lib/lexers/markdown.tes +%%LEXILLA%%%%DATADIR%%/lib/lexers/matlab.tes +%%LEXILLA%%%%DATADIR%%/lib/lexers/mmixal.tes +%%LEXILLA%%%%DATADIR%%/lib/lexers/octave.tes +%%LEXILLA%%%%DATADIR%%/lib/lexers/oscript.tes +%%LEXILLA%%%%DATADIR%%/lib/lexers/pascal.tes +%%LEXILLA%%%%DATADIR%%/lib/lexers/perl.tes +%%LEXILLA%%%%DATADIR%%/lib/lexers/php.tes +%%LEXILLA%%%%DATADIR%%/lib/lexers/pike.tes +%%LEXILLA%%%%DATADIR%%/lib/lexers/pov.tes +%%LEXILLA%%%%DATADIR%%/lib/lexers/powerpro.tes +%%LEXILLA%%%%DATADIR%%/lib/lexers/purebasic.tes +%%LEXILLA%%%%DATADIR%%/lib/lexers/python.tes +%%LEXILLA%%%%DATADIR%%/lib/lexers/r.tes +%%LEXILLA%%%%DATADIR%%/lib/lexers/rc.tes +%%LEXILLA%%%%DATADIR%%/lib/lexers/rebol.tes +%%LEXILLA%%%%DATADIR%%/lib/lexers/rust.tes +%%LEXILLA%%%%DATADIR%%/lib/lexers/scheme.tes +%%LEXILLA%%%%DATADIR%%/lib/lexers/specman.tes +%%LEXILLA%%%%DATADIR%%/lib/lexers/spice.tes +%%LEXILLA%%%%DATADIR%%/lib/lexers/swift.tes +%%LEXILLA%%%%DATADIR%%/lib/lexers/systemverilog.tes +%%LEXILLA%%%%DATADIR%%/lib/lexers/tacl.tes +%%LEXILLA%%%%DATADIR%%/lib/lexers/tal.tes +%%LEXILLA%%%%DATADIR%%/lib/lexers/tcl.tes +%%LEXILLA%%%%DATADIR%%/lib/lexers/test.tes +%%LEXILLA%%%%DATADIR%%/lib/lexers/troff.tes +%%LEXILLA%%%%DATADIR%%/lib/lexers/vala.tes +%%LEXILLA%%%%DATADIR%%/lib/lexers/vb.tes +%%LEXILLA%%%%DATADIR%%/lib/lexers/verilog.tes +%%LEXILLA%%%%DATADIR%%/lib/lexers/vhdl.tes +%%LEXILLA%%%%DATADIR%%/lib/lexers/vxml.tes +%%LEXILLA%%%%DATADIR%%/lib/lexers/xml.tes +%%LEXILLA%%%%DATADIR%%/lib/lexers/yaml.tes +%%DATADIR%%/lib/lexers/woman.tes +%%DATADIR%%/lib/session.tes +%%DATADIR%%/lib/string.tes +%%DATADIR%%/lib/women/grosciteco.tes.1.woman +%%DATADIR%%/lib/women/grosciteco.tes.1.woman.tec +%%DATADIR%%/lib/women/sciteco.1.woman +%%DATADIR%%/lib/women/sciteco.1.woman.tec +%%DATADIR%%/lib/women/sciteco.7.woman +%%DATADIR%%/lib/women/sciteco.7.woman.tec +%%DATADIR%%/lib/women/tedoc.tes.1.woman +%%DATADIR%%/lib/women/tedoc.tes.1.woman.tec +%%DATADIR%%/sample.teco_ini +%%DATADIR%%/sciteco.tmac +%%GTK%%%%DATADIR%%/fallback.css +%%GTK%%share/icons/hicolor/16x16/apps/sciteco.png +%%GTK%%share/icons/hicolor/32x32/apps/sciteco.png +%%GTK%%share/icons/hicolor/48x48/apps/sciteco.png +%%GTK%%share/icons/hicolor/256x256/apps/sciteco.png +%%GTK%%share/applications/sciteco.desktop diff --git a/freebsd/scripts/xvfb-run.sh b/freebsd/scripts/xvfb-run.sh new file mode 100755 index 0000000..b4fd5a0 --- /dev/null +++ b/freebsd/scripts/xvfb-run.sh @@ -0,0 +1,118 @@ +#!/bin/sh +# --- T2-COPYRIGHT-NOTE-BEGIN --- +# This copyright note is auto-generated by ./scripts/Create-CopyPatch. +# +# T2 SDE: package/.../xorg-server/xvfb-run.sh +# Copyright (C) 2005 The T2 SDE Project +# Copyright (C) XXXX - 2005 Debian +# +# More information can be found in the files COPYING and README. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; version 2 of the License. A copy of the +# GNU General Public License can be found in the file COPYING. +# --- T2-COPYRIGHT-NOTE-END --- + +# $Id: xvfb-run 2166 2005-01-27 07:54:19Z branden $ +# from: http://necrotic.deadbeast.net/xsf/XFree86/trunk/debian/local/xvfb-run + +# This script starts an instance of Xvfb, the "fake" X server, runs a command +# with that server available, and kills the X server when done. The return +# value of the command becomes the return value of this script. +# +# If anyone is using this to build a Debian package, make sure the package +# Build-Depends on xvfb, xbase-clients, and xfonts-base. +# +# This script has been modified by Robin Haberkorn to remove dependencies on +# GNU `fmt` and `getopt` for BSD compatibility. + +set -e + +PROGNAME=xvfb-run +SERVERNUM=99 +AUTHFILE= +ERRORFILE=/dev/null +STARTWAIT=3 +XVFBARGS="-screen 0 640x480x8" +LISTENTCP="-nolisten tcp" +XAUTHPROTO=. + +# Display a message, wrapping lines at the terminal width. +message () { + echo "$PROGNAME: $*" | fmt +} + +# Display an error message. +error () { + message "error: $*" >&2 +} + +# Find a free server number by looking at .X*-lock files in /tmp. +find_free_servernum() { + # Sadly, the "local" keyword is not POSIX. Leave the next line commented in + # the hope Debian Policy eventually changes to allow it in /bin/sh scripts + # anyway. + #local i + + i=$SERVERNUM + while [ -f /tmp/.X$i-lock ]; do + i=$(($i + 1)) + done + echo $i +} + +SERVERNUM=$(find_free_servernum) + +if [ -z "$*" ]; then + error "need a command to run" + exit 2 +fi + +if ! which xauth >/dev/null; then + error "xauth command not found" + exit 3 +fi + +# If the user did not specify an X authorization file to use, set up a temporary +# directory to house one. +if [ -z "$AUTHFILE" ]; then + XVFB_RUN_TMPDIR="${TMPDIR:-/tmp}/$PROGNAME.$$" + if ! mkdir -p -m 700 "$XVFB_RUN_TMPDIR"; then + error "temporary directory $XVFB_RUN_TMPDIR already exists" + exit 4 + fi + AUTHFILE=$(mktemp -p "$XVFB_RUN_TMPDIR" Xauthority) +fi + +# Start Xvfb. +MCOOKIE=$(mcookie) +XAUTHORITY=$AUTHFILE xauth add ":$SERVERNUM" "$XAUTHPROTO" "$MCOOKIE" \ + >"$ERRORFILE" 2>&1 +XAUTHORITY=$AUTHFILE Xvfb ":$SERVERNUM" $XVFBARGS $LISTENTCP >"$ERRORFILE" \ + 2>&1 & +XVFBPID=$! +sleep "$STARTWAIT" + +# Start the command and save its exit status. +set +e +DISPLAY=:$SERVERNUM XAUTHORITY=$AUTHFILE "$@" 2>&1 +RETVAL=$? +set -e + +# Kill Xvfb now that the command has exited. +kill $XVFBPID + +# Clean up. +XAUTHORITY=$AUTHFILE xauth remove ":$SERVERNUM" >"$ERRORFILE" 2>&1 +if [ -n "$XVFB_RUN_TMPDIR" ]; then + if ! rm -r "$XVFB_RUN_TMPDIR"; then + error "problem while cleaning up temporary directory" + exit 5 + fi +fi + +# Return the executed command's exit status. +exit $RETVAL + +# vim:set ai et sts=4 sw=4 tw=80: diff --git a/lib/Makefile.am b/lib/Makefile.am index a458692..ac45c76 100644 --- a/lib/Makefile.am +++ b/lib/Makefile.am @@ -99,6 +99,8 @@ dist_lexer_DATA += lexers/verilog.tes \ lexers/python.tes \ lexers/yaml.tes \ lexers/markdown.tes \ - lexers/asciidoc.tes + lexers/asciidoc.tes \ + lexers/troff.tes \ + lexers/git.tes endif diff --git a/lib/colors/solarized.tes b/lib/colors/solarized.tes index 7479c31..25a256a 100644 --- a/lib/colors/solarized.tes +++ b/lib/colors/solarized.tes @@ -138,5 +138,5 @@ Q[solarized.light]"T :M[solarized.light] | :M[solarized.dark] ' * register `solarized.toggle` as the function key macro for F5. * This terminates the command line as it cannot be rubbed out. *! -@[F5]{(M[solarized.toggle])} -1U[F5] +@[F5]{(M[solarized.toggle])} +1U[F5] diff --git a/lib/fnkeys.tes b/lib/fnkeys.tes index d0c0e66..fd6b332 100644 --- a/lib/fnkeys.tes +++ b/lib/fnkeys.tes @@ -1,99 +1,126 @@ -! Here we define some reasonable default function key macros. - Their corresponding command-line editing macros begin with ! - -! edit command line to move <n> chars, updating existing move ops ! +!* + * Here we define some reasonable default function key macros. + * Their corresponding command-line editing macros begin with . + *! + +!* + * Edit command line to move <n> chars, updating existing move ops. + * The command line at the time of call looks like: (123C)(M[FOO + *! @#c{{U.c <-A-("=-D1;' -D> - ."> - -A-)"= -S(\U.v .,Z-2D 0A-C"=2DQ.v|2D-Q.v'%.c | I(Q.c ' - | I(Q.c ' - "> Q.c\IC) | -Q.c\IR) ' + .-4"< Oins ' -2A-C"N -2A-R"N Oins ' ' -A-)"N Oins ' + <R -A-("=1;'> + \U.v .-1,Z-2D 0A-R"=-'Q.v%.c 2D + !ins! + I( Q.c"> Q.c\IC | -Q.c\IR ' I) }} -! Make DELETE an ESCAPE surrogate. - Macro is enabled everywhere. ! -@[DC]{} - -! Make SHIFT+DELETE a rubout/re-insert key. - This reverses the ^G modifier for BACKSPACE. - The macro is enabled everywhere. ! -@[SDC]{} - -! Command line editing macros. - They are enabled only in the start state (i.e. they - have no effect in string arguments, etc.). ! +!* + * Make DELETE an ESCAPE surrogate. + * Macro is enabled everywhere. + *! +@[DC]{} + +!* + * Make SHIFT+DELETE a rubout/re-insert key. + * This reverses the ^G modifier for BACKSPACE. + * The macro is enabled everywhere. + *! +@[SDC]{} + +!* + * Command line editing macros. + * They are enabled only in the start state (i.e. they + * have no effect in string arguments, etc.). + *! @[HOME]{ - .ESLINEFROMPOSITIONESPOSITIONFROMLINEU.p + ESLINEFROMPOSITIONESPOSITIONFROMLINE:U.p Q.pU.l <Q.l-."U 1; ' Q.l-.AU.c Q.c- "N Q.c-9"N Q.lU.p 1; '' %.l> + Q.pESGETCOLUMN,4EJ Q.p-.M#c } -@[HOME]{(M[HOME]} -1U[HOME] +@[HOME]{(M[HOME]} +1U[HOME] @[END]{ - .ESLINEFROMPOSITIONESGETLINEENDPOSITION-.M#c + ESLINEFROMPOSITIONESGETLINEENDPOSITIONU.p + Q.pESGETCOLUMN,4EJ + Q.p:-.M#c } -@[END]{(M[END]} -1U[END] +@[END]{(M[END]} +1U[END] @[NPAGE]{ - .ESLINEFROMPOSITION+(ESLINESONSCREEN) + 0,4EJ + ESLINEFROMPOSITION+(ESLINESONSCREEN) ESPOSITIONFROMLINEU.p - Q.p"< Z | Q.p '-.M#c + Q.p"< Z | Q.p: '-.M#c } -@[NPAGE]{(M[NPAGE]} -1U[NPAGE] +@[NPAGE]{(M[NPAGE]} +1U[NPAGE] @[PPAGE]{ - .ESLINEFROMPOSITION-(ESLINESONSCREEN)U.l - Q.l"< 0 | Q.lESPOSITIONFROMLINE '-.M#c + 0,4EJ + ESLINEFROMPOSITION-(ESLINESONSCREEN)U.l + Q.l"< 0 | Q.lESPOSITIONFROMLINE: '-.M#c } -@[PPAGE]{(M[PPAGE]} -1U[PPAGE] +@[PPAGE]{(M[PPAGE]} +1U[PPAGE] @[LEFT]{ - ."=0|-1'M#c + ."=0|.-1'U.p + Q.pESGETCOLUMN,4EJ + Q.p-.M#c } -@[LEFT]{(M[LEFT]} -1U[LEFT] +@[LEFT]{(M[LEFT]} +1U[LEFT] @[SLEFT]{ - 0,0,.ESWORDSTARTPOSITIONESWORDSTARTPOSITION-.M#c + 0,0,ESWORDSTARTPOSITIONESWORDSTARTPOSITIONU.p + Q.pESGETCOLUMN,4EJ + Q.p:-.M#c } -@[SLEFT]{(M[SLEFT]} -1U[SLEFT] +@[SLEFT]{(M[SLEFT]} +1U[SLEFT] @[RIGHT]{ - .-Z"=0|1'M#c + .-Z"=.|.+1'U.p + Q.pESGETCOLUMN,4EJ + Q.p-.M#c } -@[RIGHT]{(M[RIGHT]} -1U[RIGHT] +@[RIGHT]{(M[RIGHT]} +1U[RIGHT] @[SRIGHT]{ - 0,0,.ESWORDENDPOSITIONESWORDENDPOSITION-.M#c + 0,0,ESWORDENDPOSITIONESWORDENDPOSITIONU.p + Q.pESGETCOLUMN,4EJ + Q.p:-.M#c } -@[SRIGHT]{(M[SRIGHT]} -1U[SRIGHT] +@[SRIGHT]{(M[SRIGHT]} +1U[SRIGHT] @[UP]{ - .ESGETCOLUMN - (.ESLINEFROMPOSITION-1) - ESFINDCOLUMN-.M#c + 4EJ(ESLINEFROMPOSITION-1)ESFINDCOLUMN:-.M#c } -@[UP]{(M[UP]} -1U[UP] +@[UP]{(M[UP]} +1U[UP] @[DOWN]{ - .ESGETCOLUMN - (.ESLINEFROMPOSITION+1) - ESFINDCOLUMN-.M#c + 4EJ(ESLINEFROMPOSITION+1)ESFINDCOLUMN:-.M#c } -@[DOWN]{(M[DOWN]} -1U[DOWN] - -@[CLOSE]{(EX)} -1U[CLOSE] - -! enable function key (macro) support ! -0,64ED +@[DOWN]{(M[DOWN]} +1U[DOWN] + +@[CLOSE]{(EX)} +1U[CLOSE] + +!* + * Zoom with F9/F10 if function keys are enabled. + * This is automatically rubbed out. + *! +@[F9]{(ESZOOMOUT{-13D}} +1U[F9] +@[F10]{(ESZOOMIN{-12D}} +1U[F10] diff --git a/lib/lexer.tes b/lib/lexer.tes Binary files differindex 82d8bd9..7381e62 100644 --- a/lib/lexer.tes +++ b/lib/lexer.tes diff --git a/lib/lexers/cpp.tes b/lib/lexers/cpp.tes index d1acce4..5c3ce4a 100644 --- a/lib/lexers/cpp.tes +++ b/lib/lexers/cpp.tes @@ -12,7 +12,7 @@ :EN*.hppQ*"S -1 ' :EN*.hxxQ*"S -1 ' :EN*.ippQ*"S -1 ' - :EN*.mmQ*"S -1 ' + !*:EN*.mmQ*"S -1 '*! :EN*.smaQ*"S -1 ' :EN*.inoQ* } diff --git a/lib/lexers/git.tes b/lib/lexers/git.tes new file mode 100644 index 0000000..3162f78 --- /dev/null +++ b/lib/lexers/git.tes @@ -0,0 +1,18 @@ +!* Git commit and rebase messages *! + +@[lexer.test.git]{ + :EN*/COMMIT_EDITMSGQ*"S -1 ' + :EN*/TAG_EDITMSGQ*"S -1 ' + :EN*/MERGE_MSGQ*"S -1 ' + :EN*/git-rebase-todoQ* +} + +@[lexer.set.git]{ + :M[color.comment],1M[color.set] + + .U.p + J< .-Z"= 1; ' + 0A-#"= ESSTARTSTYLING 1,(Q.lESLINELENGTH)ESSETSTYLING ' + :L; %.l> + Q.pJ +} diff --git a/lib/lexers/html.tes b/lib/lexers/html.tes index b09012d..61a8448 100644 --- a/lib/lexers/html.tes +++ b/lib/lexers/html.tes @@ -1,4 +1,4 @@ -! AUTO-GENERATED FROM SCITE PROPERTY SET ! +!* HTML and embedded scripting languages *! @[lexer.test.html]{ :EN*.htmlQ*"S -1 ' @@ -78,21 +78,66 @@ __dir__ __file__ __function__ __line__ __method__ __namespace__ __sleep __wakeup 5ESSETKEYWORDS ELEMENT DOCTYPE ATTLIST ENTITY NOTATION + !* HTML *! + :M[color.keyword],1M[color.set] :M[color.number],5M[color.set] :M[color.string],6M[color.set] - :M[color.string],7M[color.set] + :M[color.string2],7M[color.set] + :M[color.comment],9M[color.set] + :M[color.variable],10M[color.set] + :M[color.preproc],17M[color.set] + !* Embedded Javascript *! :M[color.comment],42M[color.set] :M[color.comment],43M[color.set] :M[color.comment],44M[color.set] :M[color.number],45M[color.set] :M[color.keyword],47M[color.set] :M[color.string],48M[color.set] - :M[color.string],49M[color.set] + :M[color.string2],49M[color.set] :M[color.operator],50M[color.set] + !* ASP Javascript *! + :M[color.comment],57M[color.set] + :M[color.comment],58M[color.set] + :M[color.comment],59M[color.set] + :M[color.number],60M[color.set] + :M[color.keyword],62M[color.set] + :M[color.string],63M[color.set] + :M[color.string2],64M[color.set] + :M[color.operator],65M[color.set] + !* Embedded VBScript *! :M[color.comment],72M[color.set] + :M[color.number],73M[color.set] + :M[color.keyword],74M[color.set] + :M[color.string],75M[color.set] + !* ASP VBScript *! :M[color.comment],82M[color.set] + :M[color.number],83M[color.set] + :M[color.keyword],84M[color.set] + :M[color.string],85M[color.set] + !* Embedded Python *! :M[color.comment],92M[color.set] + :M[color.number],93M[color.set] + :M[color.string],94M[color.set] + :M[color.string2],95M[color.set] + :M[color.string],97M[color.set] + :M[color.string2],98M[color.set] + :M[color.operator],101M[color.set] + !* ASP Python *! :M[color.comment],107M[color.set] + :M[color.number],108M[color.set] + :M[color.string],109M[color.set] + :M[color.string2],110M[color.set] + :M[color.string],112M[color.set] + :M[color.string2],113M[color.set] + :M[color.operator],116M[color.set] + !* PHP *! + :M[color.string],119M[color.set] + :M[color.string2],120M[color.set] + :M[color.keyword],121M[color.set] + :M[color.number],122M[color.set] + :M[color.variable],123M[color.set] :M[color.comment],124M[color.set] :M[color.comment],125M[color.set] + :M[color.variable],126M[color.set] + :M[color.operator],127M[color.set] } diff --git a/lib/lexers/troff.tes b/lib/lexers/troff.tes new file mode 100644 index 0000000..a10d3e9 --- /dev/null +++ b/lib/lexers/troff.tes @@ -0,0 +1,85 @@ +!* troff/nroff *! + +@[lexer.test.troff]{ + :EN*.groffQ*"S -1 ' + :EN*.roffQ*"S -1 ' + :EN*.meQ*"S -1 ' + :EN*.mmQ*"S -1 ' + :EN*.msQ*"S -1 ' + :EN*.momQ*"S -1 ' + :EN*.manQ*"S -1 ' + :EN*.mdocQ*"S -1 ' + :EN*.tmacQ*"S -1 ' + :EN*.[12345678]Q* +} + +!* Heirloom Troff specific requests *! +[lexer.troff.heirloom] + bleedat breakchar brnl brpnl + cropat dch dwh + errprint + fallback fdeferlig feature fkern flig fp fps fspacewidth + hidechar hylang hylen hypp + index + kernafter kernbefore kernpair + lc_ctype lds letadj lhang lnr lnrf lpfx + mediasize minss + nhychar nrf + padj papersize psbb pshape + recursionlimit rhang + sentchar spacewidth + track transchar trimat + unwatch unwatchn + watch watchlength watchn + xflag +!* Neatroff-specific requests *! +[lexer.troff.neatroff] + << >> cl co co+ co< co> eos ff ffsc fmap + hycost hydash hystop in2 kn ochar pmll ssh ti2 + +@[lexer.set.troff]{ + ESSETILEXERtroff + !* Predefined requests (derived from Groff) *! + 0ESSETKEYWORDS + ab ad af aln als am am1 ami ami1 as as1 asciify + backtrace bd blm box boxa bp br brp break + c2 cc ce cf cflags ch char chop class close color composite continue cp cs cu + da de de1 defcolor dei dei1 device devicem di do ds ds1 dt + ec ecr ecs el em eo ev evc ex + fam fc fchar fcolor fi fl fp fschar fspecial ft ftr fzoom + gcolor + hc hcode hla hlm hpf hpfa hpfcode hw hy hym hys + ie if ig . in it itc + kern + lc length linetabs linetabs lf lg ll lsm ls lt + mc mk mso + na ne nf nh nm nn nop nr nroff ns nx + open opena os output + pc pev pi pl pm pn pnr po ps psbb pso ptr pvs pvs + rchar rd return rfschar rj rm rn rnn rr rs rt + schar shc shift sizes so sp special spreadwarn ss sty substring sv sy + ta tc ti tkf tl tm tm1 tmc tr trf trin trnt troff + uf ul unformat + vpt vs + warn warnscale wh while write writec writem + Q[lexer.troff.heirloom] Q[lexer.troff.neatroff] + !* Flow control requests/commands with conditionals *! + 1ESSETKEYWORDSif ie while + !* Flow control requests/commands without conditionals *! + 2ESSETKEYWORDSel nop + !* Requests and commands, initiating ignore blocks *! + 3ESSETKEYWORDSig + !* Requests and commands with end-macros. + Mom macros alias MAC to de. *! + 4ESSETKEYWORDSam am1 de de1 MAC + + :M[color.keyword],1M[color.set] + :M[color.preproc],2M[color.set] + :M[color.number],3M[color.set] + :M[color.operator],4M[color.set] + :M[color.string],5M[color.set] + :M[color.comment],6M[color.set] + !*:M[color.comment],7M[color.set]*! + 7U.i 20<:M[color.variable],%.iM[color.set]> + :M[color.preproc2],17M[color.set] +} diff --git a/sample.teco_ini b/sample.teco_ini index 95e56a6..2935d95 100644 --- a/sample.teco_ini +++ b/sample.teco_ini @@ -16,6 +16,9 @@ EMQ[$SCITECOPATH]/session.tes :Q*+1Oedit 32,0ED + !* non-UTF-8 documents are assumed to be in latin1 (8859-1) *! + EE"N 1024<:C; -A"T 1EE 1;'> J ' + M[lexer.auto] ! Set up margins ! @@ -30,6 +33,7 @@ EMQ[$SCITECOPATH]/session.tes !edit! ! Add code here to execute when a document is edited ! + ESGETCOLUMN,4EJ !close! @@ -49,12 +53,15 @@ EMQ[$SCITECOPATH]/session.tes The size unit is 1pt/100 ! ! [lexer.font]Monospace 1300U[lexer.font] ! -! Uncomment to enable default keyboard macros and function keys ! -! EMQ[$SCITECOPATH]/fnkeys.tes ! +! Enable default function key macros ! +EMQ[$SCITECOPATH]/fnkeys.tes -! Uncomment if XTerm allows clipboard operations ! +! Uncomment if terminal supports OSC-52 clipboards ! ! 0,256ED ! +! Uncomment to enable Unicode icons in the Curses UI ! +! 0,512ED ! + ! Uncomment to tweak the memory limit ! ! 500*1000*1000,2EJ ! @@ -69,7 +76,7 @@ Z"= | [session.path] ! disables session saving ! [.f - <:L;R 0X.f [* EBQ.f ]* L> + <:L;R 0X.f [* EBN.f ]* L> ].f -EF ' diff --git a/src/Makefile.am b/src/Makefile.am index b9aca8a..5b2572e 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -67,10 +67,9 @@ noinst_PROGRAMS = sciteco-minimal sciteco_minimal_SOURCES = symbols-scintilla.c symbols-scilexer.c : sciteco-minimal$(EXEEXT) endif -sciteco_minimal_LDADD = libsciteco-base.la \ - @SCINTILLA_PATH@/bin/scintilla.a +sciteco_minimal_LDADD = libsciteco-base.la $(LIBSCINTILLA) if LEXILLA -sciteco_minimal_LDADD += @LEXILLA_PATH@/bin/liblexilla.a +sciteco_minimal_LDADD += $(LIBLEXILLA) endif # Scintilla is unfortunately still written in C++, so we must force # Automake to use the C++ linker when linking the binaries. @@ -99,10 +98,10 @@ CLEANFILES = $(BUILT_SOURCES) \ symbols-scintilla.c : @SCINTILLA_PATH@/include/Scintilla.h \ symbols-extract.tes - $(SCITECO_MINIMAL) -m -- @srcdir@/symbols-extract.tes \ + $(SCITECO_MINIMAL) -8m -- @srcdir@/symbols-extract.tes \ -p "SCI_" -n teco_symbol_list_scintilla $@ $< symbols-scilexer.c : @LEXILLA_PATH@/include/SciLexer.h \ symbols-extract.tes - $(SCITECO_MINIMAL) -m -- @srcdir@/symbols-extract.tes \ + $(SCITECO_MINIMAL) -8m -- @srcdir@/symbols-extract.tes \ -p "SCE_" -n teco_symbol_list_scilexer $@ $< diff --git a/src/cmdline.c b/src/cmdline.c index 58d48b4..816816c 100644 --- a/src/cmdline.c +++ b/src/cmdline.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -52,6 +52,7 @@ #include "eol.h" #include "error.h" #include "qreg.h" +#include "glob.h" #include "cmdline.h" #if defined(HAVE_MALLOC_TRIM) && !defined(HAVE_DECL_MALLOC_TRIM) @@ -81,12 +82,12 @@ static teco_string_t teco_last_cmdline = {NULL, 0}; * @param error A GError. * @return FALSE to throw a GError */ -gboolean +static gboolean teco_cmdline_insert(const gchar *data, gsize len, GError **error) { const teco_string_t src = {(gchar *)data, len}; - teco_string_t old_cmdline = {NULL, 0}; - guint repl_pc = 0; + g_auto(teco_string_t) old_cmdline = {NULL, 0}; + gsize repl_pc = 0; teco_cmdline.machine.macro_pc = teco_cmdline.pc = teco_cmdline.effective_len; @@ -109,8 +110,6 @@ teco_cmdline_insert(const gchar *data, gsize len, GError **error) /* * Parse/execute characters, one at a time so * undo tokens get emitted for the corresponding characters. - * - * FIXME: The inner loop should be factored out. */ while (teco_cmdline.pc < teco_cmdline.effective_len) { g_autoptr(GError) tmp_error = NULL; @@ -125,7 +124,8 @@ teco_cmdline_insert(const gchar *data, gsize len, GError **error) teco_qreg_t *cmdline_reg = teco_qreg_table_find(&teco_qreg_table_globals, "\e", 1); teco_string_t new_cmdline; - if (!cmdline_reg->vtable->get_string(cmdline_reg, &new_cmdline.data, &new_cmdline.len, error)) + if (!cmdline_reg->vtable->get_string(cmdline_reg, &new_cmdline.data, &new_cmdline.len, + NULL, error)) return FALSE; /* @@ -160,6 +160,7 @@ teco_cmdline_insert(const gchar *data, gsize len, GError **error) teco_string_clear(&teco_cmdline.str); teco_cmdline.str = old_cmdline; + memset(&old_cmdline, 0, sizeof(old_cmdline)); teco_cmdline.machine.macro_pc = teco_cmdline.pc = repl_pc; /* rubout cmdline replacement command */ @@ -179,55 +180,65 @@ teco_cmdline_insert(const gchar *data, gsize len, GError **error) return TRUE; } +static gboolean +teco_cmdline_rubin(GError **error) +{ + if (!teco_cmdline.str.len) + return TRUE; + + const gchar *start, *end, *next; + start = teco_cmdline.str.data+teco_cmdline.effective_len; + end = teco_cmdline.str.data+teco_cmdline.str.len; + next = g_utf8_find_next_char(start, end) ? : end; + return teco_cmdline_insert(start, next-start, error); +} + +/** + * Process key press or expansion of key macro. + * + * Should be called only with the results of a single keypress. + * They are considered an unity and in case of errors, we + * rubout the entire sequence (unless there was a $$ return in the + * middle). + * + * @param data Key presses in UTF-8. + * @param len Length of data. + * @param error A GError. + * @return FALSE if error was set. + * If TRUE was returned, there could still have been an error, + * but it has already been handled. + */ gboolean -teco_cmdline_keypress_c(gchar key, GError **error) +teco_cmdline_keypress(const gchar *data, gsize len, GError **error) { + const teco_string_t str = {(gchar *)data, len}; teco_machine_t *machine = &teco_cmdline.machine.parent; - g_autoptr(GError) tmp_error = NULL; + + if (!teco_string_validate_utf8(&str)) { + g_set_error_literal(error, TECO_ERROR, TECO_ERROR_CODEPOINT, + "Invalid UTF-8 sequence"); + return FALSE; + } /* - * Cleanup messages,etc... + * Cleanup messages, etc... */ teco_interface_msg_clear(); - /* - * Process immediate editing commands, inserting - * characters as necessary into the command line. - */ - if (!machine->current->process_edit_cmd_cb(machine, NULL, key, &tmp_error)) { - if (g_error_matches(tmp_error, TECO_ERROR, TECO_ERROR_RETURN)) { - /* - * Return from top-level macro, results - * in command line termination. - * The return "arguments" are currently - * ignored. - */ - g_assert(machine->current == &teco_state_start); + gsize start_pc = teco_cmdline.effective_len; - teco_interface_popup_clear(); + for (guint i = 0; i < len; i = g_utf8_next_char(data+i) - data) { + gunichar chr = g_utf8_get_char(data+i); + g_autoptr(GError) tmp_error = NULL; - if (teco_quit_requested) { - /* cought by user interface */ - g_set_error_literal(error, TECO_ERROR, TECO_ERROR_QUIT, ""); - return FALSE; - } + /* + * Process immediate editing commands, inserting + * characters as necessary into the command line. + */ + if (machine->current->process_edit_cmd_cb(machine, NULL, chr, &tmp_error)) + continue; - teco_undo_clear(); - /* also empties all Scintilla undo buffers */ - teco_ring_set_scintilla_undo(TRUE); - teco_view_set_scintilla_undo(teco_qreg_view, TRUE); - /* - * FIXME: Reset main machine? - */ - teco_goto_table_clear(&teco_cmdline.machine.goto_table); - teco_expressions_clear(); - g_array_remove_range(teco_loop_stack, 0, teco_loop_stack->len); - - teco_string_clear(&teco_last_cmdline); - teco_last_cmdline = teco_cmdline.str; - memset(&teco_cmdline.str, 0, sizeof(teco_cmdline.str)); - teco_cmdline.effective_len = 0; - } else { + if (!g_error_matches(tmp_error, TECO_ERROR, TECO_ERROR_RETURN)) { /* * NOTE: Error message already displayed in * teco_cmdline_insert(). @@ -237,29 +248,76 @@ teco_cmdline_keypress_c(gchar key, GError **error) * is thrown. They must be executed so * as if the character had never been * inserted. + * Actually we rub out the entire command line + * up until the insertion point. */ - teco_undo_pop(teco_cmdline.pc); - teco_cmdline.effective_len = teco_cmdline.pc; + teco_undo_pop(start_pc); + teco_cmdline.effective_len = start_pc; /* program counter could be messed up */ teco_cmdline.machine.macro_pc = teco_cmdline.effective_len; - } #ifdef HAVE_MALLOC_TRIM + /* + * Undo stacks can grow very large - sometimes large enough to + * make the system swap and become unresponsive. + * This shrinks the program break after lots of memory has + * been freed, reducing the virtual memory size and aiding + * in recovering from swapping issues. + * + * This is particularily important with some memory limiting backends + * after hitting the memory limit* as otherwise the program's resident + * size won't shrink and it would be impossible to recover. + */ + if (g_error_matches(tmp_error, TECO_ERROR, TECO_ERROR_MEMLIMIT)) + malloc_trim(0); +#endif + + break; + } + /* - * Undo stacks can grow very large - sometimes large enough to - * make the system swap and become unresponsive. - * This shrinks the program break after lots of memory has - * been freed, reducing the virtual memory size and aiding - * in recovering from swapping issues. - * - * This is particularily important with some memory limiting backends - * after hitting the memory limit* as otherwise the program's resident - * size won't shrink and it would be impossible to recover. + * Return from top-level macro, results + * in command line termination. + * The return "arguments" are currently + * ignored. + */ + g_assert(machine->current == &teco_state_start); + + teco_interface_popup_clear(); + + if (teco_quit_requested) { + /* caught by user interface */ + g_set_error_literal(error, TECO_ERROR, TECO_ERROR_QUIT, ""); + return FALSE; + } + + teco_undo_clear(); + /* also empties all Scintilla undo buffers */ + teco_ring_set_scintilla_undo(TRUE); + teco_view_set_scintilla_undo(teco_qreg_view, TRUE); + /* + * FIXME: Reset main machine? */ - if (g_error_matches(tmp_error, TECO_ERROR, TECO_ERROR_RETURN) || - g_error_matches(tmp_error, TECO_ERROR, TECO_ERROR_MEMLIMIT)) - malloc_trim(0); + teco_goto_table_clear(&teco_cmdline.machine.goto_table); + teco_expressions_clear(); + g_array_remove_range(teco_loop_stack, 0, teco_loop_stack->len); + + teco_string_clear(&teco_last_cmdline); + teco_last_cmdline = teco_cmdline.str; + memset(&teco_cmdline.str, 0, sizeof(teco_cmdline.str)); + teco_cmdline.effective_len = 0; + +#ifdef HAVE_MALLOC_TRIM + /* see above */ + malloc_trim(0); #endif + + /* + * Continue with the other keys, + * but we obviously can't rub out beyond the return if any + * error occurs later on. + */ + start_pc = 0; } /* @@ -269,33 +327,40 @@ teco_cmdline_keypress_c(gchar key, GError **error) return TRUE; } -gboolean -teco_cmdline_fnmacro(const gchar *name, GError **error) +teco_keymacro_status_t +teco_cmdline_keymacro(const gchar *name, gssize name_len, GError **error) { g_assert(name != NULL); + if (name_len < 0) + name_len = strlen(name); + /* * NOTE: It should be safe to allocate on the stack since * there are only a limited number of possible function key macros. */ - gchar macro_name[1 + strlen(name)]; - macro_name[0] = TECO_CTL_KEY('F'); - memcpy(macro_name+1, name, sizeof(macro_name)-1); + gchar macro_name[1 + name_len]; + macro_name[0] = TECO_CTL_KEY('K'); + memcpy(macro_name+1, name, name_len); - teco_qreg_t *macro_reg; - - if (teco_ed & TECO_ED_FNKEYS && - (macro_reg = teco_qreg_table_find(&teco_qreg_table_globals, macro_name, sizeof(macro_name)))) { + teco_qreg_t *macro_reg = teco_qreg_table_find(&teco_qreg_table_globals, macro_name, sizeof(macro_name)); + if (macro_reg) { teco_int_t macro_mask; if (!macro_reg->vtable->get_integer(macro_reg, ¯o_mask, error)) - return FALSE; + return TECO_KEYMACRO_ERROR; - if (macro_mask & teco_cmdline.machine.parent.current->fnmacro_mask) - return TRUE; + /* + * FIXME: This does not work with Q-Register specs embedded into string arguments. + * There should be a keymacro_mask_cb() instead. + */ + if (!((teco_cmdline.machine.parent.current->keymacro_mask | + teco_cmdline.machine.expectstring.machine.parent.current->keymacro_mask) & ~macro_mask)) + return TECO_KEYMACRO_UNDEFINED; g_auto(teco_string_t) macro_str = {NULL, 0}; - return macro_reg->vtable->get_string(macro_reg, ¯o_str.data, ¯o_str.len, error) && - teco_cmdline_keypress(macro_str.data, macro_str.len, error); + return macro_reg->vtable->get_string(macro_reg, ¯o_str.data, ¯o_str.len, NULL, error) && + teco_cmdline_keypress(macro_str.data, macro_str.len, error) + ? TECO_KEYMACRO_SUCCESS : TECO_KEYMACRO_ERROR; } /* @@ -303,28 +368,34 @@ teco_cmdline_fnmacro(const gchar *name, GError **error) * except "CLOSE" which quits the application * (this may loose unsaved data but is better than * not doing anything if the user closes the window). - * NOTE: Doing the check here is less efficient than - * doing it in the UI implementations, but defines - * the default actions centrally. - * Also, fnmacros are only handled after key presses. */ - if (!strcmp(name, "CLOSE")) { + if (name_len == 5 && !strncmp(name, "CLOSE", name_len)) { g_set_error_literal(error, TECO_ERROR, TECO_ERROR_QUIT, ""); - return FALSE; + return TECO_KEYMACRO_ERROR; } - return TRUE; + return TECO_KEYMACRO_UNDEFINED; +} + +static void +teco_cmdline_rubout(void) +{ + const gchar *p; + p = g_utf8_find_prev_char(teco_cmdline.str.data, + teco_cmdline.str.data+teco_cmdline.effective_len); + if (p) { + teco_cmdline.effective_len = p - teco_cmdline.str.data; + teco_undo_pop(teco_cmdline.effective_len); + } } -#ifndef NDEBUG -static void __attribute__((destructor)) +static void TECO_DEBUG_CLEANUP teco_cmdline_cleanup(void) { teco_machine_main_clear(&teco_cmdline.machine); teco_string_clear(&teco_cmdline.str); teco_string_clear(&teco_last_cmdline); } -#endif /* * Commandline key processing. @@ -337,7 +408,7 @@ teco_cmdline_cleanup(void) */ gboolean -teco_state_process_edit_cmd(teco_machine_t *ctx, teco_machine_t *parent_ctx, gchar key, GError **error) +teco_state_process_edit_cmd(teco_machine_t *ctx, teco_machine_t *parent_ctx, gunichar key, GError **error) { switch (key) { case '\n': /* insert EOL sequence */ @@ -407,23 +478,30 @@ teco_state_process_edit_cmd(teco_machine_t *ctx, teco_machine_t *parent_ctx, gch } teco_interface_popup_clear(); - return teco_cmdline_insert(&key, sizeof(key), error); + + gchar buf[6]; + gsize len = g_unichar_to_utf8(key, buf); + return teco_cmdline_insert(buf, len, error); } gboolean -teco_state_caseinsensitive_process_edit_cmd(teco_machine_t *ctx, teco_machine_t *parent_ctx, gchar key, GError **error) +teco_state_caseinsensitive_process_edit_cmd(teco_machine_t *ctx, teco_machine_t *parent_ctx, gunichar key, GError **error) { + /* + * Auto case folding is for syntactic characters, + * so this could be done by working only with a-z and A-Z. + * However, it's also not speed critical. + */ if (teco_ed & TECO_ED_AUTOCASEFOLD) - /* will not modify non-letter keys */ - key = g_ascii_islower(key) ? g_ascii_toupper(key) - : g_ascii_tolower(key); + key = g_unichar_islower(key) ? g_unichar_toupper(key) + : g_unichar_tolower(key); return teco_state_process_edit_cmd(ctx, parent_ctx, key, error); } gboolean teco_state_stringbuilding_start_process_edit_cmd(teco_machine_stringbuilding_t *ctx, teco_machine_t *parent_ctx, - gchar key, GError **error) + gunichar key, GError **error) { teco_state_t *current = ctx->parent.current; @@ -460,20 +538,15 @@ teco_state_stringbuilding_start_process_edit_cmd(teco_machine_stringbuilding_t * * get the default behaviour of teco_state_process_edit_cmd(). * This may not be a real-life issue serious enough to maintain * a result string even in parse-only mode. - * - * FIXME: Does not properly rubout string-building commands at the - * start of the string argument -- ctx->result->len is not - * a valid indicator of argument emptyness. - * Since it chains to teco_state_process_edit_cmd() we will instead - * rubout the entire command. */ if (ctx->result && ctx->result->len > 0) { gboolean is_wordchar = teco_string_contains(&wchars, teco_cmdline.str.data[teco_cmdline.effective_len-1]); teco_cmdline_rubout(); if (ctx->parent.current != current) { /* rub out string building command */ - while (ctx->result->len > 0 && ctx->parent.current != current) + do teco_cmdline_rubout(); + while (ctx->parent.current != current); return TRUE; } @@ -572,8 +645,29 @@ teco_state_stringbuilding_start_process_edit_cmd(teco_machine_stringbuilding_t * } gboolean +teco_state_stringbuilding_escaped_process_edit_cmd(teco_machine_stringbuilding_t *ctx, teco_machine_t *parent_ctx, + gunichar key, GError **error) +{ + /* + * Allow insertion of characters that would otherwise be interpreted as + * immediate editing commands after ^Q/^R. + */ + switch (key) { + //case TECO_CTL_KEY('G'): + case TECO_CTL_KEY('W'): + case TECO_CTL_KEY('U'): + teco_interface_popup_clear(); + + gchar c = key; + return teco_cmdline_insert(&c, sizeof(c), error); + } + + return teco_state_process_edit_cmd(parent_ctx, NULL, key, error); +} + +gboolean teco_state_stringbuilding_qreg_process_edit_cmd(teco_machine_stringbuilding_t *ctx, teco_machine_t *parent_ctx, - gchar chr, GError **error) + gunichar chr, GError **error) { g_assert(ctx->machine_qregspec != NULL); /* We downcast since teco_machine_qregspec_t is private in qreg.c */ @@ -582,7 +676,7 @@ teco_state_stringbuilding_qreg_process_edit_cmd(teco_machine_stringbuilding_t *c } gboolean -teco_state_expectstring_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gchar key, GError **error) +teco_state_expectstring_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gunichar key, GError **error) { teco_machine_stringbuilding_t *stringbuilding_ctx = &ctx->expectstring.machine; teco_state_t *stringbuilding_current = stringbuilding_ctx->parent.current; @@ -590,7 +684,7 @@ teco_state_expectstring_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_ } gboolean -teco_state_insert_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gchar key, GError **error) +teco_state_insert_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gunichar key, GError **error) { teco_machine_stringbuilding_t *stringbuilding_ctx = &ctx->expectstring.machine; teco_state_t *stringbuilding_current = stringbuilding_ctx->parent.current; @@ -626,7 +720,7 @@ teco_state_insert_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *par } gboolean -teco_state_expectfile_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gchar key, GError **error) +teco_state_expectfile_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gunichar key, GError **error) { teco_machine_stringbuilding_t *stringbuilding_ctx = &ctx->expectstring.machine; teco_state_t *stringbuilding_current = stringbuilding_ctx->parent.current; @@ -696,8 +790,8 @@ teco_state_expectfile_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t gboolean unambiguous = teco_file_auto_complete(ctx->expectstring.string.data, G_FILE_TEST_EXISTS, &new_chars); teco_machine_stringbuilding_escape(stringbuilding_ctx, new_chars.data, new_chars.len, &new_chars_escaped); if (unambiguous && ctx->expectstring.nesting == 1) - teco_string_append_c(&new_chars_escaped, - ctx->expectstring.machine.escape_char == '{' ? '}' : ctx->expectstring.machine.escape_char); + teco_string_append_wc(&new_chars_escaped, + ctx->expectstring.machine.escape_char == '{' ? '}' : ctx->expectstring.machine.escape_char); return teco_cmdline_insert(new_chars_escaped.data, new_chars_escaped.len, error); } @@ -707,7 +801,61 @@ teco_state_expectfile_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t } gboolean -teco_state_expectdir_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gchar key, GError **error) +teco_state_expectglob_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gunichar key, GError **error) +{ + teco_machine_stringbuilding_t *stringbuilding_ctx = &ctx->expectstring.machine; + teco_state_t *stringbuilding_current = stringbuilding_ctx->parent.current; + + /* + * NOTE: We don't just define teco_state_stringbuilding_start_process_edit_cmd(), + * as it would be hard to subclass/overwrite for different main machine states. + */ + if (!stringbuilding_current->is_start) + return stringbuilding_current->process_edit_cmd_cb(&stringbuilding_ctx->parent, &ctx->parent, key, error); + + switch (key) { + case '\t': { /* autocomplete file name */ + if (teco_cmdline.modifier_enabled) + break; + + if (teco_interface_popup_is_shown()) { + /* cycle through popup pages */ + teco_interface_popup_show(); + return TRUE; + } + + if (teco_string_contains(&ctx->expectstring.string, '\0')) + /* null-byte not allowed in file names */ + return TRUE; + + /* + * We do not support autocompleting glob patterns. + * + * FIXME: What if the last autocompletion inserted escaped glob + * characters? + * Perhaps teco_file_auto_complete() should natively support glob patterns. + */ + if (teco_globber_is_pattern(ctx->expectstring.string.data)) + return TRUE; + + g_auto(teco_string_t) new_chars, new_chars_escaped; + gboolean unambiguous = teco_file_auto_complete(ctx->expectstring.string.data, G_FILE_TEST_EXISTS, &new_chars); + g_autofree gchar *pattern_escaped = teco_globber_escape_pattern(new_chars.data); + teco_machine_stringbuilding_escape(stringbuilding_ctx, pattern_escaped, strlen(pattern_escaped), &new_chars_escaped); + if (unambiguous && ctx->expectstring.nesting == 1) + teco_string_append_wc(&new_chars_escaped, + ctx->expectstring.machine.escape_char == '{' ? '}' : ctx->expectstring.machine.escape_char); + + return teco_cmdline_insert(new_chars_escaped.data, new_chars_escaped.len, error); + } + } + + /* ^W should behave like in commands accepting files */ + return teco_state_expectfile_process_edit_cmd(ctx, parent_ctx, key, error); +} + +gboolean +teco_state_expectdir_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gunichar key, GError **error) { teco_machine_stringbuilding_t *stringbuilding_ctx = &ctx->expectstring.machine; teco_state_t *stringbuilding_current = stringbuilding_ctx->parent.current; @@ -745,11 +893,12 @@ teco_state_expectdir_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t * } } - return stringbuilding_current->process_edit_cmd_cb(&stringbuilding_ctx->parent, &ctx->parent, key, error); + /* ^W should behave like in commands accepting files */ + return teco_state_expectfile_process_edit_cmd(ctx, parent_ctx, key, error); } gboolean -teco_state_expectqreg_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gchar key, GError **error) +teco_state_expectqreg_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gunichar key, GError **error) { g_assert(ctx->expectqreg != NULL); /* @@ -761,7 +910,7 @@ teco_state_expectqreg_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t } gboolean -teco_state_qregspec_process_edit_cmd(teco_machine_qregspec_t *ctx, teco_machine_t *parent_ctx, gchar key, GError **error) +teco_state_qregspec_process_edit_cmd(teco_machine_qregspec_t *ctx, teco_machine_t *parent_ctx, gunichar key, GError **error) { switch (key) { case '\t': { /* autocomplete Q-Register name */ @@ -796,7 +945,7 @@ teco_state_qregspec_process_edit_cmd(teco_machine_qregspec_t *ctx, teco_machine_ } gboolean -teco_state_qregspec_string_process_edit_cmd(teco_machine_qregspec_t *ctx, teco_machine_t *parent_ctx, gchar key, GError **error) +teco_state_qregspec_string_process_edit_cmd(teco_machine_qregspec_t *ctx, teco_machine_t *parent_ctx, gunichar key, GError **error) { teco_machine_stringbuilding_t *stringbuilding_ctx = teco_machine_qregspec_get_stringbuilding(ctx); teco_state_t *stringbuilding_current = stringbuilding_ctx->parent.current; @@ -836,7 +985,7 @@ teco_state_qregspec_string_process_edit_cmd(teco_machine_qregspec_t *ctx, teco_m } gboolean -teco_state_execute_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gchar key, GError **error) +teco_state_execute_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gunichar key, GError **error) { teco_machine_stringbuilding_t *stringbuilding_ctx = &ctx->expectstring.machine; teco_state_t *stringbuilding_current = stringbuilding_ctx->parent.current; @@ -881,7 +1030,7 @@ teco_state_execute_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *pa } gboolean -teco_state_scintilla_symbols_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gchar key, GError **error) +teco_state_scintilla_symbols_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gunichar key, GError **error) { teco_machine_stringbuilding_t *stringbuilding_ctx = &ctx->expectstring.machine; teco_state_t *stringbuilding_current = stringbuilding_ctx->parent.current; @@ -926,7 +1075,7 @@ teco_state_scintilla_symbols_process_edit_cmd(teco_machine_main_t *ctx, teco_mac } gboolean -teco_state_goto_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gchar key, GError **error) +teco_state_goto_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gunichar key, GError **error) { teco_machine_stringbuilding_t *stringbuilding_ctx = &ctx->expectstring.machine; teco_state_t *stringbuilding_current = stringbuilding_ctx->parent.current; @@ -973,7 +1122,7 @@ teco_state_goto_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *paren } gboolean -teco_state_help_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gchar key, GError **error) +teco_state_help_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gunichar key, GError **error) { teco_machine_stringbuilding_t *stringbuilding_ctx = &ctx->expectstring.machine; teco_state_t *stringbuilding_current = stringbuilding_ctx->parent.current; @@ -1004,8 +1153,8 @@ teco_state_help_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *paren gboolean unambiguous = teco_help_auto_complete(ctx->expectstring.string.data, &new_chars); teco_machine_stringbuilding_escape(stringbuilding_ctx, new_chars.data, new_chars.len, &new_chars_escaped); if (unambiguous && ctx->expectstring.nesting == 1) - teco_string_append_c(&new_chars_escaped, - ctx->expectstring.machine.escape_char == '{' ? '}' : ctx->expectstring.machine.escape_char); + teco_string_append_wc(&new_chars_escaped, + ctx->expectstring.machine.escape_char == '{' ? '}' : ctx->expectstring.machine.escape_char); return new_chars_escaped.len ? teco_cmdline_insert(new_chars_escaped.data, new_chars_escaped.len, error) : TRUE; } @@ -1028,7 +1177,8 @@ teco_state_save_cmdline_got_register(teco_machine_main_t *ctx, teco_qreg_t *qreg return &teco_state_start; if (!qreg->vtable->undo_set_string(qreg, error) || - !qreg->vtable->set_string(qreg, teco_last_cmdline.data, teco_last_cmdline.len, error)) + !qreg->vtable->set_string(qreg, teco_last_cmdline.data, teco_last_cmdline.len, + teco_default_codepage(), error)) return NULL; return &teco_state_start; diff --git a/src/cmdline.h b/src/cmdline.h index 85e657a..f4b84e4 100644 --- a/src/cmdline.h +++ b/src/cmdline.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -46,7 +46,7 @@ typedef struct { gsize effective_len; /** Program counter within the command-line macro */ - guint pc; + gsize pc; /** * Specifies whether the immediate editing modifier @@ -60,35 +60,30 @@ typedef struct { extern teco_cmdline_t teco_cmdline; -gboolean teco_cmdline_insert(const gchar *data, gsize len, GError **error); +gboolean teco_cmdline_keypress(const gchar *data, gsize len, GError **error); -static inline gboolean -teco_cmdline_rubin(GError **error) -{ - return teco_cmdline.effective_len >= teco_cmdline.str.len || - teco_cmdline_insert(teco_cmdline.str.data + teco_cmdline.effective_len, 1, error); -} +typedef enum { + TECO_KEYMACRO_ERROR = 0, /**< GError occurred */ + TECO_KEYMACRO_SUCCESS, /**< key macro found and inserted */ + TECO_KEYMACRO_UNDEFINED /**< no key macro found */ +} teco_keymacro_status_t; -gboolean teco_cmdline_keypress_c(gchar key, GError **error); +teco_keymacro_status_t teco_cmdline_keymacro(const gchar *name, gssize name_len, GError **error); static inline gboolean -teco_cmdline_keypress(const gchar *str, gsize len, GError **error) +teco_cmdline_keymacro_c(gchar key, GError **error) { - for (guint i = 0; i < len; i++) - if (!teco_cmdline_keypress_c(str[i], error)) - return FALSE; + switch (teco_cmdline_keymacro(&key, sizeof(key), error)) { + case TECO_KEYMACRO_ERROR: + return FALSE; + case TECO_KEYMACRO_SUCCESS: + break; + case TECO_KEYMACRO_UNDEFINED: + return teco_cmdline_keypress(&key, sizeof(key), error); + } return TRUE; } -gboolean teco_cmdline_fnmacro(const gchar *name, GError **error); - -static inline void -teco_cmdline_rubout(void) -{ - if (teco_cmdline.effective_len) - teco_undo_pop(--teco_cmdline.effective_len); -} - extern gboolean teco_quit_requested; /* diff --git a/src/core-commands.c b/src/core-commands.c index 4d5b378..0cde7e0 100644 --- a/src/core-commands.c +++ b/src/core-commands.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -45,7 +45,7 @@ #include "goto-commands.h" #include "core-commands.h" -static teco_state_t *teco_state_control_input(teco_machine_main_t *ctx, gchar chr, GError **error); +static teco_state_t *teco_state_control_input(teco_machine_main_t *ctx, gunichar chr, GError **error); /* * NOTE: This needs some extra code in teco_state_start_input(). @@ -129,7 +129,8 @@ teco_state_start_dot(teco_machine_main_t *ctx, GError **error) { if (!teco_expressions_eval(FALSE, error)) return; - teco_expressions_push(teco_interface_ssm(SCI_GETCURRENTPOS, 0, 0)); + sptr_t pos = teco_interface_ssm(SCI_GETCURRENTPOS, 0, 0); + teco_expressions_push(teco_interface_bytes2glyphs(pos)); } /*$ Z size @@ -145,7 +146,8 @@ teco_state_start_zed(teco_machine_main_t *ctx, GError **error) { if (!teco_expressions_eval(FALSE, error)) return; - teco_expressions_push(teco_interface_ssm(SCI_GETLENGTH, 0, 0)); + sptr_t pos = teco_interface_ssm(SCI_GETLENGTH, 0, 0); + teco_expressions_push(teco_interface_bytes2glyphs(pos)); } /*$ H @@ -162,10 +164,11 @@ teco_state_start_range(teco_machine_main_t *ctx, GError **error) if (!teco_expressions_eval(FALSE, error)) return; teco_expressions_push(0); - teco_expressions_push(teco_interface_ssm(SCI_GETLENGTH, 0, 0)); + sptr_t pos = teco_interface_ssm(SCI_GETLENGTH, 0, 0); + teco_expressions_push(teco_interface_bytes2glyphs(pos)); } -/*$ "\\" +/*$ \[rs] * n\\ -- Insert or read ASCII numbers * \\ -> n * @@ -241,6 +244,7 @@ teco_state_start_loop_open(teco_machine_main_t *ctx, GError **error) if (!teco_expressions_eval(FALSE, error) || !teco_expressions_pop_num_calc(&lctx.counter, -1, error)) return; + lctx.brace_level = teco_brace_level; lctx.pass_through = teco_machine_main_eval_colon(ctx); if (lctx.counter) { @@ -280,6 +284,14 @@ teco_state_start_loop_close(teco_machine_main_t *ctx, GError **error) teco_loop_context_t *lctx = &g_array_index(teco_loop_stack, teco_loop_context_t, teco_loop_stack->len-1); + + /* only non-pass-through loops increase the brace level */ + if (teco_brace_level != lctx->brace_level + !lctx->pass_through) { + g_set_error_literal(error, TECO_ERROR, TECO_ERROR_FAILED, + "Brace left open at loop end command"); + return; + } + gboolean colon_modified = teco_machine_main_eval_colon(ctx); /* @@ -348,7 +360,7 @@ teco_state_start_break(teco_machine_main_t *ctx, GError **error) { if (teco_loop_stack->len <= ctx->loop_stack_fp) { g_set_error_literal(error, TECO_ERROR, TECO_ERROR_FAILED, - "<;> only allowed in iterations"); + "<;> only allowed in loops"); return; } @@ -373,7 +385,7 @@ teco_state_start_break(teco_machine_main_t *ctx, GError **error) if (!teco_expressions_discard_args(error)) return; if (!lctx.pass_through && - !teco_expressions_brace_close(error)) + !teco_expressions_brace_return(lctx.brace_level, 0, error)) return; undo__insert_val__teco_loop_stack(teco_loop_stack->len, lctx); @@ -511,11 +523,12 @@ teco_state_start_jump(teco_machine_main_t *ctx, GError **error) if (!teco_expressions_pop_num_calc(&v, 0, error)) return; - if (teco_validate_pos(v)) { + gssize pos = teco_interface_glyphs2bytes(v); + if (pos >= 0) { if (teco_current_doc_must_undo()) undo__teco_interface_ssm(SCI_GOTOPOS, teco_interface_ssm(SCI_GETCURRENTPOS, 0, 0), 0); - teco_interface_ssm(SCI_GOTOPOS, v, 0); + teco_interface_ssm(SCI_GOTOPOS, pos, 0); if (teco_machine_main_eval_colon(ctx)) teco_expressions_push(TECO_SUCCESS); @@ -531,11 +544,11 @@ static teco_bool_t teco_move_chars(teco_int_t n) { sptr_t pos = teco_interface_ssm(SCI_GETCURRENTPOS, 0, 0); - - if (!teco_validate_pos(pos + n)) + gssize next_pos = teco_interface_glyphs2bytes_relative(pos, n); + if (next_pos < 0) return TECO_FAILURE; - teco_interface_ssm(SCI_GOTOPOS, pos + n, 0); + teco_interface_ssm(SCI_GOTOPOS, next_pos, 0); if (teco_current_doc_must_undo()) undo__teco_interface_ssm(SCI_GOTOPOS, pos, 0); @@ -879,7 +892,7 @@ static gboolean teco_state_start_kill(teco_machine_main_t *ctx, const gchar *cmd, gboolean by_lines, GError **error) { teco_bool_t rc; - teco_int_t from, len; + gssize from, len; /* in bytes */ if (!teco_expressions_eval(FALSE, error)) return FALSE; @@ -894,20 +907,24 @@ teco_state_start_kill(teco_machine_main_t *ctx, const gchar *cmd, gboolean by_li len = teco_interface_ssm(SCI_POSITIONFROMLINE, line, 0) - from; rc = teco_bool(teco_validate_line(line)); } else { - if (!teco_expressions_pop_num_calc(&len, teco_num_sign, error)) + teco_int_t len_glyphs; + if (!teco_expressions_pop_num_calc(&len_glyphs, teco_num_sign, error)) return FALSE; - rc = teco_bool(teco_validate_pos(from + len)); + gssize to = teco_interface_glyphs2bytes_relative(from, len_glyphs); + rc = teco_bool(to >= 0); + len = to-from; } if (len < 0) { len *= -1; from -= len; } } else { - teco_int_t to = teco_expressions_pop_num(0); - from = teco_expressions_pop_num(0); + teco_int_t to_glyphs = teco_expressions_pop_num(0); + gssize to = teco_interface_glyphs2bytes(to_glyphs); + teco_int_t from_glyphs = teco_expressions_pop_num(0); + from = teco_interface_glyphs2bytes(from_glyphs); len = to - from; - rc = teco_bool(len >= 0 && teco_validate_pos(from) && - teco_validate_pos(to)); + rc = teco_bool(len >= 0 && from >= 0 && to >= 0); } if (teco_machine_main_eval_colon(ctx)) { @@ -1002,6 +1019,9 @@ teco_state_start_delete_chars(teco_machine_main_t *ctx, GError **error) * This can be an ASCII <code> or Unicode codepoint * depending on Scintilla's encoding of the current * buffer. + * Invalid Unicode byte sequences are reported as + * -1 or -2. + * * - If <n> is 0, return the <code> of the character * pointed to by dot. * - If <n> is 1, return the <code> of the character @@ -1012,28 +1032,33 @@ teco_state_start_delete_chars(teco_machine_main_t *ctx, GError **error) * * If the position of the queried character is off-page, * the command will yield an error. + * + * If the document is encoded as UTF-8 and there is + * an incomplete sequence at the requested position, + * -1 is returned. + * All other invalid Unicode sequences are returned as -2. */ -/** @todo does Scintilla really return code points??? */ static void teco_state_start_get(teco_machine_main_t *ctx, GError **error) { teco_int_t v; if (!teco_expressions_pop_num_calc(&v, teco_num_sign, error)) return; - v += teco_interface_ssm(SCI_GETCURRENTPOS, 0, 0); - /* - * NOTE: We cannot use teco_validate_pos() here since - * the end of the buffer is not a valid position for <A>. - */ - if (v < 0 || v >= teco_interface_ssm(SCI_GETLENGTH, 0, 0)) { + + sptr_t pos = teco_interface_ssm(SCI_GETCURRENTPOS, 0, 0); + gssize get_pos = teco_interface_glyphs2bytes_relative(pos, v); + sptr_t len = teco_interface_ssm(SCI_GETLENGTH, 0, 0); + + if (get_pos < 0 || get_pos == len) { teco_error_range_set(error, "A"); return; } - teco_expressions_push(teco_interface_ssm(SCI_GETCHARAT, v, 0)); + + teco_expressions_push(teco_interface_get_character(get_pos, len)); } static teco_state_t * -teco_state_start_input(teco_machine_main_t *ctx, gchar chr, GError **error) +teco_state_start_input(teco_machine_main_t *ctx, gunichar chr, GError **error) { static teco_machine_main_transition_t transitions[] = { /* @@ -1148,7 +1173,7 @@ teco_state_start_input(teco_machine_main_t *ctx, gchar chr, GError **error) * * FIXME: Maybe, there should be a special teco_state_t * for beginnings of command-lines? - * It could also be used for a corresponding FNMACRO mask. + * It could also be used for a corresponding KEYMACRO mask. */ if (teco_cmdline.effective_len == 1 && teco_cmdline.str.data[0] == '*') return &teco_state_save_cmdline; @@ -1244,7 +1269,7 @@ teco_state_start_input(teco_machine_main_t *ctx, gchar chr, GError **error) TECO_DEFINE_STATE_CASEINSENSITIVE(teco_state_start, .end_of_macro_cb = NULL, /* Allowed at the end of a macro! */ .is_start = TRUE, - .fnmacro_mask = TECO_FNMACRO_MASK_START + .keymacro_mask = TECO_KEYMACRO_MASK_START | TECO_KEYMACRO_MASK_CASEINSENSITIVE ); /*$ F< @@ -1372,7 +1397,7 @@ teco_state_fcommand_cond_else(teco_machine_main_t *ctx, GError **error) } static teco_state_t * -teco_state_fcommand_input(teco_machine_main_t *ctx, gchar chr, GError **error) +teco_state_fcommand_input(teco_machine_main_t *ctx, gunichar chr, GError **error) { static teco_machine_main_transition_t transitions[] = { /* @@ -1435,7 +1460,7 @@ teco_state_changedir_done(teco_machine_main_t *ctx, const teco_string_t *str, GE teco_qreg_t *qreg = teco_qreg_table_find(&teco_qreg_table_globals, "$HOME", 5); g_assert(qreg != NULL); teco_string_t home; - if (!qreg->vtable->get_string(qreg, &home.data, &home.len, error)) + if (!qreg->vtable->get_string(qreg, &home.data, &home.len, NULL, error)) return NULL; /* @@ -1496,7 +1521,7 @@ teco_state_changedir_done(teco_machine_main_t *ctx, const teco_string_t *str, GE TECO_DEFINE_STATE_EXPECTDIR(teco_state_changedir); static teco_state_t * -teco_state_condcommand_input(teco_machine_main_t *ctx, gchar chr, GError **error) +teco_state_condcommand_input(teco_machine_main_t *ctx, gunichar chr, GError **error) { teco_int_t value = 0; gboolean result = TRUE; @@ -1536,20 +1561,20 @@ teco_state_condcommand_input(teco_machine_main_t *ctx, gchar chr, GError **error break; case 'A': if (ctx->mode == TECO_MODE_NORMAL) - result = g_ascii_isalpha((gchar)value); + result = g_unichar_isalpha(value); break; case 'C': if (ctx->mode == TECO_MODE_NORMAL) - result = g_ascii_isalnum((gchar)value) || + result = g_unichar_isalnum(value) || value == '.' || value == '$' || value == '_'; break; case 'D': if (ctx->mode == TECO_MODE_NORMAL) - result = g_ascii_isdigit((gchar)value); + result = g_unichar_isdigit(value); break; case 'I': if (ctx->mode == TECO_MODE_NORMAL) - result = G_IS_DIR_SEPARATOR((gchar)value); + result = G_IS_DIR_SEPARATOR(value); break; case 'S': case 'T': @@ -1582,15 +1607,15 @@ teco_state_condcommand_input(teco_machine_main_t *ctx, gchar chr, GError **error break; case 'R': if (ctx->mode == TECO_MODE_NORMAL) - result = g_ascii_isalnum((gchar)value); + result = g_unichar_isalnum(value); break; case 'V': if (ctx->mode == TECO_MODE_NORMAL) - result = g_ascii_islower((gchar)value); + result = g_unichar_islower(value); break; case 'W': if (ctx->mode == TECO_MODE_NORMAL) - result = g_ascii_isupper((gchar)value); + result = g_unichar_isupper(value); break; default: g_set_error(error, TECO_ERROR, TECO_ERROR_FAILED, @@ -1720,8 +1745,71 @@ teco_state_control_radix(teco_machine_main_t *ctx, GError **error) } } +/*$ ^E glyphs2bytes bytes2glyphs + * glyphs^E -> bytes -- Translate between glyph and byte indexes + * bytes:^E -> glyphs + * ^E -> bytes + * :^E -> length + * + * Translates from glyph/character to byte indexes when called + * without a colon. + * Otherwise when colon-modified, translates from byte indexes + * back to glyph indexes. + * These values can differ in documents with multi-byte + * encodings (of which only UTF-8 is supported). + * It is especially useful to translate between these indexes + * when manually invoking Scintilla messages (\fBES\fP command), as + * they almost always take byte positions. + * + * When called without arguments, \fB^E\fP returns the current + * position (dot) in bytes. + * This is equivalent, but faster than \(lq.^E\(rq. + * \fB:^E\fP without arguments returns the length of the current + * document in bytes, which is equivalent but faster than \(lqZ^E\(rq. + * + * When passing in indexes outside of the document's valid area, + * -1 is returned, so the return value can also be interpreted + * as a TECO boolean, signalling truth/success for invalid indexes. + * This provides an elegant and effective way to validate + * buffer addresses. + */ +static void +teco_state_control_glyphs2bytes(teco_machine_main_t *ctx, GError **error) +{ + teco_int_t res; + + if (!teco_expressions_eval(FALSE, error)) + return; + + gboolean colon_modified = teco_machine_main_eval_colon(ctx); + + if (!teco_expressions_args()) { + /* + * This is shorter than .^E or Z^E and avoids unnecessary glyph to + * byte index translations. + * On the other hand :^E is inconsistent, as it will return a byte + * index, instead of glyph index. + */ + res = teco_interface_ssm(colon_modified ? SCI_GETLENGTH : SCI_GETCURRENTPOS, 0, 0); + } else { + teco_int_t pos; + if (!teco_expressions_pop_num_calc(&pos, 0, error)) + return; + if (colon_modified) { + /* teco_interface_bytes2glyphs() does not check addresses */ + res = 0 <= pos && pos <= teco_interface_ssm(SCI_GETLENGTH, 0, 0) + ? teco_interface_bytes2glyphs(pos) : -1; + } else { + /* negative values for invalid indexes are passed down. */ + res = teco_interface_glyphs2bytes(pos); + } + } + + teco_expressions_push(res); +} + static teco_state_t * -teco_state_control_input(teco_machine_main_t *ctx, gchar chr, GError **error) +teco_state_control_input(teco_machine_main_t *ctx, gunichar chr, GError **error) { static teco_machine_main_transition_t transitions[] = { /* @@ -1746,7 +1834,8 @@ teco_state_control_input(teco_machine_main_t *ctx, gchar chr, GError **error) ['C'] = {&teco_state_start, teco_state_control_exit}, ['O'] = {&teco_state_start, teco_state_control_octal}, ['D'] = {&teco_state_start, teco_state_control_decimal}, - ['R'] = {&teco_state_start, teco_state_control_radix} + ['R'] = {&teco_state_start, teco_state_control_radix}, + ['E'] = {&teco_state_start, teco_state_control_glyphs2bytes} }; /* @@ -1761,7 +1850,7 @@ teco_state_control_input(teco_machine_main_t *ctx, gchar chr, GError **error) TECO_DEFINE_STATE_CASEINSENSITIVE(teco_state_control); static teco_state_t * -teco_state_ascii_input(teco_machine_main_t *ctx, gchar chr, GError **error) +teco_state_ascii_input(teco_machine_main_t *ctx, gunichar chr, GError **error) { if (ctx->mode == TECO_MODE_NORMAL) teco_expressions_push(chr); @@ -1797,7 +1886,7 @@ TECO_DEFINE_STATE(teco_state_ascii); * only be seen when executing the following command. */ static teco_state_t * -teco_state_escape_input(teco_machine_main_t *ctx, gchar chr, GError **error) +teco_state_escape_input(teco_machine_main_t *ctx, gunichar chr, GError **error) { /*$ ^[^[ ^[$ $$ terminate return * [a1,a2,...]$$ -- Terminate command line or return from macro @@ -1891,7 +1980,7 @@ TECO_DEFINE_STATE_CASEINSENSITIVE(teco_state_escape, * when it comes to function key macro masking. */ .is_start = TRUE, - .fnmacro_mask = TECO_FNMACRO_MASK_START + .keymacro_mask = TECO_KEYMACRO_MASK_START | TECO_KEYMACRO_MASK_CASEINSENSITIVE ); /*$ EF close @@ -1958,6 +2047,11 @@ teco_state_ecommand_close(teco_machine_main_t *ctx, GError **error) * Without any argument ED returns the current flags. * * Currently, the following flags are used by \*(ST: + * - 4: If enabled, prefer raw single-byte ANSI encoding + * for all new buffers and registers. + * This does not change the encoding of any existing + * buffers and any initialized default register when set via + * \fBED\fP, so you might want to launch \*(ST with \fB--8bit\fP. * - 8: Enable/disable automatic folding of case-insensitive * command characters during interactive key translation. * The case of letter keys is inverted, so one or two @@ -1973,14 +2067,17 @@ teco_state_ecommand_close(teco_machine_main_t *ctx, GError **error) * of files. * - 32: Enable/Disable buffer editing hooks * (via execution of macro in global Q-Register \(lqED\(rq) - * - 64: Enable/Disable function key macros * - 128: Enable/Disable enforcement of UNIX98 * \(lq/bin/sh\(rq emulation for operating system command * executions - * - 256: Enable/Disable \fBxterm\fP(1) clipboard support. - * Should only be enabled if XTerm allows the - * \fIGetSelection\fP and \fISetSelection\fP window - * operations. + * - 256: Enable/Disable OSC-52 clipboard support. + * Must only be enabled if the terminal emulator is configured + * properly. + * - 512: Enable/Disable Unicode icons in the Curses UI. + * This requires a capable font, like the ones provided + * by the \(lqNerd Fonts\(rq project. + * Changes to this flag in interactive mode may not become + * effective immediately. * * The features controlled thus are discribed in other sections * of this manual. @@ -2098,6 +2195,12 @@ teco_state_ecommand_flags(teco_machine_main_t *ctx, GError **error) * on exit the author is aware of is \fBxterm\fP(1) and * the Linux console driver. * You have been warned. Good luck. + * .IP 4 + * The column after the last horizontal movement. + * This is only used by \fBfnkeys.tes\fP and is similar to the Scintilla-internal + * setting \fBSCI_CHOOSECARETX\fP. + * Unless most other settings, this is on purpose not restored on rubout, + * so it "survives" command line replacements. */ static void teco_state_ecommand_properties(teco_machine_main_t *ctx, GError **error) @@ -2106,9 +2209,12 @@ teco_state_ecommand_properties(teco_machine_main_t *ctx, GError **error) EJ_USER_INTERFACE = 0, EJ_BUFFERS, EJ_MEMORY_LIMIT, - EJ_INIT_COLOR + EJ_INIT_COLOR, + EJ_CARETX }; + static teco_int_t caret_x = 0; + teco_int_t property; if (!teco_expressions_eval(FALSE, error) || !teco_expressions_pop_num_calc(&property, teco_num_sign, error)) @@ -2144,6 +2250,10 @@ teco_state_ecommand_properties(teco_machine_main_t *ctx, GError **error) teco_interface_init_color((guint)value, (guint32)color); break; + case EJ_CARETX: + caret_x = value; + break; + default: g_set_error(error, TECO_ERROR, TECO_ERROR_FAILED, "Cannot set property %" TECO_INT_FORMAT " " @@ -2180,6 +2290,10 @@ teco_state_ecommand_properties(teco_machine_main_t *ctx, GError **error) teco_expressions_push(teco_memory_limit); break; + case EJ_CARETX: + teco_expressions_push(caret_x); + break; + default: g_set_error(error, TECO_ERROR, TECO_ERROR_FAILED, "Invalid property %" TECO_INT_FORMAT " " @@ -2292,6 +2406,252 @@ teco_state_ecommand_eol(teco_machine_main_t *ctx, GError **error) } } +static const gchar * +teco_codepage2str(guint codepage) +{ + /* + * The multi-byte charsets are excluded, since we don't + * support them in SciTECO, even though Scintilla has them. + * Contrary to the Scintilla documentation, Gtk supports + * most of them. + * Those that are supported are tested, so the codepage + * mapping should be definitive (although there could be + * similar related codepages). + */ + switch (codepage) { + case SC_CP_UTF8: return "UTF-8"; + case SC_CHARSET_ANSI: + case SC_CHARSET_DEFAULT: return "ISO-8859-1"; /* LATIN1 */ + case SC_CHARSET_BALTIC: return "ISO-8859-13"; /* LATIN7 */ + //case SC_CHARSET_CHINESEBIG5: return "BIG5"; + case SC_CHARSET_EASTEUROPE: return "ISO-8859-2"; /* LATIN2 */ + //case SC_CHARSET_GB2312: return "GB2312"; + case SC_CHARSET_GREEK: return "ISO-8859-7"; // CP1253??? + //case SC_CHARSET_HANGUL: return "UHC"; + /* unsure whether this is supported on Gtk */ + case SC_CHARSET_MAC: return "MAC"; + /* not supported by Gtk */ + case SC_CHARSET_OEM: return "CP437"; + /* + * Apparently, this can be CP1251 on the native Windows + * port of Scintilla. + */ + case SC_CHARSET_RUSSIAN: return "KOI8-R"; + case SC_CHARSET_OEM866: return "CP866"; + case SC_CHARSET_CYRILLIC: return "CP1251"; + //case SC_CHARSET_SHIFTJIS: return "SHIFT-JIS"; + //case SC_CHARSET_SYMBOL: + case SC_CHARSET_TURKISH: return "ISO-8859-9"; /* LATIN5 */ + //case SC_CHARSET_JOHAB: return "JOHAB"; + case SC_CHARSET_HEBREW: return "ISO-8859-8"; // CP1255? + /* + * FIXME: Some arabic codepage is supported by Gtk, + * but I am not sure which. + */ + case SC_CHARSET_ARABIC: return "ISO-8859-6"; // CP720, CP1256??? + /* apparently not supported by Gtk */ + case SC_CHARSET_VIETNAMESE: return "CP1258"; + case SC_CHARSET_THAI: return "ISO-8859-11"; + case SC_CHARSET_8859_15: return "ISO-8859-15"; /* LATIN9 */ + } + + return NULL; +} + +/*$ EE encoding codepage charset + * codepageEE -- Edit current document's encoding (codepage/charset) + * EE -> codepage + * codepage:EE + * :EE -> codepage + * + * When called with an argument, it sets the current codepage, + * otherwise returns it. + * The following codepages are supported: + * - 0: ANSI (raw bytes) + * - 1: ISO-8859-1 (latin1) + * - 77: Macintosh Latin encoding + * - 161: ISO-8859-7 + * - 162: ISO-8859-9 (latin5) + * - 163: CP1258 + * - 177: ISO-8859-8 + * - 178: ISO-8859-6 + * - 186: ISO-8859-13 (latin7) + * - 204: KOI8-R + * - 222: ISO-8859-11 + * - 238: ISO-8859-2 (latin2) + * - 255: CP437 + * - 866: CP866 + * - 1000: ISO-8859-15 (latin9) + * - 1251: CP1251 + * - 65001: UTF-8 + * + * Displaying characters in the single-byte (non-UTF-8) codepages might + * be supported only with the Gtk UI. + * At least 77, 178, 163 and 255 are not displayed correctly on Gtk. + * 65001 (UTF-8) is the default for new buffers. + * 0 (ANSI) should be used when working with raw bytes, + * but is currently displayed like ISO-8859-1 (latin1). + * + * \fBEE\fP does not change the buffer contents itself by default, only + * how it is displayed and how \*(ST interacts with it. + * This allows fixing up the codepage if it is not in the default UTF-8 + * or if codepage guessing failed. + * + * When colon-modified the \fB:EE\fP command will also additionally convert + * the current buffer contents into the new code page, preserving the + * current position (dot). + * This will fail if the conversion would be lossy. + * Conversions from and to UTF-8 \fIshould\fP always be successful. + */ +static void +teco_state_ecommand_encoding(teco_machine_main_t *ctx, GError **error) +{ + if (!teco_expressions_eval(FALSE, error)) + return; + + gboolean colon_modified = teco_machine_main_eval_colon(ctx); + + guint old_cp = teco_interface_get_codepage(); + + if (!teco_expressions_args()) { + /* get current code page */ + teco_expressions_push(old_cp); + return; + } + + /* + * Set code page + */ + teco_int_t new_cp; + if (!teco_expressions_pop_num_calc(&new_cp, 0, error)) + return; + + if (old_cp == SC_CP_UTF8 && new_cp == SC_CP_UTF8) + return; + + if (teco_current_doc_must_undo() && teco_undo_enabled) { + if (old_cp == SC_CP_UTF8) { /* new_cp != SC_CP_UTF8 */ + undo__teco_interface_ssm(SCI_ALLOCATELINECHARACTERINDEX, + SC_LINECHARACTERINDEX_UTF32, 0); + undo__teco_interface_ssm(SCI_SETCODEPAGE, SC_CP_UTF8, 0); + } else { + undo__teco_interface_ssm(SCI_SETCODEPAGE, 0, 0); + for (gint style = 0; style <= STYLE_LASTPREDEFINED; style++) + undo__teco_interface_ssm(SCI_STYLESETCHARACTERSET, style, old_cp); + /* + * The index is internally reference-counted and could underflow, + * so don't do it more than necessary. + */ + if (new_cp == SC_CP_UTF8) + undo__teco_interface_ssm(SCI_RELEASELINECHARACTERINDEX, + SC_LINECHARACTERINDEX_UTF32, 0); + } + } + + teco_int_t dot_glyphs; + if (colon_modified) { + sptr_t dot_bytes = teco_interface_ssm(SCI_GETCURRENTPOS, 0, 0); + dot_glyphs = teco_interface_bytes2glyphs(dot_bytes); + + /* + * Convert buffer to new codepage. + * + * FIXME: Could be optimized slightly by converting first + * before the gap, inserting the converted text and then + * converting after the gap. + */ + const gchar *to_codepage = teco_codepage2str(new_cp); + const gchar *from_codepage = teco_codepage2str(old_cp); + if (!to_codepage || !from_codepage) { + g_set_error_literal(error, TECO_ERROR, TECO_ERROR_FAILED, + "Unknown or unsupported codepage/charset"); + return; + } + + const gchar *buf = (const gchar *)teco_interface_ssm(SCI_GETCHARACTERPOINTER, 0, 0); + gsize len = teco_interface_ssm(SCI_GETLENGTH, 0, 0); + g_autofree gchar *converted; + gsize converted_len; + + /* + * This fails if there is no direct translation. + * If we'd use g_convert_with_fallback(), it would be tricky to choose + * fallback characters that will always work. + */ + converted = g_convert(buf, len, to_codepage, from_codepage, + NULL, &converted_len, error); + if (!converted) + return; + + teco_interface_ssm(SCI_BEGINUNDOACTION, 0, 0); + teco_interface_ssm(SCI_CLEARALL, 0, 0); + teco_interface_ssm(SCI_APPENDTEXT, converted_len, (sptr_t)converted); + teco_interface_ssm(SCI_ENDUNDOACTION, 0, 0); + teco_ring_dirtify(); + + if (teco_current_doc_must_undo()) { + undo__teco_interface_ssm(SCI_GOTOPOS, dot_bytes, 0); + undo__teco_interface_ssm(SCI_UNDO, 0, 0); + } + } + + if (new_cp == SC_CP_UTF8) { + teco_interface_ssm(SCI_SETCODEPAGE, SC_CP_UTF8, 0); + /* + * UTF-8 documents strictly require the line character index. + * See teco_view_glyphs2bytes() and teco_view_bytes2glyphs(). + */ + g_assert(!(teco_interface_ssm(SCI_GETLINECHARACTERINDEX, 0, 0) + & SC_LINECHARACTERINDEX_UTF32)); + teco_interface_ssm(SCI_ALLOCATELINECHARACTERINDEX, + SC_LINECHARACTERINDEX_UTF32, 0); + } else { + /* + * The index is NOT released automatically when setting the codepage. + * But it is internally reference-counted and could underflow, + * so don't do it more than necessary. + */ + if (old_cp == SC_CP_UTF8) { + teco_interface_ssm(SCI_RELEASELINECHARACTERINDEX, + SC_LINECHARACTERINDEX_UTF32, 0); + g_assert(!(teco_interface_ssm(SCI_GETLINECHARACTERINDEX, 0, 0) + & SC_LINECHARACTERINDEX_UTF32)); + } + + /* + * Configure a single-byte codepage/charset. + * This requires setting it on all of the possible styles. + * Unfortunately there can theoretically even be 255 (STYLE_MAX) styles. + * This is important only for display purposes - other than that + * all single-byte encodings are handled the same. + * + * FIXME: Should we avoid this if new_cp == 0? + * It will be used for raw byte handling mostly. + */ + if (teco_current_doc_must_undo()) { + /* + * There is a chance the user will see this buffer even if we + * are currently in batch mode. + */ + for (gint style = 0; style <= STYLE_LASTPREDEFINED; style++) + teco_interface_ssm(SCI_STYLESETCHARACTERSET, style, new_cp); + } else { + /* we must still set it, so that <EE> retrieval works */ + teco_interface_ssm(SCI_STYLESETCHARACTERSET, STYLE_DEFAULT, new_cp); + } + /* 0 is used for ALL single-byte encodings */ + teco_interface_ssm(SCI_SETCODEPAGE, 0, 0); + } + + if (colon_modified) + /* + * Only now, it will be safe to recalculate dot in the new encoding. + * If the new codepage is UTF-8, the line character index will be + * ready only now. + */ + teco_interface_ssm(SCI_GOTOPOS, teco_interface_glyphs2bytes(dot_glyphs), 0); +} + /*$ EX exit * [bool]EX -- Exit program * -EX @@ -2352,7 +2712,7 @@ teco_state_ecommand_exit(teco_machine_main_t *ctx, GError **error) } static teco_state_t * -teco_state_ecommand_input(teco_machine_main_t *ctx, gchar chr, GError **error) +teco_state_ecommand_input(teco_machine_main_t *ctx, gunichar chr, GError **error) { static teco_machine_main_transition_t transitions[] = { /* @@ -2377,6 +2737,7 @@ teco_state_ecommand_input(teco_machine_main_t *ctx, gchar chr, GError **error) ['D'] = {&teco_state_start, teco_state_ecommand_flags}, ['J'] = {&teco_state_start, teco_state_ecommand_properties}, ['L'] = {&teco_state_start, teco_state_ecommand_eol}, + ['E'] = {&teco_state_start, teco_state_ecommand_encoding}, ['X'] = {&teco_state_start, teco_state_ecommand_exit} }; @@ -2395,26 +2756,61 @@ teco_state_insert_initial(teco_machine_main_t *ctx, GError **error) if (ctx->mode > TECO_MODE_NORMAL) return TRUE; + /* + * Current document's encoding determines the behaviour of + * string building constructs. + */ + teco_machine_stringbuilding_set_codepage(&ctx->expectstring.machine, + teco_interface_get_codepage()); + if (!teco_expressions_eval(FALSE, error)) return FALSE; guint args = teco_expressions_args(); if (!args) return TRUE; - teco_interface_ssm(SCI_BEGINUNDOACTION, 0, 0); - for (int i = args; i > 0; i--) { - gchar chr = (gchar)teco_expressions_peek_num(i-1); - teco_interface_ssm(SCI_ADDTEXT, 1, (sptr_t)&chr); + if (teco_interface_ssm(SCI_GETCODEPAGE, 0, 0) == SC_CP_UTF8) { + /* detect possible errors before introducing side effects */ + for (gint i = args; i > 0; i--) { + teco_int_t chr = teco_expressions_peek_num(i-1); + if (chr < 0 || !g_unichar_validate(chr)) { + teco_error_codepoint_set(error, "I"); + return FALSE; + } + } + teco_interface_ssm(SCI_BEGINUNDOACTION, 0, 0); + for (gint i = args; i > 0; i--) { + /* 4 bytes should be enough, but we better follow the documentation */ + gchar buf[6]; + gsize len = g_unichar_to_utf8(teco_expressions_peek_num(i-1), buf); + teco_interface_ssm(SCI_ADDTEXT, len, (sptr_t)buf); + } + } else { + /* everything else is a single-byte encoding */ + for (gint i = args; i > 0; i--) { + teco_int_t chr = teco_expressions_peek_num(i-1); + if (chr < 0 || chr > 0xFF) { + teco_error_codepoint_set(error, "I"); + return FALSE; + } + } + teco_interface_ssm(SCI_BEGINUNDOACTION, 0, 0); + for (gint i = args; i > 0; i--) { + gchar chr = (gchar)teco_expressions_peek_num(i-1); + teco_interface_ssm(SCI_ADDTEXT, 1, (sptr_t)&chr); + } } - for (int i = args; i > 0; i--) - if (!teco_expressions_pop_num_calc(NULL, 0, error)) - return FALSE; teco_interface_ssm(SCI_ENDUNDOACTION, 0, 0); teco_ring_dirtify(); if (teco_current_doc_must_undo()) undo__teco_interface_ssm(SCI_UNDO, 0, 0); + /* This is done only now because it can _theoretically_ fail. */ + for (gint i = args; i > 0; i--) + if (!teco_expressions_pop_num_calc(NULL, 0, error)) + return FALSE; + return TRUE; } @@ -2451,8 +2847,8 @@ teco_state_insert_process(teco_machine_main_t *ctx, const teco_string_t *str, * Secondly, the command inserts <text>. * In interactive mode, <text> is inserted interactively. * - * String building characters are \fBenabled\fP for the - * I command. + * Unlike in classic TECO dialects, string building characters are + * \fBenabled\fP for the \fBI\fP command. * When editing \*(ST macros, using the \fBEI\fP command * may be better, since it has string building characters * disabled. @@ -2491,10 +2887,9 @@ teco_state_insert_indent_initial(teco_machine_main_t *ctx, GError **error) len -= teco_interface_ssm(SCI_GETCOLUMN, teco_interface_ssm(SCI_GETCURRENTPOS, 0, 0), 0) % len; - gchar spaces[len]; - - memset(spaces, ' ', sizeof(spaces)); - teco_interface_ssm(SCI_ADDTEXT, sizeof(spaces), (sptr_t)spaces); + gchar space = ' '; + while (len-- > 0) + teco_interface_ssm(SCI_ADDTEXT, 1, (sptr_t)&space); } teco_interface_ssm(SCI_ENDUNDOACTION, 0, 0); teco_ring_dirtify(); diff --git a/src/core-commands.h b/src/core-commands.h index 6efc5a3..e30770d 100644 --- a/src/core-commands.h +++ b/src/core-commands.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -43,7 +43,7 @@ gboolean teco_state_insert_process(teco_machine_main_t *ctx, const teco_string_t gsize new_chars, GError **error); /* in cmdline.c */ -gboolean teco_state_insert_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gchar chr, GError **error); +gboolean teco_state_insert_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gunichar chr, GError **error); /** * @class TECO_DEFINE_STATE_INSERT @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -30,17 +30,49 @@ #include "doc.h" static inline teco_doc_scintilla_t * +teco_doc_scintilla_ref(teco_doc_scintilla_t *doc) +{ + if (doc) + teco_view_ssm(teco_qreg_view, SCI_ADDREFDOCUMENT, 0, (sptr_t)doc); + return doc; +} + +static inline void +teco_doc_scintilla_release(teco_doc_scintilla_t *doc) +{ + if (doc) + teco_view_ssm(teco_qreg_view, SCI_RELEASEDOCUMENT, 0, (sptr_t)doc); +} + +TECO_DEFINE_UNDO_OBJECT(doc_scintilla, teco_doc_scintilla_t *, + teco_doc_scintilla_ref, teco_doc_scintilla_release); + +static inline teco_doc_scintilla_t * teco_doc_get_scintilla(teco_doc_t *ctx) { + /* + * FIXME: Perhaps we should always specify SC_DOCUMENTOPTION_TEXT_LARGE? + * SC_DOCUMENTOPTION_STYLES_NONE is unfortunately also not safe to set + * always as the Q-Reg might well be used for styling even in batch mode. + */ if (G_UNLIKELY(!ctx->doc)) ctx->doc = (teco_doc_scintilla_t *)teco_view_ssm(teco_qreg_view, SCI_CREATEDOCUMENT, 0, 0); return ctx->doc; } -/** @memberof teco_doc_t */ +/** + * Edit the given document in the Q-Register view. + * + * @param ctx The document to edit. + * @param default_cp The codepage to configure if the document is new. + * + * @memberof teco_doc_t + */ void -teco_doc_edit(teco_doc_t *ctx) +teco_doc_edit(teco_doc_t *ctx, guint default_cp) { + gboolean new_doc = ctx->doc == NULL; + teco_view_ssm(teco_qreg_view, SCI_SETDOCPOINTER, 0, (sptr_t)teco_doc_get_scintilla(ctx)); teco_view_ssm(teco_qreg_view, SCI_SETFIRSTVISIBLELINE, ctx->first_line, 0); @@ -48,11 +80,39 @@ teco_doc_edit(teco_doc_t *ctx) teco_view_ssm(teco_qreg_view, SCI_SETSEL, ctx->anchor, (sptr_t)ctx->dot); /* - * NOTE: Thanks to a custom Scintilla patch, se representations + * NOTE: Thanks to a custom Scintilla patch, representations * do not get reset after SCI_SETDOCPOINTER, so they have to be * initialized only once. */ //teco_view_set_representations(teco_qreg_view); + + if (new_doc && default_cp != SC_CP_UTF8) { + /* + * There is a chance the user will see this buffer even if we + * are currently in batch mode. + */ + for (gint style = 0; style <= STYLE_LASTPREDEFINED; style++) + teco_view_ssm(teco_qreg_view, SCI_STYLESETCHARACTERSET, + style, default_cp); + /* 0 is used for ALL single-byte encodings */ + teco_view_ssm(teco_qreg_view, SCI_SETCODEPAGE, 0, 0); + } else if (!(teco_view_ssm(teco_qreg_view, SCI_GETLINECHARACTERINDEX, 0, 0) + & SC_LINECHARACTERINDEX_UTF32)) { + /* + * All UTF-8 documents are expected to have a character index. + * This allocates nothing if the document is not UTF-8. + * But it is reference counted, so it must not be allocated + * more than once. + * + * FIXME: This apparently gets reset with every SCI_SETDOCPOINTER + * (although I don't know why and where). + * Recalculating it could be inefficient. + * The index is reference-counted. Perhaps we could just allocate + * one more time, so it doesn't get freed when changing documents. + */ + teco_view_ssm(teco_qreg_view, SCI_ALLOCATELINECHARACTERINDEX, + SC_LINECHARACTERINDEX_UTF32, 0); + } } /** @memberof teco_doc_t */ @@ -68,26 +128,26 @@ teco_doc_undo_edit(teco_doc_t *ctx) undo__teco_view_ssm(teco_qreg_view, SCI_SETXOFFSET, ctx->xoffset, 0); undo__teco_view_ssm(teco_qreg_view, SCI_SETFIRSTVISIBLELINE, ctx->first_line, 0); undo__teco_view_ssm(teco_qreg_view, SCI_SETDOCPOINTER, 0, - (sptr_t)teco_doc_get_scintilla(ctx)); + (sptr_t)teco_doc_get_scintilla(ctx)); } /** @memberof teco_doc_t */ void -teco_doc_set_string(teco_doc_t *ctx, const gchar *str, gsize len) +teco_doc_set_string(teco_doc_t *ctx, const gchar *str, gsize len, guint codepage) { if (teco_qreg_current) teco_doc_update(&teco_qreg_current->string, teco_qreg_view); + teco_doc_scintilla_release(ctx->doc); + ctx->doc = NULL; + teco_doc_reset(ctx); - teco_doc_edit(ctx); + teco_doc_edit(ctx, codepage); - teco_view_ssm(teco_qreg_view, SCI_BEGINUNDOACTION, 0, 0); - teco_view_ssm(teco_qreg_view, SCI_CLEARALL, 0, 0); teco_view_ssm(teco_qreg_view, SCI_APPENDTEXT, len, (sptr_t)(str ? : "")); - teco_view_ssm(teco_qreg_view, SCI_ENDUNDOACTION, 0, 0); if (teco_qreg_current) - teco_doc_edit(&teco_qreg_current->string); + teco_doc_edit(&teco_qreg_current->string, 0); } /** @memberof teco_doc_t */ @@ -100,13 +160,13 @@ teco_doc_undo_set_string(teco_doc_t *ctx) */ teco_doc_update(ctx, teco_qreg_view); - if (teco_qreg_current && teco_qreg_current->must_undo) // FIXME + if (teco_qreg_current && teco_qreg_current->must_undo && // FIXME + ctx == &teco_qreg_current->string) + /* load old document into view */ teco_doc_undo_edit(&teco_qreg_current->string); teco_doc_undo_reset(ctx); - undo__teco_view_ssm(teco_qreg_view, SCI_UNDO, 0, 0); - - teco_doc_undo_edit(ctx); + teco_undo_object_doc_scintilla_push(&ctx->doc); } /** @@ -117,33 +177,42 @@ teco_doc_undo_set_string(teco_doc_t *ctx) * It can be NULL if you are interested only in the string's length. * Strings must be freed via g_free(). * @param len Where to store the string's length (mandatory). + * @param codepage Where to store the document's codepage or NULL + * if that information is not necessary. * * @see teco_qreg_vtable_t::get_string() * @memberof teco_doc_t */ void -teco_doc_get_string(teco_doc_t *ctx, gchar **str, gsize *len) +teco_doc_get_string(teco_doc_t *ctx, gchar **str, gsize *outlen, guint *codepage) { if (!ctx->doc) { if (str) *str = NULL; - *len = 0; + if (outlen) + *outlen = 0; + if (codepage) + *codepage = teco_default_codepage(); return; } if (teco_qreg_current) teco_doc_update(&teco_qreg_current->string, teco_qreg_view); - teco_doc_edit(ctx); + teco_doc_edit(ctx, teco_default_codepage()); - *len = teco_view_ssm(teco_qreg_view, SCI_GETLENGTH, 0, 0); + gsize len = teco_view_ssm(teco_qreg_view, SCI_GETLENGTH, 0, 0); if (str) { - *str = g_malloc(*len + 1); - teco_view_ssm(teco_qreg_view, SCI_GETTEXT, *len + 1, (sptr_t)*str); + *str = g_malloc(len + 1); + teco_view_ssm(teco_qreg_view, SCI_GETTEXT, len + 1, (sptr_t)*str); } + if (outlen) + *outlen = len; + if (codepage) + *codepage = teco_view_get_codepage(teco_qreg_view); if (teco_qreg_current) - teco_doc_edit(&teco_qreg_current->string); + teco_doc_edit(&teco_qreg_current->string, 0); } /** @memberof teco_doc_t */ @@ -185,6 +254,5 @@ teco_doc_exchange(teco_doc_t *ctx, teco_doc_t *other) void teco_doc_clear(teco_doc_t *ctx) { - if (ctx->doc) - teco_view_ssm(teco_qreg_view, SCI_RELEASEDOCUMENT, 0, (sptr_t)ctx->doc); + teco_doc_scintilla_release(ctx->doc); } @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -42,7 +42,7 @@ typedef struct teco_doc_scintilla_t teco_doc_scintilla_t; typedef struct { /** * Underlying Scintilla document. - * It is created on demand in teco_doc_maybe_create_document(), + * It is created on demand in teco_doc_get_scintilla(), * so that we don't waste memory on integer-only Q-Registers. */ teco_doc_scintilla_t *doc; @@ -62,13 +62,13 @@ teco_doc_init(teco_doc_t *ctx) memset(ctx, 0, sizeof(*ctx)); } -void teco_doc_edit(teco_doc_t *ctx); +void teco_doc_edit(teco_doc_t *ctx, guint default_cp); void teco_doc_undo_edit(teco_doc_t *ctx); -void teco_doc_set_string(teco_doc_t *ctx, const gchar *str, gsize len); +void teco_doc_set_string(teco_doc_t *ctx, const gchar *str, gsize len, guint codepage); void teco_doc_undo_set_string(teco_doc_t *ctx); -void teco_doc_get_string(teco_doc_t *ctx, gchar **str, gsize *len); +void teco_doc_get_string(teco_doc_t *ctx, gchar **str, gsize *len, guint *codepage); void teco_doc_update_from_view(teco_doc_t *ctx, teco_view_t *from); void teco_doc_update_from_doc(teco_doc_t *ctx, const teco_doc_t *from); @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/src/error.c b/src/error.c index 7c4e151..afa2ac1 100644 --- a/src/error.c +++ b/src/error.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -37,13 +37,6 @@ guint teco_error_return_args = 0; */ guint teco_error_pos = 0, teco_error_line = 0, teco_error_column = 0; -void -teco_error_set_coord(const gchar *str, guint pos) -{ - teco_error_pos = pos; - teco_string_get_coord(str, pos, &teco_error_line, &teco_error_column); -} - typedef enum { TECO_FRAME_QREG, TECO_FRAME_FILE, @@ -161,10 +154,7 @@ teco_error_add_frame_toplevel(void) teco_error_add_frame(TECO_FRAME_TOPLEVEL, 0); } -#ifndef NDEBUG -__attribute__((destructor)) -#endif -void +void TECO_DEBUG_CLEANUP teco_error_clear_frames(void) { teco_stailq_entry_t *entry; diff --git a/src/error.h b/src/error.h index 91d2b60..469d957 100644 --- a/src/error.h +++ b/src/error.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -40,13 +40,16 @@ typedef enum { */ TECO_ERROR_SYNTAX, TECO_ERROR_ARGEXPECTED, + TECO_ERROR_CODEPOINT, TECO_ERROR_MOVE, TECO_ERROR_WORDS, TECO_ERROR_RANGE, TECO_ERROR_INVALIDQREG, TECO_ERROR_QREGOPUNSUPPORTED, TECO_ERROR_QREGCONTAINSNULL, + TECO_ERROR_EDITINGLOCALQREG, TECO_ERROR_MEMLIMIT, + TECO_ERROR_CLIPBOARD, /** Interrupt current operation */ TECO_ERROR_INTERRUPTED, @@ -60,10 +63,12 @@ typedef enum { } teco_error_t; static inline void -teco_error_syntax_set(GError **error, gchar chr) +teco_error_syntax_set(GError **error, gunichar chr) { + gchar buf[6]; + g_autofree gchar *chr_printable = teco_string_echo(buf, g_unichar_to_utf8(chr, buf)); g_set_error(error, TECO_ERROR, TECO_ERROR_SYNTAX, - "Syntax error \"%c\" (%d)", chr, chr); + "Syntax error \"%s\" (U+%04" G_GINT32_MODIFIER "X)", chr_printable, chr); } static inline void @@ -74,6 +79,13 @@ teco_error_argexpected_set(GError **error, const gchar *cmd) } static inline void +teco_error_codepoint_set(GError **error, const gchar *cmd) +{ + g_set_error(error, TECO_ERROR, TECO_ERROR_CODEPOINT, + "Invalid Unicode codepoint for <%s>", cmd); +} + +static inline void teco_error_move_set(GError **error, const gchar *cmd) { g_set_error(error, TECO_ERROR, TECO_ERROR_MOVE, @@ -119,6 +131,14 @@ teco_error_qregcontainsnull_set(GError **error, const gchar *name, gsize len, gb } static inline void +teco_error_editinglocalqreg_set(GError **error, const gchar *name, gsize len) +{ + g_autofree gchar *name_printable = teco_string_echo(name, len); + g_set_error(error, TECO_ERROR, TECO_ERROR_EDITINGLOCALQREG, + "Editing local Q-Register \"%s\" at end of macro call", name_printable); +} + +static inline void teco_error_interrupted_set(GError **error) { g_set_error_literal(error, TECO_ERROR, TECO_ERROR_INTERRUPTED, "Interrupted"); @@ -135,7 +155,11 @@ teco_error_return_set(GError **error, guint args) extern guint teco_error_pos, teco_error_line, teco_error_column; -void teco_error_set_coord(const gchar *str, guint pos); +static inline void +teco_error_set_coord(const gchar *str, gsize pos) +{ + teco_string_get_coord(str, pos, &teco_error_pos, &teco_error_line, &teco_error_column); +} void teco_error_display_short(const GError *error); void teco_error_display_full(const GError *error); diff --git a/src/expressions.c b/src/expressions.c index 57e2f71..ee6b4dc 100644 --- a/src/expressions.c +++ b/src/expressions.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -86,7 +86,7 @@ teco_int_t teco_expressions_pop_num(guint index) { teco_int_t n = 0; - teco_operator_t op = teco_expressions_pop_op(0); + G_GNUC_UNUSED teco_operator_t op = teco_expressions_pop_op(0); g_assert(op == TECO_OP_NUMBER); @@ -114,11 +114,12 @@ teco_expressions_pop_num_calc(teco_int_t *ret, teco_int_t imply, GError **error) } void -teco_expressions_add_digit(gchar digit) +teco_expressions_add_digit(gunichar digit) { teco_int_t n = teco_expressions_args() > 0 ? teco_expressions_pop_num(0) : 0; - teco_expressions_push(n*teco_radix + (n < 0 ? -1 : 1)*(digit - '0')); + /* use g_unichar_digit_value()? */ + teco_expressions_push(n*teco_radix + (n < 0 ? -1 : 1)*((gint)digit - '0')); } void @@ -184,7 +185,28 @@ teco_expressions_calc(GError **error) switch (op) { case TECO_OP_POW: - for (result = 1; vright--; result *= vleft); + if (!vright) { + result = vleft < 0 ? -1 : 1; + break; + } + if (vright < 0) { + if (!vleft) { + g_set_error_literal(error, TECO_ERROR, TECO_ERROR_FAILED, + "Negative power of 0 is not defined"); + return FALSE; + } + result = ABS(vleft) == 1 ? vleft : 0; + break; + } + result = 1; + for (;;) { + if (vright & 1) + result *= vleft; + vright >>= 1; + if (!vright) + break; + vleft *= vleft; + } break; case TECO_OP_MUL: result = vleft * vright; @@ -297,6 +319,9 @@ guint teco_brace_level = 0; void teco_expressions_brace_open(void) { + while (teco_operators->len > 0 && teco_expressions_peek_op(0) == TECO_OP_NEW) + teco_expressions_pop_op(0); + teco_expressions_push_op(TECO_OP_BRACE); teco_undo_guint(teco_brace_level)++; } @@ -374,11 +399,9 @@ teco_expressions_format(gchar *buffer, teco_int_t number) return p; } -#ifndef NDEBUG -static void __attribute__((destructor)) +static void TECO_DEBUG_CLEANUP teco_expressions_cleanup(void) { g_array_free(teco_numbers, TRUE); g_array_free(teco_operators, TRUE); } -#endif diff --git a/src/expressions.h b/src/expressions.h index 45e6f64..68d8ddb 100644 --- a/src/expressions.h +++ b/src/expressions.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -123,7 +123,7 @@ teco_int_t teco_expressions_peek_num(guint index); teco_int_t teco_expressions_pop_num(guint index); gboolean teco_expressions_pop_num_calc(teco_int_t *ret, teco_int_t imply, GError **error); -void teco_expressions_add_digit(gchar digit); +void teco_expressions_add_digit(gunichar digit); void teco_expressions_push_op(teco_operator_t op); gboolean teco_expressions_push_calc(teco_operator_t op, GError **error); diff --git a/src/file-utils.c b/src/file-utils.c index 239cc5f..3f8f721 100644 --- a/src/file-utils.c +++ b/src/file-utils.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -28,6 +28,7 @@ #ifdef HAVE_WINDOWS_H #define WIN32_LEAN_AND_MEAN +#define UNICODE #include <windows.h> #endif @@ -36,7 +37,6 @@ #include "sciteco.h" #include "qreg.h" -#include "glob.h" #include "interface.h" #include "string-utils.h" #include "file-utils.h" @@ -56,26 +56,35 @@ G_STATIC_ASSERT(INVALID_FILE_ATTRIBUTES == TECO_FILE_INVALID_ATTRIBUTES); teco_file_attributes_t teco_file_get_attributes(const gchar *filename) { - return GetFileAttributes((LPCTSTR)filename); + g_autofree gunichar2 *filename_utf16 = g_utf8_to_utf16(filename, -1, NULL, NULL, NULL); + return filename_utf16 ? GetFileAttributesW(filename_utf16) + : TECO_FILE_INVALID_ATTRIBUTES; } void teco_file_set_attributes(const gchar *filename, teco_file_attributes_t attrs) { - SetFileAttributes((LPCTSTR)filename, attrs); + g_autofree gunichar2 *filename_utf16 = g_utf8_to_utf16(filename, -1, NULL, NULL, NULL); + if (filename_utf16) + SetFileAttributesW(filename_utf16, attrs); } gchar * teco_file_get_absolute_path(const gchar *path) { + if (!path) + return NULL; + g_autofree gunichar2 *path_utf16 = g_utf8_to_utf16(path, -1, NULL, NULL, NULL); TCHAR buf[MAX_PATH]; - return path && GetFullPathName(path, sizeof(buf), buf, NULL) ? g_strdup(buf) : NULL; + return path_utf16 && GetFullPathNameW(path_utf16, G_N_ELEMENTS(buf), buf, NULL) + ? g_utf16_to_utf8(buf, -1, NULL, NULL, NULL) : NULL; } gboolean teco_file_is_visible(const gchar *path) { - return !(GetFileAttributes((LPCTSTR)path) & FILE_ATTRIBUTE_HIDDEN); + g_autofree gunichar2 *path_utf16 = g_utf8_to_utf16(path, -1, NULL, NULL, NULL); + return path_utf16 && !(GetFileAttributesW(path_utf16) & FILE_ATTRIBUTE_HIDDEN); } #else /* !G_OS_WIN32 */ @@ -83,7 +92,7 @@ teco_file_is_visible(const gchar *path) teco_file_attributes_t teco_file_get_attributes(const gchar *filename) { - struct stat buf; + GStatBuf buf; return g_stat(filename, &buf) ? TECO_FILE_INVALID_ATTRIBUTES : buf.st_mode; } @@ -204,7 +213,7 @@ teco_file_expand_path(const gchar *path) * but it may have been changed later on. */ g_auto(teco_string_t) home = {NULL, 0}; - if (!qreg->vtable->get_string(qreg, &home.data, &home.len, NULL) || + if (!qreg->vtable->get_string(qreg, &home.data, &home.len, NULL, NULL) || teco_string_contains(&home, '\0')) return g_strdup(path); g_assert(home.data != NULL); @@ -227,9 +236,6 @@ teco_file_auto_complete(const gchar *filename, GFileTest file_test, teco_string_ { memset(insert, 0, sizeof(*insert)); - if (teco_globber_is_pattern(filename)) - return FALSE; - g_autofree gchar *filename_expanded = teco_file_expand_path(filename); gsize filename_len = strlen(filename_expanded); diff --git a/src/file-utils.h b/src/file-utils.h index 51b0d18..4ee59e6 100644 --- a/src/file-utils.h +++ b/src/file-utils.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -225,17 +225,19 @@ teco_globber_compile_pattern(const gchar *pattern) *pout++ = '['; break; } + /* fall through: escape PCRE metacharacters */ + case '\\': + case '^': + case '$': + case '.': + case '|': + case '(': + case ')': + case '+': + case '{': + *pout++ = '\\'; /* fall through */ default: - /* - * For simplicity, all non-alphanumeric - * characters are escaped since they could - * be PCRE magic characters. - * g_regex_escape_string() is inefficient. - * character anyway. - */ - if (!g_ascii_isalnum(*pattern)) - *pout++ = '\\'; *pout++ = *pattern; break; } @@ -271,12 +273,13 @@ teco_globber_compile_pattern(const gchar *pattern) *pout++ = ']'; break; } - /* fall through */ - default: - if (!g_ascii_isalnum(*pattern)) - *pout++ = '\\'; + /* fall through: escape PCRE metacharacters */ + case '\\': + case '[': + *pout++ = '\\'; /* fall through */ case '-': + default: state = STATE_CLASS; *pout++ = *pattern; break; @@ -315,7 +318,8 @@ teco_state_glob_pattern_done(teco_machine_main_t *ctx, const teco_string_t *str, teco_qreg_t *glob_reg = teco_qreg_table_find(&teco_qreg_table_globals, "_", 1); g_assert(glob_reg != NULL); if (!glob_reg->vtable->undo_set_string(glob_reg, error) || - !glob_reg->vtable->set_string(glob_reg, filename, strlen(filename), error)) + !glob_reg->vtable->set_string(glob_reg, filename, strlen(filename), + teco_default_codepage(), error)) return NULL; } @@ -448,7 +452,7 @@ teco_state_glob_pattern_done(teco_machine_main_t *ctx, const teco_string_t *str, * when they should be in a register, the user will * have to edit that register anyway. */ -TECO_DEFINE_STATE_EXPECTFILE(teco_state_glob_pattern, +TECO_DEFINE_STATE_EXPECTGLOB(teco_state_glob_pattern, .expectstring.last = FALSE ); @@ -490,7 +494,8 @@ teco_state_glob_filename_done(teco_machine_main_t *ctx, const teco_string_t *str teco_qreg_t *glob_reg = teco_qreg_table_find(&teco_qreg_table_globals, "_", 1); g_assert(glob_reg != NULL); g_auto(teco_string_t) pattern_str = {NULL, 0}; - if (!glob_reg->vtable->get_string(glob_reg, &pattern_str.data, &pattern_str.len, error)) + if (!glob_reg->vtable->get_string(glob_reg, &pattern_str.data, &pattern_str.len, + NULL, error)) return NULL; if (teco_string_contains(&pattern_str, '\0')) { teco_error_qregcontainsnull_set(error, "_", 1, FALSE); @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -46,6 +46,21 @@ teco_globber_is_pattern(const gchar *str) gchar *teco_globber_escape_pattern(const gchar *pattern); GRegex *teco_globber_compile_pattern(const gchar *pattern); +/* in cmdline.c */ +gboolean teco_state_expectglob_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gunichar key, GError **error); + +/** + * @interface TECO_DEFINE_STATE_EXPECTGLOB + * @implements TECO_DEFINE_STATE_EXPECTFILE + * @ingroup states + */ +#define TECO_DEFINE_STATE_EXPECTGLOB(NAME, ...) \ + TECO_DEFINE_STATE_EXPECTFILE(NAME, \ + .process_edit_cmd_cb = (teco_state_process_edit_cmd_cb_t) \ + teco_state_expectglob_process_edit_cmd, \ + ##__VA_ARGS__ \ + ) + /* * Command states */ diff --git a/src/goto-commands.c b/src/goto-commands.c index e4cd868..a8a9689 100644 --- a/src/goto-commands.c +++ b/src/goto-commands.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -53,7 +53,7 @@ teco_state_label_initial(teco_machine_main_t *ctx, GError **error) * I'm unsure whether !-signs should be allowed within comments. */ static teco_state_t * -teco_state_label_input(teco_machine_main_t *ctx, gchar chr, GError **error) +teco_state_label_input(teco_machine_main_t *ctx, gunichar chr, GError **error) { if (chr == '!') { /* @@ -61,8 +61,8 @@ teco_state_label_input(teco_machine_main_t *ctx, gchar chr, GError **error) * on rubout. * Otherwise, the label will be removed (PC == -1). */ - gint existing_pc = teco_goto_table_set(&ctx->goto_table, ctx->goto_label.data, - ctx->goto_label.len, ctx->macro_pc); + gssize existing_pc = teco_goto_table_set(&ctx->goto_table, ctx->goto_label.data, + ctx->goto_label.len, ctx->macro_pc); if (ctx->parent.must_undo) teco_goto_table_undo_set(&ctx->goto_table, ctx->goto_label.data, ctx->goto_label.len, existing_pc); @@ -85,7 +85,7 @@ teco_state_label_input(teco_machine_main_t *ctx, gchar chr, GError **error) if (ctx->parent.must_undo) undo__teco_string_truncate(&ctx->goto_label, ctx->goto_label.len); - teco_string_append_c(&ctx->goto_label, chr); + teco_string_append_wc(&ctx->goto_label, chr); return &teco_state_label; } @@ -119,7 +119,7 @@ teco_state_goto_done(teco_machine_main_t *ctx, const teco_string_t *str, GError } if (value == 0) { - gint pc = teco_goto_table_find(&ctx->goto_table, label.data, label.len); + gssize pc = teco_goto_table_find(&ctx->goto_table, label.data, label.len); if (pc >= 0) { ctx->macro_pc = pc; @@ -138,7 +138,7 @@ teco_state_goto_done(teco_machine_main_t *ctx, const teco_string_t *str, GError } /* in cmdline.c */ -gboolean teco_state_goto_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gchar chr, GError **error); +gboolean teco_state_goto_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gunichar chr, GError **error); /*$ O * Olabel$ -- Go to label diff --git a/src/goto-commands.h b/src/goto-commands.h index ffd9527..03773c0 100644 --- a/src/goto-commands.h +++ b/src/goto-commands.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -35,12 +35,12 @@ /** @extends teco_rb3str_head_t */ typedef struct { teco_rb3str_head_t head; - gint pc; + gsize pc; } teco_goto_label_t; /** @private @static @memberof teco_goto_label_t */ static teco_goto_label_t * -teco_goto_label_new(const gchar *name, gsize len, gint pc) +teco_goto_label_new(const gchar *name, gsize len, gsize pc) { teco_goto_label_t *label = g_new0(teco_goto_label_t, 1); teco_string_init(&label->head.name, name, len); @@ -79,10 +79,10 @@ teco_goto_table_dump(teco_goto_table_t *ctx) #endif /** @memberof teco_goto_table_t */ -gint +gssize teco_goto_table_remove(teco_goto_table_t *ctx, const gchar *name, gsize len) { - gint existing_pc = -1; + gssize existing_pc = -1; teco_goto_label_t *label = (teco_goto_label_t *)teco_rb3str_find(&ctx->tree, TRUE, name, len); if (label) { @@ -95,7 +95,7 @@ teco_goto_table_remove(teco_goto_table_t *ctx, const gchar *name, gsize len) } /** @memberof teco_goto_table_t */ -gint +gssize teco_goto_table_find(teco_goto_table_t *ctx, const gchar *name, gsize len) { teco_goto_label_t *label = (teco_goto_label_t *)teco_rb3str_find(&ctx->tree, TRUE, name, len); @@ -103,13 +103,13 @@ teco_goto_table_find(teco_goto_table_t *ctx, const gchar *name, gsize len) } /** @memberof teco_goto_table_t */ -gint -teco_goto_table_set(teco_goto_table_t *ctx, const gchar *name, gsize len, gint pc) +gssize +teco_goto_table_set(teco_goto_table_t *ctx, const gchar *name, gsize len, gssize pc) { if (pc < 0) return teco_goto_table_remove(ctx, name, len); - gint existing_pc = -1; + gssize existing_pc = -1; teco_goto_label_t *label = (teco_goto_label_t *)teco_rb3str_find(&ctx->tree, TRUE, name, len); if (label) { @@ -135,7 +135,7 @@ teco_goto_table_set(teco_goto_table_t *ctx, const gchar *name, gsize len, gint p */ typedef struct { teco_goto_table_t *table; - gint pc; + gssize pc; gsize len; gchar name[]; } teco_goto_table_undo_set_t; @@ -153,7 +153,7 @@ teco_goto_table_undo_set_action(teco_goto_table_undo_set_t *ctx, gboolean run) /** @memberof teco_goto_table_t */ void -teco_goto_table_undo_set(teco_goto_table_t *ctx, const gchar *name, gsize len, gint pc) +teco_goto_table_undo_set(teco_goto_table_t *ctx, const gchar *name, gsize len, gssize pc) { if (!ctx->must_undo) return; @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -40,12 +40,12 @@ teco_goto_table_init(teco_goto_table_t *ctx, gboolean must_undo) ctx->must_undo = must_undo; } -gint teco_goto_table_remove(teco_goto_table_t *ctx, const gchar *name, gsize len); +gssize teco_goto_table_remove(teco_goto_table_t *ctx, const gchar *name, gsize len); -gint teco_goto_table_find(teco_goto_table_t *ctx, const gchar *name, gsize len); +gssize teco_goto_table_find(teco_goto_table_t *ctx, const gchar *name, gsize len); -gint teco_goto_table_set(teco_goto_table_t *ctx, const gchar *name, gsize len, gint pc); -void teco_goto_table_undo_set(teco_goto_table_t *ctx, const gchar *name, gsize len, gint pc); +gssize teco_goto_table_set(teco_goto_table_t *ctx, const gchar *name, gsize len, gssize pc); +void teco_goto_table_undo_set(teco_goto_table_t *ctx, const gchar *name, gsize len, gssize pc); /** @memberof teco_goto_table_t */ static inline gboolean @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -94,7 +94,7 @@ teco_help_init(GError **error) teco_qreg_t *lib_reg = teco_qreg_table_find(&teco_qreg_table_globals, "$SCITECOPATH", 12); g_assert(lib_reg != NULL); g_auto(teco_string_t) lib_path = {NULL, 0}; - if (!lib_reg->vtable->get_string(lib_reg, &lib_path.data, &lib_path.len, error)) + if (!lib_reg->vtable->get_string(lib_reg, &lib_path.data, &lib_path.len, NULL, error)) return FALSE; /* * FIXME: lib_path may contain null-bytes. @@ -235,8 +235,7 @@ teco_help_auto_complete(const gchar *topic_name, teco_string_t *insert) topic_name ? strlen(topic_name) : 0, 0, insert); } -#ifndef NDEBUG -static void __attribute__((destructor)) +static void TECO_DEBUG_CLEANUP teco_help_cleanup(void) { if (!teco_help_chunk) @@ -251,7 +250,6 @@ teco_help_cleanup(void) teco_help_topic_free((teco_help_topic_t *)cur); } } -#endif /* * Command states @@ -316,7 +314,7 @@ teco_state_help_done(teco_machine_main_t *ctx, const teco_string_t *str, GError } /* in cmdline.c */ -gboolean teco_state_help_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gchar chr, GError **error); +gboolean teco_state_help_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gunichar chr, GError **error); /*$ "?" help * ?[topic]$ -- Get help for topic @@ -347,7 +345,7 @@ gboolean teco_state_help_process_edit_cmd(teco_machine_main_t *ctx, teco_machine * .EE * In other words it must be a \*(ST comment followed * by an asterisk sign, followed by the first topic which - * is a buffer position, followed by a colon and the topic + * is a buffer position in bytes, followed by a colon and the topic * string. * The topic string is terminated by the end of the line. * The end of the header is marked by a single \(lq*!\(rq. @@ -373,7 +371,7 @@ gboolean teco_state_help_process_edit_cmd(teco_machine_main_t *ctx, teco_machine * \fIgrosciteco\fP formatter and the \fIsciteco.tmac\fP * GNU troff macros. * When using womanpages generated by \fIgrosciteco\fP, - * help topics can be defined using the \fBTECO_TOPIC\fP + * help topics can be defined using the \fBSCITECO_TOPIC\fP * Troff macro. * This flexible system allows \*(ST to access internal * and third-party help files written in plain-text or @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/src/interface-curses/Makefile.am b/src/interface-curses/Makefile.am index 14fc920..44fb658 100644 --- a/src/interface-curses/Makefile.am +++ b/src/interface-curses/Makefile.am @@ -6,4 +6,5 @@ AM_CFLAGS = -std=gnu11 -Wall -Wno-initializer-overrides -Wno-unused-value noinst_LTLIBRARIES = libsciteco-interface.la libsciteco_interface_la_SOURCES = interface.c \ curses-utils.c curses-utils.h \ - curses-info-popup.c curses-info-popup.h + curses-info-popup.c curses-info-popup.h \ + curses-icons.c curses-icons.h diff --git a/src/interface-curses/curses-icons.c b/src/interface-curses/curses-icons.c new file mode 100644 index 0000000..1a1ba3a --- /dev/null +++ b/src/interface-curses/curses-icons.c @@ -0,0 +1,398 @@ +/* + * Copyright (C) 2012-2024 Robin Haberkorn + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <stdlib.h> +#include <string.h> + +#include <glib.h> + +#include <curses.h> + +#include "sciteco.h" +#include "curses-icons.h" + +typedef struct { + const gchar *name; + gunichar c; +} teco_curses_icon_t; + +/* + * The following icons have initially been adapted from exa, + * but icons have since been added and removed. + * + * They require fonts with additional symbols, eg. + * Nerd Fonts (https://www.nerdfonts.com/). + * + * They MUST be kept presorted, so we can perform binary searches. + */ + +/** Mapping of complete filenames to Unicode "icons" */ +static const teco_curses_icon_t teco_icons_file[] = { + {".Trash", 0xf1f8}, /* */ + {".atom", 0xe764}, /* */ + {".bash_history", 0xf489}, /* */ + {".bash_profile", 0xf489}, /* */ + {".bashrc", 0xf489}, /* */ + {".git", 0xf1d3}, /* */ + {".gitattributes", 0xf1d3}, /* */ + {".gitconfig", 0xf1d3}, /* */ + {".github", 0xf408}, /* */ + {".gitignore", 0xf1d3}, /* */ + {".gitmodules", 0xf1d3}, /* */ + {".rvm", 0xe21e}, /* */ + {".teco_ini", 0xedaa}, /* */ + {".teco_session", 0xedaa}, /* */ + {".vimrc", 0xe62b}, /* */ + {".vscode", 0xe70c}, /* */ + {".zshrc", 0xf489}, /* */ + {"COMMIT_EDITMSG", 0xf1d3}, /* */ + {"Cargo.lock", 0xe7a8}, /* */ + {"Dockerfile", 0xf308}, /* */ + {"GNUmakefile", 0xf489}, /* */ + {"MERGE_MSG", 0xf1d3}, /* */ + {"Makefile", 0xf489}, /* */ + {"PKGBUILD", 0xf303}, /* */ + {"TAG_EDITMSG", 0xf1d3}, /* */ + {"bin", 0xe5fc}, /* */ + {"config", 0xe5fc}, /* */ + {"docker-compose.yml", 0xf308}, /* */ + {"ds_store", 0xf179}, /* */ + {"git-rebase-todo", 0xf1d3}, /* */ + {"go.mod", 0xe626}, /* */ + {"go.sum", 0xe626}, /* */ + {"gradle", 0xe256}, /* */ + {"gruntfile.coffee", 0xe611}, /* */ + {"gruntfile.js", 0xe611}, /* */ + {"gruntfile.ls", 0xe611}, /* */ + {"gulpfile.coffee", 0xe610}, /* */ + {"gulpfile.js", 0xe610}, /* */ + {"gulpfile.ls", 0xe610}, /* */ + {"hidden", 0xf023}, /* */ + {"include", 0xe5fc}, /* */ + {"lib", 0xf121}, /* */ + {"localized", 0xf179}, /* */ + {"node_modules", 0xe718}, /* */ + {"npmignore", 0xe71e}, /* */ + {"rubydoc", 0xe73b}, /* */ + {"yarn.lock", 0xe718}, /* */ +}; + +/** Mapping of file extensions to Unicode "icons" */ +static const teco_curses_icon_t teco_icons_ext[] = { + {"DS_store", 0xf179}, /* */ + {"ai", 0xe7b4}, /* */ + {"android", 0xe70e}, /* */ + {"apk", 0xe70e}, /* */ + {"apple", 0xf179}, /* */ + {"avi", 0xf03d}, /* */ + {"avif", 0xf1c5}, /* */ + {"avro", 0xe60b}, /* */ + {"awk", 0xf489}, /* */ + {"bash", 0xf489}, /* */ + {"bat", 0xf17a}, /* */ + {"bats", 0xf489}, /* */ + {"bmp", 0xf1c5}, /* */ + {"bz", 0xf410}, /* */ + {"bz2", 0xf410}, /* */ + {"c", 0xe61e}, /* */ + {"c++", 0xe61d}, /* */ + {"cab", 0xe70f}, /* */ + {"cc", 0xe61d}, /* */ + {"cfg", 0xe615}, /* */ + {"class", 0xe256}, /* */ + {"clj", 0xe768}, /* */ + {"cljs", 0xe76a}, /* */ + {"cls", 0xf034}, /* */ + {"cmd", 0xe70f}, /* */ + {"coffee", 0xf0f4}, /* */ + {"conf", 0xe615}, /* */ + {"cp", 0xe61d}, /* */ + {"cpio", 0xf410}, /* */ + {"cpp", 0xe61d}, /* */ + {"cs", 0xf031b}, /* */ + {"csh", 0xf489}, /* */ + {"cshtml", 0xf1fa}, /* */ + {"csproj", 0xf031b}, /* */ + {"css", 0xe749}, /* */ + {"csv", 0xf1c3}, /* */ + {"csx", 0xf031b}, /* */ + {"cxx", 0xe61d}, /* */ + {"d", 0xe7af}, /* */ + {"dart", 0xe798}, /* */ + {"db", 0xf1c0}, /* */ + {"deb", 0xe77d}, /* */ + {"diff", 0xf440}, /* */ + {"djvu", 0xf02d}, /* */ + {"dll", 0xe70f}, /* */ + {"doc", 0xf1c2}, /* */ + {"docx", 0xf1c2}, /* */ + {"ds_store", 0xf179}, /* */ + {"dump", 0xf1c0}, /* */ + {"ebook", 0xe28b}, /* */ + {"ebuild", 0xf30d}, /* */ + {"editorconfig", 0xe615}, /* */ + {"ejs", 0xe618}, /* */ + {"elm", 0xe62c}, /* */ + {"env", 0xf462}, /* */ + {"eot", 0xf031}, /* */ + {"epub", 0xe28a}, /* */ + {"erb", 0xe73b}, /* */ + {"erl", 0xe7b1}, /* */ + {"ex", 0xe62d}, /* */ + {"exe", 0xf17a}, /* */ + {"exs", 0xe62d}, /* */ + {"fish", 0xf489}, /* */ + {"flac", 0xf001}, /* */ + {"flv", 0xf03d}, /* */ + {"font", 0xf031}, /* */ + {"fs", 0xe7a7}, /* */ + {"fsi", 0xe7a7}, /* */ + {"fsx", 0xe7a7}, /* */ + {"gdoc", 0xf1c2}, /* */ + {"gem", 0xe21e}, /* */ + {"gemfile", 0xe21e}, /* */ + {"gemspec", 0xe21e}, /* */ + {"gform", 0xf298}, /* */ + {"gif", 0xf1c5}, /* */ + {"go", 0xe626}, /* */ + {"gradle", 0xe256}, /* */ + {"groovy", 0xe775}, /* */ + {"gsheet", 0xf1c3}, /* */ + {"gslides", 0xf1c4}, /* */ + {"guardfile", 0xe21e}, /* */ + {"gz", 0xf410}, /* */ + {"h", 0xf0fd}, /* */ + {"hbs", 0xe60f}, /* */ + {"hpp", 0xf0fd}, /* */ + {"hs", 0xe777}, /* */ + {"htm", 0xf13b}, /* */ + {"html", 0xf13b}, /* */ + {"hxx", 0xf0fd}, /* */ + {"ico", 0xf1c5}, /* */ + {"image", 0xf1c5}, /* */ + {"img", 0xe271}, /* */ + {"iml", 0xe7b5}, /* */ + {"ini", 0xf17a}, /* */ + {"ipynb", 0xe678}, /* */ + {"iso", 0xe271}, /* */ + {"j2c", 0xf1c5}, /* */ + {"j2k", 0xf1c5}, /* */ + {"jad", 0xe256}, /* */ + {"jar", 0xe256}, /* */ + {"java", 0xe256}, /* */ + {"jfi", 0xf1c5}, /* */ + {"jfif", 0xf1c5}, /* */ + {"jif", 0xf1c5}, /* */ + {"jl", 0xe624}, /* */ + {"jmd", 0xf48a}, /* */ + {"jp2", 0xf1c5}, /* */ + {"jpe", 0xf1c5}, /* */ + {"jpeg", 0xf1c5}, /* */ + {"jpg", 0xf1c5}, /* */ + {"jpx", 0xf1c5}, /* */ + {"js", 0xe74e}, /* */ + {"json", 0xe60b}, /* */ + {"jsx", 0xe7ba}, /* */ + {"jxl", 0xf1c5}, /* */ + {"ksh", 0xf489}, /* */ + {"latex", 0xf034}, /* */ + {"less", 0xe758}, /* */ + {"lhs", 0xe777}, /* */ + {"license", 0xf0219}, /* */ + {"localized", 0xf179}, /* */ + {"lock", 0xf023}, /* */ + {"log", 0xf18d}, /* */ + {"lua", 0xe620}, /* */ + {"lz", 0xf410}, /* */ + {"lz4", 0xf410}, /* */ + {"lzh", 0xf410}, /* */ + {"lzma", 0xf410}, /* */ + {"lzo", 0xf410}, /* */ + {"m", 0xe61e}, /* */ + {"m4a", 0xf001}, /* */ + {"markdown", 0xf48a}, /* */ + {"md", 0xf48a}, /* */ + {"mjs", 0xe74e}, /* */ + {"mk", 0xf489}, /* */ + {"mkd", 0xf48a}, /* */ + {"mkv", 0xf03d}, /* */ + {"mm", 0xe61d}, /* */ + {"mobi", 0xe28b}, /* */ + {"mov", 0xf03d}, /* */ + {"mp3", 0xf001}, /* */ + {"mp4", 0xf03d}, /* */ + {"msi", 0xe70f}, /* */ + {"mustache", 0xe60f}, /* */ + {"nix", 0xf313}, /* */ + {"node", 0xf0399}, /* */ + {"npmignore", 0xe71e}, /* */ + {"odp", 0xf1c4}, /* */ + {"ods", 0xf1c3}, /* */ + {"odt", 0xf1c2}, /* */ + {"ogg", 0xf001}, /* */ + {"ogv", 0xf03d}, /* */ + {"otf", 0xf031}, /* */ + {"part", 0xf43a}, /* */ + {"patch", 0xf440}, /* */ + {"pdf", 0xf1c1}, /* */ + {"php", 0xe73d}, /* */ + {"pl", 0xe769}, /* */ + {"plx", 0xe769}, /* */ + {"pm", 0xe769}, /* */ + {"png", 0xf1c5}, /* */ + {"pod", 0xe769}, /* */ + {"ppt", 0xf1c4}, /* */ + {"pptx", 0xf1c4}, /* */ + {"procfile", 0xe21e}, /* */ + {"properties", 0xe60b}, /* */ + {"ps1", 0xf489}, /* */ + {"psd", 0xe7b8}, /* */ + {"pxm", 0xf1c5}, /* */ + {"py", 0xe606}, /* */ + {"pyc", 0xe606}, /* */ + {"r", 0xf25d}, /* */ + {"rakefile", 0xe21e}, /* */ + {"rar", 0xf410}, /* */ + {"razor", 0xf1fa}, /* */ + {"rb", 0xe21e}, /* */ + {"rdata", 0xf25d}, /* */ + {"rdb", 0xe76d}, /* */ + {"rdoc", 0xf48a}, /* */ + {"rds", 0xf25d}, /* */ + {"readme", 0xf48a}, /* */ + {"rlib", 0xe7a8}, /* */ + {"rmd", 0xf48a}, /* */ + {"rpm", 0xe7bb}, /* */ + {"rs", 0xe7a8}, /* */ + {"rspec", 0xe21e}, /* */ + {"rspec_parallel", 0xe21e}, /* */ + {"rspec_status", 0xe21e}, /* */ + {"rss", 0xf09e}, /* */ + {"rtf", 0xf0219}, /* */ + {"ru", 0xe21e}, /* */ + {"rubydoc", 0xe73b}, /* */ + {"sass", 0xe603}, /* */ + {"scala", 0xe737}, /* */ + {"scss", 0xe749}, /* */ + {"sh", 0xf489}, /* */ + {"shell", 0xf489}, /* */ + {"slim", 0xe73b}, /* */ + {"sln", 0xe70c}, /* */ + {"so", 0xf17c}, /* */ + {"sql", 0xf1c0}, /* */ + {"sqlite3", 0xe7c4}, /* */ + {"sty", 0xf034}, /* */ + {"styl", 0xe600}, /* */ + {"stylus", 0xe600}, /* */ + {"svg", 0xf1c5}, /* */ + {"swift", 0xe755}, /* */ + {"t", 0xe769}, /* */ + {"tar", 0xf410}, /* */ + {"taz", 0xf410}, /* */ + {"tbz", 0xf410}, /* */ + {"tbz2", 0xf410}, /* */ + {"tec", 0xedaa}, /* */ + {"tes", 0xedaa}, /* */ + {"tex", 0xf034}, /* */ + {"tgz", 0xf410}, /* */ + {"tiff", 0xf1c5}, /* */ + {"tlz", 0xf410}, /* */ + {"toml", 0xe615}, /* */ + {"torrent", 0xe275}, /* */ + {"ts", 0xe628}, /* */ + {"tsv", 0xf1c3}, /* */ + {"tsx", 0xe7ba}, /* */ + {"ttf", 0xf031}, /* */ + {"twig", 0xe61c}, /* */ + {"txt", 0xf15c}, /* */ + {"txz", 0xf410}, /* */ + {"tz", 0xf410}, /* */ + {"tzo", 0xf410}, /* */ + {"video", 0xf03d}, /* */ + {"vim", 0xe62b}, /* */ + {"vue", 0xf0844}, /* */ + {"war", 0xe256}, /* */ + {"wav", 0xf001}, /* */ + {"webm", 0xf03d}, /* */ + {"webp", 0xf1c5}, /* */ + {"windows", 0xf17a}, /* */ + {"woff", 0xf031}, /* */ + {"woff2", 0xf031}, /* */ + {"woman", 0xeaa4}, /* */ + {"xhtml", 0xf13b}, /* */ + {"xls", 0xf1c3}, /* */ + {"xlsx", 0xf1c3}, /* */ + {"xml", 0xf05c0}, /* */ + {"xul", 0xf05c0}, /* */ + {"xz", 0xf410}, /* */ + {"yaml", 0xf481}, /* */ + {"yml", 0xf481}, /* */ + {"zip", 0xf410}, /* */ + {"zsh", 0xf489}, /* */ + {"zsh-theme", 0xf489}, /* */ + {"zst", 0xf410}, /* */ +}; + +static int +teco_curses_icon_cmp(const void *a, const void *b) +{ + const gchar *str = a; + const teco_curses_icon_t *icon = b; + + return strcmp(str, icon->name); +} + +gunichar +teco_curses_icons_lookup_file(const gchar *filename) +{ + g_autofree gchar *basename = g_path_get_basename(filename); + const teco_curses_icon_t *icon; + + /* try to find icon by complete file name */ + icon = bsearch(basename, teco_icons_file, G_N_ELEMENTS(teco_icons_file), + sizeof(teco_icons_file[0]), teco_curses_icon_cmp); + if (icon) + return icon->c; + + /* try to find icon by extension */ + const gchar *ext = strrchr(basename, '.'); + if (ext) { + icon = bsearch(ext+1, teco_icons_ext, G_N_ELEMENTS(teco_icons_ext), + sizeof(teco_icons_ext[0]), teco_curses_icon_cmp); + return icon ? icon->c : 0xf15b; /* */ + } + + /* default file icon for files without extension */ + return 0xf016; /* */ +} + +gunichar +teco_curses_icons_lookup_dir(const gchar *dirname) +{ + g_autofree gchar *basename = g_path_get_basename(dirname); + const teco_curses_icon_t *icon; + + icon = bsearch(basename, teco_icons_file, G_N_ELEMENTS(teco_icons_file), + sizeof(teco_icons_file[0]), teco_curses_icon_cmp); + + /* default folder icon */ + return icon ? icon->c : 0xf115; /* */ +} diff --git a/src/interface-curses/curses-icons.h b/src/interface-curses/curses-icons.h new file mode 100644 index 0000000..c1be06f --- /dev/null +++ b/src/interface-curses/curses-icons.h @@ -0,0 +1,28 @@ +/* + * Copyright (C) 2012-2024 Robin Haberkorn + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#pragma once + +#include <glib.h> + +/** + * Q-Register icon. + * 0xf04cf would look more similar to the current Gtk icon. + */ +#define TECO_CURSES_ICONS_QREG 0xe236 /* */ + +gunichar teco_curses_icons_lookup_file(const gchar *filename); +gunichar teco_curses_icons_lookup_dir(const gchar *dirname); diff --git a/src/interface-curses/curses-info-popup.c b/src/interface-curses/curses-info-popup.c index a738f5d..e6e1549 100644 --- a/src/interface-curses/curses-info-popup.c +++ b/src/interface-curses/curses-info-popup.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -28,6 +28,7 @@ #include "interface.h" #include "curses-utils.h" #include "curses-info-popup.h" +#include "curses-icons.h" /* * FIXME: This is redundant with gtk-info-popup.c. @@ -75,8 +76,13 @@ teco_curses_info_popup_init_pad(teco_curses_info_popup_t *ctx, attr_t attr) gint pad_cols; /**! entry columns */ gint pad_colwidth; /**! width per entry column */ - /* reserve 2 spaces between columns */ - pad_colwidth = MIN(ctx->longest + 2, cols - 2); + /* + * With Unicode icons enabled, we reserve 2 characters at the beginning and one + * after the filename/directory. + * Otherwise 2 characters after the entry. + */ + gint reserve = teco_ed & TECO_ED_ICONS ? 2+1 : 2; + pad_colwidth = MIN(ctx->longest + reserve, cols - 2); /* pad_cols = floor((cols - 2) / pad_colwidth) */ pad_cols = (cols - 2) / pad_colwidth; @@ -111,8 +117,19 @@ teco_curses_info_popup_init_pad(teco_curses_info_popup_t *ctx, attr_t attr) switch (entry->type) { case TECO_POPUP_FILE: + g_assert(!teco_string_contains(&entry->name, '\0')); + if (teco_ed & TECO_ED_ICONS) { + teco_curses_add_wc(ctx->pad, teco_curses_icons_lookup_file(entry->name.data)); + waddch(ctx->pad, ' '); + } + teco_curses_format_filename(ctx->pad, entry->name.data, -1); + break; case TECO_POPUP_DIRECTORY: g_assert(!teco_string_contains(&entry->name, '\0')); + if (teco_ed & TECO_ED_ICONS) { + teco_curses_add_wc(ctx->pad, teco_curses_icons_lookup_dir(entry->name.data)); + waddch(ctx->pad, ' '); + } teco_curses_format_filename(ctx->pad, entry->name.data, -1); break; default: diff --git a/src/interface-curses/curses-info-popup.h b/src/interface-curses/curses-info-popup.h index bcdb3b8..a6c28a5 100644 --- a/src/interface-curses/curses-info-popup.h +++ b/src/interface-curses/curses-info-popup.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/src/interface-curses/curses-utils.c b/src/interface-curses/curses-utils.c index 8dc62f1..c751afd 100644 --- a/src/interface-curses/curses-utils.c +++ b/src/interface-curses/curses-utils.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -29,7 +29,21 @@ #include "string-utils.h" #include "curses-utils.h" -gsize +/** + * Render UTF-8 string with TECO character representations. + * + * Strings are cut off with `...` at the end if necessary. + * The mapping is similar to teco_view_set_representations(). + * + * @param win The Curses window to write to. + * @param str The string to format. + * @param len The length of the string in bytes. + * @param max_width The maximum width to consume in + * the window in characters. If smaller 0, take the + * entire remaining space in the window. + * @return Number of characters actually written. + */ +guint teco_curses_format_str(WINDOW *win, const gchar *str, gsize len, gint max_width) { int old_x, old_y; @@ -42,6 +56,12 @@ teco_curses_format_str(WINDOW *win, const gchar *str, gsize len, gint max_width) while (len > 0) { /* + * NOTE: It shouldn't be possible to meet any string, + * that is not valid UTF-8. + */ + gsize clen = g_utf8_next_char(str) - str; + + /* * NOTE: This mapping is similar to * teco_view_set_representations(). */ @@ -85,12 +105,18 @@ teco_curses_format_str(WINDOW *win, const gchar *str, gsize len, gint max_width) chars_added++; if (chars_added > max_width) goto truncate; - waddch(win, *str); + /* + * FIXME: This works with UTF-8 on ncurses, + * since it detects multi-byte characters. + * However on other platforms wadd_wch() may be + * necessary, which requires a widechar Curses variant. + */ + waddnstr(win, str, clen); } } - str++; - len--; + str += clen; + len -= clen; } return getcurx(win) - old_x; @@ -108,23 +134,43 @@ truncate: return getcurx(win) - old_x; } -gsize -teco_curses_format_filename(WINDOW *win, const gchar *filename, - gint max_width) +/** + * Render UTF-8 filename. + * + * This cuts of overlong filenames with `...` at the beginning, + * possibly skipping any drive letter. + * Control characters are escaped, but not highlighted. + * + * @param win The Curses window to write to. + * @param filename Null-terminated filename to render. + * @param max_width The maximum width to consume in + * the window in characters. If smaller 0, take the + * entire remaining space in the window. + * @return Number of characters actually written. + */ +guint +teco_curses_format_filename(WINDOW *win, const gchar *filename, gint max_width) { int old_x = getcurx(win); g_autofree gchar *filename_printable = teco_string_echo(filename, strlen(filename)); - size_t filename_len = strlen(filename_printable); + glong filename_len = g_utf8_strlen(filename_printable, -1); if (max_width < 0) max_width = getmaxx(win) - old_x; - if (filename_len <= (size_t)max_width) { + if (filename_len <= max_width) { + /* + * FIXME: This works with UTF-8 on ncurses, + * since it detects multi-byte characters. + * However on other platforms wadd_wch() may be + * necessary, which requires a widechar Curses variant. + */ waddstr(win, filename_printable); - } else { - const gchar *keep_post = filename_printable + filename_len - - max_width + 3; + } else if (filename_len >= 3) { + const gchar *keep_post; + keep_post = g_utf8_offset_to_pointer(filename_printable + strlen(filename_printable), + -max_width + 3); #ifdef G_OS_WIN32 const gchar *keep_pre = g_path_skip_root(filename_printable); diff --git a/src/interface-curses/curses-utils.h b/src/interface-curses/curses-utils.h index a91ab44..2c819ee 100644 --- a/src/interface-curses/curses-utils.h +++ b/src/interface-curses/curses-utils.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -20,6 +20,17 @@ #include <curses.h> -gsize teco_curses_format_str(WINDOW *win, const gchar *str, gsize len, gint max_width); +guint teco_curses_format_str(WINDOW *win, const gchar *str, gsize len, gint max_width); -gsize teco_curses_format_filename(WINDOW *win, const gchar *filename, gint max_width); +guint teco_curses_format_filename(WINDOW *win, const gchar *filename, gint max_width); + +/** + * Add Unicode character to window. + * This is just like wadd_wch(), but does not require wide-char APIs. + */ +static inline void +teco_curses_add_wc(WINDOW *win, gunichar chr) +{ + gchar buf[6]; + waddnstr(win, buf, g_unichar_to_utf8(chr, buf)); +} diff --git a/src/interface-curses/interface.c b/src/interface-curses/interface.c index ef3f0c7..95e86c9 100644 --- a/src/interface-curses/interface.c +++ b/src/interface-curses/interface.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -24,7 +24,6 @@ #include <stdlib.h> #include <stdarg.h> #include <unistd.h> -#include <locale.h> #include <errno.h> #ifdef HAVE_WINDOWS_H @@ -65,11 +64,12 @@ #include "qreg.h" #include "ring.h" #include "error.h" -#include "curses-utils.h" -#include "curses-info-popup.h" #include "view.h" #include "memory.h" #include "interface.h" +#include "curses-utils.h" +#include "curses-info-popup.h" +#include "curses-icons.h" #if defined(__PDCURSES__) && defined(G_OS_WIN32) && \ !defined(PDCURSES_GUI) @@ -340,12 +340,18 @@ static struct { TECO_INFO_TYPE_QREG } info_type; teco_string_t info_current; + gboolean info_dirty; WINDOW *msg_window; WINDOW *cmdline_window, *cmdline_pad; - gsize cmdline_len, cmdline_rubout_len; + guint cmdline_len, cmdline_rubout_len; + /** + * Pad used exclusively for wgetch() as it will not + * result in unwanted wrefresh(). + */ + WINDOW *input_pad; GQueue *input_queue; teco_curses_info_popup_t popup; @@ -554,7 +560,7 @@ teco_interface_init_screen(void) g_assert(teco_interface.screen_tty != NULL); teco_interface.screen = newterm(NULL, teco_interface.screen_tty, teco_interface.screen_tty); - if (!teco_interface.screen) { + if (G_UNLIKELY(!teco_interface.screen)) { g_fprintf(stderr, "Error initializing interactive mode. " "$TERM may be incorrect.\n"); exit(EXIT_FAILURE); @@ -629,28 +635,6 @@ teco_interface_init_interactive(GError **error) return FALSE; /* - * On UNIX terminals, the escape key is usually - * delivered as the escape character even though function - * keys are delivered as escape sequences as well. - * That's why there has to be a timeout for detecting - * escape presses if function key handling is enabled. - * This timeout can be controlled using $ESCDELAY on - * ncurses but its default is much too long. - * We set it to 25ms as Vim does. In the very rare cases - * this won't suffice, $ESCDELAY can still be set explicitly. - * - * NOTE: The only terminal emulator I'm aware of that lets - * us send an escape sequence for the escape key is Mintty - * (see "\e[?7727h"). - * - * FIXME: This appears to be ineffective for netbsd-curses. - */ -#ifdef CURSES_TTY - if (!g_getenv("ESCDELAY")) - set_escdelay(25); -#endif - - /* * $TERM must be unset or "#win32con" for the win32 * driver to load. * So we always ignore any $TERM changes by the user. @@ -679,12 +663,31 @@ teco_interface_init_interactive(GError **error) PDC_set_function_key(FUNCTION_KEY_SHUT_DOWN, KEY_CLOSE); #endif - /* for displaying UTF-8 characters properly */ - setlocale(LC_CTYPE, ""); - teco_interface_init_screen(); /* + * On UNIX terminals, the escape key is usually + * delivered as the escape character even though function + * keys are delivered as escape sequences as well. + * That's why there has to be a timeout for detecting + * escape presses if function key handling is enabled. + * This timeout can be controlled using $ESCDELAY on + * ncurses but its default is much too long. + * We set it to 25ms as Vim does. In the very rare cases + * this won't suffice, $ESCDELAY can still be set explicitly. + * + * NOTE: The only terminal emulator I'm aware of that lets + * us send an escape sequence for the escape key is Mintty + * (see "\e[?7727h"). + * + * NOTE: The delay is overwritten by initscr() on netbsd-curses. + */ +#ifdef CURSES_TTY + if (!g_getenv("ESCDELAY")) + set_escdelay(25); +#endif + + /* * We always have a CTRL handler on Windows, but doing it * here again, ensures that we have a higher precedence * than the one installed by PDCurses. @@ -699,12 +702,22 @@ teco_interface_init_interactive(GError **error) curs_set(0); teco_interface.info_window = newwin(1, 0, 0, 0); - teco_interface.msg_window = newwin(1, 0, LINES - 2, 0); - teco_interface.cmdline_window = newwin(0, 0, LINES - 1, 0); - keypad(teco_interface.cmdline_window, TRUE); - nodelay(teco_interface.cmdline_window, TRUE); + + teco_interface.input_pad = newpad(1, 1); + /* + * Controlling function key processing is important + * on Unix Curses, as ESCAPE is handled as the beginning + * of a escape sequence when terminal emulators are + * involved. + * Still, it's now enabled always since the ESCDELAY + * workaround works nicely. + * On some Curses variants (XCurses) keypad + * must always be TRUE so we receive KEY_RESIZE. + */ + keypad(teco_interface.input_pad, TRUE); + nodelay(teco_interface.input_pad, TRUE); teco_interface.input_queue = g_queue_new(); @@ -748,8 +761,8 @@ teco_interface_restore_batch(void) * Set window title to a reasonable default, * in case it is not reset immediately by the * shell. - * FIXME: See set_window_title() why this - * is necessary. + * FIXME: See teco_interface_set_window_title() + * why this is necessary. */ #if defined(CURSES_TTY) && defined(HAVE_TIGETSTR) teco_interface_set_window_title(g_getenv("TERM") ? : ""); @@ -978,10 +991,14 @@ teco_interface_draw_info(void) const gchar *info_type_str; + waddstr(teco_interface.info_window, PACKAGE_NAME " "); + switch (teco_interface.info_type) { case TECO_INFO_TYPE_QREG: info_type_str = PACKAGE_NAME " - <QRegister> "; - waddstr(teco_interface.info_window, info_type_str); + teco_curses_add_wc(teco_interface.info_window, + teco_ed & TECO_ED_ICONS ? TECO_CURSES_ICONS_QREG : '-'); + waddstr(teco_interface.info_window, " <QRegister> "); /* same formatting as in command lines */ teco_curses_format_str(teco_interface.info_window, teco_interface.info_current.data, @@ -990,10 +1007,15 @@ teco_interface_draw_info(void) case TECO_INFO_TYPE_BUFFER: info_type_str = PACKAGE_NAME " - <Buffer> "; - waddstr(teco_interface.info_window, info_type_str); g_assert(!teco_string_contains(&teco_interface.info_current, '\0')); + teco_curses_add_wc(teco_interface.info_window, + teco_ed & TECO_ED_ICONS ? teco_curses_icons_lookup_file(teco_interface.info_current.data) : '-'); + waddstr(teco_interface.info_window, " <Buffer> "); teco_curses_format_filename(teco_interface.info_window, - teco_interface.info_current.data, -1); + teco_interface.info_current.data, + getmaxx(teco_interface.info_window) - + getcurx(teco_interface.info_window) - 1); + waddch(teco_interface.info_window, teco_interface.info_dirty ? '*' : ' '); break; default: @@ -1003,13 +1025,13 @@ teco_interface_draw_info(void) wclrtoeol(teco_interface.info_window); /* - * Make sure the title will consist only of printable - * characters + * Make sure the title will consist only of printable characters. */ g_autofree gchar *info_current_printable; info_current_printable = teco_string_echo(teco_interface.info_current.data, teco_interface.info_current.len); - g_autofree gchar *title = g_strconcat(info_type_str, info_current_printable, NULL); + g_autofree gchar *title = g_strconcat(info_type_str, info_current_printable, + teco_interface.info_dirty ? "*" : "", NULL); teco_interface_set_window_title(title); } @@ -1019,6 +1041,7 @@ teco_interface_info_update_qreg(const teco_qreg_t *reg) teco_string_clear(&teco_interface.info_current); teco_string_init(&teco_interface.info_current, reg->head.name.data, reg->head.name.len); + teco_interface.info_dirty = FALSE; teco_interface.info_type = TECO_INFO_TYPE_QREG; /* NOTE: drawn in teco_interface_event_loop_iter() */ } @@ -1030,8 +1053,7 @@ teco_interface_info_update_buffer(const teco_buffer_t *buffer) teco_string_clear(&teco_interface.info_current); teco_string_init(&teco_interface.info_current, filename, strlen(filename)); - teco_string_append_c(&teco_interface.info_current, - buffer->dirty ? '*' : ' '); + teco_interface.info_dirty = buffer->dirty; teco_interface.info_type = TECO_INFO_TYPE_BUFFER; /* NOTE: drawn in teco_interface_event_loop_iter() */ } @@ -1044,7 +1066,8 @@ teco_interface_cmdline_update(const teco_cmdline_t *cmdline) * We don't know if it is similar to the last one, * so resizing makes no sense. * We approximate the size of the new formatted command-line, - * wasting a few bytes for control characters. + * wasting a few bytes for control characters and + * multi-byte Unicode sequences. */ if (teco_interface.cmdline_pad) delwin(teco_interface.cmdline_pad); @@ -1172,7 +1195,7 @@ teco_interface_set_clipboard(const gchar *name, const gchar *str, gsize str_len, { int rc = str ? PDC_setclipboard(str, str_len) : PDC_clearclipboard(); if (rc != PDC_CLIP_SUCCESS) { - g_set_error(error, TECO_ERROR, TECO_ERROR_FAILED, + g_set_error(error, TECO_ERROR, TECO_ERROR_CLIPBOARD, "Error %d copying to clipboard", rc); return FALSE; } @@ -1194,7 +1217,7 @@ teco_interface_get_clipboard(const gchar *name, gchar **str, gsize *len, GError if (rc == PDC_CLIP_EMPTY) return TRUE; if (rc != PDC_CLIP_SUCCESS) { - g_set_error(error, TECO_ERROR, TECO_ERROR_FAILED, + g_set_error(error, TECO_ERROR, TECO_ERROR_CLIPBOARD, "Error %d retrieving clipboard", rc); return FALSE; } @@ -1232,9 +1255,17 @@ teco_interface_init_clipboard(void) * must be enabled. * There is no way to find out if they are but we must * not register the clipboard registers if they aren't. - * Therefore, a special XTerm clipboard ED flag an be set by the user. + * Still, XTerm clipboards are broken with Unicode characters. + * Also, there are other terminal emulators supporting OSC-52, + * so the XTerm version is only checked if the terminal identifies as XTerm. + * Also, a special clipboard ED flag must be set by the user. + * + * NOTE: Apparently there is also a terminfo entry Ms, but it's probably + * not worth using it since it won't always be set and even if set, does not + * tell you whether the terminal will actually answer to the escape sequence or not. */ - if (!(teco_ed & TECO_ED_XTERM_CLIPBOARD) || teco_xterm_version() < 203) + if (!(teco_ed & TECO_ED_OSC52) || + (teco_xterm_version() >= 0 && teco_xterm_version() < 203)) return; teco_qreg_table_insert(&teco_qreg_table_globals, teco_qreg_clipboard_new("")); @@ -1300,6 +1331,8 @@ teco_interface_set_clipboard(const gchar *name, const gchar *str, gsize str_len, gboolean teco_interface_get_clipboard(const gchar *name, gchar **str, gsize *len, GError **error) { + gboolean ret = TRUE; + /* * Query the clipboard -- XTerm will reply with the * OSC-52 command that would set the current selection. @@ -1320,18 +1353,19 @@ teco_interface_get_clipboard(const gchar *name, gchar **str, gsize *len, GError * to be on the safe side. */ halfdelay(1); /* 100ms timeout */ - keypad(stdscr, FALSE); + /* don't interpret escape sequences */ + keypad(teco_interface.input_pad, FALSE); /* * Skip "\e]52;x;" (7 characters). */ for (gint i = 0; i < 7; i++) { - if (getch() == ERR) { + ret = wgetch(teco_interface.input_pad) != ERR; + if (!ret) { /* timeout */ - cbreak(); - g_set_error_literal(error, TECO_ERROR, TECO_ERROR_FAILED, + g_set_error_literal(error, TECO_ERROR, TECO_ERROR_CLIPBOARD, "Timed out reading XTerm clipboard"); - return FALSE; + goto cleanup; } } @@ -1347,17 +1381,22 @@ teco_interface_get_clipboard(const gchar *name, gchar **str, gsize *len, GError */ gchar buffer[MAX(3, 7)]; - gchar c = (gchar)getch(); - if (c == ERR) { + gchar c = (gchar)wgetch(teco_interface.input_pad); + ret = c != ERR; + if (!ret) { /* timeout */ - cbreak(); g_string_free(str_base64, TRUE); - g_set_error_literal(error, TECO_ERROR, TECO_ERROR_FAILED, + g_set_error_literal(error, TECO_ERROR, TECO_ERROR_CLIPBOARD, "Timed out reading XTerm clipboard"); - return FALSE; + goto cleanup; } if (c == '\a') break; + if (c == '\e') { + /* OSC escape sequence can also be terminated by "\e\\" */ + c = (gchar)wgetch(teco_interface.input_pad); + break; + } /* * This could be simplified using sscanf() and @@ -1372,14 +1411,16 @@ teco_interface_get_clipboard(const gchar *name, gchar **str, gsize *len, GError g_string_append_len(str_base64, buffer, out_len); } - cbreak(); - if (str) *str = str_base64->str; *len = str_base64->len; g_string_free(str_base64, !str); - return TRUE; + +cleanup: + keypad(teco_interface.input_pad, TRUE); + nodelay(teco_interface.input_pad, TRUE); + return ret; } #else /* !PDCURSES && !CURSES_TTY */ @@ -1489,13 +1530,17 @@ teco_interface_is_interrupted(void) gboolean teco_interface_is_interrupted(void) { - if (!teco_interface.cmdline_window) + if (!teco_interface.input_pad) /* batch mode */ return teco_interrupted != FALSE; - /* NOTE: getch() is configured to be nonblocking. */ + /* + * NOTE: wgetch() is configured to be nonblocking. + * We wgetch() on a dummy pad, so this does not call any + * wrefresh(). + */ gint key; - while ((key = wgetch(teco_interface.cmdline_window)) != ERR) { + while ((key = wgetch(teco_interface.input_pad)) != ERR) { if (G_UNLIKELY(key == TECO_CTL_KEY('C'))) return TRUE; g_queue_push_tail(teco_interface.input_queue, @@ -1535,35 +1580,19 @@ teco_interface_refresh(void) static gint teco_interface_blocking_getch(void) { - /* - * Setting function key processing is important - * on Unix Curses, as ESCAPE is handled as the beginning - * of a escape sequence when terminal emulators are - * involved. - * On some Curses variants (XCurses) however, keypad - * must always be TRUE so we receive KEY_RESIZE. - * - * FIXME: NetBSD's curses could be handled like ncurses, - * but gets into an undefined state when SciTECO processes - * escape sequences. - */ -#ifdef NCURSES_UNIX - keypad(teco_interface.cmdline_window, teco_ed & TECO_ED_FNKEYS); -#endif - /* no special <CTRL/C> handling */ raw(); - nodelay(teco_interface.cmdline_window, FALSE); + nodelay(teco_interface.input_pad, FALSE); /* * Memory limiting is stopped temporarily, since it might otherwise * constantly place 100% load on the CPU. */ teco_memory_stop_limiting(); - gint key = wgetch(teco_interface.cmdline_window); + gint key = wgetch(teco_interface.input_pad); teco_memory_start_limiting(); /* allow asynchronous interruptions on <CTRL/C> */ teco_interrupted = FALSE; - nodelay(teco_interface.cmdline_window, TRUE); + nodelay(teco_interface.input_pad, TRUE); #if defined(CURSES_TTY) || defined(PDCURSES_WINCON) || defined(NCURSES_WIN32) noraw(); /* FIXME: necessary because of NCURSES_WIN32 bug */ cbreak(); @@ -1585,6 +1614,11 @@ teco_interface_blocking_getch(void) void teco_interface_event_loop_iter(void) { + static gchar keybuf[4]; + static gint keybuf_i = 0; + + GError **error = &teco_interface.event_loop_error; + gint key = g_queue_is_empty(teco_interface.input_queue) ? teco_interface_blocking_getch() : GPOINTER_TO_INT(g_queue_pop_head(teco_interface.input_queue)); @@ -1613,23 +1647,24 @@ teco_interface_event_loop_iter(void) * backspace. * In SciTECO backspace is normalized to ^H. */ - if (!teco_cmdline_keypress_c(TECO_CTL_KEY('H'), - &teco_interface.event_loop_error)) + if (!teco_cmdline_keymacro_c(TECO_CTL_KEY('H'), error)) return; break; case KEY_ENTER: case '\r': case '\n': - if (!teco_cmdline_keypress_c('\n', &teco_interface.event_loop_error)) + if (!teco_cmdline_keymacro_c('\n', error)) return; break; /* * Function key macros + * + * FIXME: Perhaps support everything returned by keyname()? */ #define FN(KEY) \ case KEY_##KEY: \ - if (!teco_cmdline_fnmacro(#KEY, &teco_interface.event_loop_error)) \ + if (!teco_cmdline_keymacro(#KEY, -1, error)) \ return; \ break #define FNS(KEY) FN(KEY); FN(S##KEY) @@ -1639,9 +1674,8 @@ teco_interface_event_loop_iter(void) gchar macro_name[3+1]; g_snprintf(macro_name, sizeof(macro_name), - "F%d", key - KEY_F0); - if (!teco_cmdline_fnmacro(macro_name, - &teco_interface.event_loop_error)) + "F%d", key - KEY_F0); + if (!teco_cmdline_keymacro(macro_name, -1, error)) return; break; } @@ -1660,9 +1694,31 @@ teco_interface_event_loop_iter(void) * Control keys and keys with printable representation */ default: - if (key < 0x80 && - !teco_cmdline_keypress_c(key, &teco_interface.event_loop_error)) + if (key > 0xFF) + /* unhandled function key */ return; + + /* + * NOTE: There's also wget_wch(), but it requires + * a widechar version of Curses. + */ + keybuf[keybuf_i++] = key; + gsize len = keybuf_i; + gunichar cp = g_utf8_get_char_validated(keybuf, len); + if (keybuf_i >= sizeof(keybuf) || cp != (gunichar)-2) + keybuf_i = 0; + if ((gint32)cp < 0) + /* incomplete or invalid */ + return; + switch (teco_cmdline_keymacro(keybuf, len, error)) { + case TECO_KEYMACRO_ERROR: + return; + case TECO_KEYMACRO_SUCCESS: + break; + case TECO_KEYMACRO_UNDEFINED: + if (!teco_cmdline_keypress(keybuf, len, error)) + return; + } } teco_interface_refresh(); @@ -1733,6 +1789,8 @@ teco_interface_cleanup(void) delwin(teco_interface.cmdline_pad); if (teco_interface.msg_window) delwin(teco_interface.msg_window); + if (teco_interface.input_pad) + delwin(teco_interface.input_pad); /* * PDCurses/WinCon crashes if initscr() wasn't called. diff --git a/src/interface-gtk/gtk-info-popup.c b/src/interface-gtk/gtk-info-popup.c index 744900d..4e25224 100644 --- a/src/interface-gtk/gtk-info-popup.c +++ b/src/interface-gtk/gtk-info-popup.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/src/interface-gtk/gtk-info-popup.h b/src/interface-gtk/gtk-info-popup.h index de4b463..c3a62ec 100644 --- a/src/interface-gtk/gtk-info-popup.h +++ b/src/interface-gtk/gtk-info-popup.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/src/interface-gtk/gtk-label.c b/src/interface-gtk/gtk-label.c index c1f4867..50cd345 100644 --- a/src/interface-gtk/gtk-label.c +++ b/src/interface-gtk/gtk-label.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/src/interface-gtk/gtk-label.h b/src/interface-gtk/gtk-label.h index d2e2314..bed6642 100644 --- a/src/interface-gtk/gtk-label.h +++ b/src/interface-gtk/gtk-label.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/src/interface-gtk/interface.c b/src/interface-gtk/interface.c index 253600a..843ad15 100644 --- a/src/interface-gtk/interface.c +++ b/src/interface-gtk/interface.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -279,7 +279,8 @@ teco_interface_init(void) "type-label"); gtk_header_bar_pack_start(GTK_HEADER_BAR(teco_interface.info_bar_widget), teco_interface.info_type_widget); - if (teco_interface.xembed_id || teco_interface.no_csd) { + if (teco_interface.xembed_id || teco_interface.no_csd || + !g_strcmp0(g_getenv("GTK_CSD"), "0")) { /* fall back to adding the info bar as an ordinary widget */ gtk_box_pack_start(GTK_BOX(vbox), teco_interface.info_bar_widget, FALSE, FALSE, 0); @@ -390,12 +391,6 @@ teco_interface_init(void) GOptionGroup * teco_interface_get_options(void) { - /* - * FIXME: On platforms where you want to disable CSD, you usually - * want to disable it always, so it should be configurable in the SciTECO - * profile. - * On the other hand, you could just install gtk3-nocsd. - */ static const GOptionEntry entries[] = { {"no-csd", 0, G_OPTION_FLAG_IN_MAIN, G_OPTION_ARG_NONE, &teco_interface.no_csd, @@ -656,15 +651,46 @@ teco_interface_get_selection_by_name(const gchar *name) return gdk_atom_intern(name, FALSE); } +static void +teco_interface_clipboard_provide(GtkClipboard *clipboard, GtkSelectionData *selection, guint info, gpointer userdata) +{ + GString *str = userdata; + gtk_selection_data_set_text(selection, str->str, str->len); +} + +static void +teco_interface_clipboard_clear(GtkClipboard *clipboard, gpointer userdata) +{ + GString *str = userdata; + g_string_free(str, TRUE); +} + gboolean teco_interface_set_clipboard(const gchar *name, const gchar *str, gsize str_len, GError **error) { + static const GtkTargetEntry target = {"UTF8_STRING", 0, 0}; GtkClipboard *clipboard = gtk_clipboard_get(teco_interface_get_selection_by_name(name)); + if (!str) { + gtk_clipboard_clear(clipboard); + return TRUE; + } + /* - * NOTE: function has compatible semantics for str_len < 0. + * NOTE: gtk_clipboard_set_text() would ignore embedded nulls, + * even though it takes a length. + * We could theoretically avoid one allocation, but don't yet have proper types + * to store string data with length in one heap object. */ - gtk_clipboard_set_text(clipboard, str, str_len); + GString *gstr = g_string_new_len(str, str_len); + if (!gtk_clipboard_set_with_data(clipboard, &target, 1, + teco_interface_clipboard_provide, + teco_interface_clipboard_clear, gstr)) { + g_string_free(gstr, TRUE); + g_set_error_literal(error, TECO_ERROR, TECO_ERROR_CLIPBOARD, + "Cannot set clipboard"); + return FALSE; + } return TRUE; } @@ -674,16 +700,28 @@ teco_interface_get_clipboard(const gchar *name, gchar **str, gsize *len, GError { GtkClipboard *clipboard = gtk_clipboard_get(teco_interface_get_selection_by_name(name)); /* - * Could return NULL for an empty clipboard. + * gtk_clipboard_wait_for_text() does not return the text length, + * so it doesn't work with embedded nulls. + * gtk_clipboard_wait_for_contents() could also return NULL for empty clipboards. * - * FIXME: This converts to UTF8 and we loose the ability - * to get clipboard with embedded nulls. + * NOTE: This also drives the main event loop, + * which should be safe (see teco_interface_key_pressed_cb()). */ - g_autofree gchar *contents = gtk_clipboard_wait_for_text(clipboard); + GdkAtom utf8_string = gdk_atom_intern_static_string("UTF8_STRING"); + g_autoptr(GtkSelectionData) contents = gtk_clipboard_wait_for_contents(clipboard, utf8_string); + if (!contents) { + *len = 0; + if (str) + *str = NULL; + return TRUE; + } - *len = contents ? strlen(contents) : 0; - if (str) - *str = g_steal_pointer(&contents); + *len = gtk_selection_data_get_length(contents); + if (str) { + /* gtk_selection_data_get_text() does not work with embedded nulls */ + *str = memcpy(g_malloc(*len+1), gtk_selection_data_get_data(contents), *len); + (*str)[*len] = '\0'; + } return TRUE; } @@ -881,19 +919,50 @@ teco_interface_cmdline_commit_cb(GtkIMContext *context, gchar *str, gpointer use { g_autoptr(GError) error = NULL; - /* - * FIXME: This is only for consistency as long as we - * do not support Unicode. - */ - for (char *p = str; *p != '\0'; p = g_utf8_next_char(p)) - if (g_utf8_get_char(p) >= 0x80) - return; - if (!teco_cmdline_keypress(str, strlen(str), &error) && g_error_matches(error, TECO_ERROR, TECO_ERROR_QUIT)) gtk_main_quit(); } +/** + * Try to find an ANSI (latin) key for a given keypress. + * + * If the given key press does not generate a key from the ANSI + * range, it tries to find one in another group. + * + * @param event Key event to look up. In case of success, + * this event structure might also be written to. + * @return The codepoint of the ANSI version or 0 if there is + * no fitting ANSI/latin key. + */ +static gchar +teco_interface_get_ansi_key(GdkEventKey *event) +{ + gunichar cp = gdk_keyval_to_unicode(event->keyval); + if (cp && cp < 0x80) + return cp; + + GdkKeymap *map = gdk_keymap_get_for_display(gdk_window_get_display(event->window)); + g_autofree GdkKeymapKey *keys = NULL; + g_autofree guint *keyvals = NULL; + gint n_entries = 0; + + gdk_keymap_get_entries_for_keycode(map, event->hardware_keycode, + &keys, &keyvals, &n_entries); + for (gint i = 0; i < n_entries; i++) { + g_assert(keys[i].keycode == event->hardware_keycode); + cp = gdk_keyval_to_unicode(keyvals[i]); + if (cp && cp < 0x80 && + gdk_keyval_is_upper(keyvals[i]) == gdk_keyval_is_upper(event->keyval)) { + event->keyval = keyvals[i]; + event->group = keys[i].group; + return cp; + } + } + + return 0; +} + static gboolean teco_interface_handle_key_press(GdkEventKey *event, GError **error) { @@ -901,19 +970,19 @@ teco_interface_handle_key_press(GdkEventKey *event, GError **error) switch (event->keyval) { case GDK_KEY_Escape: - if (!teco_cmdline_keypress_c('\e', error)) + if (!teco_cmdline_keymacro_c('\e', error)) return FALSE; break; case GDK_KEY_BackSpace: - if (!teco_cmdline_keypress_c(TECO_CTL_KEY('H'), error)) + if (!teco_cmdline_keymacro_c(TECO_CTL_KEY('H'), error)) return FALSE; break; case GDK_KEY_Tab: - if (!teco_cmdline_keypress_c('\t', error)) + if (!teco_cmdline_keymacro_c('\t', error)) return FALSE; break; case GDK_KEY_Return: - if (!teco_cmdline_keypress_c('\n', error)) + if (!teco_cmdline_keymacro_c('\n', error)) return FALSE; break; @@ -922,12 +991,12 @@ teco_interface_handle_key_press(GdkEventKey *event, GError **error) */ #define FN(KEY, MACRO) \ case GDK_KEY_##KEY: \ - if (!teco_cmdline_fnmacro(#MACRO, error)) \ + if (!teco_cmdline_keymacro(#MACRO, -1, error)) \ return FALSE; \ break #define FNS(KEY, MACRO) \ case GDK_KEY_##KEY: \ - if (!teco_cmdline_fnmacro(event->state & GDK_SHIFT_MASK ? "S" #MACRO : #MACRO, error)) \ + if (!teco_cmdline_keymacro(event->state & GDK_SHIFT_MASK ? "S" #MACRO : #MACRO, -1, error)) \ return FALSE; \ break FN(Down, DOWN); FN(Up, UP); @@ -939,8 +1008,8 @@ teco_interface_handle_key_press(GdkEventKey *event, GError **error) gchar macro_name[3+1]; g_snprintf(macro_name, sizeof(macro_name), - "F%d", event->keyval - GDK_KEY_F1 + 1); - if (!teco_cmdline_fnmacro(macro_name, error)) + "F%d", event->keyval - GDK_KEY_F1 + 1); + if (!teco_cmdline_keymacro(macro_name, -1, error)) return FALSE; break; } @@ -960,33 +1029,72 @@ teco_interface_handle_key_press(GdkEventKey *event, GError **error) /* * Control keys and keys with printable representation */ - default: { - gunichar u = gdk_keyval_to_unicode(event->keyval); + default: + /* + * NOTE: Alt-Gr key-combinations are sometimes reported as + * Ctrl+Alt, so we filter those out. + */ + if ((event->state & (GDK_CONTROL_MASK | GDK_MOD1_MASK)) == GDK_CONTROL_MASK) { + gchar c = teco_interface_get_ansi_key(event); + if (c) { + if (!teco_cmdline_keymacro_c(TECO_CTL_KEY(g_ascii_toupper(c)), error)) + return FALSE; + break; + } + } - if (u && u < 0x80 && (event->state & (GDK_CONTROL_MASK | GDK_MOD1_MASK)) == GDK_CONTROL_MASK) { - /* - * NOTE: Alt-Gr key-combinations are sometimes reported as - * Ctrl+Alt, so we filter those out. - */ - if (!teco_cmdline_keypress_c(TECO_CTL_KEY(g_ascii_toupper(u)), error)) + /* + * First look up a key macro. + * Only if it's undefined, we try to automatically find an ANSI key. + * On the downside, this means we cannot define key macros for dead keys + * or keys that require some sort of input method editing. + * + * FIXME: This might be a good reason to be able to disable the + * automatic ANSIfication, as we could look up the key macro in + * teco_interface_cmdline_commit_cb(). + */ + gunichar cp = gdk_keyval_to_unicode(event->keyval); + if (cp) { + char buf[6]; + gsize len = g_unichar_to_utf8(cp, buf); + teco_keymacro_status_t rc = teco_cmdline_keymacro(buf, len, error); + if (rc == TECO_KEYMACRO_ERROR) return FALSE; - } else { - /* - * This is necessary to handle dead keys and in the future - * for inputting Asian languages. - * - * FIXME: We do not yet support preediting. - * It would be easier to forward the event to the Scintilla - * widget and use its existing IM support. - * But this breaks the event freezing and results in flickering. - */ - gtk_im_context_filter_keypress(teco_interface.input_method, event); + if (rc == TECO_KEYMACRO_SUCCESS) + break; + g_assert(rc == TECO_KEYMACRO_UNDEFINED); } - } + + /* + * If the current state is case-insensitive, it is a command name - + * which consists only of ANSI letters - we try to + * accept non-ANSI letters as well. + * This means, you don't have to change keyboard layouts + * so often. + * FIXME: This could be made to work with string-building constructs + * within Q-Register specs as well. + * Unfortunately, Q-Reg specs and string building can be nested + * indefinitely. + * This would effectively require a new keymacro_mask_cb(). + */ + if ((teco_cmdline.machine.parent.current->keymacro_mask | + teco_cmdline.machine.expectstring.machine.parent.current->keymacro_mask) & + TECO_KEYMACRO_MASK_CASEINSENSITIVE) + teco_interface_get_ansi_key(event); + + /* + * This is necessary to handle dead keys and in the future + * for inputting Asian languages. + * + * FIXME: We do not yet support preediting. + * It would be easier to forward the event to the Scintilla + * widget and use its existing IM support. + * But this breaks the event freezing and results in flickering. + */ + gtk_im_context_filter_keypress(teco_interface.input_method, event); } teco_interface_refresh(teco_interface_current_view != last_view); - return TRUE; } @@ -997,7 +1105,7 @@ teco_interface_event_loop(GError **error) g_assert(scitecoconfig_reg != NULL); g_auto(teco_string_t) scitecoconfig = {NULL, 0}; if (!scitecoconfig_reg->vtable->get_string(scitecoconfig_reg, - &scitecoconfig.data, &scitecoconfig.len, error)) + &scitecoconfig.data, &scitecoconfig.len, NULL, error)) return FALSE; if (teco_string_contains(&scitecoconfig, '\0')) { g_set_error_literal(error, TECO_ERROR, TECO_ERROR_FAILED, @@ -1006,45 +1114,6 @@ teco_interface_event_loop(GError **error) } g_assert(scitecoconfig.data != NULL); -#ifdef G_OS_WIN32 - /* - * FIXME: This is necessary so that the icon themes are found in the same - * directory as sciteco.exe. - * This fails of course when $SCITECOCONFIG is changed. - * We should perhaps always use the absolute path of sciteco.exe. - * If you want to install SciTECO differently, you can still set - * $XDG_DATA_DIRS. - * - * FIXME FIXME FIXME: This is also currently broken. - */ - //g_autofree char *theme_path = g_build_filename(scitecoconfig.data, "icons"); - //gtk_icon_theme_prepend_search_path(gtk_icon_theme_get_default(), theme_path); -#else - /* - * Load icons for the GTK window. - * This is not necessary on Windows since the icon included - * as a resource will be used by default. - */ - static const gchar *icon_files[] = { - SCITECODATADIR G_DIR_SEPARATOR_S "sciteco-48.png", - SCITECODATADIR G_DIR_SEPARATOR_S "sciteco-32.png", - SCITECODATADIR G_DIR_SEPARATOR_S "sciteco-16.png" - }; - GList *icon_list = NULL; - - for (gint i = 0; i < G_N_ELEMENTS(icon_files); i++) { - GdkPixbuf *icon_pixbuf = gdk_pixbuf_new_from_file(icon_files[i], NULL); - - /* fail silently if there's a problem with one of the icons */ - if (icon_pixbuf) - icon_list = g_list_append(icon_list, icon_pixbuf); - } - - gtk_window_set_default_icon_list(icon_list); - - g_list_free_full(icon_list, g_object_unref); -#endif - /* * Initialize the CSS variable provider and the CSS provider * for the included fallback.css. @@ -1087,6 +1156,50 @@ teco_interface_event_loop(GError **error) /* don't show popup by default */ gtk_widget_hide(teco_interface.popup_widget); +#ifdef G_OS_WIN32 + /* + * FIXME: This is necessary so that the icon themes are found in the same + * directory as sciteco.exe. + * This fails of course when $SCITECOCONFIG is changed. + * We should perhaps always use the absolute path of sciteco.exe. + * If you want to install SciTECO differently, you can still set + * $XDG_DATA_DIRS. + * + * FIXME FIXME FIXME: This is also currently broken. + */ + //g_autofree char *theme_path = g_build_filename(scitecoconfig.data, "icons"); + //gtk_icon_theme_prepend_search_path(gtk_icon_theme_get_default(), theme_path); +#else + /* + * Load icons for the GTK window. + * This is not necessary on Windows since the icon included + * as a resource will be used by default. + */ + static const gchar *icon_files[] = { + SCITECODATADIR G_DIR_SEPARATOR_S "sciteco-48.png", + SCITECODATADIR G_DIR_SEPARATOR_S "sciteco-32.png", + SCITECODATADIR G_DIR_SEPARATOR_S "sciteco-16.png" + }; + GList *icon_list = NULL; + + for (gint i = 0; i < G_N_ELEMENTS(icon_files); i++) { + GdkPixbuf *icon_pixbuf = gdk_pixbuf_new_from_file(icon_files[i], NULL); + + /* fail silently if there's a problem with one of the icons */ + if (icon_pixbuf) + icon_list = g_list_append(icon_list, icon_pixbuf); + } + + /* + * The position of this call after gtk_widget_show() is important, so that + * tabbed and other Xembed hosts can pick up the icon. + * They also do not pick up the icon if set via gtk_window_set_default_icon_list(). + */ + gtk_window_set_icon_list(GTK_WINDOW(teco_interface.window), icon_list); + + g_list_free_full(icon_list, g_object_unref); +#endif + /* * SIGTERM emulates the "Close" key just like when * closing the window if supported by this version of glib. diff --git a/src/interface.c b/src/interface.c index a2042db..2e2d64e 100644 --- a/src/interface.c +++ b/src/interface.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/src/interface.h b/src/interface.h index 3170849..32db6b5 100644 --- a/src/interface.h +++ b/src/interface.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -154,6 +154,36 @@ void teco_interface_process_notify(SCNotification *notify); /** @pure */ void teco_interface_cleanup(void); +static inline guint +teco_interface_get_codepage(void) +{ + return teco_view_get_codepage(teco_interface_current_view); +} + +static inline gssize +teco_interface_glyphs2bytes(teco_int_t pos) +{ + return teco_view_glyphs2bytes(teco_interface_current_view, pos); +} + +static inline teco_int_t +teco_interface_bytes2glyphs(gsize pos) +{ + return teco_view_bytes2glyphs(teco_interface_current_view, pos); +} + +static inline gssize +teco_interface_glyphs2bytes_relative(gsize pos, teco_int_t n) +{ + return teco_view_glyphs2bytes_relative(teco_interface_current_view, pos, n); +} + +static inline teco_int_t +teco_interface_get_character(gsize pos, gsize len) +{ + return teco_view_get_character(teco_interface_current_view, pos, len); +} + /* * The following functions are here for lack of a better place. * They could also be in sciteco.h, but only if declared as non-inline @@ -161,12 +191,6 @@ void teco_interface_cleanup(void); */ static inline gboolean -teco_validate_pos(teco_int_t n) -{ - return 0 <= n && n <= teco_interface_ssm(SCI_GETLENGTH, 0, 0); -} - -static inline gboolean teco_validate_line(teco_int_t n) { return 0 <= n && n < teco_interface_ssm(SCI_GETLINECOUNT, 0, 0); @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -23,6 +23,7 @@ #include <string.h> #include <stdlib.h> #include <signal.h> +#include <locale.h> #include <glib.h> #include <glib/gprintf.h> @@ -104,9 +105,10 @@ teco_get_default_config_path(const gchar *program) static gchar *teco_eval_macro = NULL; static gboolean teco_mung_file = FALSE; static gboolean teco_mung_profile = TRUE; +static gboolean teco_8bit_clean = FALSE; static gchar * -teco_process_options(gint *argc, gchar ***argv) +teco_process_options(gchar ***argv) { static const GOptionEntry option_entries[] = { {"eval", 'e', 0, G_OPTION_ARG_STRING, &teco_eval_macro, @@ -119,6 +121,8 @@ teco_process_options(gint *argc, gchar ***argv) "Do not mung " "$SCITECOCONFIG" G_DIR_SEPARATOR_S INI_FILE " " "even if it exists"}, + {"8bit", '8', 0, G_OPTION_ARG_NONE, &teco_8bit_clean, + "Use ANSI encoding by default and disable automatic EOL conversion"}, {NULL} }; @@ -133,7 +137,7 @@ teco_process_options(gint *argc, gchar ***argv) g_option_context_set_description( options, "Bug reports should go to <" PACKAGE_BUGREPORT "> or " - "<" PACKAGE_URL_DEV ">." + "<" PACKAGE_URL ">." ); g_option_context_add_main_entries(options, option_entries, NULL); @@ -155,7 +159,7 @@ teco_process_options(gint *argc, gchar ***argv) */ g_option_context_set_strict_posix(options, TRUE); - if (!g_option_context_parse(options, argc, argv, &error)) { + if (!g_option_context_parse_strv(options, argv, &error)) { g_fprintf(stderr, "Option parsing failed: %s\n", error->message); exit(EXIT_FAILURE); @@ -170,16 +174,13 @@ teco_process_options(gint *argc, gchar ***argv) * and "--" is not the first non-option argument as in * sciteco foo -- -C bar. */ - if (*argc >= 2 && !strcmp((*argv)[1], "--")) { - (*argv)[1] = (*argv)[0]; - (*argv)++; - (*argc)--; - } + if ((*argv)[0] && !g_strcmp0((*argv)[1], "--")) + g_free(teco_strv_remove(*argv, 1)); gchar *mung_filename = NULL; if (teco_mung_file) { - if (*argc < 2) { + if (!(*argv)[0] || !(*argv)[1]) { g_fprintf(stderr, "Script to mung expected!\n"); exit(EXIT_FAILURE); } @@ -190,11 +191,7 @@ teco_process_options(gint *argc, gchar ***argv) exit(EXIT_FAILURE); } - mung_filename = g_strdup((*argv)[1]); - - (*argv)[1] = (*argv)[0]; - (*argv)++; - (*argc)--; + mung_filename = teco_strv_remove(*argv, 1); } return mung_filename; @@ -306,12 +303,39 @@ main(int argc, char **argv) signal(SIGINT, teco_sigint_handler); signal(SIGTERM, teco_sigint_handler); - g_autofree gchar *mung_filename = teco_process_options(&argc, &argv); + /* + * Important for Unicode handling in curses and glib. + * In particular, in order to accept Unicode characters + * in option strings. + * + * NOTE: Windows 10 accepts ".UTF8" here, so the "ANSI" + * versions of win32 API functions accept UTF-8. + * We want to support older versions, though and + * glib happily converts to Windows' native UTF-16. + */ + setlocale(LC_ALL, ""); + +#ifdef G_OS_WIN32 + /* + * main()'s argv is in the system locale, so we might loose + * information when passing it to g_option_context_parse(). + * The remaining strings are also not guaranteed to be in + * UTF-8. + */ + g_auto(GStrv) argv_utf8 = g_win32_get_command_line(); +#else + g_auto(GStrv) argv_utf8 = g_strdupv(argv); +#endif + g_autofree gchar *mung_filename = teco_process_options(&argv_utf8); /* * All remaining arguments in argv are arguments * to the macro or munged file. */ + if (teco_8bit_clean) + /* equivalent to 16,4ED but executed earlier */ + teco_ed = (teco_ed & ~TECO_ED_AUTOEOL) | TECO_ED_DEFAULT_ANSI; + /* * Theoretically, QReg tables should only be initialized * after the interface, since they contain Scintilla documents. @@ -343,7 +367,7 @@ main(int argc, char **argv) /* current working directory ("$") */ teco_qreg_table_insert(&teco_qreg_table_globals, teco_qreg_workingdir_new()); /* environment defaults and registers */ - teco_initialize_environment(argv[0]); + teco_initialize_environment(argv_utf8[0]); teco_qreg_table_t local_qregs; teco_qreg_table_init(&local_qregs, TRUE); @@ -361,8 +385,8 @@ main(int argc, char **argv) * Also, the Unnamed Buffer should be kept empty for piping. * Therefore, it would be best to store the arguments in Q-Regs, e.g. $0,$1,$2... */ - for (gint i = 1; i < argc; i++) { - teco_interface_ssm(SCI_APPENDTEXT, strlen(argv[i]), (sptr_t)argv[i]); + for (gint i = 1; argv_utf8[i]; i++) { + teco_interface_ssm(SCI_APPENDTEXT, strlen(argv_utf8[i]), (sptr_t)argv_utf8[i]); teco_interface_ssm(SCI_APPENDTEXT, 1, (sptr_t)"\n"); } diff --git a/src/memory.c b/src/memory.c index 6d7645c..26cde55 100644 --- a/src/memory.c +++ b/src/memory.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -288,7 +288,7 @@ * Current memory usage. * Access must be synchronized using atomic operations. */ -static gint teco_memory_usage = 0; +static guint teco_memory_usage = 0; /* * NOTE: This implementation based on malloc_usable_size() might @@ -495,6 +495,16 @@ teco_memory_get_usage(void) return procstk.ki_rssize * page_size; } +/** + * Options passed to jemalloc. + * + * It's crucial to disable opt.retain, so that freeing memory after + * recovering from memory limit hits actually decreases the RSS. + * The reasons for activating the option, mentioned in jemalloc(3), + * shouldn't be relevant on FreeBSD. + */ +const gchar *malloc_conf = "retain:false"; + #define NEED_POLL_THREAD #elif defined(G_OS_UNIX) && defined(HAVE_SYSCONF) && defined(HAVE_PROCFS) @@ -628,8 +638,7 @@ teco_memory_stop_limiting(void) g_mutex_unlock(&teco_memory_mutex); } -#ifndef NDEBUG -static void __attribute__((destructor)) +static void TECO_DEBUG_CLEANUP teco_memory_cleanup(void) { if (!teco_memory_thread) @@ -642,7 +651,6 @@ teco_memory_cleanup(void) g_thread_join(teco_memory_thread); } -#endif #else /* !NEED_POLL_THREAD */ @@ -660,7 +668,7 @@ gsize teco_memory_limit = 500*1000*1000; gboolean teco_memory_set_limit(gsize new_limit, GError **error) { - gsize memory_usage = g_atomic_int_get(&teco_memory_usage); + gsize memory_usage = (guint)g_atomic_int_get(&teco_memory_usage); if (G_UNLIKELY(new_limit && memory_usage > new_limit)) { g_autofree gchar *usage_str = g_format_size(memory_usage); @@ -693,18 +701,19 @@ teco_memory_set_limit(gsize new_limit, GError **error) gboolean teco_memory_check(gsize request, GError **error) { - gsize memory_usage = g_atomic_int_get(&teco_memory_usage) + request; + gsize memory_usage = (guint)g_atomic_int_get(&teco_memory_usage); + gsize requested_memory_usage = memory_usage+request; /* * Check for overflows. * NOTE: Glib 2.48 has g_size_checked_add(). */ - if (G_UNLIKELY(memory_usage < request)) + if (G_UNLIKELY(requested_memory_usage < memory_usage)) /* guaranteed to fail if memory limiting is enabled */ - memory_usage = G_MAXSIZE; + requested_memory_usage = G_MAXSIZE; - if (G_UNLIKELY(teco_memory_limit && memory_usage >= teco_memory_limit)) { - g_autofree gchar *limit_str = g_format_size(memory_usage); + if (G_UNLIKELY(teco_memory_limit && requested_memory_usage >= teco_memory_limit)) { + g_autofree gchar *limit_str = g_format_size(requested_memory_usage); g_set_error(error, TECO_ERROR, TECO_ERROR_MEMLIMIT, "Memory limit (%s) exceeded. See <EJ> command.", diff --git a/src/memory.h b/src/memory.h index f31a451..39f8319 100644 --- a/src/memory.h +++ b/src/memory.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/src/parser.c b/src/parser.c index 910fc7f..b1aa06e 100644 --- a/src/parser.c +++ b/src/parser.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -52,16 +52,14 @@ teco_loop_stack_init(void) TECO_DEFINE_ARRAY_UNDO_INSERT_VAL(teco_loop_stack, teco_loop_context_t); TECO_DEFINE_ARRAY_UNDO_REMOVE_INDEX(teco_loop_stack); -#ifndef NDEBUG -static void __attribute__((destructor)) +static void TECO_DEBUG_CLEANUP teco_loop_stack_cleanup(void) { g_array_free(teco_loop_stack, TRUE); } -#endif gboolean -teco_machine_input(teco_machine_t *ctx, gchar chr, GError **error) +teco_machine_input(teco_machine_t *ctx, gunichar chr, GError **error) { teco_state_t *next = ctx->current->input_cb(ctx, chr, error); if (!next) @@ -88,18 +86,22 @@ teco_state_end_of_macro(teco_machine_t *ctx, GError **error) } /** + * Execute macro from current PC to stop position. + * * Handles all expected exceptions and preparing them for stack frame insertion. + * + * @param ctx State machine. + * @param macro The macro to execute. + * It does not have to be complete. + * It must consist only of validated UTF-8 sequences, though. + * @param stop_pos Where to stop execution in bytes. + * @param error Location to store error. + * @return FALSE if an error occurred. */ gboolean -teco_machine_main_step(teco_machine_main_t *ctx, const gchar *macro, gint stop_pos, GError **error) +teco_machine_main_step(teco_machine_main_t *ctx, const gchar *macro, gsize stop_pos, GError **error) { while (ctx->macro_pc < stop_pos) { -#ifdef DEBUG - g_printf("EXEC(%d): input='%c'/%x, state=%p, mode=%d\n", - ctx->macro_pc, macro[ctx->macro_pc], macro[ctx->macro_pc], - ctx->parent.current, ctx->mode); -#endif - if (G_UNLIKELY(teco_interface_is_interrupted())) { teco_error_interrupted_set(error); goto error_attach; @@ -112,9 +114,18 @@ teco_machine_main_step(teco_machine_main_t *ctx, const gchar *macro, gint stop_p if (!teco_memory_check(0, error)) goto error_attach; - if (!teco_machine_input(&ctx->parent, macro[ctx->macro_pc], error)) + /* UTF-8 sequences are already validated */ + gunichar chr = g_utf8_get_char(macro+ctx->macro_pc); + +#ifdef DEBUG + g_printf("EXEC(%d): input='%C' (U+%04" G_GINT32_MODIFIER "X), state=%p, mode=%d\n", + ctx->macro_pc, chr, chr, ctx->parent.current, ctx->mode); +#endif + + if (!teco_machine_input(&ctx->parent, chr, error)) goto error_attach; - ctx->macro_pc++; + + ctx->macro_pc = g_utf8_next_char(macro+ctx->macro_pc) - macro; } /* @@ -146,6 +157,14 @@ gboolean teco_execute_macro(const gchar *macro, gsize macro_len, teco_qreg_table_t *qreg_table_locals, GError **error) { + const teco_string_t str = {(gchar *)macro, macro_len}; + + if (!teco_string_validate_utf8(&str)) { + g_set_error_literal(error, TECO_ERROR, TECO_ERROR_CODEPOINT, + "Invalid UTF-8 byte sequence in macro"); + return FALSE; + } + /* * This is not auto-cleaned up, so it can be initialized * on demand. @@ -311,26 +330,26 @@ teco_machine_main_eval_colon(teco_machine_main_t *ctx) teco_state_t * teco_machine_main_transition_input(teco_machine_main_t *ctx, teco_machine_main_transition_t *transitions, - guint len, gchar chr, GError **error) + guint len, gunichar chr, GError **error) { - if (chr < 0 || chr >= len || !transitions[(guint)chr].next) { + if (chr >= len || !transitions[chr].next) { teco_error_syntax_set(error, chr); return NULL; } - if (ctx->mode == TECO_MODE_NORMAL && transitions[(guint)chr].transition_cb) { + if (ctx->mode == TECO_MODE_NORMAL && transitions[chr].transition_cb) { /* * NOTE: We could also just let transition_cb return a boolean... */ GError *tmp_error = NULL; - transitions[(guint)chr].transition_cb(ctx, &tmp_error); + transitions[chr].transition_cb(ctx, &tmp_error); if (tmp_error) { g_propagate_error(error, tmp_error); return NULL; } } - return transitions[(guint)chr].next; + return transitions[chr].next; } void @@ -340,15 +359,40 @@ teco_machine_main_clear(teco_machine_main_t *ctx) teco_machine_stringbuilding_clear(&ctx->expectstring.machine); } +/** Append string to result with case folding. */ +static void +teco_machine_stringbuilding_append(teco_machine_stringbuilding_t *ctx, const gchar *str, gsize len) +{ + g_assert(ctx->result != NULL); + + switch (ctx->mode) { + case TECO_STRINGBUILDING_MODE_NORMAL: + teco_string_append(ctx->result, str, len); + break; + case TECO_STRINGBUILDING_MODE_UPPER: { + g_autofree gchar *folded = ctx->codepage == SC_CP_UTF8 + ? g_utf8_strup(str, len) : g_ascii_strup(str, len); + teco_string_append(ctx->result, folded, strlen(folded)); + break; + } + case TECO_STRINGBUILDING_MODE_LOWER: { + g_autofree gchar *folded = ctx->codepage == SC_CP_UTF8 + ? g_utf8_strdown(str, len) : g_ascii_strdown(str, len); + teco_string_append(ctx->result, folded, strlen(folded)); + break; + } + } +} + /* * FIXME: All teco_state_stringbuilding_* states could be static? */ static teco_state_t *teco_state_stringbuilding_ctl_input(teco_machine_stringbuilding_t *ctx, - gchar chr, GError **error); + gunichar chr, GError **error); TECO_DECLARE_STATE(teco_state_stringbuilding_ctl); static teco_state_t *teco_state_stringbuilding_escaped_input(teco_machine_stringbuilding_t *ctx, - gchar chr, GError **error); + gunichar chr, GError **error); TECO_DECLARE_STATE(teco_state_stringbuilding_escaped); TECO_DECLARE_STATE(teco_state_stringbuilding_lower); @@ -362,19 +406,29 @@ TECO_DECLARE_STATE(teco_state_stringbuilding_ctle_quote); TECO_DECLARE_STATE(teco_state_stringbuilding_ctle_n); static teco_state_t * -teco_state_stringbuilding_start_input(teco_machine_stringbuilding_t *ctx, gchar chr, GError **error) +teco_state_stringbuilding_start_input(teco_machine_stringbuilding_t *ctx, gunichar chr, GError **error) { - if (chr == '^') + switch (chr) { + case '^': return &teco_state_stringbuilding_ctl; - if (TECO_IS_CTL(chr)) - return teco_state_stringbuilding_ctl_input(ctx, TECO_CTL_ECHO(chr), error); + case TECO_CTL_KEY('^'): + /* + * Ctrl+^ is inserted verbatim as code 30. + * Otherwise it would expand to a single caret + * just like caret+caret (^^). + */ + break; + default: + if (TECO_IS_CTL(chr)) + return teco_state_stringbuilding_ctl_input(ctx, TECO_CTL_ECHO(chr), error); + } return teco_state_stringbuilding_escaped_input(ctx, chr, error); } /* in cmdline.c */ gboolean teco_state_stringbuilding_start_process_edit_cmd(teco_machine_stringbuilding_t *ctx, teco_machine_t *parent_ctx, - gchar key, GError **error); + gunichar key, GError **error); TECO_DEFINE_STATE(teco_state_stringbuilding_start, .is_start = TRUE, @@ -383,12 +437,19 @@ TECO_DEFINE_STATE(teco_state_stringbuilding_start, ); static teco_state_t * -teco_state_stringbuilding_ctl_input(teco_machine_stringbuilding_t *ctx, gchar chr, GError **error) +teco_state_stringbuilding_ctl_input(teco_machine_stringbuilding_t *ctx, gunichar chr, GError **error) { chr = teco_ascii_toupper(chr); switch (chr) { - case '^': break; + case '^': + /* + * Double-caret expands to a single caret. + * Ctrl+^ (30) is handled separately and inserts code 30. + * The special handling of the double-caret should perhaps + * be abolished altogether. + */ + break; case 'Q': case 'R': return &teco_state_stringbuilding_escaped; case 'V': return &teco_state_stringbuilding_lower; @@ -398,85 +459,139 @@ teco_state_stringbuilding_ctl_input(teco_machine_stringbuilding_t *ctx, gchar ch chr = TECO_CTL_KEY(chr); } + /* + * Source code is always in UTF-8, so it does not + * make sense to handle ctx->codepage != SC_CP_UTF8 + * separately. + */ if (ctx->result) - teco_string_append_c(ctx->result, chr); + teco_string_append_wc(ctx->result, chr); return &teco_state_stringbuilding_start; } TECO_DEFINE_STATE_CASEINSENSITIVE(teco_state_stringbuilding_ctl); static teco_state_t * -teco_state_stringbuilding_escaped_input(teco_machine_stringbuilding_t *ctx, gchar chr, GError **error) +teco_state_stringbuilding_escaped_input(teco_machine_stringbuilding_t *ctx, gunichar chr, GError **error) { if (!ctx->result) /* parse-only mode */ return &teco_state_stringbuilding_start; + /* + * The subtle difference between UTF-8 and single-byte targets + * is that we don't try to casefold non-ANSI characters in single-byte mode. + */ switch (ctx->mode) { + case TECO_STRINGBUILDING_MODE_NORMAL: + break; case TECO_STRINGBUILDING_MODE_UPPER: - chr = g_ascii_toupper(chr); + chr = ctx->codepage == SC_CP_UTF8 || chr < 0x80 + ? g_unichar_toupper(chr) : chr; break; case TECO_STRINGBUILDING_MODE_LOWER: - chr = g_ascii_tolower(chr); - break; - default: + chr = ctx->codepage == SC_CP_UTF8 || chr < 0x80 + ? g_unichar_tolower(chr) : chr; break; } - teco_string_append_c(ctx->result, chr); + teco_string_append_wc(ctx->result, chr); return &teco_state_stringbuilding_start; } -TECO_DEFINE_STATE(teco_state_stringbuilding_escaped); +/* in cmdline.c */ +gboolean teco_state_stringbuilding_escaped_process_edit_cmd(teco_machine_stringbuilding_t *ctx, teco_machine_t *parent_ctx, + gunichar key, GError **error); + +TECO_DEFINE_STATE(teco_state_stringbuilding_escaped, + .process_edit_cmd_cb = (teco_state_process_edit_cmd_cb_t) + teco_state_stringbuilding_escaped_process_edit_cmd +); static teco_state_t * -teco_state_stringbuilding_lower_input(teco_machine_stringbuilding_t *ctx, gchar chr, GError **error) +teco_state_stringbuilding_lower_ctl_input(teco_machine_stringbuilding_t *ctx, gunichar chr, GError **error) { if (!ctx->result) /* parse-only mode */ return &teco_state_stringbuilding_start; - /* - * FIXME: This does not handle ^V^V typed with up-carets. - */ - if (chr == TECO_CTL_KEY('V')) { + chr = teco_ascii_toupper(chr); + + if (chr == 'V') { if (ctx->parent.must_undo) teco_undo_guint(ctx->mode); ctx->mode = TECO_STRINGBUILDING_MODE_LOWER; } else { - teco_string_append_c(ctx->result, g_ascii_tolower(chr)); + /* control keys cannot be case folded */ + teco_string_append_wc(ctx->result, TECO_CTL_KEY(chr)); } return &teco_state_stringbuilding_start; } +TECO_DEFINE_STATE_CASEINSENSITIVE(teco_state_stringbuilding_lower_ctl); + +static teco_state_t * +teco_state_stringbuilding_lower_input(teco_machine_stringbuilding_t *ctx, gunichar chr, GError **error) +{ + if (chr == '^') + return &teco_state_stringbuilding_lower_ctl; + if (TECO_IS_CTL(chr)) + return teco_state_stringbuilding_lower_ctl_input(ctx, TECO_CTL_ECHO(chr), error); + + if (ctx->result) { + chr = ctx->codepage == SC_CP_UTF8 || chr < 0x80 + ? g_unichar_tolower(chr) : chr; + teco_string_append_wc(ctx->result, chr); + } + return &teco_state_stringbuilding_start; +} + TECO_DEFINE_STATE(teco_state_stringbuilding_lower); static teco_state_t * -teco_state_stringbuilding_upper_input(teco_machine_stringbuilding_t *ctx, gchar chr, GError **error) +teco_state_stringbuilding_upper_ctl_input(teco_machine_stringbuilding_t *ctx, gunichar chr, GError **error) { if (!ctx->result) /* parse-only mode */ return &teco_state_stringbuilding_start; - /* - * FIXME: This does not handle ^W^W typed with up-carets. - */ - if (chr == TECO_CTL_KEY('W')) { + chr = teco_ascii_toupper(chr); + + if (chr == 'W') { if (ctx->parent.must_undo) teco_undo_guint(ctx->mode); ctx->mode = TECO_STRINGBUILDING_MODE_UPPER; } else { - teco_string_append_c(ctx->result, g_ascii_toupper(chr)); + /* control keys cannot be case folded */ + teco_string_append_wc(ctx->result, TECO_CTL_KEY(chr)); } return &teco_state_stringbuilding_start; } +TECO_DEFINE_STATE_CASEINSENSITIVE(teco_state_stringbuilding_upper_ctl); + +static teco_state_t * +teco_state_stringbuilding_upper_input(teco_machine_stringbuilding_t *ctx, gunichar chr, GError **error) +{ + if (chr == '^') + return &teco_state_stringbuilding_upper_ctl; + if (TECO_IS_CTL(chr)) + return teco_state_stringbuilding_upper_ctl_input(ctx, TECO_CTL_ECHO(chr), error); + + if (ctx->result) { + chr = ctx->codepage == SC_CP_UTF8 || chr < 0x80 + ? g_unichar_toupper(chr) : chr; + teco_string_append_wc(ctx->result, chr); + } + return &teco_state_stringbuilding_start; +} + TECO_DEFINE_STATE(teco_state_stringbuilding_upper); static teco_state_t * -teco_state_stringbuilding_ctle_input(teco_machine_stringbuilding_t *ctx, gchar chr, GError **error) +teco_state_stringbuilding_ctle_input(teco_machine_stringbuilding_t *ctx, gunichar chr, GError **error) { teco_state_t *next; @@ -488,8 +603,10 @@ teco_state_stringbuilding_ctle_input(teco_machine_stringbuilding_t *ctx, gchar c case 'N': next = &teco_state_stringbuilding_ctle_n; break; default: if (ctx->result) { - gchar buf[] = {TECO_CTL_KEY('E'), chr}; - teco_string_append(ctx->result, buf, sizeof(buf)); + /* also makes sure that search patterns can start with ^E */ + gchar buf[1+6] = {TECO_CTL_KEY('E')}; + gsize len = g_unichar_to_utf8(chr, buf+1); + teco_machine_stringbuilding_append(ctx, buf, 1+len); } return &teco_state_stringbuilding_start; } @@ -507,7 +624,7 @@ TECO_DEFINE_STATE_CASEINSENSITIVE(teco_state_stringbuilding_ctle); /* in cmdline.c */ gboolean teco_state_stringbuilding_qreg_process_edit_cmd(teco_machine_stringbuilding_t *ctx, teco_machine_t *parent_ctx, - gchar chr, GError **error); + gunichar chr, GError **error); /** * @interface TECO_DEFINE_STATE_STRINGBUILDING_QREG @@ -522,7 +639,7 @@ gboolean teco_state_stringbuilding_qreg_process_edit_cmd(teco_machine_stringbuil ) static teco_state_t * -teco_state_stringbuilding_ctle_num_input(teco_machine_stringbuilding_t *ctx, gchar chr, GError **error) +teco_state_stringbuilding_ctle_num_input(teco_machine_stringbuilding_t *ctx, gunichar chr, GError **error) { teco_qreg_t *qreg; @@ -549,7 +666,7 @@ teco_state_stringbuilding_ctle_num_input(teco_machine_stringbuilding_t *ctx, gch */ gchar buffer[TECO_EXPRESSIONS_FORMAT_LEN]; const gchar *num = teco_expressions_format(buffer, value); - teco_string_append(ctx->result, num, strlen(num)); + teco_machine_stringbuilding_append(ctx, num, strlen(num)); return &teco_state_stringbuilding_start; } @@ -557,7 +674,7 @@ teco_state_stringbuilding_ctle_num_input(teco_machine_stringbuilding_t *ctx, gch TECO_DEFINE_STATE_STRINGBUILDING_QREG(teco_state_stringbuilding_ctle_num); static teco_state_t * -teco_state_stringbuilding_ctle_u_input(teco_machine_stringbuilding_t *ctx, gchar chr, GError **error) +teco_state_stringbuilding_ctle_u_input(teco_machine_stringbuilding_t *ctx, gunichar chr, GError **error) { teco_qreg_t *qreg; @@ -578,21 +695,51 @@ teco_state_stringbuilding_ctle_u_input(teco_machine_stringbuilding_t *ctx, gchar teco_int_t value; if (!qreg->vtable->get_integer(qreg, &value, error)) return NULL; - if (value < 0 || value > 0xFF) { - g_autofree gchar *name_printable = teco_string_echo(qreg->head.name.data, qreg->head.name.len); - g_set_error(error, TECO_ERROR, TECO_ERROR_FAILED, - "Q-Register \"%s\" does not contain a valid character", name_printable); - return NULL; + + if (ctx->codepage == SC_CP_UTF8) { + if (value < 0 || !g_unichar_validate(value)) + goto error_codepoint; + switch (ctx->mode) { + case TECO_STRINGBUILDING_MODE_NORMAL: + break; + case TECO_STRINGBUILDING_MODE_UPPER: + value = g_unichar_toupper(value); + break; + case TECO_STRINGBUILDING_MODE_LOWER: + value = g_unichar_tolower(value); + break; + } + teco_string_append_wc(ctx->result, value); + } else { + if (value < 0 || value > 0xFF) + goto error_codepoint; + switch (ctx->mode) { + case TECO_STRINGBUILDING_MODE_NORMAL: + break; + case TECO_STRINGBUILDING_MODE_UPPER: + value = g_ascii_toupper(value); + break; + case TECO_STRINGBUILDING_MODE_LOWER: + value = g_ascii_tolower(value); + break; + } + teco_string_append_c(ctx->result, value); } - teco_string_append_c(ctx->result, (gchar)value); return &teco_state_stringbuilding_start; + +error_codepoint: { + g_autofree gchar *name_printable = teco_string_echo(qreg->head.name.data, qreg->head.name.len); + g_set_error(error, TECO_ERROR, TECO_ERROR_CODEPOINT, + "Q-Register \"%s\" does not contain a valid codepoint", name_printable); + return NULL; +} } TECO_DEFINE_STATE_STRINGBUILDING_QREG(teco_state_stringbuilding_ctle_u); static teco_state_t * -teco_state_stringbuilding_ctle_q_input(teco_machine_stringbuilding_t *ctx, gchar chr, GError **error) +teco_state_stringbuilding_ctle_q_input(teco_machine_stringbuilding_t *ctx, gunichar chr, GError **error) { teco_qreg_t *qreg; @@ -610,20 +757,17 @@ teco_state_stringbuilding_ctle_q_input(teco_machine_stringbuilding_t *ctx, gchar /* parse-only mode */ return &teco_state_stringbuilding_start; - /* - * FIXME: Should we have a special teco_qreg_get_string_append() function? - */ g_auto(teco_string_t) str = {NULL, 0}; - if (!qreg->vtable->get_string(qreg, &str.data, &str.len, error)) + if (!qreg->vtable->get_string(qreg, &str.data, &str.len, NULL, error)) return NULL; - teco_string_append(ctx->result, str.data, str.len); + teco_machine_stringbuilding_append(ctx, str.data, str.len); return &teco_state_stringbuilding_start; } TECO_DEFINE_STATE_STRINGBUILDING_QREG(teco_state_stringbuilding_ctle_q); static teco_state_t * -teco_state_stringbuilding_ctle_quote_input(teco_machine_stringbuilding_t *ctx, gchar chr, GError **error) +teco_state_stringbuilding_ctle_quote_input(teco_machine_stringbuilding_t *ctx, gunichar chr, GError **error) { teco_qreg_t *qreg; teco_qreg_table_t *table; @@ -643,7 +787,7 @@ teco_state_stringbuilding_ctle_quote_input(teco_machine_stringbuilding_t *ctx, g return &teco_state_stringbuilding_start; g_auto(teco_string_t) str = {NULL, 0}; - if (!qreg->vtable->get_string(qreg, &str.data, &str.len, error)) + if (!qreg->vtable->get_string(qreg, &str.data, &str.len, NULL, error)) return NULL; /* * NOTE: g_shell_quote() expects a null-terminated string, so it is @@ -658,7 +802,7 @@ teco_state_stringbuilding_ctle_quote_input(teco_machine_stringbuilding_t *ctx, g return NULL; } g_autofree gchar *str_quoted = g_shell_quote(str.data ? : ""); - teco_string_append(ctx->result, str_quoted, strlen(str_quoted)); + teco_machine_stringbuilding_append(ctx, str_quoted, strlen(str_quoted)); return &teco_state_stringbuilding_start; } @@ -666,7 +810,7 @@ teco_state_stringbuilding_ctle_quote_input(teco_machine_stringbuilding_t *ctx, g TECO_DEFINE_STATE_STRINGBUILDING_QREG(teco_state_stringbuilding_ctle_quote); static teco_state_t * -teco_state_stringbuilding_ctle_n_input(teco_machine_stringbuilding_t *ctx, gchar chr, GError **error) +teco_state_stringbuilding_ctle_n_input(teco_machine_stringbuilding_t *ctx, gunichar chr, GError **error) { teco_qreg_t *qreg; teco_qreg_table_t *table; @@ -686,7 +830,7 @@ teco_state_stringbuilding_ctle_n_input(teco_machine_stringbuilding_t *ctx, gchar return &teco_state_stringbuilding_start; g_auto(teco_string_t) str = {NULL, 0}; - if (!qreg->vtable->get_string(qreg, &str.data, &str.len, error)) + if (!qreg->vtable->get_string(qreg, &str.data, &str.len, NULL, error)) return NULL; if (teco_string_contains(&str, '\0')) { teco_error_qregcontainsnull_set(error, qreg->head.name.data, qreg->head.name.len, @@ -695,7 +839,7 @@ teco_state_stringbuilding_ctle_n_input(teco_machine_stringbuilding_t *ctx, gchar } g_autofree gchar *str_escaped = teco_globber_escape_pattern(str.data); - teco_string_append(ctx->result, str_escaped, strlen(str_escaped)); + teco_machine_stringbuilding_append(ctx, str_escaped, strlen(str_escaped)); return &teco_state_stringbuilding_start; } @@ -703,13 +847,14 @@ teco_state_stringbuilding_ctle_n_input(teco_machine_stringbuilding_t *ctx, gchar TECO_DEFINE_STATE_STRINGBUILDING_QREG(teco_state_stringbuilding_ctle_n); void -teco_machine_stringbuilding_init(teco_machine_stringbuilding_t *ctx, gchar escape_char, +teco_machine_stringbuilding_init(teco_machine_stringbuilding_t *ctx, gunichar escape_char, teco_qreg_table_t *locals, gboolean must_undo) { memset(ctx, 0, sizeof(*ctx)); teco_machine_init(&ctx->parent, &teco_state_stringbuilding_start, must_undo); ctx->escape_char = escape_char; ctx->qreg_table_locals = locals; + ctx->codepage = teco_default_codepage(); } void @@ -723,6 +868,10 @@ teco_machine_stringbuilding_reset(teco_machine_stringbuilding_t *ctx) ctx->mode = TECO_STRINGBUILDING_MODE_NORMAL; } +/* + * If we case folded only ANSI characters as in teco_ascii_toupper(), + * this could be simplified. + */ void teco_machine_stringbuilding_escape(teco_machine_stringbuilding_t *ctx, const gchar *str, gsize len, teco_string_t *target) @@ -730,12 +879,18 @@ teco_machine_stringbuilding_escape(teco_machine_stringbuilding_t *ctx, const gch target->data = g_malloc(len*2+1); target->len = 0; - for (guint i = 0; i < len; i++) { - if (teco_ascii_toupper(str[i]) == ctx->escape_char || - (ctx->escape_char == '[' && str[i] == ']') || - (ctx->escape_char == '{' && str[i] == '}')) + for (guint i = 0; i < len; ) { + gunichar chr = g_utf8_get_char(str+i); + + if (g_unichar_toupper(chr) == ctx->escape_char || + (ctx->escape_char == '[' && chr == ']') || + (ctx->escape_char == '{' && chr == '}')) target->data[target->len++] = TECO_CTL_KEY('Q'); - target->data[target->len++] = str[i]; + + gsize lenc = g_utf8_next_char(str+i) - (str+i); + memcpy(target->data+target->len, str+i, lenc); + target->len += lenc; + i += lenc; } target->data[target->len] = '\0'; @@ -748,8 +903,17 @@ teco_machine_stringbuilding_clear(teco_machine_stringbuilding_t *ctx) teco_machine_qregspec_free(ctx->machine_qregspec); } +gboolean +teco_state_expectstring_initial(teco_machine_main_t *ctx, GError **error) +{ + if (ctx->mode == TECO_MODE_NORMAL) + teco_machine_stringbuilding_set_codepage(&ctx->expectstring.machine, + teco_default_codepage()); + return TRUE; +} + teco_state_t * -teco_state_expectstring_input(teco_machine_main_t *ctx, gchar chr, GError **error) +teco_state_expectstring_input(teco_machine_main_t *ctx, gunichar chr, GError **error) { teco_state_t *current = ctx->parent.current; @@ -766,13 +930,18 @@ teco_state_expectstring_input(teco_machine_main_t *ctx, gchar chr, GError **erro /* * FIXME: Exclude setting at least whitespace characters as the * new string escape character to avoid accidental errors? + * + * FIXME: Should we perhaps restrict case folding escape characters + * to the ANSI range (teco_ascii_toupper())? + * This would be faster than case folding each and every character + * of a string argument to check against the escape char. */ switch (ctx->expectstring.machine.escape_char) { case '\e': case '{': if (ctx->parent.must_undo) - teco_undo_gchar(ctx->expectstring.machine.escape_char); - ctx->expectstring.machine.escape_char = teco_ascii_toupper(chr); + teco_undo_gunichar(ctx->expectstring.machine.escape_char); + ctx->expectstring.machine.escape_char = g_unichar_toupper(chr); return current; } } @@ -796,7 +965,7 @@ teco_state_expectstring_input(teco_machine_main_t *ctx, gchar chr, GError **erro ctx->expectstring.nesting--; break; } - } else if (teco_ascii_toupper(chr) == ctx->expectstring.machine.escape_char) { + } else if (g_unichar_toupper(chr) == ctx->expectstring.machine.escape_char) { if (ctx->parent.must_undo) teco_undo_gint(ctx->expectstring.nesting); ctx->expectstring.nesting--; @@ -826,7 +995,7 @@ teco_state_expectstring_input(teco_machine_main_t *ctx, gchar chr, GError **erro if (current->expectstring.last) { if (ctx->parent.must_undo) - teco_undo_gchar(ctx->expectstring.machine.escape_char); + teco_undo_gunichar(ctx->expectstring.machine.escape_char); ctx->expectstring.machine.escape_char = '\e'; } ctx->expectstring.nesting = 1; @@ -857,7 +1026,7 @@ teco_state_expectstring_input(teco_machine_main_t *ctx, gchar chr, GError **erro if (!teco_machine_stringbuilding_input(&ctx->expectstring.machine, chr, str, error)) return NULL; } else if (ctx->mode == TECO_MODE_NORMAL) { - teco_string_append_c(&ctx->expectstring.string, chr); + teco_string_append_wc(&ctx->expectstring.string, chr); } /* @@ -901,7 +1070,7 @@ teco_state_expectfile_process(teco_machine_main_t *ctx, const teco_string_t *str g_assert(str->data != NULL); /* - * Null-chars must not ocur in filename/path strings and at some point + * Null-chars must not occur in filename/path strings and at some point * teco_string_t has to be converted to a null-terminated C string * as all the glib filename functions rely on null-terminated strings. * Doing it here ensures that teco_file_expand_path() can be safely called diff --git a/src/parser.h b/src/parser.h index 05a9715..066896f 100644 --- a/src/parser.h +++ b/src/parser.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -16,6 +16,8 @@ */ #pragma once +#include <stdbool.h> + #include <glib.h> #include <Scintilla.h> @@ -23,6 +25,7 @@ #include "sciteco.h" #include "string-utils.h" #include "goto.h" +#include "undo.h" #include "qreg.h" /* @@ -36,7 +39,9 @@ typedef struct { /** how many iterations are left */ teco_int_t counter; /** Program counter of loop start command */ - guint pc : sizeof(guint)*8 - 1; + gsize pc; + /** Brace level at loop start */ + guint brace_level : sizeof(guint)*8 - 1; /** * Whether the loop represents an argument * barrier or not (it "passes through" @@ -46,7 +51,7 @@ typedef struct { * a signed integer, it's ok steal one * bit for the pass_through flag. */ - gboolean pass_through : 1; + bool pass_through : 1; } teco_loop_context_t; extern GArray *teco_loop_stack; @@ -71,8 +76,8 @@ void undo__remove_index__teco_loop_stack(guint); * FIXME: Maybe use TECO_DECLARE_VTABLE_METHOD()? */ typedef const struct { - gboolean string_building : 1; - gboolean last : 1; + bool string_building : 1; + bool last : 1; /** * Called repeatedly to process chunks of input and give interactive feedback. @@ -99,17 +104,18 @@ typedef const struct { } teco_state_expectqreg_t; typedef gboolean (*teco_state_initial_cb_t)(teco_machine_t *ctx, GError **error); -typedef teco_state_t *(*teco_state_input_cb_t)(teco_machine_t *ctx, gchar chr, GError **error); +typedef teco_state_t *(*teco_state_input_cb_t)(teco_machine_t *ctx, gunichar chr, GError **error); typedef gboolean (*teco_state_refresh_cb_t)(teco_machine_t *ctx, GError **error); typedef gboolean (*teco_state_end_of_macro_cb_t)(teco_machine_t *ctx, GError **error); typedef gboolean (*teco_state_process_edit_cmd_cb_t)(teco_machine_t *ctx, teco_machine_t *parent_ctx, - gchar key, GError **error); + gunichar key, GError **error); typedef enum { - TECO_FNMACRO_MASK_START = (1 << 0), - TECO_FNMACRO_MASK_STRING = (1 << 1), - TECO_FNMACRO_MASK_DEFAULT = ~((1 << 2)-1) -} teco_fnmacro_mask_t; + TECO_KEYMACRO_MASK_START = (1 << 0), + TECO_KEYMACRO_MASK_STRING = (1 << 1), + TECO_KEYMACRO_MASK_CASEINSENSITIVE = (1 << 2), + TECO_KEYMACRO_MASK_DEFAULT = ~((1 << 3)-1) +} teco_keymacro_mask_t; /** * A teco_machine_t state. @@ -182,19 +188,19 @@ struct teco_state_t { /** * Whether this state is a start state (ie. not within any * escape sequence etc.). - * This is separate of TECO_FNMACRO_MASK_START which is set + * This is separate of TECO_KEYMACRO_MASK_START which is set * only in the main machine's start states. */ - gboolean is_start : 1; + bool is_start : 1; /** - * Function key macro mask. + * Key macro mask. * This is not a bitmask since it is compared with values set * from TECO, so the bitorder needs to be defined. * * @fixme If we intend to "forward" masks from other state machines like * teco_machine_stringbuilding_t, this should probably be a callback. */ - teco_fnmacro_mask_t fnmacro_mask : 8; + teco_keymacro_mask_t keymacro_mask : 8; /** * Additional state-dependent callbacks and settings. @@ -214,7 +220,7 @@ struct teco_state_t { gboolean teco_state_end_of_macro(teco_machine_t *ctx, GError **error); /* in cmdline.c */ -gboolean teco_state_process_edit_cmd(teco_machine_t *ctx, teco_machine_t *parent_ctx, gchar chr, GError **error); +gboolean teco_state_process_edit_cmd(teco_machine_t *ctx, teco_machine_t *parent_ctx, gunichar chr, GError **error); /** * @interface TECO_DEFINE_STATE @@ -234,7 +240,7 @@ gboolean teco_state_process_edit_cmd(teco_machine_t *ctx, teco_machine_t *parent .end_of_macro_cb = teco_state_end_of_macro, \ .process_edit_cmd_cb = teco_state_process_edit_cmd, \ .is_start = FALSE, \ - .fnmacro_mask = TECO_FNMACRO_MASK_DEFAULT, \ + .keymacro_mask = TECO_KEYMACRO_MASK_DEFAULT, \ ##__VA_ARGS__ \ } @@ -243,20 +249,21 @@ gboolean teco_state_process_edit_cmd(teco_machine_t *ctx, teco_machine_t *parent extern teco_state_t NAME /* in cmdline.c */ -gboolean teco_state_caseinsensitive_process_edit_cmd(teco_machine_t *ctx, teco_machine_t *parent_ctx, gchar chr, GError **error); +gboolean teco_state_caseinsensitive_process_edit_cmd(teco_machine_t *ctx, teco_machine_t *parent_ctx, gunichar chr, GError **error); /** * @interface TECO_DEFINE_STATE_CASEINSENSITIVE * @implements TECO_DEFINE_STATE * @ingroup states * - * Base class of states with case-insenstive input. + * Base class of states with case-insensitive input. * * This is meant for states accepting command characters * that can possibly be case-folded. */ #define TECO_DEFINE_STATE_CASEINSENSITIVE(NAME, ...) \ TECO_DEFINE_STATE(NAME, \ + .keymacro_mask = TECO_KEYMACRO_MASK_CASEINSENSITIVE, \ .process_edit_cmd_cb = teco_state_caseinsensitive_process_edit_cmd, \ ##__VA_ARGS__ \ ) @@ -278,6 +285,8 @@ struct teco_machine_t { * Whether side effects must be reverted on rubout. * State machines created within macro calls don't have to * even in interactive mode. + * In fact you MUST not revert side effects if this is FALSE + * as the data no longer exists on the call stack at undo-time. */ gboolean must_undo; }; @@ -296,7 +305,7 @@ teco_machine_reset(teco_machine_t *ctx, teco_state_t *initial) teco_undo_ptr(ctx->current) = initial; } -gboolean teco_machine_input(teco_machine_t *ctx, gchar chr, GError **error); +gboolean teco_machine_input(teco_machine_t *ctx, gunichar chr, GError **error); typedef enum { TECO_STRINGBUILDING_MODE_NORMAL = 0, @@ -307,9 +316,6 @@ typedef enum { /** * A stringbuilding state machine. * - * @fixme Should contain the escape char (currently in teco_machine_expectstring_t), - * so that we can escape it via ^Q. - * * @extends teco_machine_t */ typedef struct teco_machine_stringbuilding_t { @@ -327,7 +333,7 @@ typedef struct teco_machine_stringbuilding_t { * If this is `[` or `{`, it is assumed that `]` and `}` must * be escaped as well by teco_machine_stringbuilding_escape(). */ - gchar escape_char; + gunichar escape_char; /** * Q-Register table for local registers. @@ -348,11 +354,28 @@ typedef struct teco_machine_stringbuilding_t { * (see teco_state_stringbuilding_start_process_edit_cmd()). */ teco_string_t *result; + + /** + * Encoding of string in `result`. + * This is inherited from the embedding command and may depend on + * the buffer's or Q-Register's encoding. + */ + guint codepage; } teco_machine_stringbuilding_t; -void teco_machine_stringbuilding_init(teco_machine_stringbuilding_t *ctx, gchar escape_char, +void teco_machine_stringbuilding_init(teco_machine_stringbuilding_t *ctx, gunichar escape_char, teco_qreg_table_t *locals, gboolean must_undo); +static inline void +teco_machine_stringbuilding_set_codepage(teco_machine_stringbuilding_t *ctx, + guint codepage) +{ + /* NOTE: This is not safe to undo in macro calls. */ + if (ctx->parent.must_undo) + teco_undo_guint(ctx->codepage); + ctx->codepage = codepage; +} + void teco_machine_stringbuilding_reset(teco_machine_stringbuilding_t *ctx); /** @@ -365,7 +388,7 @@ void teco_machine_stringbuilding_reset(teco_machine_stringbuilding_t *ctx); * @return FALSE in case of error. */ static inline gboolean -teco_machine_stringbuilding_input(teco_machine_stringbuilding_t *ctx, gchar chr, +teco_machine_stringbuilding_input(teco_machine_stringbuilding_t *ctx, gunichar chr, teco_string_t *result, GError **error) { ctx->result = result; @@ -424,7 +447,8 @@ typedef enum { struct teco_machine_main_t { teco_machine_t parent; - gint macro_pc; + /* signed because it is sometimes set to -1 for flow control */ + gssize macro_pc; /** * Aliases bitfield with an integer. @@ -435,8 +459,8 @@ struct teco_machine_main_t { struct { teco_mode_t mode : 8; - gboolean modifier_colon : 1; - gboolean modifier_at : 1; + bool modifier_colon : 1; + bool modifier_at : 1; }; guint __flags; }; @@ -481,7 +505,7 @@ void teco_machine_main_init(teco_machine_main_t *ctx, gboolean teco_machine_main_eval_colon(teco_machine_main_t *ctx); gboolean teco_machine_main_step(teco_machine_main_t *ctx, - const gchar *macro, gint stop_pos, GError **error); + const gchar *macro, gsize stop_pos, GError **error); gboolean teco_execute_macro(const gchar *macro, gsize macro_len, teco_qreg_table_t *qreg_table_locals, GError **error); @@ -500,17 +524,18 @@ typedef const struct { */ teco_state_t *teco_machine_main_transition_input(teco_machine_main_t *ctx, teco_machine_main_transition_t *transitions, - guint len, gchar chr, GError **error); + guint len, gunichar chr, GError **error); void teco_machine_main_clear(teco_machine_main_t *ctx); G_DEFINE_AUTO_CLEANUP_CLEAR_FUNC(teco_machine_main_t, teco_machine_main_clear); -teco_state_t *teco_state_expectstring_input(teco_machine_main_t *ctx, gchar chr, GError **error); +gboolean teco_state_expectstring_initial(teco_machine_main_t *ctx, GError **error); +teco_state_t *teco_state_expectstring_input(teco_machine_main_t *ctx, gunichar chr, GError **error); gboolean teco_state_expectstring_refresh(teco_machine_main_t *ctx, GError **error); /* in cmdline.c */ -gboolean teco_state_expectstring_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gchar key, GError **error); +gboolean teco_state_expectstring_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gunichar key, GError **error); /** * @interface TECO_DEFINE_STATE_EXPECTSTRING @@ -526,15 +551,16 @@ gboolean teco_state_expectstring_process_edit_cmd(teco_machine_main_t *ctx, teco */ #define TECO_DEFINE_STATE_EXPECTSTRING(NAME, ...) \ static teco_state_t * \ - NAME##_input(teco_machine_main_t *ctx, gchar chr, GError **error) \ + NAME##_input(teco_machine_main_t *ctx, gunichar chr, GError **error) \ { \ return teco_state_expectstring_input(ctx, chr, error); \ } \ TECO_DEFINE_STATE(NAME, \ + .initial_cb = (teco_state_initial_cb_t)teco_state_expectstring_initial, \ .refresh_cb = (teco_state_refresh_cb_t)teco_state_expectstring_refresh, \ .process_edit_cmd_cb = (teco_state_process_edit_cmd_cb_t) \ teco_state_expectstring_process_edit_cmd, \ - .fnmacro_mask = TECO_FNMACRO_MASK_STRING, \ + .keymacro_mask = TECO_KEYMACRO_MASK_STRING, \ .expectstring.string_building = TRUE, \ .expectstring.last = TRUE, \ .expectstring.process_cb = NULL, /* do nothing */ \ @@ -546,7 +572,7 @@ gboolean teco_state_expectfile_process(teco_machine_main_t *ctx, const teco_stri gsize new_chars, GError **error); /* in cmdline.c */ -gboolean teco_state_expectfile_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gchar key, GError **error); +gboolean teco_state_expectfile_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gunichar key, GError **error); /** * @interface TECO_DEFINE_STATE_EXPECTFILE @@ -562,7 +588,7 @@ gboolean teco_state_expectfile_process_edit_cmd(teco_machine_main_t *ctx, teco_m ) /* in cmdline.c */ -gboolean teco_state_expectdir_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gchar key, GError **error); +gboolean teco_state_expectdir_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gunichar key, GError **error); /** * @interface TECO_DEFINE_STATE_EXPECTDIR diff --git a/src/qreg-commands.c b/src/qreg-commands.c index be0aada..cff4c84 100644 --- a/src/qreg-commands.c +++ b/src/qreg-commands.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -50,7 +50,7 @@ teco_state_expectqreg_initial(teco_machine_main_t *ctx, GError **error) } teco_state_t * -teco_state_expectqreg_input(teco_machine_main_t *ctx, gchar chr, GError **error) +teco_state_expectqreg_input(teco_machine_main_t *ctx, gunichar chr, GError **error) { teco_state_t *current = ctx->parent.current; @@ -149,7 +149,7 @@ teco_state_loadqreg_done(teco_machine_main_t *ctx, const teco_string_t *str, GEr if (str->len > 0) { /* Load file into Q-Register */ g_autofree gchar *filename = teco_file_expand_path(str->data); - if (!teco_qreg_load(qreg, filename, error)) + if (!qreg->vtable->load(qreg, filename, error)) return NULL; } else { /* Edit Q-Register */ @@ -202,7 +202,7 @@ teco_state_saveqreg_done(teco_machine_main_t *ctx, const teco_string_t *str, GEr return &teco_state_start; g_autofree gchar *filename = teco_file_expand_path(str->data); - return teco_qreg_save(qreg, filename, error) ? &teco_state_start : NULL; + return qreg->vtable->save(qreg, filename, error) ? &teco_state_start : NULL; } /*$ E% E%q @@ -259,9 +259,9 @@ teco_state_queryqreg_got_register(teco_machine_main_t *ctx, teco_qreg_t *qreg, if (teco_machine_main_eval_colon(ctx)) { /* Query Q-Register's existence or string size */ if (qreg) { - gsize len; - - if (!qreg->vtable->get_string(qreg, NULL, &len, error)) + /* get_string() would return the size in bytes */ + teco_int_t len = qreg->vtable->get_length(qreg, error); + if (len < 0) return NULL; teco_expressions_push(len); } else { @@ -281,10 +281,9 @@ teco_state_queryqreg_got_register(teco_machine_main_t *ctx, teco_qreg_t *qreg, return NULL; } - gint c = qreg->vtable->get_character(qreg, pos, error); - if (c < 0) + teco_int_t c; + if (!qreg->vtable->get_character(qreg, pos, &c, error)) return NULL; - teco_expressions_push(c); } else { /* Query integer */ @@ -311,6 +310,10 @@ teco_state_queryqreg_got_register(teco_machine_main_t *ctx, teco_qreg_t *qreg, * Positions are handled like buffer positions \(em they * begin at 0 up to the length of the string minus 1. * An error is thrown for invalid positions. + * If <q> is encoded as UTF-8 and there is + * an incomplete sequence at the requested position, + * -1 is returned. + * All other invalid Unicode sequences are returned as -2. * Both non-colon-modified forms of Q require register <q> * to be defined and fail otherwise. * @@ -369,24 +372,50 @@ teco_state_setqregstring_nobuilding_done(teco_machine_main_t *ctx, gint args = teco_expressions_args(); if (args > 0) { - g_autofree gchar *buffer = g_malloc(args); + guint codepage = teco_default_codepage(); + if (colon_modified && !qreg->vtable->get_string(qreg, NULL, NULL, &codepage, error)) + return NULL; - for (gint i = args; i > 0; i--) { - teco_int_t v; - if (!teco_expressions_pop_num_calc(&v, 0, error)) - return NULL; - buffer[i-1] = (gchar)v; + g_autofree gchar *buffer = NULL; + gsize len = 0; + + if (codepage == SC_CP_UTF8) { + /* the glib docs wrongly claim that one character can take 6 bytes */ + buffer = g_malloc(4*args); + for (gint i = args; i > 0; i--) { + teco_int_t v; + if (!teco_expressions_pop_num_calc(&v, 0, error)) + return NULL; + if (v < 0 || !g_unichar_validate(v)) { + teco_error_codepoint_set(error, "^U"); + return NULL; + } + len += g_unichar_to_utf8(v, buffer+len); + } + } else { + buffer = g_malloc(args); + for (gint i = args; i > 0; i--) { + teco_int_t v; + if (!teco_expressions_pop_num_calc(&v, 0, error)) + return NULL; + if (v < 0 || v > 0xFF) { + teco_error_codepoint_set(error, "^U"); + return NULL; + } + buffer[len++] = v; + } } if (colon_modified) { /* append to register */ if (!qreg->vtable->undo_append_string(qreg, error) || - !qreg->vtable->append_string(qreg, buffer, args, error)) + !qreg->vtable->append_string(qreg, buffer, len, error)) return NULL; } else { /* set register */ if (!qreg->vtable->undo_set_string(qreg, error) || - !qreg->vtable->set_string(qreg, buffer, args, error)) + !qreg->vtable->set_string(qreg, buffer, len, + codepage, error)) return NULL; } } @@ -399,7 +428,8 @@ teco_state_setqregstring_nobuilding_done(teco_machine_main_t *ctx, } else { /* set register */ if (!qreg->vtable->undo_set_string(qreg, error) || - !qreg->vtable->set_string(qreg, str->data, str->len, error)) + !qreg->vtable->set_string(qreg, str->data, str->len, + teco_default_codepage(), error)) return NULL; } @@ -450,6 +480,26 @@ TECO_DEFINE_STATE_EXPECTQREG(teco_state_eucommand, .expectqreg.type = TECO_QREG_OPTIONAL_INIT ); +static gboolean +teco_state_setqregstring_building_initial(teco_machine_main_t *ctx, GError **error) +{ + if (ctx->mode > TECO_MODE_NORMAL) + return TRUE; + + teco_qreg_t *qreg; + teco_machine_qregspec_get_results(ctx->expectqreg, &qreg, NULL); + + /* + * The expected codepage of string building constructs is determined + * by the Q-Register. + */ + guint codepage; + if (!qreg->vtable->get_string(qreg, NULL, NULL, &codepage, error)) + return FALSE; + teco_machine_stringbuilding_set_codepage(&ctx->expectstring.machine, codepage); + return TRUE; +} + static teco_state_t * teco_state_setqregstring_building_done(teco_machine_main_t *ctx, const teco_string_t *str, GError **error) { @@ -467,6 +517,7 @@ teco_state_setqregstring_building_done(teco_machine_main_t *ctx, const teco_stri * characters \fBenabled\fP. */ TECO_DEFINE_STATE_EXPECTSTRING(teco_state_setqregstring_building, + .initial_cb = (teco_state_initial_cb_t)teco_state_setqregstring_building_initial, .expectstring.string_building = TRUE ); @@ -481,7 +532,7 @@ teco_state_getqregstring_got_register(teco_machine_main_t *ctx, teco_qreg_t *qre g_auto(teco_string_t) str = {NULL, 0}; - if (!qreg->vtable->get_string(qreg, &str.data, &str.len, error)) + if (!qreg->vtable->get_string(qreg, &str.data, &str.len, NULL, error)) return NULL; if (str.len > 0) { @@ -604,8 +655,15 @@ teco_state_macro_got_register(teco_machine_main_t *ctx, teco_qreg_t *qreg, } else { g_auto(teco_qreg_table_t) table; teco_qreg_table_init(&table, FALSE); + if (!teco_qreg_execute(qreg, &table, error)) return NULL; + if (teco_qreg_current && !teco_qreg_current->must_undo) { + /* currently editing local Q-Register */ + teco_error_editinglocalqreg_set(error, teco_qreg_current->head.name.data, + teco_qreg_current->head.name.len); + return NULL; + } } return &teco_state_start; @@ -632,6 +690,10 @@ teco_state_macro_got_register(teco_machine_main_t *ctx, teco_qreg_t *qreg, * Note that the string of <q> will be copied upon macro execution, * so subsequent changes to Q-Register <q> from inside the macro do * not modify the executed code. + * + * While \fBM\fP does not check the register's configured encoding + * (as reported by \fBEE\fP), its contents must be and are checked to be in + * valid UTF-8. */ TECO_DEFINE_STATE_EXPECTQREG(teco_state_macro); @@ -666,6 +728,9 @@ teco_state_macrofile_done(teco_machine_main_t *ctx, const teco_string_t *str, GE * It is otherwise similar to the \(lqM\(rq command. * * If <file> could not be read, the command yields an error. + * + * As all \*(ST code, the contents of <file> must be in valid UTF-8 + * even if operating in the \(lqdefault ANSI\(rq mode as configured by \fBED\fP. */ TECO_DEFINE_STATE_EXPECTFILE(teco_state_macrofile); @@ -678,7 +743,7 @@ teco_state_copytoqreg_got_register(teco_machine_main_t *ctx, teco_qreg_t *qreg, if (ctx->mode > TECO_MODE_NORMAL) return &teco_state_start; - teco_int_t from, len; + gssize from, len; /* in bytes */ if (!teco_expressions_eval(FALSE, error)) return NULL; @@ -702,32 +767,37 @@ teco_state_copytoqreg_got_register(teco_machine_main_t *ctx, teco_qreg_t *qreg, len *= -1; } } else { - teco_int_t to = teco_expressions_pop_num(0); - from = teco_expressions_pop_num(0); - + gssize to = teco_interface_glyphs2bytes(teco_expressions_pop_num(0)); + from = teco_interface_glyphs2bytes(teco_expressions_pop_num(0)); len = to - from; - if (len < 0 || !teco_validate_pos(from) || !teco_validate_pos(to)) { + if (len < 0 || from < 0 || to < 0) { teco_error_range_set(error, "X"); return NULL; } } + /* + * NOTE: This does not use SCI_GETRANGEPOINTER+SCI_GETGAPPOSITION + * since it may not be safe when copying from register to register. + */ g_autofree gchar *str = g_malloc(len + 1); - struct Sci_TextRange text_range = { - .chrg = {.cpMin = from, .cpMax = from + len}, + struct Sci_TextRangeFull range = { + .chrg = {from, from + len}, .lpstrText = str }; - teco_interface_ssm(SCI_GETTEXTRANGE, 0, (sptr_t)&text_range); + teco_interface_ssm(SCI_GETTEXTRANGEFULL, 0, (sptr_t)&range); if (teco_machine_main_eval_colon(ctx)) { if (!qreg->vtable->undo_append_string(qreg, error) || !qreg->vtable->append_string(qreg, str, len, error)) return NULL; } else { + guint cp = teco_interface_get_codepage(); + if (!qreg->vtable->undo_set_string(qreg, error) || - !qreg->vtable->set_string(qreg, str, len, error)) + !qreg->vtable->set_string(qreg, str, len, cp, error)) return NULL; } diff --git a/src/qreg-commands.h b/src/qreg-commands.h index 6a41fc5..27a6a5c 100644 --- a/src/qreg-commands.h +++ b/src/qreg-commands.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -33,10 +33,10 @@ teco_state_expectqreg_reset(teco_machine_main_t *ctx) gboolean teco_state_expectqreg_initial(teco_machine_main_t *ctx, GError **error); -teco_state_t *teco_state_expectqreg_input(teco_machine_main_t *ctx, gchar chr, GError **error); +teco_state_t *teco_state_expectqreg_input(teco_machine_main_t *ctx, gunichar chr, GError **error); /* in cmdline.c */ -gboolean teco_state_expectqreg_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gchar key, GError **error); +gboolean teco_state_expectqreg_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gunichar key, GError **error); /** * @interface TECO_DEFINE_STATE_EXPECTQREG @@ -47,7 +47,7 @@ gboolean teco_state_expectqreg_process_edit_cmd(teco_machine_main_t *ctx, teco_m */ #define TECO_DEFINE_STATE_EXPECTQREG(NAME, ...) \ static teco_state_t * \ - NAME##_input(teco_machine_main_t *ctx, gchar chr, GError **error) \ + NAME##_input(teco_machine_main_t *ctx, gunichar chr, GError **error) \ { \ return teco_state_expectqreg_input(ctx, chr, error); \ } \ @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -18,6 +18,7 @@ #include "config.h" #endif +#include <stdbool.h> #include <string.h> #include <glib.h> @@ -82,7 +83,12 @@ teco_qreg_execute(teco_qreg_t *qreg, teco_qreg_table_t *qreg_table_locals, GErro { g_auto(teco_string_t) macro = {NULL, 0}; - if (!qreg->vtable->get_string(qreg, ¯o.data, ¯o.len, error) || + /* + * SciTECO macros must be in UTF-8, but we don't check the encoding, + * so as not to complicate TECO_ED_DEFAULT_ANSI mode. + * The UTF-8 byte sequences are checked anyway. + */ + if (!qreg->vtable->get_string(qreg, ¯o.data, ¯o.len, NULL, error) || !teco_execute_macro(macro.data, macro.len, qreg_table_locals, error)) { teco_error_add_frame_qreg(qreg->head.name.data, qreg->head.name.len); return FALSE; @@ -120,65 +126,11 @@ teco_qreg_set_eol_mode(teco_qreg_t *qreg, gint mode) if (teco_qreg_current) teco_doc_update(&teco_qreg_current->string, teco_qreg_view); - teco_doc_edit(&qreg->string); + teco_doc_edit(&qreg->string, teco_default_codepage()); teco_view_ssm(teco_qreg_view, SCI_SETEOLMODE, mode, 0); if (teco_qreg_current) - teco_doc_edit(&teco_qreg_current->string); -} - -/** @memberof teco_qreg_t */ -gboolean -teco_qreg_load(teco_qreg_t *qreg, const gchar *filename, GError **error) -{ - if (!qreg->vtable->undo_set_string(qreg, error)) - return FALSE; - - if (teco_qreg_current) - teco_doc_update(&teco_qreg_current->string, teco_qreg_view); - - teco_doc_edit(&qreg->string); - teco_doc_reset(&qreg->string); - - /* - * teco_view_load() might change the EOL style. - */ - teco_qreg_undo_set_eol_mode(qreg); - - /* - * undo_set_string() pushes undo tokens that restore - * the previous document in the view. - * So if loading fails, teco_qreg_current will be - * made the current document again. - */ - if (!teco_view_load(teco_qreg_view, filename, error)) - return FALSE; - - if (teco_qreg_current) - teco_doc_edit(&teco_qreg_current->string); - - return TRUE; -} - -/** @memberof teco_qreg_t */ -gboolean -teco_qreg_save(teco_qreg_t *qreg, const gchar *filename, GError **error) -{ - if (teco_qreg_current) - teco_doc_update(&teco_qreg_current->string, teco_qreg_view); - - teco_doc_edit(&qreg->string); - - if (!teco_view_save(teco_qreg_view, filename, error)) { - if (teco_qreg_current) - teco_doc_edit(&teco_qreg_current->string); - return FALSE; - } - - if (teco_qreg_current) - teco_doc_edit(&teco_qreg_current->string); - - return TRUE; + teco_doc_edit(&teco_qreg_current->string, 0); } static gboolean @@ -204,9 +156,10 @@ teco_qreg_plain_get_integer(teco_qreg_t *qreg, teco_int_t *ret, GError **error) } static gboolean -teco_qreg_plain_set_string(teco_qreg_t *qreg, const gchar *str, gsize len, GError **error) +teco_qreg_plain_set_string(teco_qreg_t *qreg, const gchar *str, gsize len, + guint codepage, GError **error) { - teco_doc_set_string(&qreg->string, str, len); + teco_doc_set_string(&qreg->string, str, len, codepage); return TRUE; } @@ -231,43 +184,64 @@ teco_qreg_plain_append_string(teco_qreg_t *qreg, const gchar *str, gsize len, GE if (teco_qreg_current) teco_doc_update(&teco_qreg_current->string, teco_qreg_view); - teco_doc_edit(&qreg->string); + teco_doc_edit(&qreg->string, teco_default_codepage()); teco_view_ssm(teco_qreg_view, SCI_BEGINUNDOACTION, 0, 0); teco_view_ssm(teco_qreg_view, SCI_APPENDTEXT, len, (sptr_t)str); teco_view_ssm(teco_qreg_view, SCI_ENDUNDOACTION, 0, 0); if (teco_qreg_current) - teco_doc_edit(&teco_qreg_current->string); + teco_doc_edit(&teco_qreg_current->string, 0); return TRUE; } static gboolean -teco_qreg_plain_get_string(teco_qreg_t *qreg, gchar **str, gsize *len, GError **error) +teco_qreg_plain_get_string(teco_qreg_t *qreg, gchar **str, gsize *len, + guint *codepage, GError **error) { - teco_doc_get_string(&qreg->string, str, len); + teco_doc_get_string(&qreg->string, str, len, codepage); return TRUE; } -static gint -teco_qreg_plain_get_character(teco_qreg_t *qreg, guint position, GError **error) +static gboolean +teco_qreg_plain_get_character(teco_qreg_t *qreg, teco_int_t position, + teco_int_t *chr, GError **error) { - gint ret = -1; - if (teco_qreg_current) teco_doc_update(&teco_qreg_current->string, teco_qreg_view); - teco_doc_edit(&qreg->string); + teco_doc_edit(&qreg->string, teco_default_codepage()); - if (position < teco_view_ssm(teco_qreg_view, SCI_GETLENGTH, 0, 0)) - ret = teco_view_ssm(teco_qreg_view, SCI_GETCHARAT, position, 0); - else + sptr_t len = teco_view_ssm(teco_qreg_view, SCI_GETLENGTH, 0, 0); + gssize off = teco_view_glyphs2bytes(teco_qreg_view, position); + + gboolean ret = off >= 0 && off != len; + if (!ret) g_set_error(error, TECO_ERROR, TECO_ERROR_RANGE, - "Position %u out of range", position); + "Position %" TECO_INT_FORMAT " out of range", position); /* make sure we still restore the current Q-Register */ + else + *chr = teco_view_get_character(teco_qreg_view, off, len); + + if (teco_qreg_current) + teco_doc_edit(&teco_qreg_current->string, 0); + + return ret; +} + +static teco_int_t +teco_qreg_plain_get_length(teco_qreg_t *qreg, GError **error) +{ + if (teco_qreg_current) + teco_doc_update(&teco_qreg_current->string, teco_qreg_view); + + teco_doc_edit(&qreg->string, teco_default_codepage()); + + sptr_t len = teco_view_ssm(teco_qreg_view, SCI_GETLENGTH, 0, 0); + teco_int_t ret = teco_view_bytes2glyphs(teco_qreg_view, len); if (teco_qreg_current) - teco_doc_edit(&teco_qreg_current->string); + teco_doc_edit(&teco_qreg_current->string, 0); return ret; } @@ -294,7 +268,7 @@ teco_qreg_plain_edit(teco_qreg_t *qreg, GError **error) if (teco_qreg_current) teco_doc_update(&teco_qreg_current->string, teco_qreg_view); - teco_doc_edit(&qreg->string); + teco_doc_edit(&qreg->string, teco_default_codepage()); teco_interface_show_view(teco_qreg_view); teco_interface_info_update(qreg); @@ -319,6 +293,58 @@ teco_qreg_plain_undo_edit(teco_qreg_t *qreg, GError **error) return TRUE; } +static gboolean +teco_qreg_plain_load(teco_qreg_t *qreg, const gchar *filename, GError **error) +{ + if (!qreg->vtable->undo_set_string(qreg, error)) + return FALSE; + + if (teco_qreg_current) + teco_doc_update(&teco_qreg_current->string, teco_qreg_view); + + teco_doc_edit(&qreg->string, teco_default_codepage()); + teco_doc_reset(&qreg->string); + + /* + * teco_view_load() might change the EOL style. + */ + teco_qreg_undo_set_eol_mode(qreg); + + /* + * undo_set_string() pushes undo tokens that restore + * the previous document in the view. + * So if loading fails, teco_qreg_current will be + * made the current document again. + */ + if (!teco_view_load(teco_qreg_view, filename, error)) + return FALSE; + + if (teco_qreg_current) + teco_doc_edit(&teco_qreg_current->string, 0); + + return TRUE; +} + +static gboolean +teco_qreg_plain_save(teco_qreg_t *qreg, const gchar *filename, GError **error) +{ + if (teco_qreg_current) + teco_doc_update(&teco_qreg_current->string, teco_qreg_view); + + teco_doc_edit(&qreg->string, teco_default_codepage()); + + gboolean ret = teco_view_save(teco_qreg_view, filename, error); + + if (teco_qreg_current) + teco_doc_edit(&teco_qreg_current->string, 0); + + return ret; +} + +/** + * Initializer for vtables of Q-Registers with "plain" storage of strings. + * These store their string part as teco_docs. + */ #define TECO_INIT_QREG(...) { \ .set_integer = teco_qreg_plain_set_integer, \ .undo_set_integer = teco_qreg_plain_undo_set_integer, \ @@ -329,10 +355,13 @@ teco_qreg_plain_undo_edit(teco_qreg_t *qreg, GError **error) .undo_append_string = teco_qreg_plain_undo_set_string, \ .get_string = teco_qreg_plain_get_string, \ .get_character = teco_qreg_plain_get_character, \ + .get_length = teco_qreg_plain_get_length, \ .exchange_string = teco_qreg_plain_exchange_string, \ .undo_exchange_string = teco_qreg_plain_undo_exchange_string, \ .edit = teco_qreg_plain_edit, \ .undo_edit = teco_qreg_plain_undo_edit, \ + .load = teco_qreg_plain_load, \ + .save = teco_qreg_plain_save, \ ##__VA_ARGS__ \ } @@ -345,6 +374,150 @@ teco_qreg_plain_new(const gchar *name, gsize len) return teco_qreg_new(&vtable, name, len); } +static gboolean +teco_qreg_external_edit(teco_qreg_t *qreg, GError **error) +{ + g_auto(teco_string_t) str = {NULL, 0}; + + if (!teco_qreg_plain_edit(qreg, error) || + !qreg->vtable->get_string(qreg, &str.data, &str.len, NULL, error)) + return FALSE; + + teco_view_ssm(teco_qreg_view, SCI_BEGINUNDOACTION, 0, 0); + teco_view_ssm(teco_qreg_view, SCI_CLEARALL, 0, 0); + teco_view_ssm(teco_qreg_view, SCI_ADDTEXT, str.len, (sptr_t)str.data); + teco_view_ssm(teco_qreg_view, SCI_ENDUNDOACTION, 0, 0); + + undo__teco_view_ssm(teco_qreg_view, SCI_UNDO, 0, 0); + return TRUE; +} + +static gboolean +teco_qreg_external_exchange_string(teco_qreg_t *qreg, teco_doc_t *src, GError **error) +{ + g_auto(teco_string_t) other_str, own_str = {NULL, 0}; + guint other_cp, own_cp; + + teco_doc_get_string(src, &other_str.data, &other_str.len, &other_cp); + + if (!qreg->vtable->get_string(qreg, &own_str.data, &own_str.len, &own_cp, error) || + !qreg->vtable->set_string(qreg, other_str.data, other_str.len, other_cp, error)) + return FALSE; + + teco_doc_set_string(src, own_str.data, own_str.len, own_cp); + return TRUE; +} + +static gboolean +teco_qreg_external_undo_exchange_string(teco_qreg_t *qreg, teco_doc_t *src, GError **error) +{ + if (!qreg->vtable->undo_set_string(qreg, error)) + return FALSE; + if (qreg->must_undo) // FIXME + teco_doc_undo_set_string(src); + return TRUE; +} + +static gboolean +teco_qreg_external_get_character(teco_qreg_t *qreg, teco_int_t position, + teco_int_t *chr, GError **error) +{ + g_auto(teco_string_t) str = {NULL, 0}; + + if (!qreg->vtable->get_string(qreg, &str.data, &str.len, NULL, error)) + return FALSE; + + if (position < 0 || position >= g_utf8_strlen(str.data, str.len)) { + g_set_error(error, TECO_ERROR, TECO_ERROR_RANGE, + "Position %" TECO_INT_FORMAT " out of range", position); + return FALSE; + } + const gchar *p = g_utf8_offset_to_pointer(str.data, position); + + /* + * Make sure that the -1/-2 error values are preserved. + * The sign bit in UCS-4/UTF-32 is unused, so this will even + * suffice if TECO_INTEGER == 32. + */ + *chr = (gint32)g_utf8_get_char_validated(p, -1); + return TRUE; +} + +static teco_int_t +teco_qreg_external_get_length(teco_qreg_t *qreg, GError **error) +{ + g_auto(teco_string_t) str = {NULL, 0}; + + if (!qreg->vtable->get_string(qreg, &str.data, &str.len, NULL, error)) + return -1; + + return g_utf8_strlen(str.data, str.len); +} + +/* + * NOTE: This does not perform EOL normalization unlike teco_view_load(). + * It shouldn't be critical since "external" registers are mainly used for filenames. + * Otherwise we could of course load into the view() and call set_string() afterwards. + */ +static gboolean +teco_qreg_external_load(teco_qreg_t *qreg, const gchar *filename, GError **error) +{ + g_auto(teco_string_t) str = {NULL, 0}; + + return g_file_get_contents(filename, &str.data, &str.len, error) && + qreg->vtable->undo_set_string(qreg, error) && + qreg->vtable->set_string(qreg, str.data, str.len, teco_default_codepage(), error); +} + +/* + * NOTE: This does not simply use g_file_set_contents(), as we have to create + * save point files as well. + * FIXME: On the other hand, this does not set the correct EOL style on the document, + * so teco_view_save() will save only with the default EOL style. + * It might therefore still be a good idea to avoid any conversion. + */ +static gboolean +teco_qreg_external_save(teco_qreg_t *qreg, const gchar *filename, GError **error) +{ + if (teco_qreg_current) + teco_doc_update(&teco_qreg_current->string, teco_qreg_view); + + teco_doc_edit(&qreg->string, teco_default_codepage()); + + g_auto(teco_string_t) str = {NULL, 0}; + if (!qreg->vtable->get_string(qreg, &str.data, &str.len, NULL, error)) + return FALSE; + + teco_view_ssm(teco_qreg_view, SCI_BEGINUNDOACTION, 0, 0); + teco_view_ssm(teco_qreg_view, SCI_CLEARALL, 0, 0); + teco_view_ssm(teco_qreg_view, SCI_ADDTEXT, str.len, (sptr_t)str.data); + teco_view_ssm(teco_qreg_view, SCI_ENDUNDOACTION, 0, 0); + + undo__teco_view_ssm(teco_qreg_view, SCI_UNDO, 0, 0); + + gboolean ret = teco_view_save(teco_qreg_view, filename, error); + + if (teco_qreg_current) + teco_doc_edit(&teco_qreg_current->string, 0); + + return ret; +} + +/** + * Initializer for vtables of Q-Registers with "external" storage of strings. + * These rely on custom implementations of get_string() and set_string(). + */ +#define TECO_INIT_QREG_EXTERNAL(...) TECO_INIT_QREG( \ + .exchange_string = teco_qreg_external_exchange_string, \ + .undo_exchange_string = teco_qreg_external_undo_exchange_string, \ + .edit = teco_qreg_external_edit, \ + .get_character = teco_qreg_external_get_character, \ + .get_length = teco_qreg_external_get_length, \ + .load = teco_qreg_external_load, \ + .save = teco_qreg_external_save, \ + ##__VA_ARGS__ \ +) + /* * NOTE: The integer-component is currently unused on the "*" special register. */ @@ -368,11 +541,12 @@ teco_qreg_bufferinfo_get_integer(teco_qreg_t *qreg, teco_int_t *ret, GError **er } /* - * FIXME: These operations can and should be implemented. - * Setting the "*" register could for instance rename the file. + * FIXME: Something could be implemented here. There are 2 possibilities: + * Either it renames the current buffer, or opens a file (alternative to EB). */ static gboolean -teco_qreg_bufferinfo_set_string(teco_qreg_t *qreg, const gchar *str, gsize len, GError **error) +teco_qreg_bufferinfo_set_string(teco_qreg_t *qreg, const gchar *str, gsize len, + guint codepage, GError **error) { teco_error_qregopunsupported_set(error, qreg->head.name.data, qreg->head.name.len, FALSE); return FALSE; @@ -401,7 +575,8 @@ teco_qreg_bufferinfo_undo_append_string(teco_qreg_t *qreg, GError **error) * NOTE: The `string` component is currently unused on the "*" register. */ static gboolean -teco_qreg_bufferinfo_get_string(teco_qreg_t *qreg, gchar **str, gsize *len, GError **error) +teco_qreg_bufferinfo_get_string(teco_qreg_t *qreg, gchar **str, gsize *len, + guint *codepage, GError **error) { /* * On platforms with a default non-forward-slash directory @@ -416,43 +591,8 @@ teco_qreg_bufferinfo_get_string(teco_qreg_t *qreg, gchar **str, gsize *len, GErr * NOTE: teco_file_normalize_path() does not change the size of the string. */ *len = teco_ring_current->filename ? strlen(teco_ring_current->filename) : 0; - return TRUE; -} - -static gint -teco_qreg_bufferinfo_get_character(teco_qreg_t *qreg, guint position, GError **error) -{ - gsize max_len; - - if (!teco_qreg_bufferinfo_get_string(qreg, NULL, &max_len, error)) - return -1; - - if (position >= max_len) { - g_set_error(error, TECO_ERROR, TECO_ERROR_RANGE, - "Position %u out of range", position); - return -1; - } - - return teco_ring_current->filename[position]; -} - -static gboolean -teco_qreg_bufferinfo_edit(teco_qreg_t *qreg, GError **error) -{ - if (!teco_qreg_plain_edit(qreg, error)) - return FALSE; - - g_auto(teco_string_t) str = {NULL, 0}; - - if (!teco_qreg_bufferinfo_get_string(qreg, &str.data, &str.len, error)) - return FALSE; - - teco_view_ssm(teco_qreg_view, SCI_BEGINUNDOACTION, 0, 0); - teco_view_ssm(teco_qreg_view, SCI_CLEARALL, 0, 0); - teco_view_ssm(teco_qreg_view, SCI_ADDTEXT, str.len, (sptr_t)str.data); - teco_view_ssm(teco_qreg_view, SCI_ENDUNDOACTION, 0, 0); - - undo__teco_view_ssm(teco_qreg_view, SCI_UNDO, 0, 0); + if (codepage) + *codepage = teco_default_codepage(); return TRUE; } @@ -460,7 +600,7 @@ teco_qreg_bufferinfo_edit(teco_qreg_t *qreg, GError **error) teco_qreg_t * teco_qreg_bufferinfo_new(void) { - static teco_qreg_vtable_t vtable = TECO_INIT_QREG( + static teco_qreg_vtable_t vtable = TECO_INIT_QREG_EXTERNAL( .set_integer = teco_qreg_bufferinfo_set_integer, .undo_set_integer = teco_qreg_bufferinfo_undo_set_integer, .get_integer = teco_qreg_bufferinfo_get_integer, @@ -469,15 +609,22 @@ teco_qreg_bufferinfo_new(void) .append_string = teco_qreg_bufferinfo_append_string, .undo_append_string = teco_qreg_bufferinfo_undo_append_string, .get_string = teco_qreg_bufferinfo_get_string, - .get_character = teco_qreg_bufferinfo_get_character, - .edit = teco_qreg_bufferinfo_edit + /* + * As teco_qreg_bufferinfo_set_string() is not implemented, + * it's important to not inherit teco_qreg_external_exchange_string(). + * `[*` and `]*` will still work though. + * The inherited teco_qreg_external_load() will simply fail. + */ + .exchange_string = teco_qreg_plain_exchange_string, + .undo_exchange_string = teco_qreg_plain_undo_exchange_string ); return teco_qreg_new(&vtable, "*", 1); } static gboolean -teco_qreg_workingdir_set_string(teco_qreg_t *qreg, const gchar *str, gsize len, GError **error) +teco_qreg_workingdir_set_string(teco_qreg_t *qreg, const gchar *str, gsize len, + guint codepage, GError **error) { /* * NOTE: Makes sure that `dir` will be null-terminated as str[len] may not be '\0'. @@ -528,7 +675,8 @@ teco_qreg_workingdir_undo_append_string(teco_qreg_t *qreg, GError **error) } static gboolean -teco_qreg_workingdir_get_string(teco_qreg_t *qreg, gchar **str, gsize *len, GError **error) +teco_qreg_workingdir_get_string(teco_qreg_t *qreg, gchar **str, gsize *len, + guint *codepage, GError **error) { /* * On platforms with a default non-forward-slash directory @@ -545,84 +693,22 @@ teco_qreg_workingdir_get_string(teco_qreg_t *qreg, gchar **str, gsize *len, GErr *str = teco_file_normalize_path(dir); else g_free(dir); + if (codepage) + *codepage = teco_default_codepage(); return TRUE; } -static gint -teco_qreg_workingdir_get_character(teco_qreg_t *qreg, guint position, GError **error) -{ - g_auto(teco_string_t) str = {NULL, 0}; - - if (!teco_qreg_workingdir_get_string(qreg, &str.data, &str.len, error)) - return -1; - - if (position >= str.len) { - g_set_error(error, TECO_ERROR, TECO_ERROR_RANGE, - "Position %u out of range", position); - return -1; - } - - return str.data[position]; -} - -static gboolean -teco_qreg_workingdir_edit(teco_qreg_t *qreg, GError **error) -{ - g_auto(teco_string_t) str = {NULL, 0}; - - if (!teco_qreg_plain_edit(qreg, error) || - !teco_qreg_workingdir_get_string(qreg, &str.data, &str.len, error)) - return FALSE; - - teco_view_ssm(teco_qreg_view, SCI_BEGINUNDOACTION, 0, 0); - teco_view_ssm(teco_qreg_view, SCI_CLEARALL, 0, 0); - teco_view_ssm(teco_qreg_view, SCI_ADDTEXT, str.len, (sptr_t)str.data); - teco_view_ssm(teco_qreg_view, SCI_ENDUNDOACTION, 0, 0); - - undo__teco_view_ssm(teco_qreg_view, SCI_UNDO, 0, 0); - return TRUE; -} - -static gboolean -teco_qreg_workingdir_exchange_string(teco_qreg_t *qreg, teco_doc_t *src, GError **error) -{ - g_auto(teco_string_t) other_str, own_str = {NULL, 0}; - - teco_doc_get_string(src, &other_str.data, &other_str.len); - - if (!teco_qreg_workingdir_get_string(qreg, &own_str.data, &own_str.len, error) || - /* FIXME: Why is teco_qreg_plain_set_string() sufficient? */ - !teco_qreg_plain_set_string(qreg, other_str.data, other_str.len, error)) - return FALSE; - - teco_doc_set_string(src, own_str.data, own_str.len); - return TRUE; -} - -static gboolean -teco_qreg_workingdir_undo_exchange_string(teco_qreg_t *qreg, teco_doc_t *src, GError **error) -{ - teco_undo_change_dir_to_current(); - if (qreg->must_undo) // FIXME - teco_doc_undo_set_string(src); - return TRUE; -} - /** @static @memberof teco_qreg_t */ teco_qreg_t * teco_qreg_workingdir_new(void) { - static teco_qreg_vtable_t vtable = TECO_INIT_QREG( + static teco_qreg_vtable_t vtable = TECO_INIT_QREG_EXTERNAL( .set_string = teco_qreg_workingdir_set_string, .undo_set_string = teco_qreg_workingdir_undo_set_string, .append_string = teco_qreg_workingdir_append_string, .undo_append_string = teco_qreg_workingdir_undo_append_string, - .get_string = teco_qreg_workingdir_get_string, - .get_character = teco_qreg_workingdir_get_character, - .edit = teco_qreg_workingdir_edit, - .exchange_string = teco_qreg_workingdir_exchange_string, - .undo_exchange_string = teco_qreg_workingdir_undo_exchange_string + .get_string = teco_qreg_workingdir_get_string ); /* @@ -639,7 +725,8 @@ teco_qreg_workingdir_new(void) } static gboolean -teco_qreg_clipboard_set_string(teco_qreg_t *qreg, const gchar *str, gsize len, GError **error) +teco_qreg_clipboard_set_string(teco_qreg_t *qreg, const gchar *str, gsize len, + guint codepage, GError **error) { g_assert(!teco_string_contains(&qreg->head.name, '\0')); const gchar *clipboard_name = qreg->head.name.data + 1; @@ -724,7 +811,8 @@ teco_qreg_clipboard_undo_set_string(teco_qreg_t *qreg, GError **error) } static gboolean -teco_qreg_clipboard_get_string(teco_qreg_t *qreg, gchar **str, gsize *len, GError **error) +teco_qreg_clipboard_get_string(teco_qreg_t *qreg, gchar **str, gsize *len, + guint *codepage, GError **error) { g_assert(!teco_string_contains(&qreg->head.name, '\0')); const gchar *clipboard_name = qreg->head.name.data + 1; @@ -756,93 +844,41 @@ teco_qreg_clipboard_get_string(teco_qreg_t *qreg, gchar **str, gsize *len, GErro else teco_string_clear(&str_converted); *len = str_converted.len; + if (codepage) + *codepage = teco_default_codepage(); return TRUE; } -static gint -teco_qreg_clipboard_get_character(teco_qreg_t *qreg, guint position, GError **error) -{ - g_auto(teco_string_t) str = {NULL, 0}; - - if (!teco_qreg_clipboard_get_string(qreg, &str.data, &str.len, error)) - return -1; - - if (position >= str.len) { - g_set_error(error, TECO_ERROR, TECO_ERROR_RANGE, - "Position %u out of range", position); - return -1; - } - - return str.data[position]; -} - -static gboolean -teco_qreg_clipboard_edit(teco_qreg_t *qreg, GError **error) -{ - if (!teco_qreg_plain_edit(qreg, error)) - return FALSE; - - g_auto(teco_string_t) str = {NULL, 0}; - - if (!teco_qreg_clipboard_get_string(qreg, &str.data, &str.len, error)) - return FALSE; - - teco_view_ssm(teco_qreg_view, SCI_BEGINUNDOACTION, 0, 0); - teco_view_ssm(teco_qreg_view, SCI_CLEARALL, 0, 0); - teco_view_ssm(teco_qreg_view, SCI_APPENDTEXT, str.len, (sptr_t)str.data); - teco_view_ssm(teco_qreg_view, SCI_ENDUNDOACTION, 0, 0); - - undo__teco_view_ssm(teco_qreg_view, SCI_UNDO, 0, 0); - return TRUE; -} - /* - * FIXME: Very similar to teco_qreg_workingdir_exchange_string(). + * Regardless of whether EOL normalization is enabled, + * this will never perform it. + * Other than that, it's very similar to teco_qreg_external_load(). */ static gboolean -teco_qreg_clipboard_exchange_string(teco_qreg_t *qreg, teco_doc_t *src, GError **error) +teco_qreg_clipboard_load(teco_qreg_t *qreg, const gchar *filename, GError **error) { - g_auto(teco_string_t) other_str, own_str = {NULL, 0}; - - teco_doc_get_string(src, &other_str.data, &other_str.len); - - if (!teco_qreg_clipboard_get_string(qreg, &own_str.data, &own_str.len, error) || - /* FIXME: Why is teco_qreg_plain_set_string() sufficient? */ - !teco_qreg_plain_set_string(qreg, other_str.data, other_str.len, error)) - return FALSE; + g_assert(!teco_string_contains(&qreg->head.name, '\0')); + const gchar *clipboard_name = qreg->head.name.data + 1; - teco_doc_set_string(src, own_str.data, own_str.len); - return TRUE; -} + g_auto(teco_string_t) str = {NULL, 0}; -/* - * FIXME: Very similar to teco_qreg_workingdir_undo_exchange_string(). - */ -static gboolean -teco_qreg_clipboard_undo_exchange_string(teco_qreg_t *qreg, teco_doc_t *src, GError **error) -{ - if (!teco_qreg_clipboard_undo_set_string(qreg, error)) - return FALSE; - if (qreg->must_undo) // FIXME - teco_doc_undo_set_string(src); - return TRUE; + return g_file_get_contents(filename, &str.data, &str.len, error) && + teco_qreg_clipboard_undo_set_string(qreg, error) && + teco_interface_set_clipboard(clipboard_name, str.data, str.len, error); } /** @static @memberof teco_qreg_t */ teco_qreg_t * teco_qreg_clipboard_new(const gchar *name) { - static teco_qreg_vtable_t vtable = TECO_INIT_QREG( + static teco_qreg_vtable_t vtable = TECO_INIT_QREG_EXTERNAL( .set_string = teco_qreg_clipboard_set_string, .undo_set_string = teco_qreg_clipboard_undo_set_string, .append_string = teco_qreg_clipboard_append_string, .undo_append_string = teco_qreg_clipboard_undo_append_string, .get_string = teco_qreg_clipboard_get_string, - .get_character = teco_qreg_clipboard_get_character, - .edit = teco_qreg_clipboard_edit, - .exchange_string = teco_qreg_clipboard_exchange_string, - .undo_exchange_string = teco_qreg_clipboard_undo_exchange_string + .load = teco_qreg_clipboard_load ); teco_qreg_t *qreg = teco_qreg_new(&vtable, "~", 1); @@ -939,7 +975,8 @@ teco_qreg_table_set_environ(teco_qreg_table_t *table, GError **error) qreg = found; } - if (!qreg->vtable->set_string(qreg, value, strlen(value), error)) + if (!qreg->vtable->set_string(qreg, value, strlen(value), + teco_default_codepage(), error)) return FALSE; } @@ -994,7 +1031,7 @@ teco_qreg_table_get_environ(teco_qreg_table_t *table, GError **error) continue; g_auto(teco_string_t) value = {NULL, 0}; - if (!cur->vtable->get_string(cur, &value.data, &value.len, error)) { + if (!cur->vtable->get_string(cur, &value.data, &value.len, NULL, error)) { g_strfreev(envp); return NULL; } @@ -1088,12 +1125,13 @@ teco_qreg_stack_push(teco_qreg_t *qreg, GError **error) { teco_qreg_stack_entry_t entry; g_auto(teco_string_t) string = {NULL, 0}; + guint codepage; if (!qreg->vtable->get_integer(qreg, &entry.integer, error) || - !qreg->vtable->get_string(qreg, &string.data, &string.len, error)) + !qreg->vtable->get_string(qreg, &string.data, &string.len, &codepage, error)) return FALSE; teco_doc_init(&entry.string); - teco_doc_set_string(&entry.string, string.data, string.len); + teco_doc_set_string(&entry.string, string.data, string.len, codepage); teco_doc_update(&entry.string, &qreg->string); /* pass ownership of entry to teco_qreg_stack */ @@ -1196,6 +1234,12 @@ teco_ed_hook(teco_ed_hook_t type, GError **error) if (!teco_qreg_execute(qreg, &locals, error)) goto error_add_frame; + if (teco_qreg_current && !teco_qreg_current->must_undo) { + /* currently editing local Q-Register */ + teco_error_editinglocalqreg_set(error, teco_qreg_current->head.name.data, + teco_qreg_current->head.name.len); + goto error_add_frame; + } return teco_expressions_discard_args(error) && teco_expressions_brace_close(error); @@ -1225,7 +1269,7 @@ struct teco_machine_qregspec_t { union { struct { teco_qreg_type_t type : 8; - gboolean parse_only : 1; + bool parse_only : 1; }; guint __flags; }; @@ -1255,7 +1299,7 @@ TECO_DECLARE_STATE(teco_state_qregspec_secondchar); TECO_DECLARE_STATE(teco_state_qregspec_string); static teco_state_t *teco_state_qregspec_start_global_input(teco_machine_qregspec_t *ctx, - gchar chr, GError **error); + gunichar chr, GError **error); static teco_state_t * teco_state_qregspec_done(teco_machine_qregspec_t *ctx, GError **error) @@ -1290,7 +1334,7 @@ teco_state_qregspec_done(teco_machine_qregspec_t *ctx, GError **error) } static teco_state_t * -teco_state_qregspec_start_input(teco_machine_qregspec_t *ctx, gchar chr, GError **error) +teco_state_qregspec_start_input(teco_machine_qregspec_t *ctx, gunichar chr, GError **error) { /* * FIXME: We're using teco_state_qregspec_start as a success condition, @@ -1307,7 +1351,7 @@ teco_state_qregspec_start_input(teco_machine_qregspec_t *ctx, gchar chr, GError } /* in cmdline.c */ -gboolean teco_state_qregspec_process_edit_cmd(teco_machine_qregspec_t *ctx, teco_machine_t *parent_ctx, gchar key, GError **error); +gboolean teco_state_qregspec_process_edit_cmd(teco_machine_qregspec_t *ctx, teco_machine_t *parent_ctx, gunichar key, GError **error); TECO_DEFINE_STATE(teco_state_qregspec_start, .is_start = TRUE, @@ -1315,7 +1359,7 @@ TECO_DEFINE_STATE(teco_state_qregspec_start, ); static teco_state_t * -teco_state_qregspec_start_global_input(teco_machine_qregspec_t *ctx, gchar chr, GError **error) +teco_state_qregspec_start_global_input(teco_machine_qregspec_t *ctx, gunichar chr, GError **error) { /* * FIXME: Disallow space characters? @@ -1334,7 +1378,7 @@ teco_state_qregspec_start_global_input(teco_machine_qregspec_t *ctx, gchar chr, if (!ctx->parse_only) { if (ctx->parent.must_undo) undo__teco_string_truncate(&ctx->name, ctx->name.len); - teco_string_append_c(&ctx->name, g_ascii_toupper(chr)); + teco_string_append_wc(&ctx->name, g_unichar_toupper(chr)); } return teco_state_qregspec_done(ctx, error); } @@ -1350,7 +1394,7 @@ TECO_DEFINE_STATE(teco_state_qregspec_start_global, ); static teco_state_t * -teco_state_qregspec_firstchar_input(teco_machine_qregspec_t *ctx, gchar chr, GError **error) +teco_state_qregspec_firstchar_input(teco_machine_qregspec_t *ctx, gunichar chr, GError **error) { /* * FIXME: Disallow space characters? @@ -1358,7 +1402,7 @@ teco_state_qregspec_firstchar_input(teco_machine_qregspec_t *ctx, gchar chr, GEr if (!ctx->parse_only) { if (ctx->parent.must_undo) undo__teco_string_truncate(&ctx->name, ctx->name.len); - teco_string_append_c(&ctx->name, g_ascii_toupper(chr)); + teco_string_append_wc(&ctx->name, g_unichar_toupper(chr)); } return &teco_state_qregspec_secondchar; } @@ -1368,7 +1412,7 @@ TECO_DEFINE_STATE(teco_state_qregspec_firstchar, ); static teco_state_t * -teco_state_qregspec_secondchar_input(teco_machine_qregspec_t *ctx, gchar chr, GError **error) +teco_state_qregspec_secondchar_input(teco_machine_qregspec_t *ctx, gunichar chr, GError **error) { /* * FIXME: Disallow space characters? @@ -1376,7 +1420,7 @@ teco_state_qregspec_secondchar_input(teco_machine_qregspec_t *ctx, gchar chr, GE if (!ctx->parse_only) { if (ctx->parent.must_undo) undo__teco_string_truncate(&ctx->name, ctx->name.len); - teco_string_append_c(&ctx->name, g_ascii_toupper(chr)); + teco_string_append_wc(&ctx->name, g_unichar_toupper(chr)); } return teco_state_qregspec_done(ctx, error); } @@ -1386,7 +1430,7 @@ TECO_DEFINE_STATE(teco_state_qregspec_secondchar, ); static teco_state_t * -teco_state_qregspec_string_input(teco_machine_qregspec_t *ctx, gchar chr, GError **error) +teco_state_qregspec_string_input(teco_machine_qregspec_t *ctx, gunichar chr, GError **error) { /* * Makes sure that braces within string building constructs do not have to be @@ -1427,7 +1471,7 @@ teco_state_qregspec_string_input(teco_machine_qregspec_t *ctx, gchar chr, GError /* in cmdline.c */ gboolean teco_state_qregspec_string_process_edit_cmd(teco_machine_qregspec_t *ctx, teco_machine_t *parent_ctx, - gchar key, GError **error); + gunichar key, GError **error); TECO_DEFINE_STATE(teco_state_qregspec_string, .process_edit_cmd_cb = (teco_state_process_edit_cmd_cb_t)teco_state_qregspec_string_process_edit_cmd @@ -1488,7 +1532,7 @@ teco_machine_qregspec_get_stringbuilding(teco_machine_qregspec_t *ctx) * @memberof teco_machine_qregspec_t */ teco_machine_qregspec_status_t -teco_machine_qregspec_input(teco_machine_qregspec_t *ctx, gchar chr, +teco_machine_qregspec_input(teco_machine_qregspec_t *ctx, gunichar chr, teco_qreg_t **result, teco_qreg_table_t **result_table, GError **error) { ctx->parse_only = result == NULL; @@ -1516,7 +1560,7 @@ teco_machine_qregspec_get_results(teco_machine_qregspec_t *ctx, gboolean teco_machine_qregspec_auto_complete(teco_machine_qregspec_t *ctx, teco_string_t *insert) { - gsize restrict_len = 0; + guint restrict_len = 0; /* * NOTE: We could have separate process_edit_cmd_cb() for @@ -1531,6 +1575,10 @@ teco_machine_qregspec_auto_complete(teco_machine_qregspec_t *ctx, teco_string_t /* two-letter Q-Reg */ restrict_len = 2; + /* + * FIXME: This is not quite right as it will propose even + * lower case single or two-letter Q-Register names. + */ return teco_rb3str_auto_complete(&ctx->result_table->tree, !restrict_len, ctx->name.data, ctx->name.len, restrict_len, insert) && ctx->nesting == 1; @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -41,19 +41,26 @@ extern teco_view_t *teco_qreg_view; * FIXME: Use TECO_DECLARE_VTABLE_METHOD(gboolean, teco_qreg, set_integer, teco_qreg_t *, teco_int_t, GError **); * ... * teco_qreg_set_integer_t set_integer; + * ... + * teco_qreg_set_integer(qreg, 23, error); */ typedef const struct { gboolean (*set_integer)(teco_qreg_t *qreg, teco_int_t value, GError **error); gboolean (*undo_set_integer)(teco_qreg_t *qreg, GError **error); gboolean (*get_integer)(teco_qreg_t *qreg, teco_int_t *ret, GError **error); - gboolean (*set_string)(teco_qreg_t *qreg, const gchar *str, gsize len, GError **error); + gboolean (*set_string)(teco_qreg_t *qreg, const gchar *str, gsize len, + guint codepage, GError **error); gboolean (*undo_set_string)(teco_qreg_t *qreg, GError **error); gboolean (*append_string)(teco_qreg_t *qreg, const gchar *str, gsize len, GError **error); gboolean (*undo_append_string)(teco_qreg_t *qreg, GError **error); - gboolean (*get_string)(teco_qreg_t *qreg, gchar **str, gsize *len, GError **error); - gint (*get_character)(teco_qreg_t *qreg, guint position, GError **error); + gboolean (*get_string)(teco_qreg_t *qreg, gchar **str, gsize *len, + guint *codepage, GError **error); + gboolean (*get_character)(teco_qreg_t *qreg, teco_int_t position, + teco_int_t *chr, GError **error); + /* always returns length in glyphs in contrast to get_string() */ + teco_int_t (*get_length)(teco_qreg_t *qreg, GError **error); /* * These callbacks exist only to optimize teco_qreg_stack_push|pop() @@ -65,6 +72,13 @@ typedef const struct { gboolean (*edit)(teco_qreg_t *qreg, GError **error); gboolean (*undo_edit)(teco_qreg_t *qreg, GError **error); + + /* + * Load and save already care about undo token + * creation. + */ + gboolean (*load)(teco_qreg_t *qreg, const gchar *filename, GError **error); + gboolean (*save)(teco_qreg_t *qreg, const gchar *filename, GError **error); } teco_qreg_vtable_t; /** @extends teco_rb3str_head_t */ @@ -106,13 +120,6 @@ gboolean teco_qreg_execute(teco_qreg_t *qreg, teco_qreg_table_t *qreg_table_loca void teco_qreg_undo_set_eol_mode(teco_qreg_t *qreg); void teco_qreg_set_eol_mode(teco_qreg_t *qreg, gint mode); -/* - * Load and save already care about undo token - * creation. - */ -gboolean teco_qreg_load(teco_qreg_t *qreg, const gchar *filename, GError **error); -gboolean teco_qreg_save(teco_qreg_t *qreg, const gchar *filename, GError **error); - /** @memberof teco_qreg_t */ static inline void teco_qreg_free(teco_qreg_t *qreg) @@ -220,7 +227,7 @@ void teco_machine_qregspec_reset(teco_machine_qregspec_t *ctx); */ struct teco_machine_stringbuilding_t *teco_machine_qregspec_get_stringbuilding(teco_machine_qregspec_t *ctx); -teco_machine_qregspec_status_t teco_machine_qregspec_input(teco_machine_qregspec_t *ctx, gchar chr, +teco_machine_qregspec_status_t teco_machine_qregspec_input(teco_machine_qregspec_t *ctx, gunichar chr, teco_qreg_t **result, teco_qreg_table_t **result_table, GError **error); diff --git a/src/rb3str.c b/src/rb3str.c index 889c52e..d51ac5d 100644 --- a/src/rb3str.c +++ b/src/rb3str.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -95,7 +95,7 @@ teco_rb3str_nfind(teco_rb3str_tree_t *tree, gboolean case_sensitive, const gchar * @param case_sensitive Whether to match case-sensitive. * @param str String to complete (not necessarily null-terminated). * @param str_len Length of characters in `str`. - * @param restrict_len Limit completions to this size. + * @param restrict_len Limit completions to this size (in characters). * @param insert String to set with characters that can be autocompleted. * @return TRUE if the completion was unambiguous, else FALSE. * @@ -103,7 +103,7 @@ teco_rb3str_nfind(teco_rb3str_tree_t *tree, gboolean case_sensitive, const gchar */ gboolean teco_rb3str_auto_complete(teco_rb3str_tree_t *tree, gboolean case_sensitive, - const gchar *str, gsize str_len, gsize restrict_len, teco_string_t *insert) + const gchar *str, gsize str_len, guint restrict_len, teco_string_t *insert) { memset(insert, 0, sizeof(*insert)); @@ -115,7 +115,7 @@ teco_rb3str_auto_complete(teco_rb3str_tree_t *tree, gboolean case_sensitive, for (teco_rb3str_head_t *cur = teco_rb3str_nfind(tree, case_sensitive, str, str_len); cur && cur->key.len >= str_len && diff(&cur->key, str, str_len) == str_len; cur = teco_rb3str_get_next(cur)) { - if (restrict_len && cur->key.len != restrict_len) + if (restrict_len && g_utf8_strlen(cur->key.data, cur->key.len) != restrict_len) continue; if (G_UNLIKELY(!first)) { @@ -136,7 +136,7 @@ teco_rb3str_auto_complete(teco_rb3str_tree_t *tree, gboolean case_sensitive, for (teco_rb3str_head_t *cur = first; cur && cur->key.len >= str_len && diff(&cur->key, str, str_len) == str_len; cur = teco_rb3str_get_next(cur)) { - if (restrict_len && cur->key.len != restrict_len) + if (restrict_len && g_utf8_strlen(cur->key.data, cur->key.len) != restrict_len) continue; teco_interface_popup_add(TECO_POPUP_PLAIN, diff --git a/src/rb3str.h b/src/rb3str.h index ddbf6bb..adf5f89 100644 --- a/src/rb3str.h +++ b/src/rb3str.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -65,5 +65,5 @@ teco_rb3str_head_t *teco_rb3str_nfind(teco_rb3str_tree_t *tree, gboolean case_se const gchar *str, gsize len); gboolean teco_rb3str_auto_complete(teco_rb3str_tree_t *tree, gboolean case_sensitive, - const gchar *str, gsize str_len, gsize restrict_len, + const gchar *str, gsize str_len, guint restrict_len, teco_string_t *insert); @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -512,7 +512,7 @@ teco_state_edit_file_done(teco_machine_main_t *ctx, const teco_string_t *str, GE * A value of 1 denotes the first buffer, 2 the second, * ecetera. */ -TECO_DEFINE_STATE_EXPECTFILE(teco_state_edit_file, +TECO_DEFINE_STATE_EXPECTGLOB(teco_state_edit_file, .initial_cb = (teco_state_initial_cb_t)teco_state_edit_file_initial ); @@ -524,7 +524,7 @@ teco_state_save_file_done(teco_machine_main_t *ctx, const teco_string_t *str, GE g_autofree gchar *filename = teco_file_expand_path(str->data); if (teco_qreg_current) { - if (!teco_qreg_save(teco_qreg_current, filename, error)) + if (!teco_qreg_current->vtable->save(teco_qreg_current, filename, error)) return NULL; } else { if (!teco_buffer_save(teco_ring_current, *filename ? filename : NULL, error)) @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/src/sciteco.h b/src/sciteco.h index 87bd973..7fe09d4 100644 --- a/src/sciteco.h +++ b/src/sciteco.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -21,6 +21,8 @@ #include <glib.h> +#include <Scintilla.h> + #if TECO_INTEGER == 32 typedef gint32 teco_int_t; #define TECO_INT_FORMAT G_GINT32_FORMAT @@ -58,8 +60,18 @@ teco_is_failure(teco_bool_t x) return x >= 0; } +/** + * Call function as destructor on debug builds. + * This should be used only if the cleanup is optional. + */ +#ifdef NDEBUG +#define TECO_DEBUG_CLEANUP __attribute__((unused)) +#else +#define TECO_DEBUG_CLEANUP __attribute__((destructor)) +#endif + /** TRUE if C is a control character */ -#define TECO_IS_CTL(C) ((C) < ' ') +#define TECO_IS_CTL(C) ((gunichar)(C) < ' ') /** ASCII character to echo control character C */ #define TECO_CTL_ECHO(C) ((C) | 0x40) /** @@ -73,17 +85,25 @@ teco_is_failure(teco_bool_t x) * This is not a bitfield, since it is set from SciTECO. */ enum { + TECO_ED_DEFAULT_ANSI = (1 << 2), TECO_ED_AUTOCASEFOLD = (1 << 3), TECO_ED_AUTOEOL = (1 << 4), TECO_ED_HOOKS = (1 << 5), - TECO_ED_FNKEYS = (1 << 6), + //TECO_ED_MOUSEKEY = (1 << 6), TECO_ED_SHELLEMU = (1 << 7), - TECO_ED_XTERM_CLIPBOARD = (1 << 8) + TECO_ED_OSC52 = (1 << 8), + TECO_ED_ICONS = (1 << 9) }; /* in main.c */ extern teco_int_t teco_ed; +static inline guint +teco_default_codepage(void) +{ + return teco_ed & TECO_ED_DEFAULT_ANSI ? SC_CHARSET_ANSI : SC_CP_UTF8; +} + /* in main.c */ extern volatile sig_atomic_t teco_interrupted; diff --git a/src/search.c b/src/search.c index 733eab9..0d04895 100644 --- a/src/search.c +++ b/src/search.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -38,11 +38,8 @@ #include "search.h" typedef struct { - /* - * FIXME: Should perhaps all be teco_int_t? - */ - gint dot; - gint from, to; + gssize dot; + gssize from, to; gint count; teco_buffer_t *from_buffer, *to_buffer; @@ -63,6 +60,9 @@ teco_state_search_initial(teco_machine_main_t *ctx, GError **error) if (ctx->mode > TECO_MODE_NORMAL) return TRUE; + teco_machine_stringbuilding_set_codepage(&ctx->expectstring.machine, + teco_interface_get_codepage()); + if (G_UNLIKELY(!teco_search_qreg_machine)) teco_search_qreg_machine = teco_machine_qregspec_new(TECO_QREG_REQUIRED, ctx->qreg_table_locals, ctx->parent.must_undo); @@ -79,16 +79,16 @@ teco_state_search_initial(teco_machine_main_t *ctx, GError **error) return FALSE; if (v1 <= v2) { teco_search_parameters.count = 1; - teco_search_parameters.from = (gint)v1; - teco_search_parameters.to = (gint)v2; + teco_search_parameters.from = teco_interface_glyphs2bytes(v1); + teco_search_parameters.to = teco_interface_glyphs2bytes(v2); } else { teco_search_parameters.count = -1; - teco_search_parameters.from = (gint)v2; - teco_search_parameters.to = (gint)v1; + teco_search_parameters.from = teco_interface_glyphs2bytes(v2); + teco_search_parameters.to = teco_interface_glyphs2bytes(v1); } - if (!teco_validate_pos(teco_search_parameters.from) || - !teco_validate_pos(teco_search_parameters.to)) { + if (teco_search_parameters.from < 0 || + teco_search_parameters.to < 0) { /* * FIXME: In derived classes, the command name will * no longer be correct. @@ -114,24 +114,10 @@ teco_state_search_initial(teco_machine_main_t *ctx, GError **error) return TRUE; } -static const gchar * -teco_regexp_escape_chr(gchar chr) -{ - static gchar escaped[] = {'\\', '\0', '\0', '\0'}; - - if (!chr) { - escaped[1] = 'c'; - escaped[2] = '@'; - return escaped; - } - - escaped[1] = chr; - escaped[2] = '\0'; - return g_ascii_isalnum(chr) ? escaped + 1 : escaped; -} - typedef enum { TECO_SEARCH_STATE_START, + TECO_SEARCH_STATE_CTL, + TECO_SEARCH_STATE_ESCAPE, TECO_SEARCH_STATE_NOT, TECO_SEARCH_STATE_CTL_E, TECO_SEARCH_STATE_ANYQ, @@ -153,6 +139,7 @@ typedef enum { * The pointer is modified and always left after * the last character used, so it may point to the * terminating null byte after the call. + * @param codepage The codepage of pattern. * @param escape_default Whether to treat single characters * as classes or not. * @param error A GError. @@ -161,10 +148,13 @@ typedef enum { * When a non-empty string is returned, the state has always * been reset to TECO_STATE_STATE_START. * Must be freed with g_free(). + * + * @fixme The allocations could be avoided by letting it append + * to the target regexp teco_string_t directly. */ static gchar * teco_class2regexp(teco_search_state_t *state, teco_string_t *pattern, - gboolean escape_default, GError **error) + guint codepage, gboolean escape_default, GError **error) { while (pattern->len > 0) { switch (*state) { @@ -184,8 +174,12 @@ teco_class2regexp(teco_search_state_t *state, teco_string_t *pattern, */ if (!escape_default) return g_strdup(""); - pattern->len--; - return g_strdup(teco_regexp_escape_chr(*pattern->data++)); + gsize len = codepage == SC_CP_UTF8 + ? g_utf8_next_char(pattern->data) - pattern->data : 1; + gchar *escaped = g_regex_escape_string(pattern->data, len); + pattern->data += len; + pattern->len -= len; + return escaped; } break; @@ -246,25 +240,36 @@ teco_class2regexp(teco_search_state_t *state, teco_string_t *pattern, case TECO_SEARCH_STATE_ANYQ: { teco_qreg_t *reg; - + gsize len; + gunichar chr; + + if (codepage == SC_CP_UTF8) { + len = g_utf8_next_char(pattern->data) - pattern->data; + chr = g_utf8_get_char(pattern->data); + } else { + len = 1; + chr = *pattern->data; + } switch (teco_machine_qregspec_input(teco_search_qreg_machine, - *pattern->data, ®, NULL, error)) { + chr, ®, NULL, error)) { case TECO_MACHINE_QREGSPEC_ERROR: return NULL; case TECO_MACHINE_QREGSPEC_MORE: /* incomplete, but consume byte */ - break; + pattern->data += len; + pattern->len -= len; + continue; case TECO_MACHINE_QREGSPEC_DONE: teco_machine_qregspec_reset(teco_search_qreg_machine); g_auto(teco_string_t) str = {NULL, 0}; - if (!reg->vtable->get_string(reg, &str.data, &str.len, error)) + if (!reg->vtable->get_string(reg, &str.data, &str.len, NULL, error)) return NULL; - pattern->data++; - pattern->len--; + pattern->data += len; + pattern->len -= len; *state = TECO_SEARCH_STATE_START; return g_regex_escape_string(str.data, str.len); } @@ -303,6 +308,7 @@ teco_class2regexp(teco_search_state_t *state, teco_string_t *pattern, * successfully scanned character, so it can be * called recursively. It may also point to the * terminating null byte after the call. + * @param codepage The codepage of pattern. * @param single_expr Whether to scan a single pattern * expression or an arbitrary sequence. * @param error A GError. @@ -310,19 +316,31 @@ teco_class2regexp(teco_search_state_t *state, teco_string_t *pattern, * Must be freed with g_free(). */ static gchar * -teco_pattern2regexp(teco_string_t *pattern, gboolean single_expr, GError **error) +teco_pattern2regexp(teco_string_t *pattern, guint codepage, gboolean single_expr, GError **error) { teco_search_state_t state = TECO_SEARCH_STATE_START; g_auto(teco_string_t) re = {NULL, 0}; do { /* + * Previous character was caret. + * Make sure it is handled like a control character. + * This is necessary even though we have string building activated, + * to support constructs like ^Q^Q (typed with carets) in order to + * quote pattern matching characters. + */ + if (state == TECO_SEARCH_STATE_CTL) { + *pattern->data = TECO_CTL_KEY(g_ascii_toupper(*pattern->data)); + state = TECO_SEARCH_STATE_START; + } + + /* * First check whether it is a class. * This will not treat individual characters * as classes, so we do not convert them to regexp * classes unnecessarily. */ - g_autofree gchar *temp = teco_class2regexp(&state, pattern, FALSE, error); + g_autofree gchar *temp = teco_class2regexp(&state, pattern, codepage, FALSE, error); if (!temp) return NULL; @@ -344,18 +362,40 @@ teco_pattern2regexp(teco_string_t *pattern, gboolean single_expr, GError **error switch (state) { case TECO_SEARCH_STATE_START: switch (*pattern->data) { - case TECO_CTL_KEY('X'): teco_string_append_c(&re, '.'); break; - case TECO_CTL_KEY('N'): state = TECO_SEARCH_STATE_NOT; break; - default: { - const gchar *escaped = teco_regexp_escape_chr(*pattern->data); - teco_string_append(&re, escaped, strlen(escaped)); - } + case '^': + state = TECO_SEARCH_STATE_CTL; + break; + case TECO_CTL_KEY('Q'): + case TECO_CTL_KEY('R'): + state = TECO_SEARCH_STATE_ESCAPE; + break; + case TECO_CTL_KEY('X'): + teco_string_append_c(&re, '.'); + break; + case TECO_CTL_KEY('N'): + state = TECO_SEARCH_STATE_NOT; + break; + default: + state = TECO_SEARCH_STATE_ESCAPE; + continue; } break; + case TECO_SEARCH_STATE_ESCAPE: { + state = TECO_SEARCH_STATE_START; + gsize len = codepage == SC_CP_UTF8 + ? g_utf8_next_char(pattern->data) - pattern->data : 1; + /* the allocation could theoretically be avoided by escaping char-wise */ + g_autofree gchar *escaped = g_regex_escape_string(pattern->data, len); + teco_string_append(&re, escaped, strlen(escaped)); + pattern->data += len; + pattern->len -= len; + continue; + } + case TECO_SEARCH_STATE_NOT: { state = TECO_SEARCH_STATE_START; - g_autofree gchar *temp = teco_class2regexp(&state, pattern, TRUE, error); + g_autofree gchar *temp = teco_class2regexp(&state, pattern, codepage, TRUE, error); if (!temp) return NULL; if (!*temp) @@ -391,7 +431,7 @@ teco_pattern2regexp(teco_string_t *pattern, gboolean single_expr, GError **error case TECO_SEARCH_STATE_MANY: { /* consume exactly one pattern element */ - g_autofree gchar *temp = teco_pattern2regexp(pattern, TRUE, error); + g_autofree gchar *temp = teco_pattern2regexp(pattern, codepage, TRUE, error); if (!temp) return NULL; if (!*temp) @@ -417,7 +457,7 @@ teco_pattern2regexp(teco_string_t *pattern, gboolean single_expr, GError **error state = TECO_SEARCH_STATE_START; break; default: { - g_autofree gchar *temp = teco_pattern2regexp(pattern, TRUE, error); + g_autofree gchar *temp = teco_pattern2regexp(pattern, codepage, TRUE, error); if (!temp) return NULL; if (!*temp) @@ -454,16 +494,17 @@ teco_pattern2regexp(teco_string_t *pattern, gboolean single_expr, GError **error } static gboolean -teco_do_search(GRegex *re, gint from, gint to, gint *count, GError **error) +teco_do_search(GRegex *re, gsize from, gsize to, gint *count, GError **error) { g_autoptr(GMatchInfo) info = NULL; - const gchar *buffer = (const gchar *)teco_interface_ssm(SCI_GETCHARACTERPOINTER, 0, 0); + /* NOTE: can return NULL pointer for completely new and empty documents */ + const gchar *buffer = (const gchar *)teco_interface_ssm(SCI_GETRANGEPOINTER, from, to-from) ? : ""; GError *tmp_error = NULL; /* * NOTE: The return boolean does NOT signal whether an error was generated. */ - g_regex_match_full(re, buffer, (gssize)to, from, 0, &info, &tmp_error); + g_regex_match_full(re, buffer, to-from, 0, 0, &info, &tmp_error); if (tmp_error) { g_propagate_error(error, tmp_error); return FALSE; @@ -543,7 +584,7 @@ teco_do_search(GRegex *re, gint from, gint to, gint *count, GError **error) if (matched_from >= 0 && matched_to >= 0) /* match success */ - teco_interface_ssm(SCI_SETSEL, matched_from, matched_to); + teco_interface_ssm(SCI_SETSEL, from+matched_from, from+matched_to); return TRUE; } @@ -551,8 +592,22 @@ teco_do_search(GRegex *re, gint from, gint to, gint *count, GError **error) static gboolean teco_state_search_process(teco_machine_main_t *ctx, const teco_string_t *str, gsize new_chars, GError **error) { - static const GRegexCompileFlags flags = G_REGEX_CASELESS | G_REGEX_MULTILINE | - G_REGEX_DOTALL | G_REGEX_RAW; + /* FIXME: Should G_REGEX_OPTIMIZE be added under certain circumstances? */ + GRegexCompileFlags flags = G_REGEX_CASELESS | G_REGEX_MULTILINE | G_REGEX_DOTALL; + + /* this is set in teco_state_search_initial() */ + if (ctx->expectstring.machine.codepage != SC_CP_UTF8) { + /* single byte encoding */ + flags |= G_REGEX_RAW; + } else if (!teco_string_validate_utf8(str)) { + /* + * While SciTECO code is always guaranteed to be in valid UTF-8, + * the result of string building may not (eg. if ^EQq inserts garbage). + */ + g_set_error_literal(error, TECO_ERROR, TECO_ERROR_CODEPOINT, + "Invalid UTF-8 byte sequence in search pattern"); + return FALSE; + } if (teco_current_doc_must_undo()) undo__teco_interface_ssm(SCI_SETSEL, @@ -567,8 +622,9 @@ teco_state_search_process(teco_machine_main_t *ctx, const teco_string_t *str, gs g_autoptr(GRegex) re = NULL; teco_string_t pattern = *str; + g_autofree gchar *re_pattern; /* NOTE: teco_pattern2regexp() modifies str pointer */ - g_autofree gchar *re_pattern = teco_pattern2regexp(&pattern, FALSE, error); + re_pattern = teco_pattern2regexp(&pattern, ctx->expectstring.machine.codepage, FALSE, error); if (!re_pattern) return FALSE; teco_machine_qregspec_reset(teco_search_qreg_machine); @@ -668,13 +724,15 @@ teco_state_search_done(teco_machine_main_t *ctx, const teco_string_t *str, GErro undo__teco_interface_ssm(SCI_SETANCHOR, anchor, 0); if (!search_reg->vtable->undo_set_string(search_reg, error) || - !search_reg->vtable->set_string(search_reg, str->data, str->len, error)) + !search_reg->vtable->set_string(search_reg, str->data, str->len, + teco_default_codepage(), error)) return NULL; teco_interface_ssm(SCI_SETANCHOR, anchor, 0); } else { g_auto(teco_string_t) search_str = {NULL, 0}; - if (!search_reg->vtable->get_string(search_reg, &search_str.data, &search_str.len, error) || + if (!search_reg->vtable->get_string(search_reg, &search_str.data, &search_str.len, + NULL, error) || !teco_state_search_process(ctx, &search_str, search_str.len, error)) return NULL; } @@ -890,12 +948,12 @@ teco_state_search_kill_done(teco_machine_main_t *ctx, const teco_string_t *str, if (teco_is_failure(search_state)) return &teco_state_start; - gint dot = teco_interface_ssm(SCI_GETCURRENTPOS, 0, 0); + sptr_t dot = teco_interface_ssm(SCI_GETCURRENTPOS, 0, 0); teco_interface_ssm(SCI_BEGINUNDOACTION, 0, 0); if (teco_search_parameters.dot < dot) { /* kill forwards */ - gint anchor = teco_interface_ssm(SCI_GETANCHOR, 0, 0); + sptr_t anchor = teco_interface_ssm(SCI_GETANCHOR, 0, 0); if (teco_current_doc_must_undo()) undo__teco_interface_ssm(SCI_GOTOPOS, dot, 0); @@ -903,18 +961,23 @@ teco_state_search_kill_done(teco_machine_main_t *ctx, const teco_string_t *str, teco_interface_ssm(SCI_DELETERANGE, teco_search_parameters.dot, anchor - teco_search_parameters.dot); + + /* NOTE: An undo action is not always created. */ + if (teco_current_doc_must_undo() && + teco_search_parameters.dot != anchor) + undo__teco_interface_ssm(SCI_UNDO, 0, 0); } else { /* kill backwards */ teco_interface_ssm(SCI_DELETERANGE, dot, teco_search_parameters.dot - dot); + + /* NOTE: An undo action is not always created. */ + if (teco_current_doc_must_undo() && + teco_search_parameters.dot != dot) + undo__teco_interface_ssm(SCI_UNDO, 0, 0); } teco_interface_ssm(SCI_ENDUNDOACTION, 0, 0); teco_ring_dirtify(); - /* NOTE: An undo action is not always created. */ - if (teco_current_doc_must_undo() && - teco_search_parameters.dot != dot) - undo__teco_interface_ssm(SCI_UNDO, 0, 0); - return &teco_state_start; } @@ -981,11 +1044,20 @@ teco_state_search_delete_done(teco_machine_main_t *ctx, const teco_string_t *str */ TECO_DEFINE_STATE_SEARCH(teco_state_search_delete); +static gboolean +teco_state_replace_insert_initial(teco_machine_main_t *ctx, GError **error) +{ + if (ctx->mode == TECO_MODE_NORMAL) + teco_machine_stringbuilding_set_codepage(&ctx->expectstring.machine, + teco_interface_get_codepage()); + return TRUE; +} + /* * FIXME: Could be static */ TECO_DEFINE_STATE_INSERT(teco_state_replace_insert, - .initial_cb = NULL + .initial_cb = (teco_state_initial_cb_t)teco_state_replace_insert_initial ); static teco_state_t * @@ -1058,11 +1130,13 @@ teco_state_replace_default_insert_done_overwrite(teco_machine_main_t *ctx, const if (str->len > 0) { if (!replace_reg->vtable->undo_set_string(replace_reg, error) || - !replace_reg->vtable->set_string(replace_reg, str->data, str->len, error)) + !replace_reg->vtable->set_string(replace_reg, str->data, str->len, + teco_default_codepage(), error)) return NULL; } else { g_auto(teco_string_t) replace_str = {NULL, 0}; - if (!replace_reg->vtable->get_string(replace_reg, &replace_str.data, &replace_str.len, error) || + if (!replace_reg->vtable->get_string(replace_reg, &replace_str.data, &replace_str.len, + NULL, error) || (replace_str.len > 0 && !teco_state_insert_process(ctx, &replace_str, replace_str.len, error))) return NULL; } @@ -1089,7 +1163,8 @@ teco_state_replace_default_ignore_done(teco_machine_main_t *ctx, const teco_stri g_assert(replace_reg != NULL); if (!replace_reg->vtable->undo_set_string(replace_reg, error) || - !replace_reg->vtable->set_string(replace_reg, str->data, str->len, error)) + !replace_reg->vtable->set_string(replace_reg, str->data, str->len, + teco_default_codepage(), error)) return NULL; return &teco_state_start; diff --git a/src/search.h b/src/search.h index 3e4a2ef..3eacb6d 100644 --- a/src/search.h +++ b/src/search.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/src/spawn.c b/src/spawn.c index a30e6b2..e6d620c 100644 --- a/src/spawn.c +++ b/src/spawn.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -76,8 +76,8 @@ static struct { GSource *stdin_src, *stdout_src; gboolean interrupted; - teco_int_t from, to; - teco_int_t start; + gssize from, to; + gsize start; gboolean text_added; teco_eol_writer_t stdin_writer; @@ -121,7 +121,7 @@ teco_parse_shell_command_line(const gchar *cmdline, GError **error) teco_qreg_t *reg = teco_qreg_table_find(&teco_qreg_table_globals, "$COMSPEC", 8); g_assert(reg != NULL); teco_string_t comspec; - if (!reg->vtable->get_string(reg, &comspec.data, &comspec.len, error)) + if (!reg->vtable->get_string(reg, &comspec.data, &comspec.len, NULL, error)) return NULL; argv = g_new(gchar *, 5); @@ -140,7 +140,7 @@ teco_parse_shell_command_line(const gchar *cmdline, GError **error) teco_qreg_t *reg = teco_qreg_table_find(&teco_qreg_table_globals, "$SHELL", 6); g_assert(reg != NULL); teco_string_t shell; - if (!reg->vtable->get_string(reg, &shell.data, &shell.len, error)) + if (!reg->vtable->get_string(reg, &shell.data, &shell.len, NULL, error)) return NULL; argv = g_new(gchar *, 4); @@ -164,6 +164,13 @@ teco_state_execute_initial(teco_machine_main_t *ctx, GError **error) if (ctx->mode > TECO_MODE_NORMAL) return TRUE; + /* + * Command-lines and file names are always assumed to be UTF-8, + * unless we set TECO_ED_DEFAULT_ANSI. + */ + teco_machine_stringbuilding_set_codepage(&ctx->expectstring.machine, + teco_default_codepage()); + if (!teco_expressions_eval(FALSE, error)) return FALSE; @@ -202,15 +209,17 @@ teco_state_execute_initial(teco_machine_main_t *ctx, GError **error) break; } - default: + default: { /* pipe and replace character range */ - if (!teco_expressions_pop_num_calc(&teco_spawn_ctx.to, 0, error) || - !teco_expressions_pop_num_calc(&teco_spawn_ctx.from, 0, error)) + teco_int_t from, to; + if (!teco_expressions_pop_num_calc(&to, 0, error) || + !teco_expressions_pop_num_calc(&from, 0, error)) return FALSE; + teco_spawn_ctx.from = teco_interface_glyphs2bytes(from); + teco_spawn_ctx.to = teco_interface_glyphs2bytes(to); rc = teco_bool(teco_spawn_ctx.from <= teco_spawn_ctx.to && - teco_validate_pos(teco_spawn_ctx.from) && - teco_validate_pos(teco_spawn_ctx.to)); - break; + teco_spawn_ctx.from >= 0 && teco_spawn_ctx.to >= 0); + } } if (teco_is_failure(rc)) { @@ -257,12 +266,11 @@ teco_state_execute_done(teco_machine_main_t *ctx, const teco_string_t *str, GErr g_autoptr(GIOChannel) stdin_chan = NULL, stdout_chan = NULL; g_auto(GStrv) argv = NULL, envp = NULL; - if (teco_string_contains(str, '\0')) { + if (!str->len || teco_string_contains(str, '\0')) { g_set_error(error, TECO_ERROR, TECO_ERROR_FAILED, - "Command line must not contain null-bytes"); + "Command line must not be empty or contain null-bytes"); goto gerror; } - g_assert(str->data != NULL); argv = teco_parse_shell_command_line(str->data, error); if (!argv) @@ -410,17 +418,17 @@ cleanup: } /* in cmdline.c */ -gboolean teco_state_execute_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gchar key, GError **error); +gboolean teco_state_execute_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gunichar key, GError **error); /*$ EC pipe filter - * EC[command]$ -- Execute operating system command and filter buffer contents - * linesEC[command]$ - * -EC[command]$ - * from,toEC[command]$ - * :EC[command]$ -> Success|Failure - * lines:EC[command]$ -> Success|Failure - * -:EC[command]$ -> Success|Failure - * from,to:EC[command]$ -> Success|Failure + * ECcommand$ -- Execute operating system command and filter buffer contents + * linesECcommand$ + * -ECcommand$ + * from,toECcommand$ + * :ECcommand$ -> Success|Failure + * lines:ECcommand$ -> Success|Failure + * -:ECcommand$ -> Success|Failure + * from,to:ECcommand$ -> Success|Failure * * The EC command allows you to interface with the operating * system shell and external programs. @@ -546,14 +554,14 @@ teco_state_egcommand_got_register(teco_machine_main_t *ctx, teco_qreg_t *qreg, } /*$ EG EGq - * EGq[command]$ -- Set Q-Register to output of operating system command - * linesEGq[command]$ - * -EGq[command]$ - * from,toEGq[command]$ - * :EGq[command]$ -> Success|Failure - * lines:EGq[command]$ -> Success|Failure - * -:EGq[command]$ -> Success|Failure - * from,to:EGq[command]$ -> Success|Failure + * EGq command$ -- Set Q-Register to output of operating system command + * linesEGq command$ + * -EGq command$ + * from,toEGq command$ + * :EGq command$ -> Success|Failure + * lines:EGq command$ -> Success|Failure + * -:EGq command$ -> Success|Failure + * from,to:EGq command$ -> Success|Failure * * Runs an operating system <command> and set Q-Register * <q> to the data read from its standard output stream. @@ -635,7 +643,7 @@ teco_spawn_stdin_watch_cb(GIOChannel *chan, GIOCondition condition, gpointer dat gssize bytes_written = teco_eol_writer_convert(&teco_spawn_ctx.stdin_writer, buffer, convert_len, &teco_spawn_ctx.error); if (bytes_written < 0) { - /* GError ocurred */ + /* GError occurred */ g_main_loop_quit(teco_spawn_ctx.mainloop); return G_SOURCE_REMOVE; } @@ -667,6 +675,8 @@ teco_spawn_stdout_watch_cb(GIOChannel *chan, GIOCondition condition, gpointer da /* source has already been dispatched */ return G_SOURCE_REMOVE; + teco_qreg_t *qreg = teco_spawn_ctx.register_argument; + for (;;) { teco_string_t buffer; @@ -685,20 +695,16 @@ teco_spawn_stdout_watch_cb(GIOChannel *chan, GIOCondition condition, gpointer da if (!buffer.len) return G_SOURCE_CONTINUE; - if (teco_spawn_ctx.register_argument) { + if (qreg) { if (teco_spawn_ctx.text_added) { - if (!teco_spawn_ctx.register_argument->vtable->undo_append_string(teco_spawn_ctx.register_argument, - &teco_spawn_ctx.error) || - !teco_spawn_ctx.register_argument->vtable->append_string(teco_spawn_ctx.register_argument, - buffer.data, buffer.len, - &teco_spawn_ctx.error)) + if (!qreg->vtable->undo_append_string(qreg, &teco_spawn_ctx.error) || + !qreg->vtable->append_string(qreg, buffer.data, buffer.len, + &teco_spawn_ctx.error)) goto error; } else { - if (!teco_spawn_ctx.register_argument->vtable->undo_set_string(teco_spawn_ctx.register_argument, - &teco_spawn_ctx.error) || - !teco_spawn_ctx.register_argument->vtable->set_string(teco_spawn_ctx.register_argument, - buffer.data, buffer.len, - &teco_spawn_ctx.error)) + if (!qreg->vtable->undo_set_string(qreg, &teco_spawn_ctx.error) || + !qreg->vtable->set_string(qreg, buffer.data, buffer.len, + teco_default_codepage(), &teco_spawn_ctx.error)) goto error; } } else { @@ -789,8 +795,7 @@ teco_spawn_idle_cb(gpointer user_data) return G_SOURCE_CONTINUE; } -#ifndef NDEBUG -static void __attribute__((destructor)) +static void TECO_DEBUG_CLEANUP teco_spawn_cleanup(void) { g_source_unref(teco_spawn_ctx.idle_src); @@ -801,4 +806,3 @@ teco_spawn_cleanup(void) if (teco_spawn_ctx.error) g_error_free(teco_spawn_ctx.error); } -#endif diff --git a/src/spawn.h b/src/spawn.h index 0e5ca96..312de6e 100644 --- a/src/spawn.h +++ b/src/spawn.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/src/string-utils.c b/src/string-utils.c index f2cd45e..b284760 100644 --- a/src/string-utils.c +++ b/src/string-utils.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -55,13 +55,20 @@ teco_string_echo(const gchar *str, gsize len) return ret; } -/** @memberof teco_string_t */ +/** + * Get character coordinates for a given byte index. + * + * The given string must be valid UTF-8. + * + * @memberof teco_string_t + */ void -teco_string_get_coord(const gchar *str, guint pos, guint *line, guint *column) +teco_string_get_coord(const gchar *str, gsize off, guint *pos, guint *line, guint *column) { + *pos = 0; *line = *column = 1; - for (guint i = 0; i < pos; i++) { + for (guint i = 0; i < off; i = g_utf8_next_char(str+i) - str) { switch (str[i]) { case '\r': if (str[i+1] == '\n') @@ -75,10 +82,21 @@ teco_string_get_coord(const gchar *str, guint pos, guint *line, guint *column) (*column)++; break; } + (*pos)++; } } -/** @memberof teco_string_t */ +/** + * Get the length of the prefix common to two strings. + * Works with UTF-8 and single-byte encodings. + * + * @param a Left string. + * @param b Right string. + * @param b_len Length of right string. + * @return Length of the common prefix in bytes. + * + * @memberof teco_string_t + */ gsize teco_string_diff(const teco_string_t *a, const gchar *b, gsize b_len) { @@ -91,15 +109,32 @@ teco_string_diff(const teco_string_t *a, const gchar *b, gsize b_len) return len; } -/** @memberof teco_string_t */ +/** + * Get the length of the prefix common to two UTF-8 strings + * without considering case. + * + * The UTF-8 strings must be validated, which should be the case + * for help labels and short Q-Register names. + * + * @param a Left UTF-8 string. + * @param b Right UTF-8 string. + * @param b_len Length of right UTF-8 string. + * @return Length of the common prefix in bytes. + * + * @memberof teco_string_t + */ gsize teco_string_casediff(const teco_string_t *a, const gchar *b, gsize b_len) { gsize len = 0; - while (len < a->len && len < b_len && - g_ascii_tolower(a->data[len]) == g_ascii_tolower(b[len])) - len++; + while (len < a->len && len < b_len) { + gunichar a_chr = g_utf8_get_char(a->data+len); + gunichar b_chr = g_utf8_get_char(b+len); + if (g_unichar_tolower(a_chr) != g_unichar_tolower(b_chr)) + break; + len = g_utf8_next_char(b+len) - b; + } return len; } diff --git a/src/string-utils.h b/src/string-utils.h index 26b660b..ebe25d5 100644 --- a/src/string-utils.h +++ b/src/string-utils.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -26,15 +26,25 @@ /** * Upper-case SciTECO command character. * - * There are implementations in glib (g_ascii_toupper) and libc, + * There are implementations in glib (g_ascii_toupper() and g_unichar_toupper()) and libc, * but this implementation is sufficient for all letters used by SciTECO commands. */ -static inline gchar -teco_ascii_toupper(gchar chr) +static inline gunichar +teco_ascii_toupper(gunichar chr) { return chr >= 'a' && chr <= 'z' ? chr & ~0x20 : chr; } +static inline gchar * +teco_strv_remove(gchar **strv, guint i) +{ + gchar *ret = strv[i]; + do + strv[i] = strv[i+1]; + while (strv[++i]); + return ret; +} + /** * An 8-bit clean null-terminated string. * @@ -42,6 +52,7 @@ teco_ascii_toupper(gchar chr) * and the allocation length is not stored. * Just like GString, teco_string_t are always null-terminated but at the * same time 8-bit clean (can contain null-characters). + * It may or may not contain UTF-8 byte sequences. * * The API is designed such that teco_string_t operations operate on plain * (null-terminated) C strings, a single character or character array as well as @@ -51,6 +62,12 @@ teco_ascii_toupper(gchar chr) * A target teco_string_t::data is always null-terminated and thus safe to pass * to functions expecting traditional null-terminated C strings if you can * guarantee that it contains no null-character other than the trailing one. + * + * @warning For consistency with C idioms the underlying character type is + * `char`, which might be signed! + * Accessing individual characters may yield signed integers and that sign + * might be preserved when upcasting to a larger signed integer. + * In this case you should always cast to `guchar` first. */ typedef struct { /** @@ -58,7 +75,7 @@ typedef struct { * The pointer is guaranteed to be non-NULL after initialization. */ gchar *data; - /** Length of `data` without the trailing null-byte. */ + /** Length of `data` without the trailing null-byte in bytes. */ gsize len; } teco_string_t; @@ -112,6 +129,16 @@ teco_string_append_c(teco_string_t *str, gchar chr) teco_string_append(str, &chr, sizeof(chr)); } +/** @memberof teco_string_t */ +static inline void +teco_string_append_wc(teco_string_t *target, gunichar chr) +{ + /* 4 bytes should be enough, but we better follow the documentation */ + target->data = g_realloc(target->data, target->len + 6 + 1); + target->len += g_unichar_to_utf8(chr, target->data+target->len); + target->data[target->len] = '\0'; +} + /** * @fixme Should this also realloc str->data? * @@ -135,7 +162,7 @@ void undo__teco_string_truncate(teco_string_t *, gsize); gchar *teco_string_echo(const gchar *str, gsize len); -void teco_string_get_coord(const gchar *str, guint pos, guint *line, guint *column); +void teco_string_get_coord(const gchar *str, gsize off, guint *pos, guint *line, guint *column); typedef gsize (*teco_string_diff_t)(const teco_string_t *a, const gchar *b, gsize b_len); gsize teco_string_diff(const teco_string_t *a, const gchar *b, gsize b_len); @@ -170,6 +197,19 @@ teco_string_rindex(const teco_string_t *str, gchar chr) const gchar *teco_string_last_occurrence(const teco_string_t *str, const gchar *chars); +/** + * Validate whether string consists exclusively of valid UTF-8, but accept null bytes. + * @note there is g_utf8_validate_len() in Glib 2.60 + */ +static inline gboolean +teco_string_validate_utf8(const teco_string_t *str) +{ + const gchar *p = str->data; + while (!g_utf8_validate(p, str->len - (p - str->data), &p) && !*p) + p++; + return p - str->data == str->len; +} + /** @memberof teco_string_t */ static inline void teco_string_clear(teco_string_t *str) diff --git a/src/symbols-extract.tes b/src/symbols-extract.tes index 9f43fa6..1ab6667 100755 --- a/src/symbols-extract.tes +++ b/src/symbols-extract.tes @@ -1,4 +1,4 @@ -#!/usr/local/bin/sciteco -m +#!/usr/local/bin/sciteco -8m !* * ./symbols-extract.tes [-p <prefix pattern list>] -n <SymbolList object> [--] \ * <output file> <input header> @@ -48,13 +48,12 @@ teco_symbols_init(void) teco_symbol_list_init(&Q[getopt.n], entries, G_N_ELEMENTS(entries), FALSE); } -#ifndef NDEBUG -static void __attribute__((destructor)) +static void TECO_DEBUG_CLEANUP teco_cmdline_cleanup(void) { teco_symbol_list_clear(&Q[getopt.n]); } -#endif^J + !* write output file *! 2EL EWQ#ou diff --git a/src/symbols.c b/src/symbols.c index ce7a7f6..feead76 100644 --- a/src/symbols.c +++ b/src/symbols.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -251,7 +251,7 @@ teco_state_scintilla_symbols_done(teco_machine_main_t *ctx, const teco_string_t } /* in cmdline.c */ -gboolean teco_state_scintilla_symbols_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gchar key, GError **error); +gboolean teco_state_scintilla_symbols_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gunichar key, GError **error); /*$ ES scintilla message * -- Send Scintilla message diff --git a/src/symbols.h b/src/symbols.h index 9cdfd74..0325d9d 100644 --- a/src/symbols.h +++ b/src/symbols.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -30,7 +30,7 @@ //#define DEBUG -TECO_DEFINE_UNDO_SCALAR(gchar); +TECO_DEFINE_UNDO_SCALAR(gunichar); TECO_DEFINE_UNDO_SCALAR(gint); TECO_DEFINE_UNDO_SCALAR(guint); TECO_DEFINE_UNDO_SCALAR(gsize); @@ -112,7 +112,7 @@ teco_undo_push_size(teco_undo_action_t action_cb, gsize size) } void -teco_undo_pop(gint pc) +teco_undo_pop(gsize pc) { while ((gint)teco_undo_heads->len > pc) { teco_undo_token_t *top = @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -164,8 +164,8 @@ gpointer teco_undo_push_size(teco_undo_action_t action_cb, gsize size) * significantly improves batch-mode performance. */ -TECO_DECLARE_UNDO_SCALAR(gchar); -#define teco_undo_gchar(VAR) (*teco_undo_object_gchar_push(&(VAR))) +TECO_DECLARE_UNDO_SCALAR(gunichar); +#define teco_undo_gunichar(VAR) (*teco_undo_object_gunichar_push(&(VAR))) TECO_DECLARE_UNDO_SCALAR(gint); #define teco_undo_gint(VAR) (*teco_undo_object_gint_push(&(VAR))) @@ -243,5 +243,5 @@ TECO_DECLARE_UNDO_SCALAR(gconstpointer); /** @} */ -void teco_undo_pop(gint pc); +void teco_undo_pop(gsize pc); void teco_undo_clear(void); @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -45,6 +45,7 @@ #include "error.h" #include "qreg.h" #include "eol.h" +#include "memory.h" #include "view.h" /** @memberof teco_view_t */ @@ -72,6 +73,27 @@ teco_view_setup(teco_view_t *ctx) */ teco_view_ssm(ctx, SCI_SETMARGINWIDTHN, 1, 0); + if (teco_ed & TECO_ED_DEFAULT_ANSI) { + /* + * Configure a single-byte codepage/charset. + * This requires setting it on all of the possible styles. + * Fortunately, we can do it before SCI_STYLECLEARALL. + * This is important only for display purposes - other than that + * all single-byte encodings are handled the same. + */ + teco_view_ssm(ctx, SCI_STYLESETCHARACTERSET, STYLE_DEFAULT, SC_CHARSET_ANSI); + /* 0 is used for ALL single-byte encodings */ + teco_view_ssm(ctx, SCI_SETCODEPAGE, 0, 0); + } else { + /* + * Documents are UTF-8 by default and all UTF-8 documents + * are expected to have a character index. + * This is a property of the document, instead of the view. + */ + teco_view_ssm(ctx, SCI_ALLOCATELINECHARACTERINDEX, + SC_LINECHARACTERINDEX_UTF32, 0); + } + /* * Set some basic styles in order to provide * a consistent look across UIs if no profile @@ -137,6 +159,28 @@ teco_view_set_representations(teco_view_t *ctx) gchar buf[] = {(gchar)cc, '\0'}; teco_view_ssm(ctx, SCI_SETREPRESENTATION, (uptr_t)buf, (sptr_t)reps[cc]); } + + if (teco_ed & TECO_ED_DEFAULT_ANSI) { + /* + * Non-ANSI chars should be visible somehow. + * This would best be done always when changing the + * encoding to 0, but it would be kind of expensive. + * + * FIXME: On the other hand, this could cause problems + * when setting SC_CP_UTF8 later on. + */ + for (guint cc = 0x80; cc <= 0xFF; cc++) { + gchar buf[] = {(gchar)cc, '\0'}; + gchar rep[2+1]; + /* + * Hexadecimal is poorly supported in SciTECO, but + * multiple decimal numbers one after another look + * confusing, esp. in Curses. + */ + g_snprintf(rep, sizeof(rep), "%02X", cc); + teco_view_ssm(ctx, SCI_SETREPRESENTATION, (uptr_t)buf, (sptr_t)rep); + } + } } /** @@ -161,6 +205,9 @@ teco_view_set_representations(teco_view_t *ctx) gboolean teco_view_load_from_channel(teco_view_t *ctx, GIOChannel *channel, GError **error) { + g_auto(teco_eol_reader_t) reader; + teco_eol_reader_init_gio(&reader, channel); + teco_view_ssm(ctx, SCI_BEGINUNDOACTION, 0, 0); teco_view_ssm(ctx, SCI_CLEARALL, 0, 0); @@ -173,11 +220,11 @@ teco_view_load_from_channel(teco_view_t *ctx, GIOChannel *channel, GError **erro */ struct stat stat_buf = {.st_size = 0}; if (!fstat(g_io_channel_unix_get_fd(channel), &stat_buf) && - stat_buf.st_size > 0) + stat_buf.st_size > 0) { + if (!teco_memory_check(stat_buf.st_size, error)) + goto error; teco_view_ssm(ctx, SCI_ALLOCATE, stat_buf.st_size, 0); - - g_auto(teco_eol_reader_t) reader; - teco_eol_reader_init_gio(&reader, channel); + } for (;;) { /* @@ -187,14 +234,24 @@ teco_view_load_from_channel(teco_view_t *ctx, GIOChannel *channel, GError **erro teco_string_t str; GIOStatus rc = teco_eol_reader_convert(&reader, &str.data, &str.len, error); - if (rc == G_IO_STATUS_ERROR) { - teco_view_ssm(ctx, SCI_ENDUNDOACTION, 0, 0); - return FALSE; - } + if (rc == G_IO_STATUS_ERROR) + goto error; if (rc == G_IO_STATUS_EOF) break; teco_view_ssm(ctx, SCI_APPENDTEXT, str.len, (sptr_t)str.data); + + /* + * Even if we checked initially, knowing the file size, + * Scintilla could allocate much more bytes. + */ + if (!teco_memory_check(0, error)) + goto error; + + if (G_UNLIKELY(teco_interface_is_interrupted())) { + teco_error_interrupted_set(error); + goto error; + } } /* @@ -216,6 +273,10 @@ teco_view_load_from_channel(teco_view_t *ctx, GIOChannel *channel, GError **erro teco_view_ssm(ctx, SCI_ENDUNDOACTION, 0, 0); return TRUE; + +error: + teco_view_ssm(ctx, SCI_ENDUNDOACTION, 0, 0); + return FALSE; } /** @@ -449,3 +510,129 @@ teco_view_save_to_file(teco_view_t *ctx, const gchar *filename, GError **error) return TRUE; } + +/** + * Convert a glyph index to a byte offset as used by Scintilla. + * + * This is optimized with the "line character index", + * which must always be enabled in UTF-8 documents. + * + * It is also used to validate glyph indexes. + * + * @param ctx The view to operate on. + * @param pos Position in glyphs/characters. + * @return Position in bytes or -1 if pos is out of bounds. + */ +gssize +teco_view_glyphs2bytes(teco_view_t *ctx, teco_int_t pos) +{ + if (pos < 0) + return -1; /* invalid position */ + if (!pos) + return 0; + + if (!(teco_view_ssm(ctx, SCI_GETLINECHARACTERINDEX, 0, 0) & + SC_LINECHARACTERINDEX_UTF32)) + /* assume single-byte encoding */ + return pos <= teco_view_ssm(ctx, SCI_GETLENGTH, 0, 0) ? pos : -1; + + sptr_t line = teco_view_ssm(ctx, SCI_LINEFROMINDEXPOSITION, pos, + SC_LINECHARACTERINDEX_UTF32); + sptr_t line_bytes = teco_view_ssm(ctx, SCI_POSITIONFROMLINE, line, 0); + pos -= teco_view_ssm(ctx, SCI_INDEXPOSITIONFROMLINE, line, + SC_LINECHARACTERINDEX_UTF32); + return teco_view_ssm(ctx, SCI_POSITIONRELATIVE, line_bytes, pos) ? : -1; +} + +/** + * Convert byte offset to glyph/character index without bounds checking. + */ +teco_int_t +teco_view_bytes2glyphs(teco_view_t *ctx, gsize pos) +{ + if (!pos) + return 0; + + if (!(teco_view_ssm(ctx, SCI_GETLINECHARACTERINDEX, 0, 0) & + SC_LINECHARACTERINDEX_UTF32)) + /* assume single-byte encoding */ + return pos; + + sptr_t line = teco_view_ssm(ctx, SCI_LINEFROMPOSITION, pos, 0); + sptr_t line_bytes = teco_view_ssm(ctx, SCI_POSITIONFROMLINE, line, 0); + return teco_view_ssm(ctx, SCI_INDEXPOSITIONFROMLINE, line, + SC_LINECHARACTERINDEX_UTF32) + + teco_view_ssm(ctx, SCI_COUNTCHARACTERS, line_bytes, pos); +} + +#define TECO_RELATIVE_LIMIT 1024 + +/** + * Convert a glyph index relative to a byte position to + * a byte position. + * + * Can be used to implement commands with relative character + * ranges. + * As an optimization, this always counts characters for deltas + * smaller than TECO_RELATIVE_LIMIT, so it will be fast + * even where the character-index based lookup is too slow + * (as on exceedingly long lines). + * + * @param ctx The view to operate on. + * @param pos Byte position to start. + * @param n Number of glyphs/characters to the left (negative) or + * right (positive) of pos. + * @return Position in bytes or -1 if the resulting position is out of bounds. + */ +gssize +teco_view_glyphs2bytes_relative(teco_view_t *ctx, gsize pos, teco_int_t n) +{ + if (!n) + return pos; + if (ABS(n) > TECO_RELATIVE_LIMIT) + return teco_view_glyphs2bytes(ctx, teco_view_bytes2glyphs(ctx, pos) + n); + + sptr_t res = teco_view_ssm(ctx, SCI_POSITIONRELATIVE, pos, n); + /* SCI_POSITIONRELATIVE may return 0 even if the offset is valid */ + return res ? : n > 0 ? -1 : teco_view_bytes2glyphs(ctx, pos)+n >= 0 ? 0 : -1; +} + +/** + * Get codepoint at given byte offset. + * + * @param ctx The view to operate on. + * @param pos The glyph's byte position + * @param len The length of the document in bytes + * @return The requested codepoint. + * In UTF-8 encoded documents, this might be -1 (incomplete sequence) + * or -2 (invalid byte sequence). + */ +teco_int_t +teco_view_get_character(teco_view_t *ctx, gsize pos, gsize len) +{ + if (teco_view_ssm(ctx, SCI_GETCODEPAGE, 0, 0) != SC_CP_UTF8) + /* + * We don't support the asiatic multi-byte encodings, + * so everything else is single-byte codepages. + * NOTE: Internally, the character is casted to signed char + * and may therefore become negative. + */ + return (guchar)teco_view_ssm(ctx, SCI_GETCHARAT, pos, 0); + + gchar buf[4+1]; + struct Sci_TextRangeFull range = { + .chrg = {pos, MIN(len, pos+sizeof(buf)-1)}, + .lpstrText = buf + }; + /* + * Probably faster than SCI_GETRANGEPOINTER+SCI_GETGAPPOSITION + * or repeatedly calling SCI_GETCHARAT. + */ + teco_view_ssm(ctx, SCI_GETTEXTRANGEFULL, 0, (sptr_t)&range); + /* + * Make sure that the -1/-2 error values are preserved. + * The sign bit in UCS-4/UTF-32 is unused, so this will even + * suffice if TECO_INTEGER == 32. + */ + return (gint32)g_utf8_get_char_validated(buf, -1); +} @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -70,3 +70,16 @@ gboolean teco_view_save_to_file(teco_view_t *ctx, const gchar *filename, GError /** @pure @memberof teco_view_t */ void teco_view_free(teco_view_t *ctx); + +static inline guint +teco_view_get_codepage(teco_view_t *ctx) +{ + return teco_view_ssm(ctx, SCI_GETCODEPAGE, 0, 0) + ? : teco_view_ssm(ctx, SCI_STYLEGETCHARACTERSET, STYLE_DEFAULT, 0); +} + +gssize teco_view_glyphs2bytes(teco_view_t *ctx, teco_int_t pos); +teco_int_t teco_view_bytes2glyphs(teco_view_t *ctx, gsize pos); +gssize teco_view_glyphs2bytes_relative(teco_view_t *ctx, gsize pos, teco_int_t n); + +teco_int_t teco_view_get_character(teco_view_t *ctx, gsize pos, gsize len); diff --git a/tests/Makefile.am b/tests/Makefile.am index 997a473..eea6b67 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -32,6 +32,7 @@ installcheck-local: atconfig atlocal $(TESTSUITE) clean-local: test ! -f '$(TESTSUITE)' || \ $(SHELL) '$(TESTSUITE)' --clean + $(RM) -f atconfig AUTOM4TE = $(SHELL) $(top_srcdir)/config/missing --run autom4te AUTOTEST = $(AUTOM4TE) --language=autotest diff --git a/tests/atlocal.in b/tests/atlocal.in index 090a604..061937a 100644 --- a/tests/atlocal.in +++ b/tests/atlocal.in @@ -13,3 +13,19 @@ SCITECOPATH=@abs_top_srcdir@/lib # Glib debug options G_SLICE=debug-blocks G_ENABLE_DIAGNOSTIC=1 + +# For the Unicode tests - makes sure that UTF-8 characters +# are accepted on command lines. +case $host in +*-*-darwin*) + LC_ALL=`defaults read -g AppleLocale | @SED@ 's/@.*$//g'`.UTF-8 + ;; +*) + LC_ALL=C.UTF-8 + ;; +esac + +# Default stack size on Linux (8M). +# Some platforms allow very large stack sizes, making it hard to test +# against potential stack overflows. +ulimit -s 8192 diff --git a/tests/testsuite.at b/tests/testsuite.at index 20a72ca..676ca59 100644 --- a/tests/testsuite.at +++ b/tests/testsuite.at @@ -13,6 +13,8 @@ AT_COLOR_TESTS # (translated to [ ... ]) in simple cases where balanced # brackets are required in TECO code as well and # quadrigraphs (@<:@ and @:>@) in all other cases. +# Single round brackets also have to be replaced with the +# quadrigraphs @{:@ and @:}@. AT_BANNER([Features]) @@ -25,6 +27,16 @@ AT_CHECK([$SCITECO -e "2%a,%a - 3\"N(0/0)'"], 0, ignore, ignore) # c) The empty "list" element is equivalent to 0, so # "1,,2" is equivalent to "1,0,2" and (1,) to (1,0). AT_CHECK([$SCITECO -e "(1,) \"~|(0/0)'"], 0, ignore, ignore) +AT_CHECK([$SCITECO -e "1,(2)=="], 0, ignore, ignore) +AT_CLEANUP + +AT_SETUP([Exponentiation]) +AT_CHECK([$SCITECO -e "-1^*0 - (-1)\"N(0/0)'"], 0, ignore, ignore) +AT_CHECK([$SCITECO -e "-1^*-5 - (-1)\"N(0/0)'"], 0, ignore, ignore) +AT_CHECK([$SCITECO -e "0^*-5="], 1, ignore, ignore) +AT_CHECK([$SCITECO -e "0^*0 - 1\"N(0/0)'"], 0, ignore, ignore) +AT_CHECK([$SCITECO -e "1^*-5 - 1\"N(0/0)'"], 0, ignore, ignore) +AT_CHECK([$SCITECO -e "2^*-5 - 0\"N(0/0)'"], 0, ignore, ignore) AT_CLEANUP AT_SETUP([Missing left operand]) @@ -35,6 +47,11 @@ AT_SETUP([Closing loops at the correct macro level]) AT_CHECK([$SCITECO -e '@^Ua{>} <Ma'], 1, ignore, ignore) AT_CLEANUP +AT_SETUP([Braces in loops]) +AT_CHECK([$SCITECO -e "1<23@{:@42>"], 1, ignore, ignore) +AT_CHECK([$SCITECO -e "1<23(1;)> \"~|(0/0)'"], 0, ignore, ignore) +AT_CLEANUP + AT_SETUP([Pass-through loops]) # NOTE: This requires the <=>, so that values get consumed from the stack. # More elegant would be a command for popping exactly one argument like <:$>. @@ -63,8 +80,53 @@ AT_CHECK([$SCITECO -e '0U[[AB^Q@:>@(0/0)]]'], 0, ignore, ignore) # TODO: String building in Q-Register definitions AT_CLEANUP -AT_SETUP([8-bit cleanlyness]) -AT_CHECK([$SCITECO -e "0:@EUa/f^@^@/ :Qa-4\"N(0/0)' Ga Z= Z-4\"N(0/0)'"], 0, ignore, ignore) +AT_SETUP([Q-Register stack]) +AT_CHECK([$SCITECO -e "[[a 23Ub ]]b Qb\"N(0/0)'"], 0, ignore, ignore) +# FG will temporarily change the working directory to tests/testsuite.dir. +AT_CHECK([$SCITECO -e "[[\$ @FG'..' ]]\$ :Q\$-1Q\$-^^r\"=(0/0)'"], 0, ignore, ignore) +AT_CLEANUP + +AT_SETUP([Searches]) +# FIXME: We cannot currently easily insert a single ASCII 5 (^E), as it must be followed +# by a 2nd character. It can be quoted, but cannot be written as Caret+E. +# You also cannot search for a single ASCII 5 using Caret+E. +# 2 additional ^Q are translated to a single ^Q and interpreted at the search-pattern layer. +AT_CHECK([$SCITECO -e "@I/^Q\05/ J @:S/^Q^Q^Q\05/\"F(0/0)'"], 0, ignore, ignore) +AT_CLEANUP + +AT_SETUP([Editing local registers in macro calls]) +AT_CHECK([$SCITECO -e '@^Ua{@EQ.x//} :Ma @^U.x/FOO/'], 0, ignore, ignore) +AT_CHECK([$SCITECO -e '@^Ua{@EQ.x//} Ma @^U.x/FOO/'], 1, ignore, ignore) +AT_CLEANUP + +AT_SETUP([Loading files into Q-Registers]) +AT_CHECK([$SCITECO -e "@I/../ @EW/loadqreg.txt/ @EQa/loadqreg.txt/ :Qa-2\"N(0/0)'"], 0, ignore, ignore) +# Does the same as FG..$. Afterwards, the parent directory should be shorter. +AT_CHECK([$SCITECO -e ":Q\$Ul @EQ\$/loadqreg.txt/ :Q\$-Ql+1\">(0/0)'"], 0, ignore, ignore) +AT_CLEANUP + +AT_SETUP([Saving Q-Registers contents to files]) +AT_CHECK([$SCITECO -e "@^Ua/test/ @E%a/saveqreg.txt/ @EB/saveqreg.txt/ Z-4\"N(0/0)'"], 0, ignore, ignore) +AT_CHECK([$SCITECO -e "@E%\$/saveqreg.txt/ @EB/saveqreg.txt/ Z-:Q\$\"N(0/0)'"], 0, ignore, ignore) +AT_CLEANUP + +AT_SETUP([8-bit cleanliness]) +AT_CHECK([$SCITECO -e "0@I//J 0A\"N(0/0)' :@S/^@/\"F(0/0)'"], 0, ignore, ignore) +AT_CHECK([$SCITECO -e "@EQa//0EE 1U*0EE 0:@EUa/f^@^@/ :Qa-4\"N(0/0)' Ga Z-4\"N(0/0)'"], 0, ignore, ignore) +AT_CHECK([$SCITECO -e "0EE 129@I// -A-129\"N(0/0)' HXa @EQa// EE\"N(0/0)'"], 0, ignore, ignore) +AT_CHECK([$SCITECO -8e "129@:^Ua// 0Qa-129\"N(0/0)'"], 0, ignore, ignore) +AT_CHECK([$SCITECO -e "1EE 167Ua @I/^EUa/ .-1\"N(0/0)'"], 0, ignore, ignore) +AT_CHECK([$SCITECO -8e "194Ua Qa@I//J :@S/^EUa/\"F(0/0)'"], 0, ignore, ignore) +AT_CLEANUP + +AT_SETUP([Unicode]) +AT_CHECK([$SCITECO -e "8594@I/Здравствуй, мир!/ Z-17\"N(0/0)' J0A-8594\"N(0/0)'"], 0, ignore, ignore) +AT_CHECK([$SCITECO -e "8594@^Ua/Здравствуй, мир!/ :Qa-17\"N(0/0)' 0Qa-8594\"N(0/0)'"], 0, ignore, ignore) +AT_CHECK([$SCITECO -e "@I/Здравствуй, мир!/ JW .-10\"N(0/0)' ^E-20\"N(0/0)' 204:EE .-10\"N(0/0)'"], 0, ignore, ignore) +AT_CHECK([$SCITECO -e "@I/TEST/ @EW/юникод.txt/"], 0, ignore, ignore) +AT_CHECK([test -f юникод.txt], 0, ignore, ignore) +AT_CHECK([$SCITECO -e "^^ß-223\"N(0/0) 23Uъ Q[Ъ]-23\"N(0/0)'"], 0, ignore, ignore) +AT_CHECK([$SCITECO -e "@O/метка/ !метка!"], 0, ignore, ignore) AT_CLEANUP AT_SETUP([Automatic EOL normalization]) @@ -104,11 +166,20 @@ AT_CHECK([$SCITECO -e "2147483647@S/foo/"], 0, ignore, ignore) AT_CHECK([$SCITECO -e "-2147483648@S/foo/"], 1, ignore, ignore) AT_CLEANUP +AT_SETUP([Search on new empty document]) +AT_CHECK([$SCITECO -e ":@S/foo/\"S(0/0)'"], 0, ignore, ignore) +AT_CLEANUP + AT_SETUP([Memory limiting during spawning]) # This might result in an OOM if memory limiting is not working AT_CHECK([$SCITECO -e "50*1000*1000,2EJ 0,128ED @EC'dd if=/dev/zero'"], 1, ignore, ignore) AT_CLEANUP +AT_SETUP([Memory limiting during file reading]) +AT_CHECK([dd if=/dev/zero of=big-file.txt bs=1000 count=50000], 0, ignore, ignore) +AT_CHECK([$SCITECO -8e "50*1000*1000,2EJ @EB'big-file.txt'"], 1, ignore, ignore) +AT_CLEANUP + AT_SETUP([Q-Register stack cleanup]) AT_CHECK([$SCITECO -e '@<:@a'], 0, ignore, ignore) AT_CLEANUP @@ -131,6 +202,15 @@ AT_SETUP([Empty lexer name]) AT_CHECK([$SCITECO -e '@ES/SETILEXER//'], 1, ignore, ignore) AT_CLEANUP +AT_SETUP([Empty command string]) +AT_CHECK([$SCITECO -e '@EC//'], 1, ignore, ignore) +AT_CHECK([$SCITECO -e '@EGa//'], 1, ignore, ignore) +AT_CLEANUP + +AT_SETUP([Jump to beginning of macro]) +AT_CHECK([$SCITECO -e "%a-2\"< F< ' Qa-2\"N(0/0)'"], 0, ignore, ignore) +AT_CLEANUP + AT_BANNER([Known Bugs]) AT_SETUP([Number stack]) @@ -150,6 +230,7 @@ AT_CLEANUP # NOTE: This bug depends on specific build options of Glib's # PCRE which is not predictable. +# It segfaults at least on Ubuntu 20.04 (libpcre3 v2:8.39). #AT_SETUP([Pattern matching overflow]) ## Should no longer dump core. ## It could fail because the memory limit is exceeed, @@ -159,9 +240,6 @@ AT_CLEANUP #AT_CLEANUP AT_SETUP([Recursion overflow]) -# On Mac OS and FreeBSD we cannot always reliably provoke a stack overflow. -# Let's suppose this is the case on all BSDs. -AT_SKIP_IF([case $host in *-*-*bsd* | *-*-darwin*) true;; *) false;; esac]) # Should no longer dump core. # It could fail because the memory limit is exceeed, # but not in this case since we limit the recursion. diff --git a/www/build.tes b/www/build.tes new file mode 100755 index 0000000..9ef1ec4 --- /dev/null +++ b/www/build.tes @@ -0,0 +1,118 @@ +#!/usr/local/bin/sciteco -m +!* + * Generate the website at https://rhaberkorn.github.io/sciteco + * This reuses content from Markdown documents and the manpages. + * Everything else is cross-linked to Sourceforge. + * It must currently be run from the www/ subdirectory of an in-tree-build. + * The HTML manuals must be in ../doc. + * Required tools: lowdown + *! + +!* + * Perhaps everything should be white on black, like in a terminal? + * The first line could be inverted (black on white). + *! +@[header]{I +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> +<html> +<head> + <title>SciTECO - <Website> Q[title]</title> + <link rel="icon" type="image/x-icon" href="https://raw.githubusercontent.com/rhaberkorn/sciteco/master/ico/sciteco.ico"> + <meta name="description" content="Advanced TECO dialect and interactive screen editor based on Scintilla"> + <style> + @import "https://www.nerdfonts.com/assets/css/webfont.css"; + + .reverse { + background-color: black; + color: white; + } + + /* For grohtml-generated content */ + .grohtml p { margin-top: 0; margin-bottom: 0; vertical-align: top } + .grohtml pre { margin-top: 0; margin-bottom: 0; vertical-align: top } + .grohtml table { margin-top: 0; margin-bottom: 0; vertical-align: top } + .grohtml h1 { text-align: center } + </style> +</head> +<body> +<tt> + SciTECO - <Website> + <span class="nf nf-md-home"></span> <a href="index.html">Home</a> / + <span class="nf nf-md-image"></span> <a href="screenshots.html">Screenshots</a> / + <span class="nf nf-md-floppy_variant"></span> <a href="https://github.com/rhaberkorn/sciteco/releases" target=_blank>Downloads</a> / + <span class="nf nf-fa-book_atlas"></span> <a href="sciteco.1.html"><b>sciteco</b>(1)</a> / + <span class="nf nf-fa-book_bible"></span> <a href="sciteco.7.html"><b>sciteco</b>(7)</a> / + <span class="nf nf-md-alpha_w_box"></span> <a href="https://github.com/rhaberkorn/sciteco/wiki" target=_blank>Wiki</a> / + <span class="nf nf-fa-github_square"></span> <a href="https://github.com/rhaberkorn/sciteco" target=_blank>Github</a> +</tt> +<hr> +} + +!* This is a macro, so we can potentially customize the content per page *! +@[footer]{I +<hr> +<tt> +<table width="100%"><tr> + <td width="1ch" valign=top><b>*</b></td> + <td valign=top><marquee>IThis page was made with SciTECO.<span class=reverse>$</span>-EX<span class=reverse>$$</span></marquee></td> + <td width=56><a href="https://github.com/rhaberkorn/sciteco/issues" target=_blank> + <img src="https://sciteco.sf.net/graphics/notbug.gif" title="There are no bugs. Go away."> + </a></td> +</tr></table> +</tt> +</body> +</html> +} + +EBindex.html HK + [title]Home M[header] + EClowdown -thtml --html-no-skiphtml --html-no-escapehtml ../NEWS.md + I<hr> + EClowdown -thtml --html-no-skiphtml --html-no-escapehtml ../README.md + M[footer] +EW + +EBscreenshots.html HK + [title]Screenshots M[header] + EClowdown -thtml --html-no-skiphtml --html-no-escapehtml screenshots.md + M[footer] +EW + +[manpage-header] + <p class="nf nf-fa-warning"> This documents the project's HEAD revision.</p> + <div class="grohtml"> + +EB../doc/sciteco.1.html + S<body>S<h1 L 0,.K + [title]sciteco(1) M[header] G[manpage-header] + FD<hr>S</body> .,ZK + M[footer] +EWsciteco.1.html + +EB../doc/sciteco.7.html + S<body>S<h1 L 0,.K + [title]sciteco(7) M[header] G[manpage-header] + FD<hr>S</body> .,ZK + M[footer] +EWsciteco.7.html + +!* + * These manpages are not in the header bar, but still postprocessed + * for consinstency. + *! + +EB../doc/grosciteco.tes.1.html + S<body>S<h1 L 0,.K + [title]grosciteco.tes(1) M[header] G[manpage-header] + FD<hr>S</body> .,ZK + M[footer] +EWgrosciteco.tes.1.html + +EB../doc/tedoc.tes.1.html + S<body>S<h1 L 0,.K + [title]tedoc.tes(1) M[header] G[manpage-header] + FD<hr>S</body> .,ZK + M[footer] +EWtedoc.tes.1.html + +EX diff --git a/www/screenshots.md b/www/screenshots.md new file mode 100644 index 0000000..0e4462b --- /dev/null +++ b/www/screenshots.md @@ -0,0 +1,23 @@ +# Screenshots + +## v2.1.0 + + + +<img src="https://sciteco.sf.net/screenshots/v2.1.0-freebsd-ncurses.png" width="921" alt="FreeBSD/ncurses, Unicode icons" title="FreeBSD/ncurses, Unicode icons"/> + +## v2.1 (dev) + + + +## v0.7 (dev) + + + + + + + +## v0.5 + + |