aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRobin Haberkorn <robin.haberkorn@googlemail.com>2019-03-15 22:25:16 +0100
committerRobin Haberkorn <robin.haberkorn@googlemail.com>2019-03-15 22:25:16 +0100
commit92f8b625b5bdde6f87a99c4415ee218496fec343 (patch)
tree04b575d12a69d83b1ae680678df9a4984e96e24f
parentdf055fec1b75837aae23c38c237e84f79786ee04 (diff)
downloadopenrussian-cli-92f8b625b5bdde6f87a99c4415ee218496fec343.tar.gz
imported mysql2sqlite script from https://github.com/dumblob/mysql2sqlite
* this will practically be required by everyone since I won't add any pre-generated databases. * this could be added as a submodule as well, but it's probably not worth it for a single Awk script
-rwxr-xr-xmysql2sqlite278
1 files changed, 278 insertions, 0 deletions
diff --git a/mysql2sqlite b/mysql2sqlite
new file mode 100755
index 0000000..deaa469
--- /dev/null
+++ b/mysql2sqlite
@@ -0,0 +1,278 @@
+#!/usr/bin/awk -f
+
+# Authors: @esperlu, @artemyk, @gkuenning, @dumblob
+
+# FIXME detect empty input file and issue a warning
+
+function printerr( s ){ print s | "cat >&2" }
+
+BEGIN {
+ if( ARGC != 2 ){
+ printerr( \
+ "USAGE: mysql2sqlite dump_mysql.sql > dump_sqlite3.sql\n" \
+ " file name - (dash) is not supported, because - means stdin")
+ no_END = 1
+ exit 1
+ }
+
+ # Find INT_MAX supported by both this AWK (usually an ISO C signed int)
+ # and SQlite.
+ # On non-8bit-based architectures, the additional bits are safely ignored.
+
+ # 8bit (lower precision should not exist)
+ s="127"
+ # "63" + 0 avoids potential parser misbehavior
+ if( (s + 0) "" == s ){ INT_MAX_HALF = "63" + 0 }
+ # 16bit
+ s="32767"
+ if( (s + 0) "" == s ){ INT_MAX_HALF = "16383" + 0 }
+ # 32bit
+ s="2147483647"
+ if( (s + 0) "" == s ){ INT_MAX_HALF = "1073741823" + 0 }
+ # 64bit (as INTEGER in SQlite3)
+ s="9223372036854775807"
+ if( (s + 0) "" == s ){ INT_MAX_HALF = "4611686018427387904" + 0 }
+# # 128bit
+# s="170141183460469231731687303715884105728"
+# if( (s + 0) "" == s ){ INT_MAX_HALF = "85070591730234615865843651857942052864" + 0 }
+# # 256bit
+# s="57896044618658097711785492504343953926634992332820282019728792003956564819968"
+# if( (s + 0) "" == s ){ INT_MAX_HALF = "28948022309329048855892746252171976963317496166410141009864396001978282409984" + 0 }
+# # 512bit
+# s="6703903964971298549787012499102923063739682910296196688861780721860882015036773488400937149083451713845015929093243025426876941405973284973216824503042048"
+# if( (s + 0) "" == s ){ INT_MAX_HALF = "3351951982485649274893506249551461531869841455148098344430890360930441007518386744200468574541725856922507964546621512713438470702986642486608412251521024" + 0 }
+# # 1024bit
+# s="89884656743115795386465259539451236680898848947115328636715040578866337902750481566354238661203768010560056939935696678829394884407208311246423715319737062188883946712432742638151109800623047059726541476042502884419075341171231440736956555270413618581675255342293149119973622969239858152417678164812112068608"
+# if( (s + 0) "" == s ){ INT_MAX_HALF = "44942328371557897693232629769725618340449424473557664318357520289433168951375240783177119330601884005280028469967848339414697442203604155623211857659868531094441973356216371319075554900311523529863270738021251442209537670585615720368478277635206809290837627671146574559986811484619929076208839082406056034304" + 0 }
+# # higher precision probably not needed
+
+ FS=",$"
+ print "PRAGMA synchronous = OFF;"
+ print "PRAGMA journal_mode = MEMORY;"
+ print "BEGIN TRANSACTION;"
+}
+
+# historically 3 spaces separate non-argument local variables
+function bit_to_int( str_bit, powtwo, i, res, bit, overflow ){
+ powtwo = 1
+ overflow = 0
+ # 011101 = 1*2^0 + 0*2^1 + 1*2^2 ...
+ for( i = length( str_bit ); i > 0; --i ){
+ bit = substr( str_bit, i, 1 )
+ if( overflow || ( bit == 1 && res > INT_MAX_HALF ) ){
+ printerr( \
+ NR ": WARN Bit field overflow, number truncated (LSBs saved, MSBs ignored)." )
+ break
+ }
+ res = res + bit * powtwo
+ # no warning here as it might be the last iteration
+ if( powtwo > INT_MAX_HALF ){ overflow = 1; continue }
+ powtwo = powtwo * 2
+ }
+ return res
+}
+
+# CREATE TRIGGER statements have funny commenting. Remember we are in trigger.
+/^\/\*.*(CREATE.*TRIGGER|create.*trigger)/ {
+ gsub( /^.*(TRIGGER|trigger)/, "CREATE TRIGGER" )
+ print
+ inTrigger = 1
+ next
+}
+# The end of CREATE TRIGGER has a stray comment terminator
+/(END|end) \*\/;;/ { gsub( /\*\//, "" ); print; inTrigger = 0; next }
+# The rest of triggers just get passed through
+inTrigger != 0 { print; next }
+
+# CREATE VIEW looks like a TABLE in comments
+/^\/\*.*(CREATE.*TABLE|create.*table)/ {
+ inView = 1
+ next
+}
+# end of CREATE VIEW
+/^(\).*(ENGINE|engine).*\*\/;)/ {
+ inView = 0
+ next
+}
+# content of CREATE VIEW
+inView != 0 { next }
+
+# skip comments
+/^\/\*/ { next }
+
+# skip PARTITION statements
+/^ *[(]?(PARTITION|partition) +[^ ]+/ { next }
+
+# print all INSERT lines
+( /^ *\(/ && /\) *[,;] *$/ ) || /^(INSERT|insert|REPLACE|replace)/ {
+ prev = ""
+
+ # first replace \\ by \_ that mysqldump never generates to deal with
+ # sequnces like \\n that should be translated into \n, not \<LF>.
+ # After we convert all escapes we replace \_ by backslashes.
+ gsub( /\\\\/, "\\_" )
+
+ # single quotes are escaped by another single quote
+ gsub( /\\'/, "''" )
+ gsub( /\\n/, "\n" )
+ gsub( /\\r/, "\r" )
+ gsub( /\\"/, "\"" )
+ gsub( /\\\032/, "\032" ) # substitute char
+
+ gsub( /\\_/, "\\" )
+
+ # sqlite3 is limited to 16 significant digits of precision
+ while( match( $0, /0x[0-9a-fA-F]{17}/ ) ){
+ hexIssue = 1
+ sub( /0x[0-9a-fA-F]+/, substr( $0, RSTART, RLENGTH-1 ), $0 )
+ }
+ if( hexIssue ){
+ printerr( \
+ NR ": WARN Hex number trimmed (length longer than 16 chars)." )
+ hexIssue = 0
+ }
+ print
+ next
+}
+
+# CREATE DATABASE is not supported
+/^(CREATE.*DATABASE|create.*database)/ { next }
+
+# print the CREATE line as is and capture the table name
+/^(CREATE|create)/ {
+ if( $0 ~ /IF NOT EXISTS|if not exists/ || $0 ~ /TEMPORARY|temporary/ ){
+ caseIssue = 1
+ printerr( \
+ NR ": WARN Potential case sensitivity issues with table/column naming\n" \
+ " (see INFO at the end)." )
+ }
+ if( match( $0, /`[^`]+/ ) ){
+ tableName = substr( $0, RSTART+1, RLENGTH-1 )
+ }
+ aInc = 0
+ prev = ""
+ firstInTable = 1
+ print
+ next
+}
+
+# Replace `FULLTEXT KEY` (probably other `XXXXX KEY`)
+/^ (FULLTEXT KEY|fulltext key)/ { gsub( /.+(KEY|key)/, " KEY" ) }
+
+# Get rid of field lengths in KEY lines
+/ (PRIMARY |primary )?(KEY|key)/ { gsub( /\([0-9]+\)/, "" ) }
+
+aInc == 1 && /PRIMARY KEY|primary key/ { next }
+
+# Replace COLLATE xxx_xxxx_xx statements with COLLATE BINARY
+/ (COLLATE|collate) [a-z0-9_]*/ { gsub( /(COLLATE|collate) [a-z0-9_]*/, "COLLATE BINARY" ) }
+
+# Print all fields definition lines except the `KEY` lines.
+/^ / && !/^( (KEY|key)|\);)/ {
+ if( match( $0, /[^"`]AUTO_INCREMENT|auto_increment[^"`]/) ){
+ aInc = 1
+ gsub( /AUTO_INCREMENT|auto_increment/, "PRIMARY KEY AUTOINCREMENT" )
+ }
+ gsub( /(UNIQUE KEY|unique key) (`.*`|".*") /, "UNIQUE " )
+ gsub( /(CHARACTER SET|character set) [^ ]+[ ,]/, "" )
+ # FIXME
+ # CREATE TRIGGER [UpdateLastTime]
+ # AFTER UPDATE
+ # ON Package
+ # FOR EACH ROW
+ # BEGIN
+ # UPDATE Package SET LastUpdate = CURRENT_TIMESTAMP WHERE ActionId = old.ActionId;
+ # END
+ gsub( /(ON|on) (UPDATE|update) (CURRENT_TIMESTAMP|current_timestamp)(\(\))?/, "" )
+ gsub( /(DEFAULT|default) (CURRENT_TIMESTAMP|current_timestamp)(\(\))?/, "DEFAULT current_timestamp")
+ gsub( /(COLLATE|collate) [^ ]+ /, "" )
+ gsub( /(ENUM|enum)[^)]+\)/, "text " )
+ gsub( /(SET|set)\([^)]+\)/, "text " )
+ gsub( /UNSIGNED|unsigned/, "" )
+ gsub( /` [^ ]*(INT|int|BIT|bit)[^ ]*/, "` integer" )
+ gsub( /" [^ ]*(INT|int|BIT|bit)[^ ]*/, "\" integer" )
+ ere_bit_field = "[bB]'[10]+'"
+ if( match($0, ere_bit_field) ){
+ sub( ere_bit_field, bit_to_int( substr( $0, RSTART +2, RLENGTH -2 -1 ) ) )
+ }
+ # field comments are not supported
+ gsub( / (COMMENT|comment).+$/, "" )
+ # Get commas off end of line
+ gsub( /,.?$/, "" )
+ if( prev ){
+ if( firstInTable ){
+ print prev
+ firstInTable = 0
+ }
+ else {
+ print "," prev
+ }
+ }
+ else {
+ # FIXME check if this is correct in all cases
+ if( match( $1,
+ /(CONSTRAINT|constraint) \".*\" (FOREIGN KEY|foreign key)/ ) ){
+ print ","
+ }
+ }
+ prev = $1
+}
+
+/ ENGINE| engine/ {
+ if( prev ){
+ if( firstInTable ){
+ print prev
+ firstInTable = 0
+ }
+ else {
+ print "," prev
+ }
+ }
+ prev=""
+ print ");"
+ next
+}
+# `KEY` lines are extracted from the `CREATE` block and stored in array for later print
+# in a separate `CREATE KEY` command. The index name is prefixed by the table name to
+# avoid a sqlite error for duplicate index name.
+/^( (KEY|key)|\);)/ {
+ if( prev ){
+ if( firstInTable ){
+ print prev
+ firstInTable = 0
+ }
+ else {
+ print "," prev
+ }
+ }
+ prev = ""
+ if( $0 == ");" ){
+ print
+ }
+ else {
+ if( match( $0, /`[^`]+/ ) ){
+ indexName = substr( $0, RSTART+1, RLENGTH-1 )
+ }
+ if( match( $0, /\([^()]+/ ) ){
+ indexKey = substr( $0, RSTART+1, RLENGTH-1 )
+ }
+ # idx_ prefix to avoid name clashes (they really happen!)
+ key[tableName] = key[tableName] "CREATE INDEX \"idx_" \
+ tableName "_" indexName "\" ON \"" tableName "\" (" indexKey ");\n"
+ }
+}
+
+END {
+ if( no_END ){ exit 1}
+ # print all KEY creation lines.
+ for( table in key ){ printf key[table] }
+
+ print "END TRANSACTION;"
+
+ if( caseIssue ){
+ printerr( \
+ "INFO Pure sqlite identifiers are case insensitive (even if quoted\n" \
+ " or if ASCII) and doesnt cross-check TABLE and TEMPORARY TABLE\n" \
+ " identifiers. Thus expect errors like \"table T has no column named F\".")
+ }
+}