From 0c03b434fd6a5e218160a527a0a91dc8f820d3e8 Mon Sep 17 00:00:00 2001 From: Vlad Brezae Date: Tue, 19 Sep 2023 17:11:27 +0300 Subject: [PATCH 1/2] [mono][aot] Fix support for files with non-ascii characters Add g_fopen, g_unlink and g_rename which on windows do a utf8 to utf16 conversion and then call the corresponding wide char api. --- src/mono/mono/eglib/gfile.c | 48 +++++++++++++++++++++++++++---- src/mono/mono/eglib/glib.h | 9 ++---- src/mono/mono/mini/aot-compiler.c | 28 +++++++++--------- 3 files changed, 60 insertions(+), 25 deletions(-) diff --git a/src/mono/mono/eglib/gfile.c b/src/mono/mono/eglib/gfile.c index 73597ad5b208d..19331a98aba65 100644 --- a/src/mono/mono/eglib/gfile.c +++ b/src/mono/mono/eglib/gfile.c @@ -105,17 +105,15 @@ g_file_error_from_errno (gint err_no) } } -FILE * -g_fopen (const char *path, const char *mode) +FILE* +g_fopen (const gchar *path, const gchar *mode) { FILE *fp; if (!path) return NULL; -#ifndef HOST_WIN32 - fp = fopen (path, mode); -#else +#ifdef HOST_WIN32 gunichar2 *wPath = g_utf8_to_utf16 (path, -1, 0, 0, 0); gunichar2 *wMode = g_utf8_to_utf16 (mode, -1, 0, 0, 0); @@ -125,7 +123,47 @@ g_fopen (const char *path, const char *mode) fp = _wfopen ((wchar_t *) wPath, (wchar_t *) wMode); g_free (wPath); g_free (wMode); +#else + fp = fopen (path, mode); #endif return fp; } + +int +g_rename (const gchar *src_path, const gchar *dst_path) +{ +#ifdef HOST_WIN32 + gunichar2 *wSrcPath = g_utf8_to_utf16 (src_path, -1, 0, 0, 0); + gunichar2 *wDstPath = g_utf8_to_utf16 (dst_path, -1, 0, 0, 0); + + if (!wSrcPath || !wDstPath) + return -1; + + int ret = _wrename ((wchar_t *) wSrcPath, (wchar_t *) wDstPath); + g_free (wSrcPath); + g_free (wDstPath); + + return ret; +#else + return rename (src_path, dst_path); +#endif +} + +int +g_unlink (const gchar *path) +{ +#ifdef HOST_WIN32 + gunichar2 *wPath = g_utf8_to_utf16 (path, -1, 0, 0, 0); + + if (!wPath) + return -1; + + int ret = _wunlink ((wchar_t *) wPath); + g_free (wPath); + + return ret; +#else + return unlink (path); +#endif +} diff --git a/src/mono/mono/eglib/glib.h b/src/mono/mono/eglib/glib.h index 68192b2e6baeb..9a8474310bede 100644 --- a/src/mono/mono/eglib/glib.h +++ b/src/mono/mono/eglib/glib.h @@ -962,7 +962,9 @@ typedef enum { G_ENUM_FUNCTIONS (GFileTest) -FILE * g_fopen (const char *path, const char *mode); +FILE* g_fopen (const gchar *path, const gchar *mode); +int g_rename (const gchar *src_path, const gchar *dst_path); +int g_unlink (const gchar *path); gboolean g_file_get_contents (const gchar *filename, gchar **contents, gsize *length, GError **gerror); GFileError g_file_error_from_errno (gint err_no); gint g_file_open_tmp (const gchar *tmpl, gchar **name_used, GError **gerror); @@ -974,11 +976,6 @@ gboolean g_file_test (const gchar *filename, GFileTest test); #define g_open open #endif #ifdef G_OS_WIN32 -#define g_unlink _unlink -#else -#define g_unlink unlink -#endif -#ifdef G_OS_WIN32 #define g_write _write #else #define g_write write diff --git a/src/mono/mono/mini/aot-compiler.c b/src/mono/mono/mini/aot-compiler.c index 42c122d98371b..3b38e7a1efa94 100644 --- a/src/mono/mono/mini/aot-compiler.c +++ b/src/mono/mono/mini/aot-compiler.c @@ -5529,7 +5529,7 @@ MONO_RESTORE_WARNING if (acfg->aot_opts.export_symbols_outfile) { char *export_symbols_out = g_string_free (export_symbols, FALSE); - FILE* export_symbols_outfile = fopen (acfg->aot_opts.export_symbols_outfile, "w"); + FILE* export_symbols_outfile = g_fopen (acfg->aot_opts.export_symbols_outfile, "w"); if (!export_symbols_outfile) { fprintf (stderr, "Unable to open specified export_symbols_outfile '%s' to append symbols '%s': %s\n", acfg->aot_opts.export_symbols_outfile, export_symbols_out, strerror (errno)); g_free (export_symbols_out); @@ -13443,11 +13443,11 @@ compile_asm (MonoAotCompile *acfg) } #endif - if (0 != rename (tmp_outfile_name, outfile_name)) { + if (0 != g_rename (tmp_outfile_name, outfile_name)) { if (G_FILE_ERROR_EXIST == g_file_error_from_errno (errno)) { /* Since we are rebuilding the module we need to be able to replace any old copies. Remove old file and retry rename operation. */ - unlink (outfile_name); - rename (tmp_outfile_name, outfile_name); + g_unlink (outfile_name); + g_rename (tmp_outfile_name, outfile_name); } } @@ -13460,7 +13460,7 @@ compile_asm (MonoAotCompile *acfg) #endif if (!acfg->aot_opts.save_temps) - unlink (objfile); + g_unlink (objfile); g_free (tmp_outfile_name); g_free (outfile_name); @@ -13469,7 +13469,7 @@ compile_asm (MonoAotCompile *acfg) if (acfg->aot_opts.save_temps) aot_printf (acfg, "Retained input file.\n"); else - unlink (acfg->tmpfname); + g_unlink (acfg->tmpfname); return 0; } @@ -13517,7 +13517,7 @@ load_profile_file (MonoAotCompile *acfg, char *filename) int version; char magic [32]; - infile = fopen (filename, "rb"); + infile = g_fopen (filename, "rb"); if (!infile) { fprintf (stderr, "Unable to open file '%s': %s.\n", filename, strerror (errno)); exit (1); @@ -14535,7 +14535,7 @@ static void aot_dump (MonoAotCompile *acfg) mono_json_writer_object_end (&writer); dumpname = g_strdup_printf ("%s.json", g_path_get_basename (acfg->image->name)); - dumpfile = fopen (dumpname, "w+"); + dumpfile = g_fopen (dumpname, "w+"); g_free (dumpname); fprintf (dumpfile, "%s", writer.text->str); @@ -14944,7 +14944,7 @@ aot_assembly (MonoAssembly *ass, guint32 jit_opts, MonoAotOptions *aot_options) } if (acfg->aot_opts.logfile) { - acfg->logfile = fopen (acfg->aot_opts.logfile, "a+"); + acfg->logfile = g_fopen (acfg->aot_opts.logfile, "a+"); } if (acfg->aot_opts.trimming_eligible_methods_outfile && acfg->dedup_phase != DEDUP_COLLECT) { @@ -14958,7 +14958,7 @@ aot_assembly (MonoAssembly *ass, guint32 jit_opts, MonoAotOptions *aot_options) } if (acfg->aot_opts.data_outfile) { - acfg->data_outfile = fopen (acfg->aot_opts.data_outfile, "w+"); + acfg->data_outfile = g_fopen (acfg->aot_opts.data_outfile, "w+"); if (!acfg->data_outfile) { aot_printerrf (acfg, "Unable to create file '%s': %s\n", acfg->aot_opts.data_outfile, strerror (errno)); return 1; @@ -15121,7 +15121,7 @@ aot_assembly (MonoAssembly *ass, guint32 jit_opts, MonoAotOptions *aot_options) acfg->flags = (MonoAotFileFlags)(acfg->flags | MONO_AOT_FILE_FLAG_EAGER_LOAD); if (acfg->aot_opts.instances_logfile_path) { - acfg->instances_logfile = fopen (acfg->aot_opts.instances_logfile_path, "w"); + acfg->instances_logfile = g_fopen (acfg->aot_opts.instances_logfile_path, "w"); if (!acfg->instances_logfile) { aot_printerrf (acfg, "Unable to create logfile: '%s'.\n", acfg->aot_opts.instances_logfile_path); return 1; @@ -15384,7 +15384,7 @@ create_depfile (MonoAotCompile *acfg) // FIXME: Support other configurations g_assert (acfg->aot_opts.llvm_only && acfg->aot_opts.asm_only && acfg->aot_opts.llvm_outfile); - depfile = fopen (acfg->aot_opts.depfile, "w"); + depfile = g_fopen (acfg->aot_opts.depfile, "w"); g_assert (depfile); int ntargets = 1; @@ -15456,14 +15456,14 @@ emit_aot_image (MonoAotCompile *acfg) acfg->tmpfname = g_strdup_printf ("%s", acfg->aot_opts.outfile); else acfg->tmpfname = g_strdup_printf ("%s.s", acfg->image->name); - acfg->fp = fopen (acfg->tmpfname, "w+"); + acfg->fp = g_fopen (acfg->tmpfname, "w+"); } else { if (strcmp (acfg->aot_opts.temp_path, "") == 0) { acfg->fp = fdopen (g_file_open_tmp ("mono_aot_XXXXXX", &acfg->tmpfname, NULL), "w+"); } else { acfg->tmpbasename = g_build_filename (acfg->aot_opts.temp_path, "temp", (const char*)NULL); acfg->tmpfname = g_strdup_printf ("%s.s", acfg->tmpbasename); - acfg->fp = fopen (acfg->tmpfname, "w+"); + acfg->fp = g_fopen (acfg->tmpfname, "w+"); } } if (acfg->fp == 0 && !acfg->aot_opts.llvm_only) { From 6192682829a06cdb6a278df6ecd4095f7500085e Mon Sep 17 00:00:00 2001 From: Vlad Brezae Date: Tue, 30 Jan 2024 14:52:17 +0200 Subject: [PATCH 2/2] [mono][eglib] Avoid utf16 conversion if all characters are ASCII --- src/mono/mono/eglib/gfile.c | 70 +++++++++++++++++++++++++------------ src/mono/mono/eglib/glib.h | 1 + 2 files changed, 48 insertions(+), 23 deletions(-) diff --git a/src/mono/mono/eglib/gfile.c b/src/mono/mono/eglib/gfile.c index 19331a98aba65..4d5390c193294 100644 --- a/src/mono/mono/eglib/gfile.c +++ b/src/mono/mono/eglib/gfile.c @@ -105,6 +105,18 @@ g_file_error_from_errno (gint err_no) } } +#ifdef HOST_WIN32 +static gboolean +is_ascii_string (const gchar *str) +{ + while (*str) { + if (!g_isascii (*str)) + return FALSE; + } + return TRUE; +} +#endif + FILE* g_fopen (const gchar *path, const gchar *mode) { @@ -114,15 +126,19 @@ g_fopen (const gchar *path, const gchar *mode) return NULL; #ifdef HOST_WIN32 - gunichar2 *wPath = g_utf8_to_utf16 (path, -1, 0, 0, 0); - gunichar2 *wMode = g_utf8_to_utf16 (mode, -1, 0, 0, 0); - - if (!wPath || !wMode) - return NULL; - - fp = _wfopen ((wchar_t *) wPath, (wchar_t *) wMode); - g_free (wPath); - g_free (wMode); + if (is_ascii_string (path) && is_ascii_string (mode)) { + fp = fopen (path, mode); + } else { + gunichar2 *wPath = g_utf8_to_utf16 (path, -1, 0, 0, 0); + gunichar2 *wMode = g_utf8_to_utf16 (mode, -1, 0, 0, 0); + + if (!wPath || !wMode) + return NULL; + + fp = _wfopen ((wchar_t *) wPath, (wchar_t *) wMode); + g_free (wPath); + g_free (wMode); + } #else fp = fopen (path, mode); #endif @@ -134,17 +150,21 @@ int g_rename (const gchar *src_path, const gchar *dst_path) { #ifdef HOST_WIN32 - gunichar2 *wSrcPath = g_utf8_to_utf16 (src_path, -1, 0, 0, 0); - gunichar2 *wDstPath = g_utf8_to_utf16 (dst_path, -1, 0, 0, 0); + if (is_ascii_string (src_path) && is_ascii_string (dst_path)) { + return rename (src_path, dst_path); + } else { + gunichar2 *wSrcPath = g_utf8_to_utf16 (src_path, -1, 0, 0, 0); + gunichar2 *wDstPath = g_utf8_to_utf16 (dst_path, -1, 0, 0, 0); - if (!wSrcPath || !wDstPath) - return -1; + if (!wSrcPath || !wDstPath) + return -1; - int ret = _wrename ((wchar_t *) wSrcPath, (wchar_t *) wDstPath); - g_free (wSrcPath); - g_free (wDstPath); + int ret = _wrename ((wchar_t *) wSrcPath, (wchar_t *) wDstPath); + g_free (wSrcPath); + g_free (wDstPath); - return ret; + return ret; + } #else return rename (src_path, dst_path); #endif @@ -154,15 +174,19 @@ int g_unlink (const gchar *path) { #ifdef HOST_WIN32 - gunichar2 *wPath = g_utf8_to_utf16 (path, -1, 0, 0, 0); + if (is_ascii_string (path)) { + return unlink (path); + } else { + gunichar2 *wPath = g_utf8_to_utf16 (path, -1, 0, 0, 0); - if (!wPath) - return -1; + if (!wPath) + return -1; - int ret = _wunlink ((wchar_t *) wPath); - g_free (wPath); + int ret = _wunlink ((wchar_t *) wPath); + g_free (wPath); - return ret; + return ret; + } #else return unlink (path); #endif diff --git a/src/mono/mono/eglib/glib.h b/src/mono/mono/eglib/glib.h index 9a8474310bede..7ec9dcde9c8e6 100644 --- a/src/mono/mono/eglib/glib.h +++ b/src/mono/mono/eglib/glib.h @@ -391,6 +391,7 @@ gint g_ascii_xdigit_value (gchar c); #define g_ascii_isalpha(c) (isalpha (c) != 0) #define g_ascii_isprint(c) (isprint (c) != 0) #define g_ascii_isxdigit(c) (isxdigit (c) != 0) +#define g_isascii(c) (isascii (c) != 0) /* FIXME: g_strcasecmp supports utf8 unicode stuff */ #ifdef _MSC_VER