Sign Up
Log In
Log In
or
Sign Up
Places
All Projects
Status Monitor
Collapse sidebar
Please login to access the resource
openSUSE:Step:15
tracker-miners
tracker-miners-ps-libz.patch
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File tracker-miners-ps-libz.patch of Package tracker-miners
From 3752c094f108ad7bca0fd429505126c52bc60a4b Mon Sep 17 00:00:00 2001 From: Andrea Azzarone <andrea.azzarone@canonical.com> Date: Tue, 23 Apr 2019 12:01:54 +0100 Subject: [PATCH] tracker-extract: Use libz to process ps.gz files Process ps.gz files using GZlibDecompressor instead of spawing gunzip. Because tracker-extract runs the file parsers inside a seccomp sandbox, spawning an external process during parsing is not a good idea because it leaves us with little control on which syscalls are used. Closes: https://gitlab.gnome.org/GNOME/tracker-miners/issues/61 --- src/tracker-extract/tracker-extract-ps.c | 203 ++++++----------------- 1 file changed, 51 insertions(+), 152 deletions(-) diff --git a/src/tracker-extract/tracker-extract-ps.c b/src/tracker-extract/tracker-extract-ps.c index 6d59e3d2f..207a32a89 100644 --- a/src/tracker-extract/tracker-extract-ps.c +++ b/src/tracker-extract/tracker-extract-ps.c @@ -18,13 +18,7 @@ * Boston, MA 02110-1301, USA. */ -#include "config.h" - -#include <fcntl.h> -#include <string.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <unistd.h> +#include <config.h> #include <glib.h> #include <glib/gstdio.h> @@ -101,43 +95,30 @@ date_to_iso8601 (const gchar *date) } static TrackerResource * -extract_ps_from_filestream (FILE *f) +extract_ps_from_inputstream (GInputStream *stream) { TrackerResource *metadata; + g_autoptr(GDataInputStream) data_stream = NULL; gchar *line; - gsize length; - gssize read_char; - gsize accum; - gsize max_bytes; - - line = NULL; - length = 0; + gsize length, accum, max_bytes; + g_autoptr(GError) error = NULL; metadata = tracker_resource_new (NULL); tracker_resource_add_uri (metadata, "rdf:type", "nfo:PaginatedTextDocument"); + data_stream = g_data_input_stream_new (stream); + /* 20 MiB should be enough! (original safe limit) */ accum = 0; max_bytes = 20u << 20; - /* Reuse the same buffer for all lines. Must be dynamically allocated with - * malloc family methods as getline() may re-size it with realloc() */ - length = 1024; - line = g_malloc (length); - - /* Halt the whole when one of these conditions is met: - * a) Reached max bytes to read - * b) No more lines to read - */ while ((accum < max_bytes) && - (read_char = tracker_getline (&line, &length, f)) != -1) { + (line = g_data_input_stream_read_line (data_stream, &length, NULL, &error)) != NULL) { gboolean pageno_atend = FALSE; gboolean header_finished = FALSE; /* Update accumulated bytes read */ - accum += read_char; - - line[read_char - 1] = '\0'; /* overwrite '\n' char */ + accum += length; if (!header_finished && strncmp (line, "%%Copyright:", 12) == 0) { tracker_resource_set_string (metadata, "nie:copyright", line + 13); @@ -148,13 +129,11 @@ extract_ps_from_filestream (FILE *f) tracker_resource_set_relation (metadata, "nco:creator", creator); g_object_unref (creator); } else if (!header_finished && strncmp (line, "%%CreationDate:", 15) == 0) { - gchar *date; + g_autofree gchar *date = NULL; date = date_to_iso8601 (line + 16); - if (date) { + if (date) tracker_resource_set_string (metadata, "nie:contentCreated", date); - g_free (date); - } } else if (strncmp (line, "%%Pages:", 8) == 0) { if (strcmp (line + 9, "(atend)") == 0) { pageno_atend = TRUE; @@ -168,141 +147,66 @@ extract_ps_from_filestream (FILE *f) header_finished = TRUE; if (!pageno_atend) { + g_free (line); break; } } - } - /* Deallocate the buffer */ - if (line) { g_free (line); } + if (error != NULL) + g_warning ("Unexpected lack of content trying to read a line: %s", error->message); + return metadata; } - - static TrackerResource * -extract_ps (const gchar *uri) +extract_ps (const gchar *uri) { - TrackerResource *metadata; - FILE *f; - gchar *filename; + g_autoptr(GFile) file = NULL; + g_autoptr(GInputStream) stream = NULL; + g_autoptr(GError) error = NULL; - filename = g_filename_from_uri (uri, NULL, NULL); - f = tracker_file_open (filename); - g_free (filename); + g_debug ("Extracting PS '%s'...", uri); - if (!f) { + file = g_file_new_for_uri (uri); + + stream = G_INPUT_STREAM (g_file_read (file, NULL, &error)); + if (stream == NULL) { + g_warning ("Could't not read file %s: %s", uri, error->message); return NULL; } - /* Extract from filestream! */ - g_debug ("Extracting PS '%s'...", uri); - metadata = extract_ps_from_filestream (f); - - tracker_file_close (f, FALSE); - - return metadata; + return extract_ps_from_inputstream (stream); } #ifdef USING_UNZIPPSFILES -#include <errno.h> -#include <sys/time.h> -#include <sys/resource.h> +#include <zlib.h> -static void -spawn_child_func (gpointer user_data) +static TrackerResource * +extract_ps_gz (const gchar *uri) { - struct rlimit cpu_limit; - gint timeout = GPOINTER_TO_INT (user_data); + g_autoptr(GFile) file = NULL; + g_autoptr(GInputStream) stream, cstream = NULL; + g_autoptr(GConverter) converter = NULL; + g_autoptr(GError) error = NULL; - if (timeout > 0) { - /* set cpu limit */ - getrlimit (RLIMIT_CPU, &cpu_limit); - cpu_limit.rlim_cur = timeout; - cpu_limit.rlim_max = timeout + 1; - - if (setrlimit (RLIMIT_CPU, &cpu_limit) != 0) { - g_critical ("Failed to set resource limit for CPU"); - } - - /* Have this as a precaution in cases where cpu limit has not - * been reached due to spawned app sleeping. - */ - alarm (timeout + 2); - } - - /* Set child's niceness to 19 */ - errno = 0; + g_debug ("Extracting PS '%s'...", uri); - /* nice() uses attribute "warn_unused_result" and so complains - * if we do not check its returned value. But it seems that - * since glibc 2.2.4, nice() can return -1 on a successful call - * so we have to check value of errno too. Stupid... - */ - if (nice (19) == -1 && errno) { - g_warning ("Failed to set nice value"); - } -} + file = g_file_new_for_uri (uri); -static TrackerResource * -extract_ps_gz (const gchar *uri) -{ - TrackerResource *metadata = NULL; - FILE *fz; - gint fdz; - const gchar *argv[4]; - gchar *filename; - GError *error = NULL; - - filename = g_filename_from_uri (uri, NULL, NULL); - - /* TODO: we should be using libz for this instead */ - - argv[0] = "gunzip"; - argv[1] = "-c"; - argv[2] = filename; - argv[3] = NULL; - - /* Fork & spawn to gunzip the file */ - if (!g_spawn_async_with_pipes (g_get_tmp_dir (), - (gchar **) argv, - NULL, - G_SPAWN_SEARCH_PATH | G_SPAWN_STDERR_TO_DEV_NULL, - spawn_child_func, - GINT_TO_POINTER (10), - NULL, - NULL, - &fdz, - NULL, - &error)) { - g_warning ("Couldn't fork & spawn to gunzip '%s': %s", - uri, error ? error->message : NULL); - g_clear_error (&error); - } - /* Get FILE from FD */ - else if ((fz = fdopen (fdz, "r")) == NULL) { - g_warning ("Couldn't open FILE from FD (%s)...", uri); - close (fdz); - } - /* Extract from filestream! */ - else - { - g_debug ("Extracting compressed PS '%s'...", uri); - metadata = extract_ps_from_filestream (fz); -#ifdef HAVE_POSIX_FADVISE - if (posix_fadvise (fdz, 0, 0, POSIX_FADV_DONTNEED) != 0) - g_warning ("posix_fadvise() call failed: %m"); -#endif /* HAVE_POSIX_FADVISE */ - fclose (fz); + stream = G_INPUT_STREAM (g_file_read (file, NULL, &error)); + if (stream == NULL) { + g_warning ("Could't not read file %s: %s", uri, error->message); + return NULL; } - g_free (filename); + converter = G_CONVERTER (g_zlib_decompressor_new (G_ZLIB_COMPRESSOR_FORMAT_GZIP)); + cstream = g_converter_input_stream_new (stream, converter); - return metadata; + return extract_ps_from_inputstream (cstream); } #endif /* USING_UNZIPPSFILES */ @@ -312,28 +216,23 @@ tracker_extract_get_metadata (TrackerExtractInfo *info) { TrackerResource *metadata; GFile *file; - gchar *uri; + g_autofree gchar *uri = NULL; + const char *mimetype; file = tracker_extract_info_get_file (info); uri = g_file_get_uri (file); + mimetype = tracker_extract_info_get_mimetype (info); - { + if (strcmp (mimetype, "application/x-gzpostscript") == 0) { #ifdef USING_UNZIPPSFILES - const char *mimetype; - - mimetype = tracker_extract_info_get_mimetype (info); - - if (strcmp (mimetype, "application/x-gzpostscript") == 0) { - metadata = extract_ps_gz (uri); - } else + metadata = extract_ps_gz (uri); +#else + metadata = NULL; #endif /* USING_UNZIPPSFILES */ - { - metadata = extract_ps (uri); - } + } else { + metadata = extract_ps (uri); } - g_free (uri); - if (metadata) { tracker_extract_info_set_resource (info, metadata); g_object_unref (metadata); -- 2.26.2
Locations
Projects
Search
Status Monitor
Help
OpenBuildService.org
Documentation
API Documentation
Code of Conduct
Contact
Support
@OBShq
Terms
openSUSE Build Service is sponsored by
The Open Build Service is an
openSUSE project
.
Sign Up
Log In
Places
Places
All Projects
Status Monitor