From d734163e8eaf731e72f9460ba57e3439e593a3c2 Mon Sep 17 00:00:00 2001 From: Nicolas MASSE Date: Fri, 30 Dec 2022 15:55:53 +0100 Subject: [PATCH] fix 0 byte uploads --- .gitignore | 1 - .podman-compose/nginx/nginx.conf | 167 ++++++++++++++++++++++ .podman-compose/vsftpd/config/vsftpd.conf | 22 +++ .podman-compose/vsftpd/users/test | 1 + README.md | 3 +- config.yaml.sample => config.yaml | 6 +- main.go | 59 ++++++-- podman-compose.yaml | 63 ++++++++ 8 files changed, 308 insertions(+), 14 deletions(-) create mode 100644 .podman-compose/nginx/nginx.conf create mode 100644 .podman-compose/vsftpd/config/vsftpd.conf create mode 100644 .podman-compose/vsftpd/users/test rename config.yaml.sample => config.yaml (50%) create mode 100644 podman-compose.yaml diff --git a/.gitignore b/.gitignore index 5b6b072..e69de29 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +0,0 @@ -config.yaml diff --git a/.podman-compose/nginx/nginx.conf b/.podman-compose/nginx/nginx.conf new file mode 100644 index 0000000..1b9af82 --- /dev/null +++ b/.podman-compose/nginx/nginx.conf @@ -0,0 +1,167 @@ +worker_processes auto; +error_log stderr warn; + +events { + worker_connections 1024; +} + +http { + include /etc/nginx/mime.types; + default_type application/octet-stream; + + log_format main '$remote_addr - $remote_user [$time_local] "$request" ' + '$status $body_bytes_sent "$http_referer" ' + '"$http_user_agent" "$http_x_forwarded_for"'; + + access_log /var/log/nginx/access.log main; + + sendfile on; + #tcp_nopush on; + + keepalive_timeout 65; + + # Do not leak server version in HTTP headers + server_tokens off; + + set_real_ip_from 10.0.0.0/8; + set_real_ip_from 172.16.0.0/12; + set_real_ip_from 192.168.0.0/16; + real_ip_header X-Real-IP; + + upstream php-handler { + server nextcloud:9000; + } + + server { + listen 8080; + + # HSTS settings + # WARNING: Only add the preload option once you read about + # the consequences in https://hstspreload.org/. This option + # will add the domain to a hardcoded list that is shipped + # in all major browsers and getting removed from this list + # could take several months. + #add_header Strict-Transport-Security "max-age=15768000; includeSubDomains; preload;" always; + + # set max upload size + client_max_body_size 10G; + fastcgi_buffers 64 4K; + + # Enable gzip but do not remove ETag headers + gzip on; + gzip_vary on; + gzip_comp_level 4; + gzip_min_length 256; + gzip_proxied expired no-cache no-store private no_last_modified no_etag auth; + gzip_types application/atom+xml application/javascript application/json application/ld+json application/manifest+json application/rss+xml application/vnd.geo+json application/vnd.ms-fontobject application/x-font-ttf application/x-web-app-manifest+json application/xhtml+xml application/xml font/opentype image/bmp image/svg+xml image/x-icon text/cache-manifest text/css text/plain text/vcard text/vnd.rim.location.xloc text/vtt text/x-component text/x-cross-domain-policy; + + # Pagespeed is not supported by Nextcloud, so if your server is built + # with the `ngx_pagespeed` module, uncomment this line to disable it. + #pagespeed off; + + # HTTP response headers borrowed from Nextcloud `.htaccess` + add_header Referrer-Policy "no-referrer" always; + add_header X-Content-Type-Options "nosniff" always; + add_header X-Download-Options "noopen" always; + add_header X-Frame-Options "SAMEORIGIN" always; + add_header X-Permitted-Cross-Domain-Policies "none" always; + add_header X-Robots-Tag "none" always; + add_header X-XSS-Protection "1; mode=block" always; + + # Remove X-Powered-By, which is an information leak + fastcgi_hide_header X-Powered-By; + + # Path to the root of your installation + root /var/www/html; + + # Specify how to handle directories -- specifying `/index.php$request_uri` + # here as the fallback means that Nginx always exhibits the desired behaviour + # when a client requests a path that corresponds to a directory that exists + # on the server. In particular, if that directory contains an index.php file, + # that file is correctly served; if it doesn't, then the request is passed to + # the front-end controller. This consistent behaviour means that we don't need + # to specify custom rules for certain paths (e.g. images and other assets, + # `/updater`, `/ocm-provider`, `/ocs-provider`), and thus + # `try_files $uri $uri/ /index.php$request_uri` + # always provides the desired behaviour. + index index.php index.html /index.php$request_uri; + + # Do not include the hostname and scheme in the redirect URL since it is + # always wrong in a Kubernetes environment (request received on HTTPS by Traefik + # and transmitted on HTTP internally). + absolute_redirect off; + + # Rule borrowed from `.htaccess` to handle Microsoft DAV clients + location = / { + if ( $http_user_agent ~ ^DavClnt ) { + return 302 /remote.php/webdav/$is_args$args; + } + } + + location = /robots.txt { + allow all; + log_not_found off; + access_log off; + } + + # Make a regex exception for `/.well-known` so that clients can still + # access it despite the existence of the regex rule + # `location ~ /(\.|autotest|...)` which would otherwise handle requests + # for `/.well-known`. + location ^~ /.well-known { + # The following 6 rules are borrowed from `.htaccess` + + location = /.well-known/carddav { return 301 /remote.php/dav/; } + location = /.well-known/caldav { return 301 /remote.php/dav/; } + # Anything else is dynamically handled by Nextcloud + location ^~ /.well-known { return 301 /index.php$uri; } + + try_files $uri $uri/ =404; + } + + # Rules borrowed from `.htaccess` to hide certain paths from clients + location ~ ^/(?:build|tests|config|lib|3rdparty|templates|data)(?:$|/) { return 404; } + location ~ ^/(?:\.|autotest|occ|issue|indie|db_|console) { return 404; } + + # Ensure this block, which passes PHP files to the PHP process, is above the blocks + # which handle static assets (as seen below). If this block is not declared first, + # then Nginx will encounter an infinite rewriting loop when it prepends `/index.php` + # to the URI, resulting in a HTTP 500 error response. + location ~ \.php(?:$|/) { + fastcgi_split_path_info ^(.+?\.php)(/.*)$; + set $path_info $fastcgi_path_info; + + try_files $fastcgi_script_name =404; + + include fastcgi_params; + fastcgi_param SCRIPT_FILENAME $document_root$fastcgi_script_name; + fastcgi_param PATH_INFO $path_info; + fastcgi_param HTTPS on; + + fastcgi_param modHeadersAvailable true; # Avoid sending the security headers twice + fastcgi_param front_controller_active true; # Enable pretty urls + fastcgi_pass php-handler; + + fastcgi_intercept_errors on; + fastcgi_request_buffering off; + } + + location ~ \.(?:css|js|svg|gif)$ { + try_files $uri /index.php$request_uri; + expires 6M; # Cache-Control policy borrowed from `.htaccess` + access_log off; # Optional: Don't log access to assets + } + + location ~ \.woff2?$ { + try_files $uri /index.php$request_uri; + expires 7d; # Cache-Control policy borrowed from `.htaccess` + access_log off; # Optional: Don't log access to assets + } + + location / { + try_files $uri $uri/ /index.php$request_uri; + # Optional: Don't log access to other assets + access_log off; + } + } +} \ No newline at end of file diff --git a/.podman-compose/vsftpd/config/vsftpd.conf b/.podman-compose/vsftpd/config/vsftpd.conf new file mode 100644 index 0000000..9a321c6 --- /dev/null +++ b/.podman-compose/vsftpd/config/vsftpd.conf @@ -0,0 +1,22 @@ +background=NO +delete_failed_uploads=YES +listen=YES +listen_port=2121 +pasv_min_port=30100 +pasv_max_port=30119 +pasv_address=127.0.0.1 +pasv_enable=YES +pasv_promiscuous=YES +anonymous_enable=NO +local_enable=YES +virtual_use_local_privs=YES +pam_service_name=vsftpd-virtual +guest_enable=YES +user_sub_token=$USER +local_root=/srv/vsftpd/$USER +chroot_local_user=YES +vsftpd_log_file=/var/log/vsftpd.log +log_ftp_protocol=YES +write_enable=YES +guest_username=vsftpd +allow_writeable_chroot=YES diff --git a/.podman-compose/vsftpd/users/test b/.podman-compose/vsftpd/users/test new file mode 100644 index 0000000..536aca3 --- /dev/null +++ b/.podman-compose/vsftpd/users/test @@ -0,0 +1 @@ +secret \ No newline at end of file diff --git a/README.md b/README.md index 48aefdd..fca4048 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,6 @@ go build -o recettes-hellofresh ## Usage ```sh -cp config.yaml.sample config.yaml -vim config.yaml +podman-compose up -d ./recettes-hellofresh config.yaml ``` diff --git a/config.yaml.sample b/config.yaml similarity index 50% rename from config.yaml.sample rename to config.yaml index 98938be..0a4eb7d 100644 --- a/config.yaml.sample +++ b/config.yaml @@ -1,8 +1,8 @@ Scrapper: URL: https://www.hellofresh.be/about/nieuws?locale=fr-BE WebDAV: - URL: https://nextcloud-server/remote.php/dav/files/john.doe/ - Username: john.doe + URL: http://localhost:8080/remote.php/dav/files/admin/ + Username: admin Password: secret - Folder: /Documents/hellofresh + Folder: /Documents/à trier FolderFormat: "2006" diff --git a/main.go b/main.go index d5ecbe0..2da6fbb 100644 --- a/main.go +++ b/main.go @@ -1,7 +1,9 @@ package main import ( + "bytes" "fmt" + "io/ioutil" "log" "net/http" "net/url" @@ -24,16 +26,26 @@ type Scrapper struct { davFolderFormat string } -func NewScrapper(url, davUrl, davUsername, davPassword, davFolder, davFolderFormat string, httpTimeout time.Duration) (*Scrapper, error) { - dav := gowebdav.NewClient(davUrl, davUsername, davPassword) +type ScrapperConfig struct { + ScrapperUrl string + DavUrl string + DavUsername string + DavPassword string + DavFolder string + DavFolderFormat string + HttpTimeout time.Duration +} + +func NewScrapper(config ScrapperConfig) (*Scrapper, error) { + dav := gowebdav.NewClient(config.DavUrl, config.DavUsername, config.DavPassword) scrapper := Scrapper{ - url: url, + url: config.ScrapperUrl, c: colly.NewCollector(), dav: dav, - davFolder: davFolder, - davFolderFormat: davFolderFormat, + davFolder: config.DavFolder, + davFolderFormat: config.DavFolderFormat, client: &http.Client{ - Timeout: httpTimeout, + Timeout: config.HttpTimeout, }, } @@ -90,7 +102,30 @@ func (s *Scrapper) Download(u, filename string) error { return fmt.Errorf("Wrong status code: %d", resp.StatusCode) } - err = s.dav.WriteStream(davFilePath, body, 0644) + // HEADS UP ! + // + // Because of a potential bug with the default Nextcloud configuration, + // the whole file is loaded in memory before being sent over the network. + // + // Long explanation: + // + // The golang net/http library behaves differently depending on the + // implementation behind the io.Reader interface. + // + // * bytes.Reader, strings.Reader and bytes.Buffer: Content-Length is set + // to the size of the content. + // + // * others: no content-length is set and therefore chunked encoding is used. + // + // It looks like the default Nginx configuration for Nextcloud does not like + // chunked encoding... + // + // See https://github.com/photoprism/photoprism/issues/443#issuecomment-685608490 + // and https://github.com/studio-b12/gowebdav/issues/35 + content, err := ioutil.ReadAll(body) + reader := bytes.NewReader(content) + + err = s.dav.WriteStream(davFilePath, reader, 0644) if err != nil { return err } @@ -132,7 +167,15 @@ func initConfig() { func main() { initConfig() - scrapper, err := NewScrapper(viper.GetString("Scrapper.URL"), viper.GetString("WebDAV.URL"), viper.GetString("WebDAV.Username"), viper.GetString("WebDAV.Password"), viper.GetString("WebDAV.Folder"), viper.GetString("WebDAV.FolderFormat"), viper.GetDuration("Scrapper.Timeout")) + scrapper, err := NewScrapper(ScrapperConfig{ + ScrapperUrl: viper.GetString("Scrapper.URL"), + DavUrl: viper.GetString("WebDAV.URL"), + DavUsername: viper.GetString("WebDAV.Username"), + DavPassword: viper.GetString("WebDAV.Password"), + DavFolder: viper.GetString("WebDAV.Folder"), + DavFolderFormat: viper.GetString("WebDAV.FolderFormat"), + HttpTimeout: viper.GetDuration("Scrapper.Timeout"), + }) if err != nil { log.Fatal(err) } diff --git a/podman-compose.yaml b/podman-compose.yaml new file mode 100644 index 0000000..dbb0201 --- /dev/null +++ b/podman-compose.yaml @@ -0,0 +1,63 @@ +volumes: + db: + nextcloud: + vsftpd: + +services: + db: + image: docker.io/library/mariadb:10.5 + expose: + - "3306" + restart: always + command: --transaction-isolation=READ-COMMITTED --binlog-format=ROW + volumes: + - db:/var/lib/mysql + environment: + - MARIADB_ROOT_PASSWORD= + - MARIADB_PASSWORD= + - MARIADB_DATABASE=nextcloud + - MARIADB_USER=nextcloud + - MARIADB_PASSWORD=nextcloud + - MARIADB_ALLOW_EMPTY_ROOT_PASSWORD=true + + # URL: http://localhost:8080/apps/dashboard/ + nextcloud: + image: docker.io/library/nextcloud:23-fpm-alpine + restart: always + expose: + - "9000" + links: + - db + volumes: + - nextcloud:/var/www/html + environment: + - MYSQL_PASSWORD=nextcloud + - MYSQL_DATABASE=nextcloud + - MYSQL_USER=nextcloud + - MYSQL_HOST=db + - NEXTCLOUD_ADMIN_USER=admin + - NEXTCLOUD_ADMIN_PASSWORD=secret + - NEXTCLOUD_DATA_DIR=/var/www/html/data + - NEXTCLOUD_TRUSTED_DOMAINS=localhost + + nginx: + image: docker.io/library/nginx:1.23-alpine + restart: always + ports: + - "8080:8080" + links: + - nextcloud + volumes: + - .podman-compose/nginx/nginx.conf:/etc/nginx/nginx.conf:z + - nextcloud:/var/www/html + + vsftpd: + image: quay.io/itix/vsftpd:v0.0.22 + restart: always + ports: + - "2121:2121" + - "30100-30119:30100-30119" + volumes: + - .podman-compose/vsftpd/config:/etc/vsftpd:z + - .podman-compose/vsftpd/users:/etc/vsftpd-users:z + - vsftpd:/srv/vsftpd