Browse Source

fix 0 byte uploads

main v0.0.2
Nicolas Massé 3 years ago
parent
commit
d734163e8e
  1. 1
      .gitignore
  2. 167
      .podman-compose/nginx/nginx.conf
  3. 22
      .podman-compose/vsftpd/config/vsftpd.conf
  4. 1
      .podman-compose/vsftpd/users/test
  5. 3
      README.md
  6. 6
      config.yaml
  7. 59
      main.go
  8. 63
      podman-compose.yaml

1
.gitignore

@ -1 +0,0 @@
config.yaml

167
.podman-compose/nginx/nginx.conf

@ -0,0 +1,167 @@
worker_processes auto;
error_log stderr warn;
events {
worker_connections 1024;
}
http {
include /etc/nginx/mime.types;
default_type application/octet-stream;
log_format main '$remote_addr - $remote_user [$time_local] "$request" '
'$status $body_bytes_sent "$http_referer" '
'"$http_user_agent" "$http_x_forwarded_for"';
access_log /var/log/nginx/access.log main;
sendfile on;
#tcp_nopush on;
keepalive_timeout 65;
# Do not leak server version in HTTP headers
server_tokens off;
set_real_ip_from 10.0.0.0/8;
set_real_ip_from 172.16.0.0/12;
set_real_ip_from 192.168.0.0/16;
real_ip_header X-Real-IP;
upstream php-handler {
server nextcloud:9000;
}
server {
listen 8080;
# HSTS settings
# WARNING: Only add the preload option once you read about
# the consequences in https://hstspreload.org/. This option
# will add the domain to a hardcoded list that is shipped
# in all major browsers and getting removed from this list
# could take several months.
#add_header Strict-Transport-Security "max-age=15768000; includeSubDomains; preload;" always;
# set max upload size
client_max_body_size 10G;
fastcgi_buffers 64 4K;
# Enable gzip but do not remove ETag headers
gzip on;
gzip_vary on;
gzip_comp_level 4;
gzip_min_length 256;
gzip_proxied expired no-cache no-store private no_last_modified no_etag auth;
gzip_types application/atom+xml application/javascript application/json application/ld+json application/manifest+json application/rss+xml application/vnd.geo+json application/vnd.ms-fontobject application/x-font-ttf application/x-web-app-manifest+json application/xhtml+xml application/xml font/opentype image/bmp image/svg+xml image/x-icon text/cache-manifest text/css text/plain text/vcard text/vnd.rim.location.xloc text/vtt text/x-component text/x-cross-domain-policy;
# Pagespeed is not supported by Nextcloud, so if your server is built
# with the `ngx_pagespeed` module, uncomment this line to disable it.
#pagespeed off;
# HTTP response headers borrowed from Nextcloud `.htaccess`
add_header Referrer-Policy "no-referrer" always;
add_header X-Content-Type-Options "nosniff" always;
add_header X-Download-Options "noopen" always;
add_header X-Frame-Options "SAMEORIGIN" always;
add_header X-Permitted-Cross-Domain-Policies "none" always;
add_header X-Robots-Tag "none" always;
add_header X-XSS-Protection "1; mode=block" always;
# Remove X-Powered-By, which is an information leak
fastcgi_hide_header X-Powered-By;
# Path to the root of your installation
root /var/www/html;
# Specify how to handle directories -- specifying `/index.php$request_uri`
# here as the fallback means that Nginx always exhibits the desired behaviour
# when a client requests a path that corresponds to a directory that exists
# on the server. In particular, if that directory contains an index.php file,
# that file is correctly served; if it doesn't, then the request is passed to
# the front-end controller. This consistent behaviour means that we don't need
# to specify custom rules for certain paths (e.g. images and other assets,
# `/updater`, `/ocm-provider`, `/ocs-provider`), and thus
# `try_files $uri $uri/ /index.php$request_uri`
# always provides the desired behaviour.
index index.php index.html /index.php$request_uri;
# Do not include the hostname and scheme in the redirect URL since it is
# always wrong in a Kubernetes environment (request received on HTTPS by Traefik
# and transmitted on HTTP internally).
absolute_redirect off;
# Rule borrowed from `.htaccess` to handle Microsoft DAV clients
location = / {
if ( $http_user_agent ~ ^DavClnt ) {
return 302 /remote.php/webdav/$is_args$args;
}
}
location = /robots.txt {
allow all;
log_not_found off;
access_log off;
}
# Make a regex exception for `/.well-known` so that clients can still
# access it despite the existence of the regex rule
# `location ~ /(\.|autotest|...)` which would otherwise handle requests
# for `/.well-known`.
location ^~ /.well-known {
# The following 6 rules are borrowed from `.htaccess`
location = /.well-known/carddav { return 301 /remote.php/dav/; }
location = /.well-known/caldav { return 301 /remote.php/dav/; }
# Anything else is dynamically handled by Nextcloud
location ^~ /.well-known { return 301 /index.php$uri; }
try_files $uri $uri/ =404;
}
# Rules borrowed from `.htaccess` to hide certain paths from clients
location ~ ^/(?:build|tests|config|lib|3rdparty|templates|data)(?:$|/) { return 404; }
location ~ ^/(?:\.|autotest|occ|issue|indie|db_|console) { return 404; }
# Ensure this block, which passes PHP files to the PHP process, is above the blocks
# which handle static assets (as seen below). If this block is not declared first,
# then Nginx will encounter an infinite rewriting loop when it prepends `/index.php`
# to the URI, resulting in a HTTP 500 error response.
location ~ \.php(?:$|/) {
fastcgi_split_path_info ^(.+?\.php)(/.*)$;
set $path_info $fastcgi_path_info;
try_files $fastcgi_script_name =404;
include fastcgi_params;
fastcgi_param SCRIPT_FILENAME $document_root$fastcgi_script_name;
fastcgi_param PATH_INFO $path_info;
fastcgi_param HTTPS on;
fastcgi_param modHeadersAvailable true; # Avoid sending the security headers twice
fastcgi_param front_controller_active true; # Enable pretty urls
fastcgi_pass php-handler;
fastcgi_intercept_errors on;
fastcgi_request_buffering off;
}
location ~ \.(?:css|js|svg|gif)$ {
try_files $uri /index.php$request_uri;
expires 6M; # Cache-Control policy borrowed from `.htaccess`
access_log off; # Optional: Don't log access to assets
}
location ~ \.woff2?$ {
try_files $uri /index.php$request_uri;
expires 7d; # Cache-Control policy borrowed from `.htaccess`
access_log off; # Optional: Don't log access to assets
}
location / {
try_files $uri $uri/ /index.php$request_uri;
# Optional: Don't log access to other assets
access_log off;
}
}
}

22
.podman-compose/vsftpd/config/vsftpd.conf

@ -0,0 +1,22 @@
background=NO
delete_failed_uploads=YES
listen=YES
listen_port=2121
pasv_min_port=30100
pasv_max_port=30119
pasv_address=127.0.0.1
pasv_enable=YES
pasv_promiscuous=YES
anonymous_enable=NO
local_enable=YES
virtual_use_local_privs=YES
pam_service_name=vsftpd-virtual
guest_enable=YES
user_sub_token=$USER
local_root=/srv/vsftpd/$USER
chroot_local_user=YES
vsftpd_log_file=/var/log/vsftpd.log
log_ftp_protocol=YES
write_enable=YES
guest_username=vsftpd
allow_writeable_chroot=YES

1
.podman-compose/vsftpd/users/test

@ -0,0 +1 @@
secret

3
README.md

@ -18,7 +18,6 @@ go build -o recettes-hellofresh
## Usage ## Usage
```sh ```sh
cp config.yaml.sample config.yaml podman-compose up -d
vim config.yaml
./recettes-hellofresh config.yaml ./recettes-hellofresh config.yaml
``` ```

6
config.yaml.sample → config.yaml

@ -1,8 +1,8 @@
Scrapper: Scrapper:
URL: https://www.hellofresh.be/about/nieuws?locale=fr-BE URL: https://www.hellofresh.be/about/nieuws?locale=fr-BE
WebDAV: WebDAV:
URL: https://nextcloud-server/remote.php/dav/files/john.doe/ URL: http://localhost:8080/remote.php/dav/files/admin/
Username: john.doe Username: admin
Password: secret Password: secret
Folder: /Documents/hellofresh Folder: /Documents/à trier
FolderFormat: "2006" FolderFormat: "2006"

59
main.go

@ -1,7 +1,9 @@
package main package main
import ( import (
"bytes"
"fmt" "fmt"
"io/ioutil"
"log" "log"
"net/http" "net/http"
"net/url" "net/url"
@ -24,16 +26,26 @@ type Scrapper struct {
davFolderFormat string davFolderFormat string
} }
func NewScrapper(url, davUrl, davUsername, davPassword, davFolder, davFolderFormat string, httpTimeout time.Duration) (*Scrapper, error) { type ScrapperConfig struct {
dav := gowebdav.NewClient(davUrl, davUsername, davPassword) ScrapperUrl string
DavUrl string
DavUsername string
DavPassword string
DavFolder string
DavFolderFormat string
HttpTimeout time.Duration
}
func NewScrapper(config ScrapperConfig) (*Scrapper, error) {
dav := gowebdav.NewClient(config.DavUrl, config.DavUsername, config.DavPassword)
scrapper := Scrapper{ scrapper := Scrapper{
url: url, url: config.ScrapperUrl,
c: colly.NewCollector(), c: colly.NewCollector(),
dav: dav, dav: dav,
davFolder: davFolder, davFolder: config.DavFolder,
davFolderFormat: davFolderFormat, davFolderFormat: config.DavFolderFormat,
client: &http.Client{ client: &http.Client{
Timeout: httpTimeout, Timeout: config.HttpTimeout,
}, },
} }
@ -90,7 +102,30 @@ func (s *Scrapper) Download(u, filename string) error {
return fmt.Errorf("Wrong status code: %d", resp.StatusCode) return fmt.Errorf("Wrong status code: %d", resp.StatusCode)
} }
err = s.dav.WriteStream(davFilePath, body, 0644) // HEADS UP !
//
// Because of a potential bug with the default Nextcloud configuration,
// the whole file is loaded in memory before being sent over the network.
//
// Long explanation:
//
// The golang net/http library behaves differently depending on the
// implementation behind the io.Reader interface.
//
// * bytes.Reader, strings.Reader and bytes.Buffer: Content-Length is set
// to the size of the content.
//
// * others: no content-length is set and therefore chunked encoding is used.
//
// It looks like the default Nginx configuration for Nextcloud does not like
// chunked encoding...
//
// See https://github.com/photoprism/photoprism/issues/443#issuecomment-685608490
// and https://github.com/studio-b12/gowebdav/issues/35
content, err := ioutil.ReadAll(body)
reader := bytes.NewReader(content)
err = s.dav.WriteStream(davFilePath, reader, 0644)
if err != nil { if err != nil {
return err return err
} }
@ -132,7 +167,15 @@ func initConfig() {
func main() { func main() {
initConfig() initConfig()
scrapper, err := NewScrapper(viper.GetString("Scrapper.URL"), viper.GetString("WebDAV.URL"), viper.GetString("WebDAV.Username"), viper.GetString("WebDAV.Password"), viper.GetString("WebDAV.Folder"), viper.GetString("WebDAV.FolderFormat"), viper.GetDuration("Scrapper.Timeout")) scrapper, err := NewScrapper(ScrapperConfig{
ScrapperUrl: viper.GetString("Scrapper.URL"),
DavUrl: viper.GetString("WebDAV.URL"),
DavUsername: viper.GetString("WebDAV.Username"),
DavPassword: viper.GetString("WebDAV.Password"),
DavFolder: viper.GetString("WebDAV.Folder"),
DavFolderFormat: viper.GetString("WebDAV.FolderFormat"),
HttpTimeout: viper.GetDuration("Scrapper.Timeout"),
})
if err != nil { if err != nil {
log.Fatal(err) log.Fatal(err)
} }

63
podman-compose.yaml

@ -0,0 +1,63 @@
volumes:
db:
nextcloud:
vsftpd:
services:
db:
image: docker.io/library/mariadb:10.5
expose:
- "3306"
restart: always
command: --transaction-isolation=READ-COMMITTED --binlog-format=ROW
volumes:
- db:/var/lib/mysql
environment:
- MARIADB_ROOT_PASSWORD=
- MARIADB_PASSWORD=
- MARIADB_DATABASE=nextcloud
- MARIADB_USER=nextcloud
- MARIADB_PASSWORD=nextcloud
- MARIADB_ALLOW_EMPTY_ROOT_PASSWORD=true
# URL: http://localhost:8080/apps/dashboard/
nextcloud:
image: docker.io/library/nextcloud:23-fpm-alpine
restart: always
expose:
- "9000"
links:
- db
volumes:
- nextcloud:/var/www/html
environment:
- MYSQL_PASSWORD=nextcloud
- MYSQL_DATABASE=nextcloud
- MYSQL_USER=nextcloud
- MYSQL_HOST=db
- NEXTCLOUD_ADMIN_USER=admin
- NEXTCLOUD_ADMIN_PASSWORD=secret
- NEXTCLOUD_DATA_DIR=/var/www/html/data
- NEXTCLOUD_TRUSTED_DOMAINS=localhost
nginx:
image: docker.io/library/nginx:1.23-alpine
restart: always
ports:
- "8080:8080"
links:
- nextcloud
volumes:
- .podman-compose/nginx/nginx.conf:/etc/nginx/nginx.conf:z
- nextcloud:/var/www/html
vsftpd:
image: quay.io/itix/vsftpd:v0.0.22
restart: always
ports:
- "2121:2121"
- "30100-30119:30100-30119"
volumes:
- .podman-compose/vsftpd/config:/etc/vsftpd:z
- .podman-compose/vsftpd/users:/etc/vsftpd-users:z
- vsftpd:/srv/vsftpd
Loading…
Cancel
Save