From f083f0cc76efc43222cb308c0ad7a34661eb991e Mon Sep 17 00:00:00 2001 From: MauricioGarciaS <47052044+MauricioGarciaS@users.noreply.github.com> Date: Thu, 19 Oct 2023 15:16:36 +0200 Subject: [PATCH] fix(quickwit): updated modules and solve s3 connection (#1524) --- ee/quickwit/Dockerfile | 22 ++++++++++++---------- ee/quickwit/consumer.py | 8 ++++---- ee/quickwit/entrypoint.sh | 19 ++----------------- ee/quickwit/env.default | 6 ------ ee/quickwit/index-config-fetch.yaml | 7 ++++--- ee/quickwit/index-config-graphql.yaml | 7 ++++--- ee/quickwit/index-config-pageevent.yaml | 5 +++-- ee/quickwit/quickwit_start_task.sh | 1 + ee/quickwit/requirements.txt | 8 ++++---- ee/quickwit/s3-config-listen.yaml | 7 ++++--- ee/quickwit/s3-config.yaml | 7 ++++--- ee/quickwit/setup_indexes_and_worker.sh | 8 ++++++++ 12 files changed, 50 insertions(+), 55 deletions(-) delete mode 100644 ee/quickwit/env.default create mode 100755 ee/quickwit/quickwit_start_task.sh create mode 100755 ee/quickwit/setup_indexes_and_worker.sh diff --git a/ee/quickwit/Dockerfile b/ee/quickwit/Dockerfile index 00180501b..fcc502a0a 100644 --- a/ee/quickwit/Dockerfile +++ b/ee/quickwit/Dockerfile @@ -1,23 +1,25 @@ -FROM quickwit/quickwit +FROM quickwit/quickwit:0.6.4 -COPY *.yaml /quickwit/ -COPY entrypoint.sh /quickwit/ -COPY consumer.py /quickwit/ -COPY requirements.txt /quickwit/ -COPY msgcodec /quickwit/msgcodec WORKDIR /quickwit RUN apt-get update RUN apt-get install python3 python3-pip -y RUN apt-get clean +COPY requirements.txt /quickwit/ RUN pip install -r requirements.txt -COPY env.default .env -RUN source .env +COPY *.yaml /quickwit/ +COPY *.sh /quickwit/ +COPY consumer.py /quickwit/ +COPY msgcodec /quickwit/msgcodec ENV filter="true" \ - encrypted="false" + encrypted="false" \ + fetch_maxsize=800 \ + graphql_maxsize=800 \ + pageevent_maxsize=800 \ + QUICKWIT_PORT=7280 -EXPOSE 7280 +EXPOSE 7281 ENTRYPOINT ./entrypoint.sh diff --git a/ee/quickwit/consumer.py b/ee/quickwit/consumer.py index af14f0a9b..5cf3c7818 100644 --- a/ee/quickwit/consumer.py +++ b/ee/quickwit/consumer.py @@ -8,7 +8,7 @@ import json from time import time, sleep - +QUICKWIT_PORT = config('QUICKWIT_PORT', default=7280, cast=int) #decryption = config('encrypted', cast=bool) decryption = False @@ -22,12 +22,12 @@ if decryption: def _quickwit_ingest(index, data_list, retry=0): try: - res = requests.post(f'http://localhost:7280/api/v1/{index}/ingest', data=__jsonify_data(data_list, index)) + res = requests.post(f'http://localhost:{QUICKWIT_PORT}/api/v1/{index}/ingest', data=__jsonify_data(data_list, index)) except requests.exceptions.ConnectionError as e: retry += 1 - assert retry <= max_retry, f'[ENDPOINT CONNECTION FAIL] Failed to connect to endpoint http://localhost:7280/api/v1/{index}/ingest\n{e}\n' + assert retry <= max_retry, f'[ENDPOINT CONNECTION FAIL] Failed to connect to endpoint http://localhost:{QUICKWIT_PORT}/api/v1/{index}/ingest\n{e}\n' sleep(5*retry) - print(f"[ENDPOINT ERROR] Failed to connect to endpoint http://localhost:7280/api/v1/{index}/ingest, retrying in {5*retry} seconds..\n") + print(f"[ENDPOINT ERROR] Failed to connect to endpoint http://localhost:{QUICKWIT_PORT}/api/v1/{index}/ingest, retrying in {5*retry} seconds..\n") return _quickwit_ingest(index, data_list, retry=retry) return res diff --git a/ee/quickwit/entrypoint.sh b/ee/quickwit/entrypoint.sh index 549cdbd12..284faf3c7 100755 --- a/ee/quickwit/entrypoint.sh +++ b/ee/quickwit/entrypoint.sh @@ -6,22 +6,7 @@ ls config/ find /quickwit/ -type f -name "*.yaml" -exec sed -i "s#{{KAFKA_SERVER}}#${KAFKA_SERVER}#g" {} \; find /quickwit/ -type f -name "*.yaml" -exec sed -i "s#{{AWS_BUCKET}}#${AWS_BUCKET}#g" {} \; find /quickwit/ -type f -name "*.yaml" -exec sed -i "s/{{QUICKWIT_TOPIC}}/${QUICKWIT_TOPIC}/g" {} \; +find /quickwit/ -type f -name "*.yaml" -exec sed -i "s/{{QUICKWIT_PORT}}/${QUICKWIT_PORT}/g" {} \; find /quickwit/ -type f -name "*.yaml" -exec sed -i "s#{{data_dir_path}}#${data_dir_path}#g" {} \; -quickwit index create --index-config index-config-fetch.yaml --config s3-config.yaml -quickwit index create --index-config index-config-graphql.yaml --config s3-config.yaml -quickwit index create --index-config index-config-pageevent.yaml --config s3-config.yaml - -quickwit source delete --index fetchevent --source fetch-kafka --config s3-config.yaml -quickwit source delete --index graphql --source graphql-kafka --config s3-config.yaml -quickwit source delete --index pageevent --source pageevent-kafka --config s3-config.yaml - - -if [${filter} == "false"]; then - quickwit source create --index fetchevent --source-config source-fetch.yaml --config s3-config.yaml - quickwit source create --index graphql --source-config source-graphql.yaml --config s3-config.yaml - quickwit source create --index pageevent --source-config source-pageevent.yaml --config s3-config.yaml - quickwit run --config s3-config-listen.yaml -else - quickwit run --config s3-config-listen.yaml & python3 consumer.py && fg -fi +./quickwit_start_task.sh & ./setup_indexes_and_worker.sh && fg diff --git a/ee/quickwit/env.default b/ee/quickwit/env.default deleted file mode 100644 index 24cf79186..000000000 --- a/ee/quickwit/env.default +++ /dev/null @@ -1,6 +0,0 @@ -KAFKA_SERVER= -QUICKWIT_TOPIC=ee-quickwit -fetch_maxsize=800 -graphql_maxsize=800 -pageevent_maxsize=800 -group_id=ee-quickwit diff --git a/ee/quickwit/index-config-fetch.yaml b/ee/quickwit/index-config-fetch.yaml index 55cced160..c4a3dfebb 100644 --- a/ee/quickwit/index-config-fetch.yaml +++ b/ee/quickwit/index-config-fetch.yaml @@ -2,9 +2,10 @@ # Index config file for gh-archive dataset. # -version: 0.4 +version: 0.6 -index_id: fetchevent +index_id: "fetchevent" +index_uri: "s3://openreplay-quickwit/quickwit-indexes/fetchevent" doc_mapping: mode: dynamic @@ -57,7 +58,7 @@ doc_mapping: timestamp_field: insertion_timestamp search_settings: - default_search_fields: [project_id, session_id, url, request] + default_search_fields: [project_id, session_id, url] retention: period: 30 days diff --git a/ee/quickwit/index-config-graphql.yaml b/ee/quickwit/index-config-graphql.yaml index b94c5d4a6..c71ce2dbf 100644 --- a/ee/quickwit/index-config-graphql.yaml +++ b/ee/quickwit/index-config-graphql.yaml @@ -2,9 +2,10 @@ # Index config file for gh-archive dataset. # -version: 0.4 +version: 0.6 -index_id: graphql +index_id: "graphql" +index_uri: "s3://openreplay-quickwit/quickwit-indexes/graphql" doc_mapping: mode: dynamic @@ -44,7 +45,7 @@ doc_mapping: timestamp_field: insertion_timestamp search_settings: - default_search_fields: [project_id, session_id, operation_kind, operation_name, variables] + default_search_fields: [project_id, session_id, operation_kind, operation_name] retention: period: 30 days diff --git a/ee/quickwit/index-config-pageevent.yaml b/ee/quickwit/index-config-pageevent.yaml index 1ffdae9f0..870efff29 100644 --- a/ee/quickwit/index-config-pageevent.yaml +++ b/ee/quickwit/index-config-pageevent.yaml @@ -2,9 +2,10 @@ # Index config file for gh-archive dataset. # -version: 0.4 +version: 0.6 -index_id: pageevent +index_id: "pageevent" +index_uri: "s3://openreplay-quickwit/quickwit-indexes/pageevent" doc_mapping: mode: strict diff --git a/ee/quickwit/quickwit_start_task.sh b/ee/quickwit/quickwit_start_task.sh new file mode 100755 index 000000000..f074d604c --- /dev/null +++ b/ee/quickwit/quickwit_start_task.sh @@ -0,0 +1 @@ +quickwit run --config=./s3-config-listen.yaml diff --git a/ee/quickwit/requirements.txt b/ee/quickwit/requirements.txt index 78fb272f5..06e954503 100644 --- a/ee/quickwit/requirements.txt +++ b/ee/quickwit/requirements.txt @@ -1,4 +1,4 @@ -confluent-kafka -python-decouple -requests -zstd +confluent-kafka==2.2.0 +python-decouple==3.8 +requests==2.31.0 +zstd==1.5.5.1 diff --git a/ee/quickwit/s3-config-listen.yaml b/ee/quickwit/s3-config-listen.yaml index f6065e927..bd27ec951 100644 --- a/ee/quickwit/s3-config-listen.yaml +++ b/ee/quickwit/s3-config-listen.yaml @@ -1,6 +1,7 @@ ## In order to save data into S3 # metastore also accepts s3://{bucket/path}#pooling_interval={seconds}s -version: 0 -metastore_uri: s3://quickwit/quickwit-indexes -default_index_root_uri: s3://quickwit/quickwit-indexes +version: 0.6 +metastore_uri: s3://openreplay-quickwit/quickwit-indexes +default_index_root_uri: s3://openreplay-quickwit/quickwit-indexes listen_address: 0.0.0.0 +rest_listen_port: {{QUICKWIT_PORT}} diff --git a/ee/quickwit/s3-config.yaml b/ee/quickwit/s3-config.yaml index 2fa1e20d7..466ec56b3 100644 --- a/ee/quickwit/s3-config.yaml +++ b/ee/quickwit/s3-config.yaml @@ -1,5 +1,6 @@ ## In order to save data into S3 # metastore also accepts s3://{bucket/path}#pooling_interval={seconds}s -version: 0 -metastore_uri: s3://quickwit/quickwit-indexes -default_index_root_uri: s3://quickwit/quickwit-indexes +version: 0.6 +metastore_uri: s3://openreplay-quickwit/quickwit-indexes +default_index_root_uri: s3://openreplay-quickwit/quickwit-indexes +rest_listen_port: {{QUICKWIT_PORT}} diff --git a/ee/quickwit/setup_indexes_and_worker.sh b/ee/quickwit/setup_indexes_and_worker.sh new file mode 100755 index 000000000..16b9ffff3 --- /dev/null +++ b/ee/quickwit/setup_indexes_and_worker.sh @@ -0,0 +1,8 @@ +sleep 120 +echo "Creating indexes.." +quickwit index create --index-config index-config-fetch.yaml +quickwit index create --index-config index-config-graphql.yaml +quickwit index create --index-config index-config-pageevent.yaml +echo "Running kafka reader.." +python3 -u consumer.py +