{ "cells": [ { "cell_type": "markdown", "id": "fce70006-809b-4c98-b89c-00910b8bbea1", "metadata": {}, "source": [ "**Hugging Face API**" ] }, { "cell_type": "code", "execution_count": 2, "id": "1c3b8678-d1fd-46a6-8633-fe0b34393d51", "metadata": {}, "outputs": [], "source": [ "from dotenv import load_dotenv\n", "import os\n", "import requests\n", "import json\n", "\n", "load_dotenv() \n", "headers = {\"Authorization\": f\"Bearer {os.getenv('HF_API_TOKEN')}\"}\n", "\n", "candidate_labels = [\"technology\", \"sports\", \"politics\", \"health\"]\n", "\n", "def query(model, input_text):\n", " API_URL = f\"https://router.huggingface.co/hf-inference/models/{model}\"\n", " payload = {\n", " \"inputs\": input_text,\n", " \"parameters\": {\"candidate_labels\": candidate_labels}\n", " }\n", " response = requests.post(API_URL, headers=headers, json=payload)\n", " return response.json()" ] }, { "cell_type": "code", "execution_count": 3, "id": "9a5fbe4a-c092-4d9b-b797-9104823b71cc", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[\n", " {\n", " \"label\": \"sports\",\n", " \"score\": 0.9877110719680786\n", " },\n", " {\n", " \"label\": \"health\",\n", " \"score\": 0.006601463537663221\n", " },\n", " {\n", " \"label\": \"technology\",\n", " \"score\": 0.004392746835947037\n", " },\n", " {\n", " \"label\": \"politics\",\n", " \"score\": 0.0012947289505973458\n", " }\n", "]\n" ] } ], "source": [ "input_text = \"I just started to play tennis, and it's so much fun!\"\n", "output = query(\"facebook/bart-large-mnli\", input_text)\n", "print(json.dumps(output, indent=4))" ] }, { "cell_type": "markdown", "id": "01dc84d7-6d0f-4409-8966-c1adac5fd0c8", "metadata": {}, "source": [ "**Model implementation**" ] }, { "cell_type": "code", "execution_count": 4, "id": "49355cad-771a-4080-8b67-f995c3de0110", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", " activity_type :\n", "\t hut trek (summer)\n", "\t hut trek (winter)\n", "\t camping trip (wild camping)\n", "\t camping trip (campground)\n", "\t ski tour / skitour\n", "\t snowboard / splitboard trip\n", "\t long-distance hike / thru-hike\n", "\t digital nomad trip\n", "\t city trip\n", "\t road trip (car/camper)\n", "\t festival trip\n", "\t yoga / wellness retreat\n", "\t micro-adventure / weekend trip\n", "\t beach vacation\n", "\t cultural exploration\n", "\t nature escape\n", "\n", " activities :\n", "\t swimming\n", "\t going to the beach\n", "\t relaxing\n", "\t sightseeing\n", "\t biking\n", "\t running\n", "\t skiing\n", "\t cross-country skiing\n", "\t ski touring\n", "\t hiking\n", "\t hut-to-hut hiking\n", "\t rock climbing\n", "\t ice climbing\n", "\t snowshoe hiking\n", "\t kayaking / canoeing\n", "\t stand-up paddleboarding (SUP)\n", "\t snorkeling\n", "\t scuba diving\n", "\t surfing\n", "\t paragliding\n", "\t horseback riding\n", "\t photography\n", "\t fishing\n", "\t rafting\n", "\t yoga\n", "\n", " climate_or_season :\n", "\t cold destination / winter\n", "\t warm destination / summer\n", "\t variable weather / spring / autumn\n", "\t tropical / humid\n", "\t dry / desert-like\n", "\t rainy climate\n", "\n", " style_or_comfort :\n", "\t ultralight\n", "\t lightweight (but comfortable)\n", "\t luxury (including evening wear)\n", "\t minimalist\n", "\n", " dress_code :\n", "\t casual\n", "\t formal (business trip)\n", "\t conservative\n", "\n", " accommodation :\n", "\t indoor\n", "\t huts with half board\n", "\t sleeping in a tent\n", "\t sleeping in a car\n", "\n", " transportation :\n", "\t own vehicle\n", "\t no own vehicle\n", "\n", " special_conditions :\n", "\t off-grid / no electricity\n", "\t self-supported (bring your own cooking gear)\n", "\t travel with children\n", "\t pet-friendly\n", "\t snow and ice\n", "\t high alpine terrain\n", "\t snow, ice and avalanche-prone terrain\n", "\t no special conditions to consider\n", "\n", " trip_length_days :\n", "\t 1 day\n", "\t 2 days\n", "\t 3 days\n", "\t 4 days\n", "\t 5 days\n", "\t 6 days\n", "\t 7 days\n", "\t 7+ days\n" ] } ], "source": [ "import json\n", "import pandas as pd\n", "from tabulate import tabulate\n", "from transformers import pipeline\n", "\n", "with open(\"packing_label_structure.json\", \"r\") as file:\n", " candidate_labels = json.load(file)\n", "keys_list = list(candidate_labels.keys())\n", "\n", "for key in candidate_labels:\n", " print(\"\\n\", key, \":\")\n", " for item in candidate_labels[key]:\n", " print(\"\\t\", item)" ] }, { "cell_type": "code", "execution_count": 5, "id": "4b3a1bcb-3450-4128-b941-952f145baf99", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " Label Score\n", "0 beach vacation 0.376311\n", "1 micro-adventure / weekend trip 0.350168\n", "2 nature escape 0.133974\n", "3 digital nomad trip 0.031636\n", "4 cultural exploration 0.031271\n", "5 yoga / wellness retreat 0.012846\n", "6 festival trip 0.012700\n", "7 long-distance hike / thru-hike 0.009527\n", "8 hut trek (summer) 0.008148\n", "9 city trip 0.007793\n", "10 road trip (car/camper) 0.006512\n", "11 ski tour / skitour 0.005670\n", "12 camping trip (campground) 0.004448\n", "13 snowboard / splitboard trip 0.004113\n", "14 camping trip (wild camping) 0.002714\n", "15 hut trek (winter) 0.002170\n" ] } ], "source": [ "model_name = \"facebook/bart-large-mnli\"\n", "trip_descr = \"I am planning a trip to Greece with my boyfriend, where we will visit two islands. We have booked an apartment on each island for a few days and plan to spend most of our time relaxing. Our main goals are to enjoy the beach, try delicious local food, and possibly go on a hike—if it’s not too hot. We will be relying solely on public transport. We’re in our late 20s and traveling from the Netherlands.\"\n", "classifier = pipeline(\"zero-shot-classification\", model = model_name)\n", "result = classifier(trip_descr, candidate_labels[\"activity_type\"])\n", "\n", "df = pd.DataFrame({\n", " \"Label\": result[\"labels\"],\n", " \"Score\": result[\"scores\"]\n", "})\n", "print(df)" ] }, { "cell_type": "code", "execution_count": 6, "id": "9f5f1c45-b411-4de1-a0a6-a7ecde5d8eae", "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " Label Score\n", "0 going to the beach 0.991486\n", "1 relaxing 0.977136\n", "2 hiking 0.942628\n", "3 swimming 0.219020\n", "4 sightseeing 0.175862\n", "5 running 0.098545\n", "6 hut-to-hut hiking 0.083704\n", "7 biking 0.036792\n", "8 photography 0.036690\n", "9 surfing 0.030993\n", "10 stand-up paddleboarding (SUP) 0.025300\n", "11 snorkeling 0.021451\n", "12 yoga 0.011070\n", "13 kayaking / canoeing 0.007511\n", "14 rock climbing 0.006307\n", "15 fishing 0.003497\n", "16 paragliding 0.002656\n", "17 rafting 0.001970\n", "18 horseback riding 0.001560\n", "19 snowshoe hiking 0.001528\n", "20 cross-country skiing 0.001502\n", "21 ice climbing 0.001434\n", "22 skiing 0.001169\n", "23 scuba diving 0.000789\n", "24 ski touring 0.000491\n", "['going to the beach', 'relaxing', 'hiking']\n" ] } ], "source": [ "# we do things differently for \"activities\"\n", "cut_off = 0.5\n", "result_activ = classifier(trip_descr, candidate_labels[\"activities\"], multi_label=True)\n", "\n", "df = pd.DataFrame({\n", " \"Label\": result_activ[\"labels\"],\n", " \"Score\": result_activ[\"scores\"]\n", "})\n", "classes = df.loc[df[\"Score\"] > 0.5, \"Label\"].tolist()\n", "\n", "print(df)\n", "print(classes)" ] }, { "cell_type": "code", "execution_count": 7, "id": "3a7287c2-78f0-4a53-af72-1bc0f62da36f", "metadata": { "scrolled": true }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Processing 9/9 superclass pred_class\n", "0 activity_type beach vacation\n", "1 activities [going to the beach, relaxing, hiking]\n", "2 climate_or_season warm destination / summer\n", "3 style_or_comfort minimalist\n", "4 dress_code casual\n", "5 accommodation huts with half board\n", "6 transportation no own vehicle\n", "7 special_conditions off-grid / no electricity\n", "8 trip_length_days 7+ days\n" ] } ], "source": [ "# doing this for all superclasses, depending on local machine this might take a while\n", "def pred_trip(model_name, trip_descr, cut_off = 0.5):\n", " \"\"\"\n", " Classifies trip\n", " \n", " Parameters:\n", " model_name: name of hugging-face model\n", " trip_descr: text describing the trip\n", " cut_off: cut_off for choosing activities\n", "\n", " Returns:\n", " pd Dataframe: with class predictions\n", " \"\"\"\n", " \n", " classifier = pipeline(\"zero-shot-classification\", model=model_name)\n", " df = pd.DataFrame(columns=['superclass', 'pred_class'])\n", " for i, key in enumerate(keys_list):\n", " print(f\"\\rProcessing {i + 1}/{len(keys_list)}\", end=\"\", flush=True)\n", " if key == 'activities':\n", " result = classifier(trip_descr, candidate_labels[key], multi_label=True)\n", " indices = [i for i, score in enumerate(result['scores']) if score > cut_off]\n", " classes = [result['labels'][i] for i in indices]\n", " else:\n", " result = classifier(trip_descr, candidate_labels[key])\n", " classes = result[\"labels\"][0]\n", " df.loc[i] = [key, classes]\n", " return df\n", "\n", "result = pred_trip(model_name, trip_descr, cut_off = 0.5)\n", "print(result)" ] }, { "cell_type": "code", "execution_count": 8, "id": "f8b547d4-1858-49d5-b26c-fd30e210cd58", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Processing 9/9" ] } ], "source": [ "# Load packing item data\n", "with open(\"packing_templates_self_supported_offgrid_expanded.json\", \"r\") as file:\n", " packing_items = json.load(file)\n", "\n", "\n", "def pred_trip(model_name, trip_descr, cut_off = 0.5):\n", " \"\"\"\n", " Classifies trip\n", " \n", " Parameters:\n", " model_name: name of hugging-face model\n", " trip_descr: text describing the trip\n", " cut_off: cut_off for choosing activities\n", "\n", " Returns:\n", " pd Dataframe: with class predictions\n", " \"\"\"\n", " \n", " classifier = pipeline(\"zero-shot-classification\", model=model_name)\n", " df = pd.DataFrame(columns=['superclass', 'pred_class'])\n", " for i, key in enumerate(keys_list):\n", " print(f\"\\rProcessing {i + 1}/{len(keys_list)}\", end=\"\", flush=True)\n", " if key == 'activities':\n", " result = classifier(trip_descr, candidate_labels[key], multi_label=True)\n", " indices = [i for i, score in enumerate(result['scores']) if score > cut_off]\n", " classes = [result['labels'][i] for i in indices]\n", " else:\n", " result = classifier(trip_descr, candidate_labels[key])\n", " classes = result[\"labels\"][0]\n", " df.loc[i] = [key, classes]\n", " \n", " ## Look up and return list of items to pack based on class predictions\n", " # make list from dataframe column\n", " all_classes = [elem for x in df[\"pred_class\"] for elem in (x if isinstance(x, list) else [x])]\n", " # look up packing items for each class/key\n", " list_of_list_of_items = [packing_items.get(k, []) for k in all_classes]\n", " # combine lists and remove doubble entries\n", " flat_unique = []\n", " for sublist in list_of_list_of_items:\n", " for item in sublist:\n", " if item not in flat_unique:\n", " flat_unique.append(item)\n", " # sort alphabetically and add newlines\n", " sorted_list = \"\\n\".join(sorted(flat_unique)) \n", " return df, sorted_list\n", "\n", "result = pred_trip(model_name, trip_descr, cut_off = 0.5)" ] }, { "cell_type": "code", "execution_count": 11, "id": "162482dd-d2ce-44b7-9a6d-c34a96c1a6ed", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " superclass pred_class\n", "0 activity_type beach vacation\n", "1 activities [going to the beach, relaxing, hiking]\n", "2 climate_or_season warm destination / summer\n", "3 style_or_comfort minimalist\n", "4 dress_code casual\n", "5 accommodation huts with half board\n", "6 transportation no own vehicle\n", "7 special_conditions off-grid / no electricity\n", "8 trip_length_days 7+ days\n", "1 set of clothing for every situation\n", "USB hub (for multiple devices)\n", "all‑in‑one soap\n", "backpack\n", "backup lighting (e.g. small flashlight)\n", "beach bag\n", "beach chair\n", "beach towel\n", "blister plasters or tape\n", "book or e‑reader\n", "cap or hat\n", "cash for payments\n", "comfortable clothing\n", "compact toothbrush\n", "cooler\n", "daypack\n", "earplugs\n", "emergency communication (e.g. GPS beacon or satellite messenger)\n", "extra charger cables\n", "extra clothing layer\n", "first aid kit\n", "flashlight or headlamp\n", "flip flops\n", "foldable solar panel (if on longer trips)\n", "hat or cap\n", "headlamp + extra batteries\n", "hiking boots or trail runners\n", "hiking poles\n", "hiking socks (anti-blister)\n", "jeans or comfortable pants\n", "light pajamas or sleepwear\n", "light shoes\n", "light towel\n", "lightweight clothing\n", "lightweight towel\n", "music / headphones\n", "navigation (map/compass/GPS)\n", "navigation device with offline maps\n", "notebook + pen\n", "number of meals/snacks matched to duration\n", "packaging to keep electronics dry\n", "paper map and compass\n", "power bank (at least 10,000 mAh)\n", "public transport app or ticket\n", "rain jacket or poncho\n", "rechargeable batteries and charger\n", "reservation confirmation\n", "seat cushion or beach mat\n", "sheet liner (often required)\n", "slippers or indoor shoes for inside\n", "small backpack\n", "small toiletry bag\n", "snacks / energy bars\n", "snacks for along the way\n", "sneakers\n", "socks per day\n", "solar panel or portable charging system\n", "sun hat\n", "sunglasses\n", "sunscreen\n", "sunscreen and sunglasses\n", "sweater or hoodie\n", "swimwear\n", "t-shirts\n", "toiletry bag\n", "underwear per day\n", "water bottle\n", "water bottle(s) or hydration bladder\n" ] } ], "source": [ "print(result[0])\n", "print(result[1])" ] }, { "cell_type": "markdown", "id": "c4799d6b-6ab5-42da-a992-afe3666d0015", "metadata": {}, "source": [ "Now use gradio app" ] }, { "cell_type": "code", "execution_count": 9, "id": "8eefd4cc-c375-4cc0-956b-472b36bafdb7", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Running on local URL: http://127.0.0.1:7860\n", "\n", "To create a public link, set `share=True` in `launch()`.\n" ] }, { "data": { "text/html": [ "
" ], "text/plain": [ "