diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 17ec9443..232ea364 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -25,6 +25,7 @@ jobs:
python -m pip install --upgrade pip
pip install .
pip install .[postgres]
+ pip install .[bigquery]
pip install pytest
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
- name: Test with pytest
diff --git a/notebooks/vn-connect-to-bigquery.ipynb b/notebooks/vn-connect-to-bigquery.ipynb
new file mode 100644
index 00000000..5ca96996
--- /dev/null
+++ b/notebooks/vn-connect-to-bigquery.ipynb
@@ -0,0 +1,532 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "![Vanna AI](https://img.vanna.ai/vanna-ask.svg)\n",
+ "\n",
+ "The following notebook goes through the process of connecting to your gcs using bigquery connector and running sql queries usig Vanna AI. For demo purpose we are using one the google's example queries.\n",
+ "\n",
+ "# Install Vanna\n",
+ "First we install Vanna from [PyPI](https://pypi.org/project/vanna/) and import it.\n",
+ "Here, we'll install vanna with extra postgres. If you're using a different database, you'll need to install the appropriate extras."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%pip install vanna[bigquery]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import vanna as vn"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Set Database Connection\n",
+ "These details are only referenced within your notebook. These database credentials are never sent to Vanna's severs.\n",
+ "You need to set:\n",
+ "`PROJECT_ID`.\n",
+ "in your environment. By default vanna will look for the pre-set google ADC but if it is not pre-set, you'll also need to provide service account credentials json file path. we can also provide parameters `cred_file_path` `project_id` to the method."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Not using Google Colab.\n"
+ ]
+ }
+ ],
+ "source": [
+ "vn.connect_to_bigquery()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "or if you are using service account you can provide credentials json file path:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "cred_file_path = \"provide/creds/path.json\"\n",
+ "vn.connect_to_bigquery(cred_file_path=cred_file_path)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### Note: \n",
+ "You need to add `PROJECT_ID` to your environment or you can pass it as `project_id` in above method as a parameter"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Get Results\n",
+ "This gets the SQL run it and prints it's result as a dataframe. Note that we use your provided credentials to execute the SQL on your warehouse from your local instance. Your connection nor your data gets sent to Vanna's servers. For more info on how Vanna works, [see this post](https://medium.com/vanna-ai/how-vanna-works-how-to-train-it-data-security-8d8f2008042). We will be using google demo sql below. Note that below SQL only works for the google demo data."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " submitter_id | \n",
+ " case_id | \n",
+ " diag__treat__count | \n",
+ " primary_site | \n",
+ " disease_type | \n",
+ " proj__name | \n",
+ " proj__project_id | \n",
+ " demo__demographic_id | \n",
+ " demo__gender | \n",
+ " demo__race | \n",
+ " ... | \n",
+ " exp__bmi | \n",
+ " exp__years_smoked | \n",
+ " exp__pack_years_smoked | \n",
+ " exp__cigarettes_per_day | \n",
+ " exp__alcohol_history | \n",
+ " exp__state | \n",
+ " exp__created_datetime | \n",
+ " exp__updated_datetime | \n",
+ " state | \n",
+ " updated_datetime | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " TCGA-CN-5363 | \n",
+ " 291b069c-9dde-4e1e-8430-85146bc94338 | \n",
+ " 2 | \n",
+ " Larynx | \n",
+ " Squamous Cell Neoplasms | \n",
+ " Head and Neck Squamous Cell Carcinoma | \n",
+ " TCGA-HNSC | \n",
+ " 2611cb61-6d05-5286-b94a-ce6cac2ba37b | \n",
+ " male | \n",
+ " black or african american | \n",
+ " ... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 15.0 | \n",
+ " 0.821918 | \n",
+ " Yes | \n",
+ " released | \n",
+ " None | \n",
+ " 2019-07-31T18:43:25.167078-05:00 | \n",
+ " released | \n",
+ " 2019-08-06T14:25:25.511101-05:00 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " TCGA-CN-5365 | \n",
+ " 4cffea0b-90a7-4c86-a73f-bb8feca3ada7 | \n",
+ " 2 | \n",
+ " Tonsil | \n",
+ " Squamous Cell Neoplasms | \n",
+ " Head and Neck Squamous Cell Carcinoma | \n",
+ " TCGA-HNSC | \n",
+ " 97a7f69b-0f40-5450-bbeb-92084a100a9d | \n",
+ " male | \n",
+ " white | \n",
+ " ... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 26.0 | \n",
+ " 1.424658 | \n",
+ " Yes | \n",
+ " released | \n",
+ " None | \n",
+ " 2019-07-31T19:39:51.442671-05:00 | \n",
+ " released | \n",
+ " 2019-08-06T14:25:25.511101-05:00 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " TCGA-CN-A642 | \n",
+ " a1ded1e8-eb28-49dd-8f3d-1ce8f40eed8f | \n",
+ " 2 | \n",
+ " Other and unspecified parts of tongue | \n",
+ " Squamous Cell Neoplasms | \n",
+ " Head and Neck Squamous Cell Carcinoma | \n",
+ " TCGA-HNSC | \n",
+ " 4bc58619-47fc-5c2d-aaec-9d9e562e049b | \n",
+ " male | \n",
+ " white | \n",
+ " ... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 5.0 | \n",
+ " 0.273973 | \n",
+ " Yes | \n",
+ " released | \n",
+ " None | \n",
+ " 2019-07-31T19:30:27.901248-05:00 | \n",
+ " released | \n",
+ " 2019-08-06T14:25:39.854271-05:00 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " TCGA-CR-7380 | \n",
+ " 53b254b7-021f-43df-af9b-3fc01b87479e | \n",
+ " 2 | \n",
+ " Other and ill-defined sites in lip, oral cavit... | \n",
+ " Squamous Cell Neoplasms | \n",
+ " Head and Neck Squamous Cell Carcinoma | \n",
+ " TCGA-HNSC | \n",
+ " be41a712-ebee-52e1-907c-80b1917daa45 | \n",
+ " male | \n",
+ " white | \n",
+ " ... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Yes | \n",
+ " released | \n",
+ " None | \n",
+ " 2019-07-31T19:40:20.032260-05:00 | \n",
+ " released | \n",
+ " 2019-08-06T14:26:05.315718-05:00 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " TCGA-CV-5978 | \n",
+ " e16e9535-b20f-4c9a-8b5b-82df80c99448 | \n",
+ " 2 | \n",
+ " Larynx | \n",
+ " Squamous Cell Neoplasms | \n",
+ " Head and Neck Squamous Cell Carcinoma | \n",
+ " TCGA-HNSC | \n",
+ " 92d1d967-c8a0-52cb-a62d-1d11bdf85068 | \n",
+ " female | \n",
+ " black or african american | \n",
+ " ... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Yes | \n",
+ " released | \n",
+ " None | \n",
+ " 2019-07-31T19:52:06.976359-05:00 | \n",
+ " released | \n",
+ " 2019-08-06T14:26:05.315718-05:00 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " TCGA-CV-6948 | \n",
+ " fcf0dc48-b889-4593-a15b-aa715aae7bf5 | \n",
+ " 2 | \n",
+ " Floor of mouth | \n",
+ " Squamous Cell Neoplasms | \n",
+ " Head and Neck Squamous Cell Carcinoma | \n",
+ " TCGA-HNSC | \n",
+ " 2fd1a926-7584-50d5-b6b7-9b9d02710f47 | \n",
+ " female | \n",
+ " white | \n",
+ " ... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " No | \n",
+ " released | \n",
+ " None | \n",
+ " 2019-07-31T19:55:16.152855-05:00 | \n",
+ " released | \n",
+ " 2019-08-06T14:26:16.536997-05:00 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " TCGA-CV-7409 | \n",
+ " acd98e20-d2da-4256-99a5-13e261bc88e6 | \n",
+ " 2 | \n",
+ " Other and ill-defined sites in lip, oral cavit... | \n",
+ " Squamous Cell Neoplasms | \n",
+ " Head and Neck Squamous Cell Carcinoma | \n",
+ " TCGA-HNSC | \n",
+ " 2a3f5bb4-3606-5549-8d85-ec413eadd7ab | \n",
+ " male | \n",
+ " black or african american | \n",
+ " ... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " No | \n",
+ " released | \n",
+ " None | \n",
+ " 2019-07-31T19:48:25.311492-05:00 | \n",
+ " released | \n",
+ " 2019-08-06T14:26:28.608672-05:00 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " TCGA-CV-A6JU | \n",
+ " b1b3983d-37d2-4bef-bd17-708e3e600146 | \n",
+ " 2 | \n",
+ " Other and unspecified parts of tongue | \n",
+ " Squamous Cell Neoplasms | \n",
+ " Head and Neck Squamous Cell Carcinoma | \n",
+ " TCGA-HNSC | \n",
+ " 604e3dac-30be-589d-b622-df0b41cd9a7f | \n",
+ " female | \n",
+ " white | \n",
+ " ... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 81.0 | \n",
+ " 4.438356 | \n",
+ " Yes | \n",
+ " released | \n",
+ " None | \n",
+ " 2019-07-31T19:48:40.594893-05:00 | \n",
+ " released | \n",
+ " 2019-08-06T14:26:39.780396-05:00 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " TCGA-QK-A6IH | \n",
+ " c1f286f6-d4a1-494a-88c8-ff8e2a3df2ce | \n",
+ " 2 | \n",
+ " Gum | \n",
+ " Squamous Cell Neoplasms | \n",
+ " Head and Neck Squamous Cell Carcinoma | \n",
+ " TCGA-HNSC | \n",
+ " 83e5c705-bd2e-5516-9700-ed3803dde268 | \n",
+ " female | \n",
+ " white | \n",
+ " ... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Yes | \n",
+ " released | \n",
+ " None | \n",
+ " 2019-07-31T19:49:42.057478-05:00 | \n",
+ " released | \n",
+ " 2019-08-06T14:27:02.392779-05:00 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " TCGA-QK-A8Z8 | \n",
+ " ac511727-185b-4ac0-b6c0-dc3a79657be6 | \n",
+ " 2 | \n",
+ " Larynx | \n",
+ " Squamous Cell Neoplasms | \n",
+ " Head and Neck Squamous Cell Carcinoma | \n",
+ " TCGA-HNSC | \n",
+ " fd1e46fb-43bb-54ae-b713-a579ba857ed4 | \n",
+ " female | \n",
+ " black or african american | \n",
+ " ... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 80.0 | \n",
+ " 4.383562 | \n",
+ " Yes | \n",
+ " released | \n",
+ " None | \n",
+ " 2019-07-31T19:48:22.125112-05:00 | \n",
+ " released | \n",
+ " 2019-08-06T14:27:02.392779-05:00 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
10 rows × 70 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " submitter_id case_id diag__treat__count \\\n",
+ "0 TCGA-CN-5363 291b069c-9dde-4e1e-8430-85146bc94338 2 \n",
+ "1 TCGA-CN-5365 4cffea0b-90a7-4c86-a73f-bb8feca3ada7 2 \n",
+ "2 TCGA-CN-A642 a1ded1e8-eb28-49dd-8f3d-1ce8f40eed8f 2 \n",
+ "3 TCGA-CR-7380 53b254b7-021f-43df-af9b-3fc01b87479e 2 \n",
+ "4 TCGA-CV-5978 e16e9535-b20f-4c9a-8b5b-82df80c99448 2 \n",
+ "5 TCGA-CV-6948 fcf0dc48-b889-4593-a15b-aa715aae7bf5 2 \n",
+ "6 TCGA-CV-7409 acd98e20-d2da-4256-99a5-13e261bc88e6 2 \n",
+ "7 TCGA-CV-A6JU b1b3983d-37d2-4bef-bd17-708e3e600146 2 \n",
+ "8 TCGA-QK-A6IH c1f286f6-d4a1-494a-88c8-ff8e2a3df2ce 2 \n",
+ "9 TCGA-QK-A8Z8 ac511727-185b-4ac0-b6c0-dc3a79657be6 2 \n",
+ "\n",
+ " primary_site disease_type \\\n",
+ "0 Larynx Squamous Cell Neoplasms \n",
+ "1 Tonsil Squamous Cell Neoplasms \n",
+ "2 Other and unspecified parts of tongue Squamous Cell Neoplasms \n",
+ "3 Other and ill-defined sites in lip, oral cavit... Squamous Cell Neoplasms \n",
+ "4 Larynx Squamous Cell Neoplasms \n",
+ "5 Floor of mouth Squamous Cell Neoplasms \n",
+ "6 Other and ill-defined sites in lip, oral cavit... Squamous Cell Neoplasms \n",
+ "7 Other and unspecified parts of tongue Squamous Cell Neoplasms \n",
+ "8 Gum Squamous Cell Neoplasms \n",
+ "9 Larynx Squamous Cell Neoplasms \n",
+ "\n",
+ " proj__name proj__project_id \\\n",
+ "0 Head and Neck Squamous Cell Carcinoma TCGA-HNSC \n",
+ "1 Head and Neck Squamous Cell Carcinoma TCGA-HNSC \n",
+ "2 Head and Neck Squamous Cell Carcinoma TCGA-HNSC \n",
+ "3 Head and Neck Squamous Cell Carcinoma TCGA-HNSC \n",
+ "4 Head and Neck Squamous Cell Carcinoma TCGA-HNSC \n",
+ "5 Head and Neck Squamous Cell Carcinoma TCGA-HNSC \n",
+ "6 Head and Neck Squamous Cell Carcinoma TCGA-HNSC \n",
+ "7 Head and Neck Squamous Cell Carcinoma TCGA-HNSC \n",
+ "8 Head and Neck Squamous Cell Carcinoma TCGA-HNSC \n",
+ "9 Head and Neck Squamous Cell Carcinoma TCGA-HNSC \n",
+ "\n",
+ " demo__demographic_id demo__gender \\\n",
+ "0 2611cb61-6d05-5286-b94a-ce6cac2ba37b male \n",
+ "1 97a7f69b-0f40-5450-bbeb-92084a100a9d male \n",
+ "2 4bc58619-47fc-5c2d-aaec-9d9e562e049b male \n",
+ "3 be41a712-ebee-52e1-907c-80b1917daa45 male \n",
+ "4 92d1d967-c8a0-52cb-a62d-1d11bdf85068 female \n",
+ "5 2fd1a926-7584-50d5-b6b7-9b9d02710f47 female \n",
+ "6 2a3f5bb4-3606-5549-8d85-ec413eadd7ab male \n",
+ "7 604e3dac-30be-589d-b622-df0b41cd9a7f female \n",
+ "8 83e5c705-bd2e-5516-9700-ed3803dde268 female \n",
+ "9 fd1e46fb-43bb-54ae-b713-a579ba857ed4 female \n",
+ "\n",
+ " demo__race ... exp__bmi exp__years_smoked \\\n",
+ "0 black or african american ... NaN NaN \n",
+ "1 white ... NaN NaN \n",
+ "2 white ... NaN NaN \n",
+ "3 white ... NaN NaN \n",
+ "4 black or african american ... NaN NaN \n",
+ "5 white ... NaN NaN \n",
+ "6 black or african american ... NaN NaN \n",
+ "7 white ... NaN NaN \n",
+ "8 white ... NaN NaN \n",
+ "9 black or african american ... NaN NaN \n",
+ "\n",
+ " exp__pack_years_smoked exp__cigarettes_per_day exp__alcohol_history \\\n",
+ "0 15.0 0.821918 Yes \n",
+ "1 26.0 1.424658 Yes \n",
+ "2 5.0 0.273973 Yes \n",
+ "3 NaN NaN Yes \n",
+ "4 NaN NaN Yes \n",
+ "5 NaN NaN No \n",
+ "6 NaN NaN No \n",
+ "7 81.0 4.438356 Yes \n",
+ "8 NaN NaN Yes \n",
+ "9 80.0 4.383562 Yes \n",
+ "\n",
+ " exp__state exp__created_datetime exp__updated_datetime \\\n",
+ "0 released None 2019-07-31T18:43:25.167078-05:00 \n",
+ "1 released None 2019-07-31T19:39:51.442671-05:00 \n",
+ "2 released None 2019-07-31T19:30:27.901248-05:00 \n",
+ "3 released None 2019-07-31T19:40:20.032260-05:00 \n",
+ "4 released None 2019-07-31T19:52:06.976359-05:00 \n",
+ "5 released None 2019-07-31T19:55:16.152855-05:00 \n",
+ "6 released None 2019-07-31T19:48:25.311492-05:00 \n",
+ "7 released None 2019-07-31T19:48:40.594893-05:00 \n",
+ "8 released None 2019-07-31T19:49:42.057478-05:00 \n",
+ "9 released None 2019-07-31T19:48:22.125112-05:00 \n",
+ "\n",
+ " state updated_datetime \n",
+ "0 released 2019-08-06T14:25:25.511101-05:00 \n",
+ "1 released 2019-08-06T14:25:25.511101-05:00 \n",
+ "2 released 2019-08-06T14:25:39.854271-05:00 \n",
+ "3 released 2019-08-06T14:26:05.315718-05:00 \n",
+ "4 released 2019-08-06T14:26:05.315718-05:00 \n",
+ "5 released 2019-08-06T14:26:16.536997-05:00 \n",
+ "6 released 2019-08-06T14:26:28.608672-05:00 \n",
+ "7 released 2019-08-06T14:26:39.780396-05:00 \n",
+ "8 released 2019-08-06T14:27:02.392779-05:00 \n",
+ "9 released 2019-08-06T14:27:02.392779-05:00 \n",
+ "\n",
+ "[10 rows x 70 columns]"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "sql=\"\"\"SELECT *\n",
+ "FROM `isb-cgc-bq.TCGA_versioned.clinical_gdc_r24`\n",
+ "LIMIT 10\"\"\"\n",
+ "vn.run_sql(sql=sql)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Run as a Web App\n",
+ "If you would like to use this functionality in a web app, you can deploy the Vanna Streamlit app and use your own secrets. See [this repo](https://github.com/vanna-ai/vanna-streamlit)."
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.12"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/pyproject.toml b/pyproject.toml
index da7797db..c0b59d45 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -27,6 +27,6 @@ dependencies = [
[project.optional-dependencies]
postgres = ["psycopg2", "db-dtypes"]
-bigqury = ["google-cloud-bigquery"]
+bigquery = ["google-auth", "google-cloud-bigquery"]
snowflake = ["snowflake-connector-python"]
test = ["tox"]
diff --git a/src/vanna/__init__.py b/src/vanna/__init__.py
index 7013c750..ae3c06ab 100644
--- a/src/vanna/__init__.py
+++ b/src/vanna/__init__.py
@@ -408,7 +408,7 @@ def set_model(model: str):
model = env_model
else:
raise ValidationError("Please replace 'my-model' with the name of your model")
- dataset = sanitize_model_name(model)
+
_set_org(org=model)
@@ -1636,3 +1636,86 @@ def run_sql_postgres(sql: str) -> Union[pd.DataFrame, None]:
global run_sql
run_sql = run_sql_postgres
+
+
+def connect_to_bigquery(cred_file_path: str = None, project_id: str = None):
+ """
+ Connect to gcs using the bigquery connector. This is just a helper function to set [`vn.run_sql`][vanna.run_sql]
+ **Example:**
+ ```python
+ import bigquery.Client
+ vn.connect_to_bigquery(
+ project_id="myprojectid",
+ cred_file_path="path/to/credentials.json",
+ )
+ ```
+ Args:
+ project_id (str): The gcs project id.
+ cred_file_path (str): The gcs credential file path
+ """
+
+ try:
+ from google.api_core.exceptions import GoogleAPIError
+ from google.cloud import bigquery
+ from google.oauth2 import service_account
+ except ImportError:
+ raise DependencyError("You need to install required dependencies to execute this method, run command:"
+ " \npip install vanna[bigquery]")
+
+ if not project_id:
+ project_id = os.getenv('PROJECT_ID')
+
+ if not project_id:
+ raise ImproperlyConfigured("Please set your Google Cloud Project ID.")
+
+ import sys
+ if "google.colab" in sys.modules:
+ try:
+ from google.colab import auth
+ auth.authenticate_user()
+ except Exception as e:
+ raise ImproperlyConfigured(e)
+ else:
+ print("Not using Google Colab.")
+
+ conn = None
+
+ try:
+ conn = bigquery.Client()
+ except:
+ print("Could not found any google cloud implicit credentials")
+
+ if cred_file_path:
+ # Validate file path and pemissions
+ validate_config_path(cred_file_path)
+ else:
+ if not conn:
+ raise ValidationError("Pleae provide a service account credentials json file")
+
+ if not conn:
+ with open(cred_file_path, 'r') as f:
+ credentials = service_account.Credentials.from_service_account_info(
+ json.loads(f.read()),
+ scopes=["https://www.googleapis.com/auth/cloud-platform"]
+ )
+
+ try:
+ conn = bigquery.Client(project=project_id, credentials=credentials)
+ except:
+ raise ImproperlyConfigured("Could not connect to bigquery please correct credentials")
+
+ def run_sql_bigquery(sql: str) -> Union[pd.DataFrame, None]:
+ if conn:
+ try:
+ job = conn.query(sql)
+ df = job.result().to_dataframe()
+ return df
+ except GoogleAPIError as error:
+ errors = []
+ for error in error.errors:
+ errors.append(error["message"])
+ raise errors
+ return None
+
+ global run_sql
+ run_sql = run_sql_bigquery
diff --git a/tests/test_vanna.py b/tests/test_vanna.py
index 5c3f95bf..37bb92ac 100644
--- a/tests/test_vanna.py
+++ b/tests/test_vanna.py
@@ -3,6 +3,8 @@
import sys
import io
import pandas as pd
+import contextlib
+import stat
import os
import pytest
from vanna.exceptions import ValidationError, ImproperlyConfigured
@@ -431,3 +433,67 @@ def test_connect_to_postgres_validations(monkeypatch, params, none_param):
with pytest.raises(ImproperlyConfigured) as exc:
vn.connect_to_postgres(**params)
assert f"Please set your postgres {none_param}" in exc.args[0]
+
+
+class Client:
+ def query(self, query):
+
+ pass
+
+
+@pytest.mark.parametrize("params", [
+ dict(project_id=None),
+])
+def test_connect_to_bigquery_validations(monkeypatch, params):
+ monkeypatch.setattr("google.cloud.bigquery.Client", Client)
+ with pytest.raises(ImproperlyConfigured) as exc:
+ vn.connect_to_bigquery(**params)
+ assert "Please set your Google Cloud Project ID." in exc.args[0]
+
+
+@pytest.mark.parametrize("params, expected_err", [
+ (
+ dict(
+ project_id="test-project",
+ cred_file_path="wrong/file/path.json"
+ ),
+ "No such configuration file: wrong/file/path.json"
+ ),
+ (
+ dict(
+ project_id="test-project",
+ cred_file_path="tests"
+ ),
+ "Config should be a file: tests"
+ )
+])
+def test_connect_to_bigquery_creds_path_validations(monkeypatch, params, expected_err):
+ monkeypatch.setattr("google.cloud.bigquery.Client", Client)
+ with pytest.raises(ImproperlyConfigured) as exc:
+ vn.connect_to_bigquery(**params)
+ assert expected_err in exc.args[0]
+
+
+@pytest.mark.parametrize("params", [
+ dict(
+ project_id="test-project",
+ cred_file_path="tests/test-creds.json"
+ ),
+])
+def test_connect_to_bigquery_creds_file_permissions(monkeypatch, params):
+ monkeypatch.setattr("google.cloud.bigquery.Client", Client)
+ with create_file(params["cred_file_path"]) as creds_path:
+ with pytest.raises(ImproperlyConfigured) as exc:
+ vn.connect_to_bigquery(**params)
+ assert f"Cannot read the config file. Please grant read privileges: {creds_path}" in exc.args[0]
+
+
+@contextlib.contextmanager
+def create_file(file_path):
+ with open(file_path, "w") as f:
+ pass
+ os.chmod(file_path, stat.S_IWUSR)
+ try:
+ yield file_path
+ finally:
+ os.remove(file_path)