Google Analytics 4 (or GA4) has an API that provides access to data such as pageviews, traffic source, and other data points. With this API, build custom dashboards, automate reporting, and integrate with other applications.
This guide focuses on how to access and export data using Python. I’ll cover a basic method to access data that we’re used to seeing in GA4.
You can download it from GitHub: https://github.com/Impesud/machine-learning-with-python/tree/main/google-analytics-data
1
2
3
4
5
6
7
8
9
|
# Google Analytics 4 API with Python
# Visit my profile: https://github.com/impesud
# We will use the following libraries:
# - google-analytics-data: for accessing Google Analytics data
# - pandas: for data manipulation
# - matplotlib: for data visualization
%pip install google-analytics-data pandas matplotlib
|
1
2
3
4
5
6
|
from google.analytics.data_v1beta import BetaAnalyticsDataClient
from google.analytics.data_v1beta import Metric, RunReportRequest, OrderBy
from google.oauth2.service_account import Credentials
import pandas as pd
import itertools as it
import matplotlib.pyplot as plt
|
1
2
3
4
5
6
|
# Authentication using service account credentials
# Set-Up a Google Cloud Project
# Add and Enable Google Analytics Data API
# Create a GCP Service Account and download a Private Key JSON File
# Grant User Permissions in GA4
credentials = Credentials.from_service_account_file('xxxxxxxxxxxxxxx.json')
|
1
2
|
# Initializing the Google Analytics client
client = BetaAnalyticsDataClient(credentials=credentials)
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
|
# Example query to get data
request = RunReportRequest(
#property ID of your GA4 property
property=f"properties/{property_id}",
date_ranges=[{"start_date": "21daysAgo", "end_date": "today"}],
dimensions=[
{"name": "date"},
#{"name":"country"}
],
metrics=[
Metric(name="activeUsers"),
#Metric(name="newUsers")
],
order_bys=[
OrderBy(dimension=OrderBy.DimensionOrderBy(dimension_name="date"), desc=False),
]
)
# You can add other dimensions and metrics, following Google's documentation on GA4:
# https://developers.google.com/analytics/devguides/reporting/data/v1/api-schema?hl=en
|
1
2
3
4
|
response = client.run_report(
request=request
)
print(response)
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
|
# Convert data to a Pandas DataFrame for analysis
def query_data(api_response):
dimension_headers = [header.name for header in api_response.dimension_headers]
metric_headers = [header.name for header in api_response.metric_headers]
dimensions = []
metrics = []
for i in range(len(dimension_headers)):
dimensions.append([row.dimension_values[i].value for row in api_response.rows])
dimensions
for i in range(len(metric_headers)):
metrics.append([row.metric_values[i].value for row in api_response.rows])
headers = dimension_headers, metric_headers
headers = list(it.chain.from_iterable(headers))
data = dimensions, metrics
data = list(it.chain.from_iterable(data))
df = pd.DataFrame(data)
df = df.transpose()
df.columns = headers
return df
query_data(response)
|
1
2
3
|
# Export data to a CSV file
final_data = query_data(response)
final_data.to_csv('file.csv', index=False)
|
1
2
3
4
5
6
7
8
9
10
11
12
13
|
# Data Visualization and Exploratory Analysis
# For example, displaying the number of active users over time
df = pd.read_csv('file.csv')
plt.figure(figsize=(20, 8))
x = range(len(df['date']))
plt.plot(x, df['activeUsers'], marker="o")
plt.xticks(x, df['date'])
plt.yticks(df['activeUsers'])
plt.xlabel('')
plt.ylabel('Active Users')
plt.xticks(rotation=90)
plt.tight_layout()
plt.show()
|