Analásis de coches financiados en Coches.net¶
by Albert Lanza: https://www.linkedin.com/in/albert-lanza-rio/¶
Se han extraído aquellos anuncios con los siguientes atributos:
- Aquellos que son financiables
- Extraídos de más nuevo a más antiguo
- Alrededor del 45% del total del stock disponible a 19/11/2025
0. Carga de librerias, datos y cuántos anuncios hay en el dataset¶
In [29]:
import json
import numpy as np
import pandas as pd
import geopandas as gpd
import plotly.express as px
import matplotlib.pyplot as plt
In [30]:
# Cargar los datos previamente extraídos de su web
# https://www.coches.net/segunda-mano/?MinInstalmentPrice=1&hasInstalment=true?pg=1
df = pd.read_csv('scraping_stock_financ_cochesNet_19112025.csv', low_memory=False)
df = df.dropna(subset=['financing_lender'])
print('TOTAL DE VEHÍCULOS en el dataset extraído:', f"{len(df):,.0f}".replace(',', '.'))
print('Columnas del dataset:', list(df.columns))
TOTAL DE VEHÍCULOS en el dataset extraído: 29.615 Columnas del dataset: ['id', 'title', 'manufacturer', 'model', 'version', 'year', 'kms', 'cubic_capacity', 'environmental_label', 'doors', 'power', 'seating_capacity', 'consumption_urban', 'consumption_extra_urban', 'consumption_mixed', 'max_speed', 'acceleration', 'tank_capacity_in_liters', 'trunk_capacity_in_liters', 'weight', 'width', 'height', 'length', 'warranty_months', 'warranty_is_official', 'color', 'cash_price', 'financing_price', 'financing_capital', 'financing_computed_price', 'financing_terms', 'financing_entry', 'financing_min_entry', 'financing_max_entry', 'financing_first_fee', 'financing_fee', 'financing_lender', 'financing_tae', 'financing_tin', 'financing_opening_percentage', 'financing_opening_expenses', 'financing_total_interest', 'financing_amount_to_finance', 'financing_total_loan_amount', 'financing_total_price_terms', 'financing_total_terms_amount', 'financing_total_amount_due', 'financing_coefficient', 'has_taxes', 'creation_date', 'publication_date', 'url', 'status', 'contact_email', 'contact_phone1', 'contact_phone2', 'rating_score_avg', 'rating_comments_number', 'has_reservation', 'has_financing_product', 'client_name', 'client_pageslug', 'client_url', 'client_location_address', 'client_location_province', 'client_location_zipcode', 'client_location_geolocation_latitude', 'client_location_geolocation_longitude', 'statistics_views', 'statistics_shares', 'statistics_favorites', 'statistics_total_calls', 'statistics_total_contacts', 'statistics_total_scheduled_calls', 'statistics_total_attended_calls', 'statistics_messages', 'statistics_total_missed_calls']
1. ¿Qué financieras están en Coches.net y qué cuota de mercado tienen en Coches.net?¶
In [31]:
financieras = np.unique(df['financing_lender'])
counts = [len(df[df['financing_lender'] == f]) for f in financieras]
percentages = [c / sum(counts) * 100 for c in counts]
MIN_PERCENTAGE = 3
financieras_filtradas = []
counts_filtrados = []
otras_count = 0
for f, c, p in zip(financieras, counts, percentages):
if p >= MIN_PERCENTAGE:
financieras_filtradas.append(f.upper())
counts_filtrados.append(c)
else:
otras_count += c
if otras_count > 0:
financieras_filtradas.append("OTRAS")
counts_filtrados.append(otras_count)
orden = np.argsort(counts_filtrados)[::-1]
financieras_filtradas = [financieras_filtradas[i] for i in orden]
counts_filtrados = [counts_filtrados[i] for i in orden]
color_map = {'SANTANDER': '#ED0808', 'BBVA': '#398CC9', 'CAIXA': '#FFCE08'}
plt.figure(figsize=(5, 5))
wedges, texts, autotexts = plt.pie(
counts_filtrados,
labels=financieras_filtradas,
autopct='%1.1f%%',
counterclock=False,
startangle=90,
pctdistance=0.8,
colors=[color_map.get(f, '#7f7f7f') for f in financieras_filtradas]
)
plt.setp(autotexts, size=8, color="black")
plt.setp(texts, size=10, fontweight='bold')
plt.legend(
wedges,
[f"{f} ({format(c, ',').replace(',', '.')})" for f, c in zip(financieras_filtradas, counts_filtrados)],
title="Financieras",
loc="upper right",
bbox_to_anchor=(1.3, 1)
)
plt.title('Distribución anuncios de financieras en Coches.net')
plt.show()
2. ¿Cómo se distribuyen los anuncios por financiera y cuál es su ticket medio al contado?¶
In [32]:
financieras = np.unique(df['financing_lender'])
ads_counts = []
avg_prices = []
for financiera in financieras:
ads = len(df[df['financing_lender'] == financiera])
avg_price = df[df['financing_lender'] == financiera]['cash_price'].mean()
ads_counts.append(ads)
avg_prices.append(avg_price)
fig, ax = plt.subplots(figsize=(10, 6))
color_map = {'SANTANDER': '#ED0808', 'BBVA': '#398CC9', 'CAIXA': '#FFCE08', 'STELLANTIS': '#001137', 'MOBILIZE-NISSAN': '#CED0CD', 'MOBILIZE': '#FE672E', 'LENDROCK': '#EE6B3F', 'BMW-BANK': '#117C87'}
colors = [color_map.get(f.upper(), '#7f7f7f') for f in financieras]
bars = ax.barh(financieras, ads_counts, color=colors)
for bar, count, price in zip(bars, ads_counts, avg_prices):
ax.text(bar.get_width(), bar.get_y() + bar.get_height()/2, f"{count:,}".replace(',', '.') + " anuncios\n" + f"{price:,.0f} €".replace(',', '.'), fontsize=8)
ax.set_title("Número de anuncios y promedio del precio al contado del ticket medio")
ax.set_xlabel("Número de anuncios")
plt.xticks(list(range(0, 24000, 3000)), fontsize=8)
plt.tight_layout()
plt.show()
3. ¿Cuántos concesionarios hay en el dataset y, cada uno, con qué financieras trabaja?¶
In [33]:
print(f"Hay un total de {df['client_name'].nunique():,.0f} concesionarios en el dataset.\n".replace(',', '.'))
print(f'⬇️ 10 concesionarios aleatorios en número de vehículos anunciados y con qué financieras trabajan:')
counts = df.groupby(['client_name', 'financing_lender']).size().unstack(fill_value=0)
counts['TOTAL'] = counts.sum(axis=1)
counts = counts.sort_values('TOTAL', ascending=False)
counts.sample(10)
Hay un total de 1.032 concesionarios en el dataset. ⬇️ 10 concesionarios aleatorios en número de vehículos anunciados y con qué financieras trabajan:
Out[33]:
| financing_lender | bbva | bmw-bank | caixa | lendrock | mobilize | mobilize-nissan | santander | stellantis | TOTAL |
|---|---|---|---|---|---|---|---|---|---|
| client_name | |||||||||
| Cars la Sagra | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 |
| Flexicar Sabadell Aeropuerto | 0 | 0 | 0 | 0 | 0 | 0 | 40 | 0 | 40 |
| Total Ocasión: Leganés, Alcalá de Henares, Rivas. | 0 | 0 | 0 | 118 | 0 | 0 | 0 | 0 | 118 |
| AUTOSAE CENTER | 0 | 0 | 0 | 0 | 0 | 12 | 0 | 0 | 12 |
| Ocasionplus GETAFE | 0 | 0 | 0 | 0 | 0 | 0 | 173 | 0 | 173 |
| Seminuevos y Ocasión Lexus Madrid | 0 | 0 | 0 | 0 | 0 | 0 | 41 | 0 | 41 |
| Autos Juanjo | 0 | 0 | 0 | 0 | 0 | 0 | 109 | 0 | 109 |
| Autos Auringis | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13 | 13 |
| Vázquez de Prada | 7 | 0 | 0 | 3 | 0 | 0 | 0 | 0 | 10 |
| Rivero Motor | 0 | 0 | 0 | 0 | 0 | 0 | 11 | 0 | 11 |
4. Gráfico de la distribución de los anuncios por financiera y marca de vehículo¶
In [34]:
brands = np.unique(df['manufacturer'])
financieras = np.unique(df['financing_lender'])
color_map = {'santander': '#ED0808', 'bbva': '#398CC9', 'caixa': '#FFCE08'}
default_color = '#7f7f7f'
data_matrix = []
for brand in brands:
row = []
for f in financieras:
count = len(df[(df['manufacturer'] == brand) & (df['financing_lender'] == f)])
row.append(count)
data_matrix.append(row)
data_matrix = np.array(data_matrix)
avg_prices = [df[df['manufacturer'] == b]['cash_price'].mean() for b in brands]
orden = np.argsort(avg_prices)[::-1]
brands_sorted = [brands[i] for i in orden]
data_matrix_sorted = data_matrix[orden]
avg_prices_sorted = [avg_prices[i] for i in orden]
colors = [color_map.get(f, default_color) for f in financieras]
plt.figure(figsize=(10, 14))
bottom = np.zeros(len(brands_sorted))
for i, f in enumerate(financieras):
counts = data_matrix_sorted[:, i]
plt.barh(brands_sorted, counts, left=bottom, color=colors[i], height=0.6, label=f)
bottom += counts
for i, brand in enumerate(brands_sorted):
total = np.sum(data_matrix_sorted[i, :])
avg = avg_prices_sorted[i]
plt.text(total + 0.5, i, f"{format(total, ',').replace(',', '.')} {'anuncios' if total>1 else 'anuncio'}, Promedio cash: €{format(avg, ',.0f').replace(',', '.')}", va='center', fontsize=7)
plt.xlabel('Número de anuncios')
plt.xticks(list(range(0, 4200, 200)), fontsize=8)
plt.ylabel('Fabricante')
plt.yticks(fontsize=8)
plt.title('Número de anuncios por fabricante y financiera ordenado por promedio del precio en efectivo (desc)')
plt.legend(title='Financieras', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.gca().invert_yaxis()
plt.tight_layout()
plt.show()
5. Gráfico de España de cuántos anuncios hay por província¶
In [35]:
gdf = gpd.read_file("provincias_spain.geojson")
count_df = (df.groupby("client_location_province").size().reset_index(name="num_coches"))
merged = gdf.merge(count_df, left_on="Texto", right_on="client_location_province", how="left")
merged["num_coches"] = merged["num_coches"].fillna(0)
geojson_dict = json.loads(merged.to_json())
fig = px.choropleth(
merged,
geojson=geojson_dict,
locations=merged.index,
color="num_coches",
hover_name="Texto",
hover_data={"num_coches": True},
color_continuous_scale="Viridis"
)
total_coches = f"{len(df):,}".replace(',', '.')
fig.update_geos(fitbounds="locations", visible=False)
fig.update_layout(title=f"Número de coches por provincia. Número de coches en total: {total_coches}")
fig.write_html("mapa_provincias.html", include_plotlyjs='cdn', full_html=True)
fig.show()