Documentation

How to use ScrapingBee

Overview

Here is the list of the different parameters you can use with ScrapingBee API.

name [type](default) Description
api_key [string] required Your API key: Get your API key learn more
url [string] required The URL of the page you want to scrape learn more
block_ads [boolean] (false) Block ads on the page you want to scrape learn more
block_resources [boolean] (true) Block images and CSS on the page you want to scrape learn more
cookies [string] ("") Pass custom cookies to the webpage you want to scrape learn more
country_code [string] ("") Premium proxy geolocation. learn more
forward_headers [boolean] (false) Forward particular headers to the webpage learn more
js_snippet [base64 encoded string] JavaScript snippet to execute (clicking on a button, scrolling ...) learn more
premium_proxy [boolean] (false) Use premium proxies to bypass difficult to scrape a website (10-25 credits/request) learn more
render_js [boolean] (True) Render the Javascript on the page with a headless browser (5 credits/request) learn more
return_page_source [boolean] (false) Return the original HTML before the Javascript rendering learn more
wait [integer] (0) Additional time in ms for Javascript to render learn more
wait_for [string] ("") CSS selector to wait for in the DOM. learn more

Getting Started

ScrapingBee is meant to be the easiest scraping API available on the web.

To scrape a web page, you only need two things:

Then, simply do this:


curl "https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL"
         

#  Install the Python Requests library:
# `pip install requests`
import requests

def send_request():
    response = requests.get(
        url="https://app.scrapingbee.com/api/v1/",
        params={
            "api_key": "YOUR-API-KEY",
            "url": "YOUR-URL",
        },

    )
    print('Response HTTP Status Code: ', response.status_code)
    print('Response HTTP Response Body: ', response.content)
send_request()

// request Classic
const https = require('https')

const options = {
    hostname: 'app.scrapingbee.com',
    port: '443',
    path: '/api/v1?api_key=YOUR-API-KEY&url=YOUR-URL',
    method: 'GET',

}

const req = https.request(options, res => {
    console.log(`statusCode: ${ res.statusCode }`)
    res.on('data', d => {
        process.stdout.write(d)
    })
})

req.on('error', error => {
    console.error(error)
})

req.end()

import java.io.IOException;
import org.apache.http.client.fluent.*;

public class SendRequest
{
  public static void main(String[] args) {
    sendRequest();
  }

  private static void sendRequest() {

    // Classic (GET )

    try {

      // Create request
      Content content = Request.Get("https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL")



      // Fetch request and return content
      .execute().returnContent();

      // Print content
      System.out.println(content);
    }
    catch (IOException e) { System.out.println(e); }
  }
}

require 'net/http'
require 'net/https'

# Classic (GET )
def send_request
    uri = URI('https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL')

    # Create client
    http = Net::HTTP.new(uri.host, uri.port)
    http.use_ssl = true
    http.verify_mode = OpenSSL::SSL::VERIFY_PEER

    # Create Request
    req =  Net::HTTP::Get.new(uri)

    # Fetch Request
    res = http.request(req)
    puts "Response HTTP Status Code: #{ res.code }"
    puts "Response HTTP Response Body: #{ res.body }"
rescue StandardError => e
    puts "HTTP Request failed (#{ e.message })"
end

send_request()

<?php

// get cURL resource
$ch = curl_init();

// set url
curl_setopt($ch, CURLOPT_URL, 'https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL');

// set method
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'GET');

// return the transfer as a string
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);



// send the request and save response to $response
$response = curl_exec($ch);

// stop if fails
if (!$response) {
  die('Error: "' . curl_error($ch) . '" - Code: ' . curl_errno($ch));
}

echo 'HTTP Status Code: ' . curl_getinfo($ch, CURLINFO_HTTP_CODE) . PHP_EOL;
echo 'Response Body: ' . $response . PHP_EOL;

// close curl resource to free up system resources
curl_close($ch);

?>

package main

import (
	"fmt"
	"io/ioutil"
	"net/http"
)

func sendClassic() {
	// Create client
	client := &http.Client{}

	// Create request
	req, err := http.NewRequest("GET", "https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL", nil)


	parseFormErr := req.ParseForm()
	if parseFormErr != nil {
		fmt.Println(parseFormErr)
	}

	// Fetch Request
	resp, err := client.Do(req)

	if err != nil {
		fmt.Println("Failure : ", err)
	}

	// Read Response Body
	respBody, _ := ioutil.ReadAll(resp.Body)

	// Display Results
	fmt.Println("response Status : ", resp.Status)
	fmt.Println("response Headers : ", resp.Header)
	fmt.Println("response Body : ", string(respBody))
}

func main() {
    sendClassic()
}

The API will then respond with the raw HTML content of the target URL:

<html>
  <head>
     ...
  </head>
  <body>
     ...
  </body>
</html>          

Every URL that failed will be tried as many times as possible for 30 seconds.

So please be aware of this maximum timeout when writing your own code.

Headers and cookies returned by the target website are prefixed with Spb- (for ScraPingBee).

API Key

All requests are authenticated by using your private API key.

To get access to your API key, just create an account here and confirm your email address.

URL

This parameter is the full URL (with http/https) of the page you want to extract data from.

Never forget to correctly encode your URL before calling the API because there is a good chance that special characters such as + or ? are in it.

If you need help encoding your URL you can find more information below:


sudo apt-get install gridsite-clients
urlencode "YOUR URL"
         

import urllib.parse
encoded_url = urllib.parse.quote("YOUR URL")

encoded_url = encodeURIComponent("YOUR URL")

String encoded_url = URLEncoder.encode("YOUR URL", "UTF-8");

require 'uri'
encoded_url = URI::encode("YOUR URL")

<?php

$url_encoded = urlencode("YOUR URL");

?>

package main

import (
	"net/url"
)

func main() {
	encoded_url := url.QueryEscape("YOUR URL")
}

Javascript Rendering

By default, ScrapingBee fetches the URL you want to scrape through a headless browser that will execute the Javascript code on the page. This is the default behavior and cost 5 credits per request.

This can be very useful if you are scraping a Single Page Application built with frameworks like React.js / Angular.js / JQuery or Vue.

To fetch the URL without going through a headless browser, just use the render_js=false parameter in your GET request.

Example with a dummy Single Page Application (SPA):

If you use render_js=true (default behavior)


curl "https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL"
         

#  Install the Python Requests library:
# `pip install requests`
import requests

def send_request():
    response = requests.get(
        url="https://app.scrapingbee.com/api/v1/",
        params={
            "api_key": "YOUR-API-KEY",
            "url": "YOUR-URL",
        },

    )
    print('Response HTTP Status Code: ', response.status_code)
    print('Response HTTP Response Body: ', response.content)
send_request()

// request Classic
const https = require('https')

const options = {
    hostname: 'app.scrapingbee.com',
    port: '443',
    path: '/api/v1?api_key=YOUR-API-KEY&url=YOUR-URL',
    method: 'GET',

}

const req = https.request(options, res => {
    console.log(`statusCode: ${ res.statusCode }`)
    res.on('data', d => {
        process.stdout.write(d)
    })
})

req.on('error', error => {
    console.error(error)
})

req.end()

import java.io.IOException;
import org.apache.http.client.fluent.*;

public class SendRequest
{
  public static void main(String[] args) {
    sendRequest();
  }

  private static void sendRequest() {

    // Classic (GET )

    try {

      // Create request
      Content content = Request.Get("https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL")



      // Fetch request and return content
      .execute().returnContent();

      // Print content
      System.out.println(content);
    }
    catch (IOException e) { System.out.println(e); }
  }
}

require 'net/http'
require 'net/https'

# Classic (GET )
def send_request
    uri = URI('https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL')

    # Create client
    http = Net::HTTP.new(uri.host, uri.port)
    http.use_ssl = true
    http.verify_mode = OpenSSL::SSL::VERIFY_PEER

    # Create Request
    req =  Net::HTTP::Get.new(uri)

    # Fetch Request
    res = http.request(req)
    puts "Response HTTP Status Code: #{ res.code }"
    puts "Response HTTP Response Body: #{ res.body }"
rescue StandardError => e
    puts "HTTP Request failed (#{ e.message })"
end

send_request()

<?php

// get cURL resource
$ch = curl_init();

// set url
curl_setopt($ch, CURLOPT_URL, 'https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL');

// set method
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'GET');

// return the transfer as a string
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);



// send the request and save response to $response
$response = curl_exec($ch);

// stop if fails
if (!$response) {
  die('Error: "' . curl_error($ch) . '" - Code: ' . curl_errno($ch));
}

echo 'HTTP Status Code: ' . curl_getinfo($ch, CURLINFO_HTTP_CODE) . PHP_EOL;
echo 'Response Body: ' . $response . PHP_EOL;

// close curl resource to free up system resources
curl_close($ch);

?>

package main

import (
	"fmt"
	"io/ioutil"
	"net/http"
)

func sendClassic() {
	// Create client
	client := &http.Client{}

	// Create request
	req, err := http.NewRequest("GET", "https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL", nil)


	parseFormErr := req.ParseForm()
	if parseFormErr != nil {
		fmt.Println(parseFormErr)
	}

	// Fetch Request
	resp, err := client.Do(req)

	if err != nil {
		fmt.Println("Failure : ", err)
	}

	// Read Response Body
	respBody, _ := ioutil.ReadAll(resp.Body)

	// Display Results
	fmt.Println("response Status : ", resp.Status)
	fmt.Println("response Headers : ", resp.Header)
	fmt.Println("response Body : ", string(respBody))
}

func main() {
    sendClassic()
}

You will get this result.

<html>
  <head>
     ...
  </head>
  <body>
     <content>
     </content>
     <content>
     </content>
     <content>
     </content>
      <content>
     </content>
     <content>
     </content>
  </body>
</html>             

But if you use render_js=false


curl "https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR_URL&render_js=false"
         

#  Install the Python Requests library:
# `pip install requests`
import requests

def send_request():
    response = requests.get(
        url="https://app.scrapingbee.com/api/v1/",
        params={
            "api_key": "YOUR-API-KEY",
            "url": "YOUR_URL",
            "render_js": "false",
        },

    )
    print('Response HTTP Status Code: ', response.status_code)
    print('Response HTTP Response Body: ', response.content)
send_request()

// request Classic
const https = require('https')

const options = {
    hostname: 'app.scrapingbee.com',
    port: '443',
    path: '/api/v1?api_key=YOUR-API-KEY&url=YOUR_URL&render_js=false',
    method: 'GET',

}

const req = https.request(options, res => {
    console.log(`statusCode: ${ res.statusCode }`)
    res.on('data', d => {
        process.stdout.write(d)
    })
})

req.on('error', error => {
    console.error(error)
})

req.end()

import java.io.IOException;
import org.apache.http.client.fluent.*;

public class SendRequest
{
  public static void main(String[] args) {
    sendRequest();
  }

  private static void sendRequest() {

    // Classic (GET )

    try {

      // Create request
      Content content = Request.Get("https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR_URL&render_js=false")



      // Fetch request and return content
      .execute().returnContent();

      // Print content
      System.out.println(content);
    }
    catch (IOException e) { System.out.println(e); }
  }
}

require 'net/http'
require 'net/https'

# Classic (GET )
def send_request
    uri = URI('https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR_URL&render_js=false')

    # Create client
    http = Net::HTTP.new(uri.host, uri.port)
    http.use_ssl = true
    http.verify_mode = OpenSSL::SSL::VERIFY_PEER

    # Create Request
    req =  Net::HTTP::Get.new(uri)

    # Fetch Request
    res = http.request(req)
    puts "Response HTTP Status Code: #{ res.code }"
    puts "Response HTTP Response Body: #{ res.body }"
rescue StandardError => e
    puts "HTTP Request failed (#{ e.message })"
end

send_request()

<?php

// get cURL resource
$ch = curl_init();

// set url
curl_setopt($ch, CURLOPT_URL, 'https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR_URL&render_js=false');

// set method
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'GET');

// return the transfer as a string
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);



// send the request and save response to $response
$response = curl_exec($ch);

// stop if fails
if (!$response) {
  die('Error: "' . curl_error($ch) . '" - Code: ' . curl_errno($ch));
}

echo 'HTTP Status Code: ' . curl_getinfo($ch, CURLINFO_HTTP_CODE) . PHP_EOL;
echo 'Response Body: ' . $response . PHP_EOL;

// close curl resource to free up system resources
curl_close($ch);

?>

package main

import (
	"fmt"
	"io/ioutil"
	"net/http"
)

func sendClassic() {
	// Create client
	client := &http.Client{}

	// Create request
	req, err := http.NewRequest("GET", "https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR_URL&render_js=false", nil)


	parseFormErr := req.ParseForm()
	if parseFormErr != nil {
		fmt.Println(parseFormErr)
	}

	// Fetch Request
	resp, err := client.Do(req)

	if err != nil {
		fmt.Println("Failure : ", err)
	}

	// Read Response Body
	respBody, _ := ioutil.ReadAll(resp.Body)

	// Display Results
	fmt.Println("response Status : ", resp.Status)
	fmt.Println("response Headers : ", resp.Header)
	fmt.Println("response Body : ", string(respBody))
}

func main() {
    sendClassic()
}

You will get this result instead.

<html>
  <head>
  ..
  </head>
  <body>
  </body>
</html>          

Javascript Execution

You can ask ScrapingBee API to execute arbitrary Javascript code inside our Headless Chrome instance.

This can be useful for example if you need to perform a scroll in case of an infinite scroll triggering AJAX requests to load more elements.

Or if you need to click some button before a specific information is being displayed.

To do so, you need to add the parameter js_snippet with your snippet encoded in base64.

If you need help encoding your JavaScript snippet in base64 you can find more information below:


echo  'YOUR JS SNIPPET' | base64
         

import base64
base64_snippet = base64.b64encode("YOUR JS SNIPPET".encode()).decode()

'use strict';

let js_snippet = 'YOUR JS SNIPPET';
let buff = new Buffer(js_snippet);
let base64_snippet = buff.toString('base64');

import org.apache.commons.codec.binary.Base64;

byte[] encodedBytes = Base64.encodeBase64("YOUR JS SNIPPET".getBytes());
String base64_snippet = new String(encodedBytes);

require "base64"

base64_snippet = Base64.encode64('YOUR JS SNIPPET')


<?php

$str = 'YOUR JS SNIPPET';
$base64_snippet = base64_encode($str);

?>

package main

func main() {
    base64_snippet := b64.StdEncoding.EncodeToString([]byte("YOUR JS SNIPPET"))
}

Snippet examples

You can find below some of the most useful snippets.


// Scrolling to the end page
window.scrollTo(0,document.body.scrollHeight);

// Scrolling to the end page after 1 second (to wait for the page to load for example)
setTimeout(function(){window.scrollTo(0,document.body.scrollHeight);}, 1000);

// Clicking on a button
document.getElementById('my-button-selector').click();

// Clicking on a button after 1 second (to wait for the page to load for example)
setTimeout(function(){document.getElementById('my-button-selector').click();}, 1000);
                    

And below you'll find a sandbox to help you correctly encoding any JavaScript snippet you need to run on the webpage you want to scrape.

js_snippet=

Wait

Some heavy website needs a bit of time to "render" fully. If you need ScrapingBee to wait before it returns the fully rendered HTML you can use the wait parameter with a value in milliseconds between 0 and 35000.

Our headless browsers will then wait for this amount of time before returning the page's HTML.

If you need some help setting all this up, do not hesitate to contact us.

Wait For

Sometime, you will want to wait for a particular element to appear in the DOM before ScrapingBee returns the HTML content.

Our headless browsers will wait for the CSS selector passed in the parameter before returning the HTML.

For example, if you need to wait for the element <div class="loading-done"></div> use wait_for=.loading-done in your request.

Premium Proxy

For some hard-to-scrape websites, you may need to use premium proxies (or Residential proxies). These proxies almost never get blocked and you should definitely try it in case of error codes or difficult to scrape websites, like search engines, social networks, or hard to scrape E-commerce websites.

To do so, you need to add the parameter premium_proxy=true.

Each request with this parameter will count as 25 API credits. if used with JavaScript rendering enabled, else 10 credits

Advanced Usage

Geolocation

In addition to premium proxies, you can also choose the proxy country from the following countries with the parameter country_code=COUNTRY_CODE.

So if you want to use premium proxies from Germany for example you need to set both premium_proxy=true and country_code=de parameters on your API call.


curl "https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&cookies=cookie_name_1%3Dcookie_value1%3Bcookie_name_2%3Dcookie_value_2&premium_proxy=true&country_code=de"
         

#  Install the Python Requests library:
# `pip install requests`
import requests

def send_request():
    response = requests.get(
        url="https://app.scrapingbee.com/api/v1/",
        params={
            "api_key": "YOUR-API-KEY",
            "url": "YOUR-URL",
            "cookies": "cookie_name_1=cookie_value1;cookie_name_2=cookie_value_2",
            "premium_proxy": "true",
            "country_code":"de"
        },

    )
    print('Response HTTP Status Code: ', response.status_code)
    print('Response HTTP Response Body: ', response.content)
send_request()

// request Classic
const https = require('https')

const options = {
    hostname: 'app.scrapingbee.com',
    port: '443',
    path: '/api/v1?api_key=YOUR-API-KEY&url=YOUR-URL&cookies=cookie_name_1%3Dcookie_value1%3Bcookie_name_2%3Dcookie_value_2&premium_proxy=true&country_code=de',
    method: 'GET',

}

const req = https.request(options, res => {
    console.log(`statusCode: ${ res.statusCode }`)
    res.on('data', d => {
        process.stdout.write(d)
    })
})

req.on('error', error => {
    console.error(error)
})

req.end()

import java.io.IOException;
import org.apache.http.client.fluent.*;

public class SendRequest
{
  public static void main(String[] args) {
    sendRequest();
  }

  private static void sendRequest() {

    // Classic (GET )

    try {

      // Create request
      Content content = Request.Get("https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&cookies=cookie_name_1%3Dcookie_value1%3Bcookie_name_2%3Dcookie_value_2&premium_proxy=true&country_code=de")



      // Fetch request and return content
      .execute().returnContent();

      // Print content
      System.out.println(content);
    }
    catch (IOException e) { System.out.println(e); }
  }
}

require 'net/http'
require 'net/https'

# Classic (GET )
def send_request
    uri = URI('https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&cookies=cookie_name_1%3Dcookie_value1%3Bcookie_name_2%3Dcookie_value_2&premium_proxy=true&country_code=de')

    # Create client
    http = Net::HTTP.new(uri.host, uri.port)
    http.use_ssl = true
    http.verify_mode = OpenSSL::SSL::VERIFY_PEER

    # Create Request
    req =  Net::HTTP::Get.new(uri)

    # Fetch Request
    res = http.request(req)
    puts "Response HTTP Status Code: #{ res.code }"
    puts "Response HTTP Response Body: #{ res.body }"
rescue StandardError => e
    puts "HTTP Request failed (#{ e.message })"
end

send_request()

<?php

// get cURL resource
$ch = curl_init();

// set url
curl_setopt($ch, CURLOPT_URL, 'https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&cookies=cookie_name_1%3Dcookie_value1%3Bcookie_name_2%3Dcookie_value_2&premium_proxy=true&country_code=de');

// set method
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'GET');

// return the transfer as a string
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);



// send the request and save response to $response
$response = curl_exec($ch);

// stop if fails
if (!$response) {
  die('Error: "' . curl_error($ch) . '" - Code: ' . curl_errno($ch));
}

echo 'HTTP Status Code: ' . curl_getinfo($ch, CURLINFO_HTTP_CODE) . PHP_EOL;
echo 'Response Body: ' . $response . PHP_EOL;

// close curl resource to free up system resources
curl_close($ch);

?>

package main

import (
	"fmt"
	"io/ioutil"
	"net/http"
)

func sendClassic() {
	// Create client
	client := &http.Client{}

	// Create request
	req, err := http.NewRequest("GET", "https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=YOUR-URL&cookies=cookie_name_1%3Dcookie_value1%3Bcookie_name_2%3Dcookie_value_2&premium_proxy=true&country_code=de", nil)


	parseFormErr := req.ParseForm()
	if parseFormErr != nil {
		fmt.Println(parseFormErr)
	}

	// Fetch Request
	resp, err := client.Do(req)

	if err != nil {
		fmt.Println("Failure : ", err)
	}

	// Read Response Body
	respBody, _ := ioutil.ReadAll(resp.Body)

	// Display Results
	fmt.Println("response Status : ", resp.Status)
	fmt.Println("response Headers : ", resp.Header)
	fmt.Println("response Body : ", string(respBody))
}

func main() {
    sendClassic()
}

Here is the list of supported country codes (With ISO 3166-1 format ).

country_code Country Name
br Brazil
ca Canada
fr France
de Germany
gr Greece
il Israel
it Italy
mx Mexico
nl Netherlands
ru Russia
es Spain
se Sweden
ua Ukraine
us UnitedStates
gb UnitedKingdom

Header Forwarding

You might need to forward specific headers to the website you want to scrape.

In order to do so, you first have to set forward_headers to true and then pass your custom headers.

Then you will have to prefix the headers you want to forward to the website with "Spb-" (for ScraPingBee).

This prefix will be trimmed by ScrapingBee before the request actually hits the target webpage.

Prefixing the headers is required in order to let us know between headers you want to forward from headers created by your HTTP client.

Example :

If you want to send the header Accept-Language: En-US, add the header: Spb-Accept-Language: En-US and the parameter forward_headers=true to the request sent to our API.


curl "https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=http%3A%2F%2Fhttpbin.org%2Fheaders%3Fjson&forward_headers=true" \
  -H "Spb-Accept-Language:En-US"  
         

#  Install the Python Requests library:
# `pip install requests`
import requests

def send_request():
    response = requests.get(
        url="https://app.scrapingbee.com/api/v1/",
        params={
            "api_key": "YOUR-API-KEY",
            "url": "http://httpbin.org/headers?json",
            "forward_headers": "true",
        },
        headers={
            "Spb-Accept-Language": "En-US",
        }
    )
    print('Response HTTP Status Code: ', response.status_code)
    print('Response HTTP Response Body: ', response.content)
send_request()

// request Classic
const https = require('https')

const options = {
    hostname: 'app.scrapingbee.com',
    port: '443',
    path: '/api/v1?api_key=YOUR-API-KEY&url=http%3A%2F%2Fhttpbin.org%2Fheaders%3Fjson&forward_headers=true',
    method: 'GET',
    headers:{ "Spb-Accept-Language": "En-US",}
}

const req = https.request(options, res => {
    console.log(`statusCode: ${ res.statusCode }`)
    res.on('data', d => {
        process.stdout.write(d)
    })
})

req.on('error', error => {
    console.error(error)
})

req.end()

import java.io.IOException;
import org.apache.http.client.fluent.*;

public class SendRequest
{
  public static void main(String[] args) {
    sendRequest();
  }

  private static void sendRequest() {

    // Classic (GET )

    try {

      // Create request
      Content content = Request.Get("https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=http%3A%2F%2Fhttpbin.org%2Fheaders%3Fjson&forward_headers=true")

      // Add headers
        .addHeader("Spb-Accept-Language", "En-US")

      // Fetch request and return content
      .execute().returnContent();

      // Print content
      System.out.println(content);
    }
    catch (IOException e) { System.out.println(e); }
  }
}

require 'net/http'
require 'net/https'

# Classic (GET )
def send_request
    uri = URI('https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=http%3A%2F%2Fhttpbin.org%2Fheaders%3Fjson&forward_headers=true')

    # Create client
    http = Net::HTTP.new(uri.host, uri.port)
    http.use_ssl = true
    http.verify_mode = OpenSSL::SSL::VERIFY_PEER

    # Create Request
    req =  Net::HTTP::Get.new(uri)
    # Add headers
    req.add_field "Spb-Accept-Language", "En-US"

    # Fetch Request
    res = http.request(req)
    puts "Response HTTP Status Code: #{ res.code }"
    puts "Response HTTP Response Body: #{ res.body }"
rescue StandardError => e
    puts "HTTP Request failed (#{ e.message })"
end

send_request()

<?php

// get cURL resource
$ch = curl_init();

// set url
curl_setopt($ch, CURLOPT_URL, 'https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=http%3A%2F%2Fhttpbin.org%2Fheaders%3Fjson&forward_headers=true');

// set method
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'GET');

// return the transfer as a string
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);

// set headers
curl_setopt($ch, CURLOPT_HTTPHEADER, [
    'Spb-Accept-Language: En-US',

]);

// send the request and save response to $response
$response = curl_exec($ch);

// stop if fails
if (!$response) {
  die('Error: "' . curl_error($ch) . '" - Code: ' . curl_errno($ch));
}

echo 'HTTP Status Code: ' . curl_getinfo($ch, CURLINFO_HTTP_CODE) . PHP_EOL;
echo 'Response Body: ' . $response . PHP_EOL;

// close curl resource to free up system resources
curl_close($ch);

?>

package main

import (
	"fmt"
	"io/ioutil"
	"net/http"
)

func sendClassic() {
	// Create client
	client := &http.Client{}

	// Create request
	req, err := http.NewRequest("GET", "https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=http%3A%2F%2Fhttpbin.org%2Fheaders%3Fjson&forward_headers=true", nil)

// Headers
        req.Header.Add("Spb-Accept-Language", "En-US")

	parseFormErr := req.ParseForm()
	if parseFormErr != nil {
		fmt.Println(parseFormErr)
	}

	// Fetch Request
	resp, err := client.Do(req)

	if err != nil {
		fmt.Println("Failure : ", err)
	}

	// Read Response Body
	respBody, _ := ioutil.ReadAll(resp.Body)

	// Display Results
	fmt.Println("response Status : ", resp.Status)
	fmt.Println("response Headers : ", resp.Header)
	fmt.Println("response Body : ", string(respBody))
}

func main() {
    sendClassic()
}

In the above example, we are scraping httpbin.org/headers?json, a page that will return the headers it received.

Here is the response from the above code, note the Accept-Language header.

{
  "headers": {
    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
    "Accept-Encoding": "gzip, deflate",
    "Accept-Language": "fr-FR,fr;q=0.9,en-US;q=0.8,en;q=0.7",
    "Host": "httpbin.org",
    "Accept-Language": "En-US" # <-- Your header
  }
}                   

Custom Cookies

You can pass custom cookies to the target webpages

To do this just pass a cookie string in the cookies parameter.

We currently only handle the name and value of custom cookies. If you want to set multiple cookies separate them with ;.

Example with cookie_name_1=cookie_value1;cookie_name_2=cookie_value_2:


 curl "https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=http%3A%2F%2Fhttpbin.org%2Fcookies%3Fjson&cookies=cookie_name_1%3Dcookie_value1%3Bcookie_name_2%3Dcookie_value_2"
         

#  Install the Python Requests library:
# `pip install requests`
import requests

def send_request():
    response = requests.get(
        url="https://app.scrapingbee.com/api/v1/",
        params={
            "api_key": "YOUR-API-KEY",
            "url": "http://httpbin.org/cookies?json",
            "cookies": "cookie_name_1=cookie_value1;cookie_name_2=cookie_value_2",
        },

    )
    print('Response HTTP Status Code: ', response.status_code)
    print('Response HTTP Response Body: ', response.content)
send_request()

// request Classic
const https = require('https')

const options = {
    hostname: 'app.scrapingbee.com',
    port: '443',
    path: '/api/v1?api_key=YOUR-API-KEY&url=http%3A%2F%2Fhttpbin.org%2Fcookies%3Fjson&cookies=cookie_name_1%3Dcookie_value1%3Bcookie_name_2%3Dcookie_value_2',
    method: 'GET',

}

const req = https.request(options, res => {
    console.log(`statusCode: ${ res.statusCode }`)
    res.on('data', d => {
        process.stdout.write(d)
    })
})

req.on('error', error => {
    console.error(error)
})

req.end()

import java.io.IOException;
import org.apache.http.client.fluent.*;

public class SendRequest
{
  public static void main(String[] args) {
    sendRequest();
  }

  private static void sendRequest() {

    // Classic (GET )

    try {

      // Create request
      Content content = Request.Get("https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=http%3A%2F%2Fhttpbin.org%2Fcookies%3Fjson&cookies=cookie_name_1%3Dcookie_value1%3Bcookie_name_2%3Dcookie_value_2")



      // Fetch request and return content
      .execute().returnContent();

      // Print content
      System.out.println(content);
    }
    catch (IOException e) { System.out.println(e); }
  }
}

require 'net/http'
require 'net/https'

# Classic (GET )
def send_request
    uri = URI('https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=http%3A%2F%2Fhttpbin.org%2Fcookies%3Fjson&cookies=cookie_name_1%3Dcookie_value1%3Bcookie_name_2%3Dcookie_value_2')

    # Create client
    http = Net::HTTP.new(uri.host, uri.port)
    http.use_ssl = true
    http.verify_mode = OpenSSL::SSL::VERIFY_PEER

    # Create Request
    req =  Net::HTTP::Get.new(uri)

    # Fetch Request
    res = http.request(req)
    puts "Response HTTP Status Code: #{ res.code }"
    puts "Response HTTP Response Body: #{ res.body }"
rescue StandardError => e
    puts "HTTP Request failed (#{ e.message })"
end

send_request()

<?php

// get cURL resource
$ch = curl_init();

// set url
curl_setopt($ch, CURLOPT_URL, 'https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=http%3A%2F%2Fhttpbin.org%2Fcookies%3Fjson&cookies=cookie_name_1%3Dcookie_value1%3Bcookie_name_2%3Dcookie_value_2');

// set method
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'GET');

// return the transfer as a string
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);



// send the request and save response to $response
$response = curl_exec($ch);

// stop if fails
if (!$response) {
  die('Error: "' . curl_error($ch) . '" - Code: ' . curl_errno($ch));
}

echo 'HTTP Status Code: ' . curl_getinfo($ch, CURLINFO_HTTP_CODE) . PHP_EOL;
echo 'Response Body: ' . $response . PHP_EOL;

// close curl resource to free up system resources
curl_close($ch);

?>

package main

import (
	"fmt"
	"io/ioutil"
	"net/http"
)

func sendClassic() {
	// Create client
	client := &http.Client{}

	// Create request
	req, err := http.NewRequest("GET", "https://app.scrapingbee.com/api/v1/?api_key=YOUR-API-KEY&url=http%3A%2F%2Fhttpbin.org%2Fcookies%3Fjson&cookies=cookie_name_1%3Dcookie_value1%3Bcookie_name_2%3Dcookie_value_2", nil)


	parseFormErr := req.ParseForm()
	if parseFormErr != nil {
		fmt.Println(parseFormErr)
	}

	// Fetch Request
	resp, err := client.Do(req)

	if err != nil {
		fmt.Println("Failure : ", err)
	}

	// Read Response Body
	respBody, _ := ioutil.ReadAll(resp.Body)

	// Display Results
	fmt.Println("response Status : ", resp.Status)
	fmt.Println("response Headers : ", resp.Header)
	fmt.Println("response Body : ", string(respBody))
}

func main() {
    sendClassic()
}

In the above example, we're scraping httpbin.org/cookies?json, a page that will return the cookies it received.

Here is what will be returned by the above code.

{
  "cookies": {
    "cookie_name_1": "cookie_value1",
    "cookie_name_2": "cookie_value_2"
  }
}                   

Page Source

If you want to have the HTML returned by the server and unaltered by the browser (before the Javascript execution), you can use return_page_source=true

This parameter is useless if you don't use JavaScript rendering.

Blocking Ads

By default, we have chosen not to block ads, if you don't want to scrape them (to speed up your request for example), you can use block_ads=true

This parameter is useless if you don't use JavaScript rendering.

Blocking Images and CSS

By default, and to speed up your request we block all images and CSS in the page you want to scrape, but if you want to scrape them you can use block_resources=false

This parameter is useless if you don't use JavaScript rendering.

Post

To use this just send a POST request to the main endpoint with your api_key and url parameter.

POST data will be forwarded transparently to the target web page.

Headers and cookies will be returned.

Below is an example of how to do it on httpbin.org, a service mirroring your HTTP requests.

curl -X "POST" "https://app.scrapingbee.com/api/v1?url=https:%2F%2Fhttpbin.org%2Fanything&api_key=YOUR-API-KEY" \
     -H 'Content-Type: application/x-www-form-urlencoded; charset=utf-8' \
     --data-urlencode "KEY_1=VALUE_1"
         
# Install the Python Requests library:
# `pip install requests`

import requests


def send_request():
    # Post
    # POST https://app.scrapingbee.com/api/v1

    try:
        response = requests.post(
            url="https://app.scrapingbee.com/api/v1",
            params={
                "url": "https://httpbin.org/anything",
                "api_key": "YOUR-API-KEY",
            },
            headers={
                "Content-Type": "application/x-www-form-urlencoded; charset=utf-8",
            },
            data={
                "KEY_1": "VALUE_1",
            },
        )
        print('Response HTTP Status Code: {status_code}'.format(
            status_code=response.status_code))
        print('Response HTTP Response Body: {content}'.format(
            content=response.content))
    except requests.exceptions.RequestException:
        print('HTTP Request failed')
// request Post
(function(callback) {
    'use strict';

    const httpTransport = require('https');
    const responseEncoding = 'utf8';
    const httpOptions = {
        hostname: 'app.scrapingbee.com',
        port: '443',
        path: '/api/v1?url=https:%2F%2Fhttpbin.org%2Fanything&api_key=YOUR-API-KEY',
        method: 'POST',
        headers: {"Content-Type":"application/x-www-form-urlencoded; charset=utf-8"}
    };
    httpOptions.headers['User-Agent'] = 'node ' + process.version;


    const request = httpTransport.request(httpOptions, (res) => {
        let responseBufs = [];
        let responseStr = '';

        res.on('data', (chunk) => {
            if (Buffer.isBuffer(chunk)) {
                responseBufs.push(chunk);
            }
            else {
                responseStr = responseStr + chunk;
            }
        }).on('end', () => {
            responseStr = responseBufs.length > 0 ?
                Buffer.concat(responseBufs).toString(responseEncoding) : responseStr;

            callback(null, res.statusCode, res.headers, responseStr);
        });

    })
    .setTimeout(0)
    .on('error', (error) => {
        callback(error);
    });
    request.write("KEY_1=VALUE_1")
    request.end();


})((error, statusCode, headers, body) => {
    console.log('ERROR:', error);
    console.log('STATUS:', statusCode);
    console.log('HEADERS:', JSON.stringify(headers));
    console.log('BODY:', body);
});

import java.io.IOException;
import org.apache.http.client.fluent.*;
import org.apache.http.entity.ContentType;

public class SendRequest
{
  public static void main(String[] args) {
    sendRequest();
  }

  private static void sendRequest() {

    // Post (POST )

    try {

      // Create request
      Content content = Request.Post("https://app.scrapingbee.com/api/v1?url=https:%2F%2Fhttpbin.org%2Fanything&api_key=YOUR-API-KEY")

      // Add headers
      .addHeader("Content-Type", "application/x-www-form-urlencoded; charset=utf-8")

      // Add body
      .bodyForm(Form.form()
      .add("KEY_1", "VALUE_1")
      .build())

      // Fetch request and return content
      .execute().returnContent();

      // Print content
      System.out.println(content);
    }
    catch (IOException e) { System.out.println(e); }
  }
}
require 'net/http'
require 'net/https'

# Post (POST )
def send_request
  uri = URI('https://app.scrapingbee.com/api/v1?url=https:%2F%2Fhttpbin.org%2Fanything&api_key=YOUR-API-KEY')

  # Create client
  http = Net::HTTP.new(uri.host, uri.port)
  http.use_ssl = true
  http.verify_mode = OpenSSL::SSL::VERIFY_PEER
  data = {
    "KEY_1" => "VALUE_1",
  }
  body = URI.encode_www_form(data)

  # Create Request
  req =  Net::HTTP::Post.new(uri)
  # Add headers
  req.add_field "Content-Type", "application/x-www-form-urlencoded; charset=utf-8"
  # Set body
  req.body = body

  # Fetch Request
  res = http.request(req)
  puts "Response HTTP Status Code: #{res.code}"
  puts "Response HTTP Response Body: #{res.body}"
rescue StandardError => e
  puts "HTTP Request failed (#{e.message})"
end
<?php

// get cURL resource
$ch = curl_init();

// set url
curl_setopt($ch, CURLOPT_URL, 'https://app.scrapingbee.com/api/v1?url=https:%2F%2Fhttpbin.org%2Fanything&api_key=YOUR-API-KEY');

// set method
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'POST');

// return the transfer as a string
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);

// set headers
curl_setopt($ch, CURLOPT_HTTPHEADER, [
  'Content-Type: application/x-www-form-urlencoded; charset=utf-8',
]);

// form body
$body = [
  'KEY_1' => 'VALUE_1',
];
$body = http_build_query($body);

// set body
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS, $body);

// send the request and save response to $response
$response = curl_exec($ch);

// stop if fails
if (!$response) {
  die('Error: "' . curl_error($ch) . '" - Code: ' . curl_errno($ch));
}

echo 'HTTP Status Code: ' . curl_getinfo($ch, CURLINFO_HTTP_CODE) . PHP_EOL;
echo 'Response Body: ' . $response . PHP_EOL;

// close curl resource to free up system resources
curl_close($ch);
package main

import (
	"fmt"
	"io/ioutil"
	"net/http"
	"net/url"
	"bytes"
)

func sendPost() {
	// Post (POST https://app.scrapingbee.com/api/v1?url=https:%2F%2Fhttpbin.org%2Fanything&api_key=YOUR-API-KEY)

	params := url.Values{}
	params.Set("KEY_1", "VALUE_1")
	body := bytes.NewBufferString(params.Encode())

	// Create client
	client := &http.Client{}

	// Create request
	req, err := http.NewRequest("POST", "https://app.scrapingbee.com/api/v1?url=https:%2F%2Fhttpbin.org%2Fanything&api_key=YOUR-API-KEY", body)

	// Headers
	req.Header.Add("Content-Type", "application/x-www-form-urlencoded; charset=utf-8")

	parseFormErr := req.ParseForm()
	if parseFormErr != nil {
	  fmt.Println(parseFormErr)
	}

	// Fetch Request
	resp, err := client.Do(req)

	if err != nil {
		fmt.Println("Failure : ", err)
	}

	// Read Response Body
	respBody, _ := ioutil.ReadAll(resp.Body)

	// Display Results
	fmt.Println("response Status : ", resp.Status)
	fmt.Println("response Headers : ", resp.Header)
	fmt.Println("response Body : ", string(respBody))
}

And here is the response you'll get:


{
  "args": {},
  "data": "",
  "files": {},
  "form": {
    "KEY_1": "VALUE_1"
  },
  "headers": {
    "Accept": "*/*",
    "Accept-Encoding": "gzip, deflate",
    "Content-Length": "13",
    "Content-Type": "application/x-www-form-urlencoded; charset=utf-8",
    "Host": "httpbin.org",
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:67.0) Gecko/20100101 Firefox/67.0",
    "X-Amzn-Trace-Id": "Root=1-5eb416d8-65c76a78a809bb07614d7cfe"
  },
  "json": null,
  "method": "POST",
  "origin": "23.250.56.153",
  "url": "https://httpbin.org/anything"
}
                    

Session (coming soon)

Credit cost for your requests

Each plan gives a certain amount of API credits per month.

Depending on the parameters you use with your API calls it will cost you from one to one hundred API credits.

Here is a breakdown of those costs:

Feature used API credit cost
Rotating Proxy without JavaScript rendering 1
Rotating Proxy with JavaScript rendering (default) 5
Premium Proxy without JavaScript rendering 10
Premium Proxy with JavaScript rendering 25

Usage endpoint

If you want to programatically monitor your credit consumption and concurrency usage you may use our /usage endpoint.

Call to this endpoint will not increase your concurrency, but you can only call it 6 times per minute.

Please note that the results are available in real time.


curl "https://app.scrapingbee.com/api/v1/usage?api_key=YOUR-API-KEY&url=YOUR-URL"
         

#  Install the Python Requests library:
# `pip install requests`
import requests

def send_request():
    response = requests.get(
        url="https://app.scrapingbee.com/api/v1/usage",
        params={
            "api_key": "YOUR-API-KEY",
        },

    )
    print('Response HTTP Status Code: ', response.status_code)
    print('Response HTTP Response Body: ', response.content)
send_request()

// request Classic
const https = require('https')

const options = {
    hostname: 'app.scrapingbee.com',
    port: '443',
    path: '/api/v1/usage?api_key=YOUR-API-KEY',
    method: 'GET',

}

const req = https.request(options, res => {
    console.log(`statusCode: ${ res.statusCode }`)
    res.on('data', d => {
        process.stdout.write(d)
    })
})

req.on('error', error => {
    console.error(error)
})

req.end()

import java.io.IOException;
import org.apache.http.client.fluent.*;

public class SendRequest
{
  public static void main(String[] args) {
    sendRequest();
  }

  private static void sendRequest() {

    // Classic (GET )

    try {

      // Create request
      Content content = Request.Get("https://app.scrapingbee.com/api/v1/usage?api_key=YOUR-API-KEY")



      // Fetch request and return content
      .execute().returnContent();

      // Print content
      System.out.println(content);
    }
    catch (IOException e) { System.out.println(e); }
  }
}

require 'net/http'
require 'net/https'

# Classic (GET )
def send_request
    uri = URI('https://app.scrapingbee.com/api/v1/usage?api_key=YOUR-API-KEY')

    # Create client
    http = Net::HTTP.new(uri.host, uri.port)
    http.use_ssl = true
    http.verify_mode = OpenSSL::SSL::VERIFY_PEER

    # Create Request
    req =  Net::HTTP::Get.new(uri)

    # Fetch Request
    res = http.request(req)
    puts "Response HTTP Status Code: #{ res.code }"
    puts "Response HTTP Response Body: #{ res.body }"
rescue StandardError => e
    puts "HTTP Request failed (#{ e.message })"
end

send_request()

<?php

// get cURL resource
$ch = curl_init();

// set url
curl_setopt($ch, CURLOPT_URL, 'https://app.scrapingbee.com/api/v1/usage?api_key=YOUR-API-KEY');

// set method
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'GET');

// return the transfer as a string
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);



// send the request and save response to $response
$response = curl_exec($ch);

// stop if fails
if (!$response) {
  die('Error: "' . curl_error($ch) . '" - Code: ' . curl_errno($ch));
}

echo 'HTTP Status Code: ' . curl_getinfo($ch, CURLINFO_HTTP_CODE) . PHP_EOL;
echo 'Response Body: ' . $response . PHP_EOL;

// close curl resource to free up system resources
curl_close($ch);

?>

package main

import (
	"fmt"
	"io/ioutil"
	"net/http"
)

func sendClassic() {
	// Create client
	client := &http.Client{}

	// Create request
	req, err := http.NewRequest("GET", "https://app.scrapingbee.com/api/v1/usage?api_key=YOUR-API-KEY", nil)


	parseFormErr := req.ParseForm()
	if parseFormErr != nil {
		fmt.Println(parseFormErr)
	}

	// Fetch Request
	resp, err := client.Do(req)

	if err != nil {
		fmt.Println("Failure : ", err)
	}

	// Read Response Body
	respBody, _ := ioutil.ReadAll(resp.Body)

	// Display Results
	fmt.Println("response Status : ", resp.Status)
	fmt.Println("response Headers : ", resp.Header)
	fmt.Println("response Body : ", string(respBody))
}

func main() {
    sendClassic()
}

Results.

{
    "max_api_credit": 20000000,
    "used_api_credit": 3704332,
    "max_concurrency": 200,
    "current_concurrency": 1
}                   

Response Status Code

Please find here the list of HTTP codes returned by ScrapingBee.

Code Billed? Meaning Solution
200 Yes Successful Call
400 No Bad request Incorrect parameters or parameters type. See the message in the response body.
401 No No more credit available Please upgrade your plan or contact sale.
404 Yes Requested URL not found Pass valid URL.
429 No Too many concurrent requests. Please upgrade your plan or contact sale.
500 No Misc error Please retry, see the message in the response body.

Response Headers

Please find here the list of additional HTTP headers returned by ScrapingBee.

Name Meaning
Spb-cost Request cost in credits.
Spb-initial-status-code The initial status code returned by the page you want to scrape.
Useful when they are redirects.
Spb-resolved-url The resolved URL of the page you want to scrape.
Useful when they are redirects.