MS2PIP Server can be accessed through the webpage or through a RESTful API. While this page provides an example Python script for contacting the API, the Swagger-generated documentation can be found here:

The following Python function contacts the MS2PIP Server RESTful API:

def run_ms2pip_server(peptides, frag_method, ptm_list, url=''):
    # Check if all columns are present in dataframe
    for col in ['spec_id', 'peptide', 'charge', 'modifications']:
        if col not in peptides.columns:
            print("{} is missing from peptides DataFrame".format(col))
            return None

    # Split-up into batches of 100 000 peptides (maximum MS2PIP Server accepts per request)
    batch_size = 100000
    result = pd.DataFrame()
    for i in list(range(0, len(peptides), batch_size)):
        print("Working on batch {}/{}".format(int(i / batch_size + 1), len(peptides) // batch_size + 1))
        peptides_batch = peptides.iloc[i:i+batch_size, :]

        # Combine data into dictionary for json post request
        input_data = {
            "params": {
                "frag_method": frag_method,
                "ptm": ptm_list
            "peptides": peptides_batch.to_dict(orient='list')

        # Post data to server and get task id
        response ='{}/task'.format(url), json=input_data)
        if 'task_id' not in response.json():
            if 'error' in response.json():
                print("Server error: {}".format(response.json()['error']))
                print("Something went wrong")
            return None
        task_id = response.json()['task_id']
        print("Received task id: {}".format(task_id))

        # Check server task status and get result when ready
        response = requests.get('{}/task/{}/status'.format(url, task_id))
        state = response.json()['state']

        if state != 'SUCCESS':
            print("Check MS2PIP Server status every 5 seconds", end='')
            state = 'PENDING'
            pending_count = 0

            while state == 'PENDING' or state == 'PROGRESS':
                print('.', end='')
                response = requests.get('{}/task/{}/status'.format(url, task_id))
                state = response.json()['state']

                # Do not keep looping if task state is stuck on PENDING, it might have failed silently
                if state == 'PENDING':
                    pending_count += 1
                if pending_count > 24:
                    print("\nSomething went wrong")
                    return None

        if state == 'SUCCESS':
            response = requests.get('{}/task/{}/result'.format(url, task_id))
            result_batch = pd.DataFrame.from_dict(response.json())['ms2pip_out']
            result_batch = result_batch[['spec_id', 'charge', 'ion', 'ionnumber', 'mz', 'prediction']]
            result = result.append(result_batch)
            print("Result received", end='\n\n')
            error_message = response.json()['status']
            print("Something went wrong: {}".format(error_message))
            return None

    print("Finished with all batches!")

    return result

The function takes three arguments: peptides, frag_method and ptm_list. peptides is an MS2PIP PEPREC-formatted Pandas DataFrame. ptm_list is a list of MS2PIP formatted PTM definitions. A detailed explanation of both data structures can be found on the MS2PIP Server webpage. frag_method is the specific model with which you want to predict peak intensities (e.g. HCD, CID, iTRAQ…) Checkout for a list of all available models.

The function also takes an argument url, in which you can provide a custom URL to the server. By default this is

An example for running the function:

# Define arguments
frag_method = 'HCD'
ptm_list = [
peptides = pd.DataFrame(data={
    'spec_id': ['peptide1', 'peptide2'],
    'peptide': ['STCINTFWLIVK', 'GRLNTFILK'],
    'modifications': ['3|Carbamidomethyl', '-'],
    'charge': [2, 3]

# Run function
result = run_ms2pip_server(peptides, frag_method, ptm_list)