Commit 7ae3ba16 authored by Deepak Nadig's avatar Deepak Nadig

Added MISP prepocessor.

parent 98982a0a
Pipeline #71 failed with stages
# Created by .ignore support plugin (hsz.mobi)
### JetBrains template
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
# User-specific stuff
.idea/**/workspace.xml
.idea/**/tasks.xml
.idea/**/dictionaries
.idea/**/shelf
# Sensitive or high-churn files
.idea/**/dataSources/
.idea/**/dataSources.ids
.idea/**/dataSources.local.xml
.idea/**/sqlDataSources.xml
.idea/**/dynamic.xml
.idea/**/uiDesigner.xml
.idea/**/dbnavigator.xml
# Gradle
.idea/**/gradle.xml
.idea/**/libraries
# CMake
cmake-build-debug/
cmake-build-release/
# Mongo Explorer plugin
.idea/**/mongoSettings.xml
# File-based project format
*.iws
# IntelliJ
out/
# mpeltonen/sbt-idea plugin
.idea_modules/
# JIRA plugin
atlassian-ide-plugin.xml
# Cursive Clojure plugin
.idea/replstate.xml
# Crashlytics plugin (for Android Studio and IntelliJ)
com_crashlytics_export_strings.xml
crashlytics.properties
crashlytics-build.properties
fabric.properties
# Editor-based Rest Client
.idea/httpRequests
### Python template
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
.pytest_cache/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# pyenv
.python-version
# celery beat schedule file
celerybeat-schedule
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
### VirtualEnv template
# Virtualenv
# http://iamzed.com/2009/05/07/a-primer-on-virtualenv/
.Python
[Bb]in
[Ii]nclude
[Ll]ib
[Ll]ib64
[Ll]ocal
[Ss]cripts
pyvenv.cfg
.venv
pip-selfcheck.json
......@@ -298,7 +298,7 @@ filter {
"comments" => "Not current? Updated multiple times a day?"
"intel-category" => "Network Activity"
"exportable_to_ids" => "true"
"intel_type" => "dst-ip"
"intel_type" => "ip-dst"
"threat_level_id" => "2"
}
}
......@@ -316,7 +316,7 @@ filter {
"comments" => ""
"intel-category" => "Network Activity"
"exportable_to_ids" => "true"
"intel_type" => "dst-ip"
"intel_type" => "ip-dst"
"threat_level_id" => "2"
}
}
......@@ -334,7 +334,7 @@ filter {
"comments" => ""
"intel-category" => "Network Activity"
"exportable_to_ids" => "true"
"intel_type" => "dst-ip"
"intel_type" => "ip-dst"
"threat_level_id" => "2"
}
}
......@@ -352,7 +352,7 @@ filter {
"comments" => ""
"intel-category" => "Network Activity"
"exportable_to_ids" => "true"
"intel_type" => "dst-ip"
"intel_type" => "ip-dst"
"threat_level_id" => "2"
}
}
......@@ -490,7 +490,7 @@ filter {
"comments" => "These hosts have tried and failed to log on to the pop3 service at bsdly.net."
"intel-category" => "Network Activity"
"exportable_to_ids" => "true"
"intel_type" => "dst-ip"
"intel_type" => "ip-dst"
"threat_level_id" => "2"
}
}
......@@ -537,7 +537,7 @@ filter {
"intel-analysis" => "2"
"intel-category" => "Network Activity"
"exportable_to_ids" => "true"
"intel_type" => "dst-ip"
"intel_type" => "ip-dst"
"threat_level_id" => "2"
}
remove_field => [ "DstIP", "DstPort", "Reason" ]
......@@ -580,7 +580,7 @@ filter {
"comments" => ""
"intel-category" => "Network Activity"
"exportable_to_ids" => "true"
"intel_type" => "dst-ip"
"intel_type" => "ip-dst"
"threat_level_id" => "2"
}
}
......
input {
file {
type => "json"
codec => "json"
path => "/home/ubuntu/intel/json2/*"
sincedb_path => "/var/tmp/.sincedb_threats"
# sincedb_path => "/dev/null"
# exclude => "*.py"
start_position => "beginning"
}
}
filter {
split {
field => "[Event]"
}
# New Fields
mutate {
add_field => {"intel-has-subtypes" => ""}
add_field => {"intel-subtype-1" => ""}
add_field => {"intel-subtype-2" => ""}
add_field => {"intel-subtype-3" => ""}
add_field => {"intel-type-metadata" => ""}
rename => {"[Event][analysis]" => "intel-analysis"}
rename => {"[Event][threat_level_id]" => "threat-level-id"}
rename => {"[Event][info]" => "intel-source"}
rename => {"[Event][org]" => "intel-organization"}
rename => {"[Event][published]" => "intel-published"}
rename => {"[Event][timestamp]" => "intel-timestamp"}
rename => {"[Event][category]" => "intel-category"}
rename => {"[Event][comment]" => "comments"}
rename => {"[Event][uuid]" => "intel-uuid"}
rename => {"[Event][to_ids]" => "exportable-to-ids"}
rename => {"[Event][value]" => "intel-value"}
rename => {"[Event][type]" => "intel-type"}
convert => {
"intel-analysis" => "integer"
"threat-level-id" => "integer"
}
add_tag => [ "misp_daily_updates" ]
remove_field => [ "[Event]" ]
}
############################################################################
# Field Processing Begins
############################################################################
# IP Fields
if "intel-type" == "ip-src" {
mutate {
replace => [ "intel-type", "IP" ]
update => {"intel-type-metadata" => "ip-src"}
}
} else if "intel-type" == "ip-dst" {
mutate {
replace => [ "intel-type", "IP" ]
update => {"intel-type-metadata" => "ip-dst"}
}
} else if "intel-type" == "dst-ip" {
mutate {
replace => [ "intel-type", "IP" ]
update => {"intel-type-metadata" => "dst-ip"}
}
}
# File Hash Fields
if "intel-type" == "sha256" {
mutate {
replace => [ "intel-type", "FILEHASH" ]
update => {"intel-type-metadata" => "sha256"}
}
} else if "intel-type" == "md5" {
mutate {
replace => [ "intel-type", "FILEHASH" ]
update => {"intel-type-metadata" => "md5"}
}
} else if "intel-type" == "sha1" {
mutate {
replace => [ "intel-type", "FILEHASH" ]
update => {"intel-type-metadata" => "sha1"}
}
} else if "intel-type" == "sha224" {
mutate {
replace => [ "intel-type", "FILEHASH" ]
update => {"intel-type-metadata" => "sha224"}
}
} else if "intel-type" == "sha384" {
mutate {
replace => [ "intel-type", "FILEHASH" ]
update => {"intel-type-metadata" => "sha384"}
}
} else if "intel-type" == "sha512" {
mutate {
replace => [ "intel-type", "FILEHASH" ]
update => {"intel-type-metadata" => "sha512"}
}
} else if "intel-type" == "filename|sha256" {
mutate {
split => { "intel-type" => "|" }
update => {"intel-type" => "{%intel-type[0]}"}
update => {"intel-subtype-1" => "{%intel-type[1]}"}
update => {"intel-has-subtypes" => true}
}
}
############################################################################
# Field Processing Ends
############################################################################
grok {
match => { "intel-value" => "%{IP:intel-ip}"}
tag_on_failure => [ ]
}
# add geoip attributes
geoip {
source => "intel-ip"
tag_on_failure => [ ]
}
date {
match => [ "intel-timestamp", "UNIX" ]
target => "intel-timestamp"
}
translate {
field => "intel-analysis"
destination => "intel-analysis-code"
dictionary => [
"0", "Initial",
"1", "Ongoing",
"2", "Complete"
]
}
translate {
field => "threat-level-id"
destination => "threat-level-code"
dictionary => [
"1", "High",
"2", "Medium",
"3", "Low",
"4", "Undefined"
]
}
# Fingerprinting to remove duplicates
fingerprint {
concatenate_sources => true
source => ["intel-type", "intel-value"]
target => "[@metadata][fingerprint]"
method => "MURMUR3"
}
if "_jsonparsefailure" in [tags] {
drop { }
}
}
output {
stdout { codec => rubydebug }
elasticsearch {
hosts => localhost
document_id => "%{[@metadata][fingerprint]}"
index => "intel"
}
}
type_dict = {
"ip": "IP",
"ip-src": "IP",
"ip-dst": "IP",
"src-ip": "IP",
"dst-ip": "IP",
"md5": "FILEHASH",
"sha256": "FILEHASH"
}
import json
import pprint
import os
import sys
from inteltypes import type_dict
# Progress Bar
def progress(count, total, status=''):
bar_len = 60
filled_len = int(round(bar_len * count / float(total)))
percents = round(100.0 * count / float(total), 1)
bar = '=' * filled_len + '-' * (bar_len - filled_len)
sys.stdout.write('[%s] %s%s ...%s\r' % (bar, percents, '%', status))
sys.stdout.flush()
# For Python 3.6 and above
# directory = os.fsencode('/home/ubuntu/intel/json')
directory = '/home/dna/intel/json'
target = os.fsencode('/home/dna/intel/json2')
if not os.path.exists(target):
os.makedirs(target)
total_num_events = 0
local_event_data = dict()
progress_counter = 0
i = 1
def json_file_reader(source):
# print('Starting {}.\n'.format(filename))
# Uncomment for Python >= 3.6
# source = os.fsdecode(source)
if source.endswith(".json"):
with open(os.path.join(directory, source), 'r') as read_file:
return json.load(read_file)
else:
print('SKIPPING {}...\n'.format(source))
def json_file_writer(data, file, location):
with open(os.path.join(location, file), 'w') as write_file:
json.dump(data, write_file)
write_file.write("\n")
def process_common_fields(data):
total_events = 0
local_event_list = list()
# Get Header Data
intel_info = data['Event']['info']
intel_analysis = data['Event']['analysis']
threat_level_id = data['Event']['threat_level_id']
intel_timestamp = data['Event']['timestamp'] if ('timestamp' in data['Event']) else data['Event']['date']
intel_org = data['Event']['Orgc']['name']
if 'published' not in data['Event']:
intel_published = ""
print('No published timestamp. Skipping')
else:
intel_published = data['Event']['published']
# Get Attribute Data
if 'Attribute' not in data['Event']:
print('Field "Attribute" not available. Skipping {}...'.format(filename))
else:
for item in data['Event']['Attribute']:
item['intel-analysis'] = intel_analysis
item['threat-level-id'] = threat_level_id
item['intel-source'] = intel_info
item['intel-organization'] = intel_org
item['intel-published'] = intel_published
item['intel-timestamp'] = intel_timestamp
item['intel-category'] = item.pop('category')
item['comments'] = item.pop('comment')
item['intel-uuid'] = item.pop('uuid')
item['exportable-to-ids'] = item.pop('to_ids')
item['intel-value'] = item.pop('value')
item['intel-type'] = item.pop('type')
# Additional preprocessing
if '|' in item['intel-type']:
var1 = item['intel-type']
item['intel-has-subtype'] = "true"
item['intel-subtype-1'] = var1.split('|')[1]
item['intel-type'] = var1.split('|')[0]
else:
item['intel-has-subtype'] = 'false'
item['intel-subtype-1'] =''
# Additional preprocessing
if '|' in item['intel-value']:
var2 = item['intel-value']
item['intel-has-values'] = "true"
item['intel-value-2'] = var2.split('|')[1]
item['intel-value'] = var2.split('|')[0]
else:
item['intel-has-values'] = 'false'
item['intel-value-2'] = ''
if item['intel-type'] in type_dict.keys():
item['intel-subtype-1'] = item['intel-type']
item['intel-type'] = type_dict[item['intel-type']]
# print(item['intel-has-subtype'] + "\t" + item['intel-type'] + "\t" + item['intel-subtype-1'])
# print(item['intel-has-values'] + "\t" + item['intel-value'] + "\t" + item['intel-value-2'])
# print(item['intel-type'])
local_event_list.append(item)
total_events = len(local_event_list)
return local_event_list, total_events
# for file in os.listdir(directory):
for filename in os.listdir(directory):
# print('Starting {}.'.format(filename))
progress(progress_counter, len(os.listdir(directory)), status='Processing...')
progress_counter += 1
data = json_file_reader(filename)
events = list()
events, total_num_events = process_common_fields(data)
for event in events:
prefix = '{:d}'.format(i).zfill(8) + '-'
filename1 = prefix + filename
filename2 = os.fsencode(filename1)
local_event_data['Event'] = [event]
json_file_writer(local_event_data, filename2, target)
# Clear temporary local variables
local_event_data.clear()
i += 1
# Reset 'i'
i = 1
print('\nCompleted preprocessing successfully.\n')
print('Total processed events: {:,}\n'.format(total_num_events))
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment