

pip install git+https://github.com/TencentCloud/tencentcloud-cls-sdk-python.git
log_group {source //Log source, which is usually the machine's IP address.filename //Log file namelogs {time //Log time, which is a Unix timestamp in microseconds.user_defined_log_kvs //User log fields}}
class SampleConsumer(ConsumerProcessorBase):last_check_time = 0def initialize(self, topic_id):self.topic_id = topic_iddef process(self, log_groups, offset_tracker):for log_group in log_groups:for log in log_group.logs:# Process a single row of data.item = dict()item['filename'] = log_group.filenameitem['source'] = log_group.sourceitem['time'] = log.timefor content in log.contents:item[content.key] = content.value# Subsequent data processing# put your business logic hereprint(json.dumps(item))# offset commitcurrent_time = time.time()if current_time - self.last_check_time > 3:try:self.last_check_time = current_timeoffset_tracker.save_offset(True)except Exception:import tracebacktraceback.print_exc()else:try:offset_tracker.save_offset(False)except Exception:import tracebacktraceback.print_exc()return None
Parameter | Description | Default Value | Value Range |
endpoint | - | Supported regions: ALL | |
access_key_id | - | - | |
access_key | - | - | |
region | Topic's region. For example, ap-beijing, ap-guangzhou, ap-shanghai. For more details, see Regions and Access Domains. | - | Supported regions: ALL |
logset_id | Logset ID. Only one logset is supported. | - | - |
topic_ids | Log topic ID. For multiple topics, use , to separate. | - | - |
consumer_group_name | Consumer Group Name | - | - |
internal | Private network: TRUE Public network: FALSE Note: | FALSE | TRUE/FALSE |
consumer_name | Consumer name. Within the same consumer group, consumer names must be unique. | - | A string consisting of 0-9, aA-zZ, '-', '_', '.'. |
heartbeat_interval | The interval of heartbeats. If consumers fail to report a heartbeat for two intervals, they will be considered offline. | 20 | 0-30 minutes |
data_fetch_interval | The interval of consumer data pulling. Cannot be less than 1 second. | 2 | - |
offset_start_time | The start time for data pulling. The string type of UNIX Timestamp , with second-level precision. For example, 1711607794. It can also be directly configured as "begin" and "end". begin: The earliest data within the log topic lifetime. end: The latest data within the log topic lifetime. | "end" | "begin"/"end"/UNIX Timestamp |
max_fetch_log_group_size | The data size for a consumer in a single pulling. Defaults to 2 M and up to 10 M. | 2097152 | 2M - 10M |
offset_end_time | The end time for data pulling. Supports a string-type UNIX Timestamp , with second-level precision. For example, 1711607794. Not filling this field represents continuous pulling. | - | - |
class App:def __init__(self):self.shutdown_flag = False# access endpointself.endpoint = os.environ.get('TENCENTCLOUD_LOG_SAMPLE_ENDPOINT', '')# regionself.region = os.environ.get('TENCENTCLOUD_LOG_SAMPLE_REGION', '')# secret idself.access_key_id = os.environ.get('TENCENTCLOUD_LOG_SAMPLE_ACCESSID', '')# secret keyself.access_key = os.environ.get('TENCENTCLOUD_LOG_SAMPLE_ACCESSKEY', '')# logset idself.logset_id = os.environ.get('TENCENTCLOUD_LOG_SAMPLE_LOGSET_ID', '')# topic idsself.topic_ids = os.environ.get('TENCENTCLOUD_LOG_SAMPLE_TOPICS', '').split(',')# consumer group name,self.consumer_group = 'consumer-group-1'# consumer id, we recommend setting the consumer count equal to the log topic partition count.self.consumer_name1 = "consumer-group-1-A"assert self.endpoint and self.access_key_id and self.access_key and self.logset_id, ValueError("endpoint/access_id/access_key and ""logset_id cannot be empty")signal.signal(signal.SIGTERM, self.signal_handler)signal.signal(signal.SIGINT, self.signal_handler)def signal_handler(self, signum, frame):print(f"catch signal {signum},cleanup...")self.shutdown_flag = Truedef run(self):print("*** start to run consumer...")self.consume()# waiting for exit signalwhile not self.shutdown_flag:time.sleep(1)# shutdown consumerprint("*** stopping workers")self.consumer.shutdown()sys.exit(0)def consume(self):try:# consumer configoption1 = LogHubConfig(self.endpoint, self.access_key_id, self.access_key, self.region, self.logset_id, self.topic_ids, self.consumer_group,self.consumer_name1, heartbeat_interval=3, data_fetch_interval=1,offset_start_time='begin', max_fetch_log_group_size=1048576)# init consumerself.consumer = ConsumerWorker(SampleConsumer, consumer_option=option1)# start consumerprint("*** start to consume data...")self.consumer.start()except Exception as e:import tracebacktraceback.print_exc()raise e
Feedback